Skip to content

Commit

Permalink
Created using Colaboratory
Browse files Browse the repository at this point in the history
  • Loading branch information
Vaibhavs10 committed Feb 10, 2023
1 parent 27dade7 commit 2d749a3
Showing 1 changed file with 219 additions and 0 deletions.
219 changes: 219 additions & 0 deletions Whisper_translate_with_🤗transformers_pipeline.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyNc6C55bNNEjls6hK10Usqh",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/Vaibhavs10/notebooks/blob/main/Whisper_translate_with_%F0%9F%A4%97transformers_pipeline.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "oW1KgNjEgtvZ",
"outputId": "403edb60-8922-4d1b-ff04-d2b291efc89d"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: transformers in /usr/local/lib/python3.8/dist-packages (4.26.1)\n",
"Requirement already satisfied: datasets in /usr/local/lib/python3.8/dist-packages (2.9.0)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (1.21.6)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (6.0)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.12.0)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (2022.6.2)\n",
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.13.2)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.8/dist-packages (from transformers) (4.64.1)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from transformers) (2.25.1)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers) (3.9.0)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (23.0)\n",
"Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (9.0.0)\n",
"Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (2023.1.0)\n",
"Requirement already satisfied: dill<0.3.7 in /usr/local/lib/python3.8/dist-packages (from datasets) (0.3.6)\n",
"Requirement already satisfied: multiprocess in /usr/local/lib/python3.8/dist-packages (from datasets) (0.70.14)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from datasets) (1.3.5)\n",
"Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.8/dist-packages (from datasets) (0.18.0)\n",
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets) (3.8.3)\n",
"Requirement already satisfied: xxhash in /usr/local/lib/python3.8/dist-packages (from datasets) (3.2.0)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.8.2)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (22.2.0)\n",
"Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (2.1.1)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (4.0.2)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.1)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.3)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (6.0.4)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.4.0)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (1.26.14)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2022.12.7)\n",
"Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (4.0.0)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2.10)\n",
"Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2022.7.1)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2.8.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n"
]
}
],
"source": [
"!pip install transformers datasets"
]
},
{
"cell_type": "code",
"source": [
"from transformers import pipeline\n",
"from datasets import load_dataset"
],
"metadata": {
"id": "BLCTDydZgwvJ"
},
"execution_count": 25,
"outputs": []
},
{
"cell_type": "code",
"source": [
"pipe = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-small\", generate_kwargs={\"task\": \"translate\", \"language\": \"german\"})"
],
"metadata": {
"id": "_fGRIMlMhF_A"
},
"execution_count": 26,
"outputs": []
},
{
"cell_type": "code",
"source": [
"cv11 = load_dataset(\"mozilla-foundation/common_voice_11_0\", \"de\", streaming=True, split=\"test\")"
],
"metadata": {
"id": "GbIJMykEEuVO"
},
"execution_count": 27,
"outputs": []
},
{
"cell_type": "code",
"source": [
"next(iter(cv11))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-xP3eHKDLVwS",
"outputId": "f147a031-3cc4-4e1f-de2a-424bea9e4a05"
},
"execution_count": 31,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Reading metadata...: 16082it [00:00, 24809.25it/s]\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'client_id': '0052c07533a6976233ad5926d950b523002c4d8cdd9ae8726dbfec385951bd22aa707a742c49afe20c7d6cb9515dbaddac5b4d6fe8ebddcfbec46a2d3180a3a1',\n",
" 'path': 'common_voice_de_17922420.mp3',\n",
" 'audio': {'path': 'common_voice_de_17922420.mp3',\n",
" 'array': array([ 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,\n",
" -9.0749630e-12, 5.6385865e-09, 7.3282314e-09], dtype=float32),\n",
" 'sampling_rate': 48000},\n",
" 'sentence': 'Zieht euch bitte draußen die Schuhe aus.',\n",
" 'up_votes': 2,\n",
" 'down_votes': 0,\n",
" 'age': '',\n",
" 'gender': '',\n",
" 'accent': '',\n",
" 'locale': 'de',\n",
" 'segment': ''}"
]
},
"metadata": {},
"execution_count": 31
}
]
},
{
"cell_type": "code",
"source": [
"test_speech = {\"raw\": next(iter(cv11))[\"audio\"][\"array\"],\n",
" \"sampling_rate\": next(iter(cv11))[\"audio\"][\"sampling_rate\"]}"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ZZlzI4iWFD7C",
"outputId": "f390e005-9720-4e8d-8d65-61f3d3714b7b"
},
"execution_count": 28,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Reading metadata...: 16082it [00:00, 23627.15it/s]\n",
"Reading metadata...: 16082it [00:00, 32748.99it/s]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"pipe(test_speech, return_timestamps=True, chunk_length_s=30, stride_length_s=[6,0])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1RDKFmrViDGP",
"outputId": "84d38b06-8639-4285-86d2-7b40f0497ca1"
},
"execution_count": 30,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'text': ' Please take off your shoes.',\n",
" 'chunks': [{'text': ' Please take off your shoes.', 'timestamp': (0.0, 3.0)}]}"
]
},
"metadata": {},
"execution_count": 30
}
]
}
]
}

0 comments on commit 2d749a3

Please sign in to comment.