From be2b2c8e005b3bca8abfad1cf99722ce93d90a25 Mon Sep 17 00:00:00 2001 From: Vaibhav Srivastav Date: Wed, 4 Jan 2023 19:30:45 +0100 Subject: [PATCH] Created using Colaboratory --- Whisper_Large_8bit_loading_w_bnb.ipynb | 877 +++++++++++++++++++++++++ 1 file changed, 877 insertions(+) create mode 100644 Whisper_Large_8bit_loading_w_bnb.ipynb diff --git a/Whisper_Large_8bit_loading_w_bnb.ipynb b/Whisper_Large_8bit_loading_w_bnb.ipynb new file mode 100644 index 0000000..896f784 --- /dev/null +++ b/Whisper_Large_8bit_loading_w_bnb.ipynb @@ -0,0 +1,877 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyP+MiRyiV5xQ5ZbVq2Qh2cH", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "gpuClass": "standard", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "f7a390ea8adc42b3917a18aa20ac7b08": { + "model_module": "@jupyter-widgets/controls", + "model_name": "VBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9d22b9fb75264a64905c196405996b1f", + "IPY_MODEL_83e7115a09d144ab9ef10dd0c60c6a68", + "IPY_MODEL_85b25d75e78b4cda8546259be94235b1", + "IPY_MODEL_4a69efc93763470eb2afd6fe1c258076", + "IPY_MODEL_b32ea47b26f44f0582aecf7cd1526f0e" + ], + "layout": "IPY_MODEL_4b818f15db4d47ce910c4987c64501c7" + } + }, + "9d22b9fb75264a64905c196405996b1f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3ea81ae0965840ed8ad532ee02606d05", + "placeholder": "​", + "style": "IPY_MODEL_a1bfa88b478542a3a4b8773693355bb2", + "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" + } + }, + "83e7115a09d144ab9ef10dd0c60c6a68": { + "model_module": "@jupyter-widgets/controls", + "model_name": "PasswordModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "PasswordModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "PasswordView", + "continuous_update": true, + "description": "Token:", + "description_tooltip": null, + "disabled": false, + "layout": "IPY_MODEL_b1f2b510eb2b4e63b4ccc5e0c963da3d", + "placeholder": "​", + "style": "IPY_MODEL_fd6bd029bfeb4028b34b4ffa848e5ee7", + "value": "" + } + }, + "85b25d75e78b4cda8546259be94235b1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "CheckboxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "CheckboxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "CheckboxView", + "description": "Add token as git credential?", + "description_tooltip": null, + "disabled": false, + "indent": true, + "layout": "IPY_MODEL_7788f987f5ee48dbb63d3634b87309cd", + "style": "IPY_MODEL_a28a135050b641b19d5d7bb12d666ae7", + "value": true + } + }, + "4a69efc93763470eb2afd6fe1c258076": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ButtonModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ButtonView", + "button_style": "", + "description": "Login", + "disabled": false, + "icon": "", + "layout": "IPY_MODEL_05ffd42668424ac09d052c325dbad8d3", + "style": "IPY_MODEL_cb1a9f6bf78047ca88d90d31ed9ab1ee", + "tooltip": "" + } + }, + "b32ea47b26f44f0582aecf7cd1526f0e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bea114e4787c4318a99072f76ab641bc", + "placeholder": "​", + "style": "IPY_MODEL_87390232ac4649ba9681b65beb48d17a", + "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " + } + }, + "4b818f15db4d47ce910c4987c64501c7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": "center", + "align_self": null, + "border": null, + "bottom": null, + "display": "flex", + "flex": null, + "flex_flow": "column", + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "50%" + } + }, + "3ea81ae0965840ed8ad532ee02606d05": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a1bfa88b478542a3a4b8773693355bb2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b1f2b510eb2b4e63b4ccc5e0c963da3d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd6bd029bfeb4028b34b4ffa848e5ee7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7788f987f5ee48dbb63d3634b87309cd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a28a135050b641b19d5d7bb12d666ae7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "05ffd42668424ac09d052c325dbad8d3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cb1a9f6bf78047ca88d90d31ed9ab1ee": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ButtonStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "button_color": null, + "font_weight": "" + } + }, + "bea114e4787c4318a99072f76ab641bc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "87390232ac4649ba9681b65beb48d17a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Whisper Large inference in 8-bit mode\n", + "\n", + "For faster and memory efficient inference for large models. Read more about it [here](https://huggingface.co/blog/hf-bitsandbytes-integration)\n", + "\n", + "Compiled by: [Vaibhav (VB) Srivastav](https://twitter.com/reach_vb)" + ], + "metadata": { + "id": "YnVYTxOBJMmR" + } + }, + { + "cell_type": "markdown", + "source": [ + "We'll first install the necessary packages. We need ffmpeg to decode `mp3` files from the CV11 dataset and transformers, bnb and accelerate to load the model in 8bit mode." + ], + "metadata": { + "id": "KgGPly6_Lrhm" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JKOGmCPxrfJ4", + "outputId": "b4a4a392-53ab-44aa-8616-047507375c87" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\r0% [Working]\r \rHit:1 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease\n", + "\r0% [Connecting to archive.ubuntu.com (185.125.190.36)] [Connecting to security.\r0% [1 InRelease gpgv 3,626 B] [Waiting for headers] [Connecting to security.ubu\r \rHit:2 http://archive.ubuntu.com/ubuntu bionic InRelease\n", + "\r0% [1 InRelease gpgv 3,626 B] [Waiting for headers] [Waiting for headers] [Wait\r \rHit:3 http://security.ubuntu.com/ubuntu bionic-security InRelease\n", + "\r0% [1 InRelease gpgv 3,626 B] [Waiting for headers] [Waiting for headers] [Wait\r \rHit:4 http://archive.ubuntu.com/ubuntu bionic-updates InRelease\n", + "Hit:5 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease\n", + "Hit:6 http://archive.ubuntu.com/ubuntu bionic-backports InRelease\n", + "Ign:7 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 InRelease\n", + "Hit:8 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 InRelease\n", + "Hit:9 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release\n", + "Hit:10 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease\n", + "Hit:11 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease\n", + "Hit:12 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease\n", + "Hit:13 http://ppa.launchpad.net/jonathonf/ffmpeg-4/ubuntu bionic InRelease\n", + "Reading package lists... Done\n", + "Hit:1 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease\n", + "Hit:2 http://ppa.launchpad.net/cran/libgit2/ubuntu bionic InRelease\n", + "Hit:3 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease\n", + "Hit:4 http://security.ubuntu.com/ubuntu bionic-security InRelease\n", + "Hit:5 http://archive.ubuntu.com/ubuntu bionic InRelease\n", + "Hit:6 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu bionic InRelease\n", + "Hit:7 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease\n", + "Hit:8 http://archive.ubuntu.com/ubuntu bionic-updates InRelease\n", + "Ign:9 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 InRelease\n", + "Hit:10 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 InRelease\n", + "Hit:11 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 Release\n", + "Hit:12 http://archive.ubuntu.com/ubuntu bionic-backports InRelease\n", + "Hit:13 http://ppa.launchpad.net/jonathonf/ffmpeg-4/ubuntu bionic InRelease\n", + "Reading package lists... Done\n", + "Building dependency tree \n", + "Reading state information... Done\n", + "32 packages can be upgraded. Run 'apt list --upgradable' to see them.\n", + "Reading package lists... Done\n", + "Building dependency tree \n", + "Reading state information... Done\n", + "ffmpeg is already the newest version (7:4.3.2-0york0~18.04).\n", + "The following package was automatically installed and is no longer required:\n", + " libnvidia-common-460\n", + "Use 'apt autoremove' to remove it.\n", + "0 upgraded, 0 newly installed, 0 to remove and 32 not upgraded.\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "!add-apt-repository -y ppa:jonathonf/ffmpeg-4 && apt update && apt install -y ffmpeg\n", + "!pip install --quiet datasets git+https://github.com/huggingface/transformers evaluate huggingface_hub jiwer bitsandbytes accelerate" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Since we will be running inference on CV11 dataset, we'd need to authenticate ourselves (since, CV11 requires accepting its Terms and Conditions)" + ], + "metadata": { + "id": "NvR6u52ZL9yb" + } + }, + { + "cell_type": "code", + "source": [ + "!git config --global credential.helper store\n", + "from huggingface_hub import login\n", + "\n", + "login()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 331, + "referenced_widgets": [ + "f7a390ea8adc42b3917a18aa20ac7b08", + "9d22b9fb75264a64905c196405996b1f", + "83e7115a09d144ab9ef10dd0c60c6a68", + "85b25d75e78b4cda8546259be94235b1", + "4a69efc93763470eb2afd6fe1c258076", + "b32ea47b26f44f0582aecf7cd1526f0e", + "4b818f15db4d47ce910c4987c64501c7", + "3ea81ae0965840ed8ad532ee02606d05", + "a1bfa88b478542a3a4b8773693355bb2", + "b1f2b510eb2b4e63b4ccc5e0c963da3d", + "fd6bd029bfeb4028b34b4ffa848e5ee7", + "7788f987f5ee48dbb63d3634b87309cd", + "a28a135050b641b19d5d7bb12d666ae7", + "05ffd42668424ac09d052c325dbad8d3", + "cb1a9f6bf78047ca88d90d31ed9ab1ee", + "bea114e4787c4318a99072f76ab641bc", + "87390232ac4649ba9681b65beb48d17a" + ] + }, + "id": "tBSPoZggrtc8", + "outputId": "1d9fed6b-2345-4eb1-923d-c4624373cc7d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Token is valid.\n", + "Your token has been saved in your configured git credential helpers (store).\n", + "Your token has been saved to /root/.huggingface/token\n", + "Login successful\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "To reduce the memory and time overhead, we'll load the dataset in streaming fashion. During the time of inference we'll stream one data point at a time. This is specially useful for larger datasets." + ], + "metadata": { + "id": "nmQdxHxXMPNL" + } + }, + { + "cell_type": "code", + "source": [ + "from datasets import load_dataset\n", + "\n", + "dataset = load_dataset(\n", + " \"mozilla-foundation/common_voice_11_0\", \"en\", revision=\"streaming\", split=\"test\", streaming=True, use_auth_token=True\n", + ")" + ], + "metadata": { + "id": "_MeHLH1Qrv6_" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Loading the model and processor in 8bit mode with `load_in_8bit=True`\n", + "\n", + "Note: This is the only change you need to make in order for you to run the model in 8bit mode." + ], + "metadata": { + "id": "Jk-efqH5MeFg" + } + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "from transformers import WhisperForConditionalGeneration, WhisperProcessor\n", + "\n", + "model = WhisperForConditionalGeneration.from_pretrained(\"openai/whisper-large\", device_map=\"auto\", load_in_8bit=True)\n", + "processor = WhisperProcessor.from_pretrained(\"openai/whisper-large\", load_in_8bit=True)" + ], + "metadata": { + "id": "UdiPC_3w_UyR" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Preprocess the dataset to be sampled at 16KHz, since Whisper expects 16KHz input." + ], + "metadata": { + "id": "IJtaj3IpMwWF" + } + }, + { + "cell_type": "code", + "source": [ + "from datasets import Audio\n", + "\n", + "dataset = dataset.take(10)\n", + "\n", + "# resample to 16kHz\n", + "dataset = dataset.cast_column(\"audio\", Audio(sampling_rate=16000))" + ], + "metadata": { + "id": "NN39fYPbEbkC" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Voila! Time to run inference loop!" + ], + "metadata": { + "id": "ZyXEYmF3M4C5" + } + }, + { + "cell_type": "code", + "source": [ + "%%time\n", + "\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "\n", + "for data in dataset:\n", + " inputs = processor.feature_extractor(data[\"audio\"][\"array\"], return_tensors=\"pt\", sampling_rate=16_000).input_features.half().to(device)\n", + " forced_decoder_ids = processor.get_decoder_prompt_ids(language=\"en\", task=\"transcribe\")\n", + " predicted_ids = model.generate(inputs, forced_decoder_ids=forced_decoder_ids)\n", + " text = processor.tokenizer.batch_decode(predicted_ids, skip_special_tokens=True, normalize=False)[0]\n", + " print(text)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Sbwt2HOPEVpn", + "outputId": "bfea6799-9226-4b80-dab1-39dbf936d5ab" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Reading metadata...: 16354it [00:00, 66038.23it/s]\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Joe Keaton disapproved of films and Buster also had reservations about the medium.\n", + " She'll be alright.\n", + " Six.\n", + " All is well that ends well.\n", + " It is a busy market town that serves a large, surrounded area.\n", + " the team had Olympic champion Carolina Marin in the squad for the season\n", + " Do you mean it?\n", + " The new patch is less invasive than the old one, but still causes regression.\n", + " How is Mozilla going to handle ambiguities like Q and Q?\n", + " Wish you a safe and happy holiday.\n", + "CPU times: user 42.3 s, sys: 1.27 s, total: 43.6 s\n", + "Wall time: 43.7 s\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!nvidia-smi" + ], + "metadata": { + "id": "Lq91uTlgM-Xp", + "outputId": "c7e6aab2-9996-40e2-8855-669b6c69b705", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Wed Dec 7 15:28:37 2022 \n", + "+-----------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", + "|-------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|===============================+======================+======================|\n", + "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", + "| N/A 57C P0 29W / 70W | 6410MiB / 15109MiB | 0% Default |\n", + "| | | N/A |\n", + "+-------------------------------+----------------------+----------------------+\n", + " \n", + "+-----------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=============================================================================|\n", + "+-----------------------------------------------------------------------------+\n" + ] + } + ] + } + ] +} \ No newline at end of file