diff --git a/text_to_sound_with_audioLDM_and_diffusers.ipynb b/text_to_sound_with_audioLDM_and_diffusers.ipynb new file mode 100644 index 0000000..fceb732 --- /dev/null +++ b/text_to_sound_with_audioLDM_and_diffusers.ipynb @@ -0,0 +1,504 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyNoFVI4orNtOxbQrciMwQ6R", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "gpuClass": "standard", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "108d647daa9a497ca7bdb3e43c3ef805": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9ef20e7aae9646df9be62a01037c7566", + "IPY_MODEL_0082af0cd39348d493e68e18580e231e", + "IPY_MODEL_7ead70c7d7114157b46f5c583a7ea510" + ], + "layout": "IPY_MODEL_047268df6d70464a8c355e740ef3893e" + } + }, + "9ef20e7aae9646df9be62a01037c7566": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d54c373ef1c84f9392ba215ea4e4e870", + "placeholder": "​", + "style": "IPY_MODEL_2bbeedf4d045488bb8c177977405fc6f", + "value": "100%" + } + }, + "0082af0cd39348d493e68e18580e231e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_12e7ad2191df4295bb30cd8a7d495a38", + "max": 10, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1180e6a81af34046995a582c6a87bb07", + "value": 10 + } + }, + "7ead70c7d7114157b46f5c583a7ea510": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f368250f528e4fd887f5afcd3e56d196", + "placeholder": "​", + "style": "IPY_MODEL_169af8e45d14425a8679126bf1703690", + "value": " 10/10 [00:01<00:00, 10.99it/s]" + } + }, + "047268df6d70464a8c355e740ef3893e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d54c373ef1c84f9392ba215ea4e4e870": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2bbeedf4d045488bb8c177977405fc6f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "12e7ad2191df4295bb30cd8a7d495a38": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1180e6a81af34046995a582c6a87bb07": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f368250f528e4fd887f5afcd3e56d196": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "169af8e45d14425a8679126bf1703690": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Text-to-Sound w/ AudioLDM and 🤗 Diffusers\n", + "\n", + "Want to experiment with [AudioLDM](https://audioldm.github.io), create audio at the speed of thought with blazingly fast inference powered by Diffusers.\n", + "\n", + "AudioLDM is now part of the latest release of diffusers, so you can just install via `pip install diffusers` ⚡️" + ], + "metadata": { + "id": "zyVtUbSiEjbT" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "NOtiaLeW_ODH" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!pip install git+https://github.com/huggingface/diffusers transformers accelerate xformers" + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "from diffusers import AudioLDMPipeline\n", + "\n", + "repo_id = \"cvssp/audioldm\"\n", + "\n", + "pipe = AudioLDMPipeline.from_pretrained(repo_id, \n", + " torch_dtype=torch.float16).to(\"cuda\")\n", + "\n", + "pipe.enable_xformers_memory_efficient_attention()" + ], + "metadata": { + "id": "MzCFYWmi_iCb" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "prompt = \"Techno music with a strong, upbeat tempo and high melodic riffs\"\n", + "audio = pipe(prompt, num_inference_steps=10, audio_length_in_s=5.0).audios[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "108d647daa9a497ca7bdb3e43c3ef805", + "9ef20e7aae9646df9be62a01037c7566", + "0082af0cd39348d493e68e18580e231e", + "7ead70c7d7114157b46f5c583a7ea510", + "047268df6d70464a8c355e740ef3893e", + "d54c373ef1c84f9392ba215ea4e4e870", + "2bbeedf4d045488bb8c177977405fc6f", + "12e7ad2191df4295bb30cd8a7d495a38", + "1180e6a81af34046995a582c6a87bb07", + "f368250f528e4fd887f5afcd3e56d196", + "169af8e45d14425a8679126bf1703690" + ] + }, + "id": "5HZOpGZfBgnu", + "outputId": "19a1993c-756a-4f19-a3cd-67aff2d59b18" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0/10 [00:00" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + } + ] +} \ No newline at end of file