IgnoranceAI · piercecohen1 · Apr 2, 2023 · Apr 2, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
+# Ignore uploads and outputs
+uploads/
+outputs/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/app.py b/app.py
@@ -4,13 +4,16 @@
 import uuid
 from flask import Flask, request, jsonify, send_file, render_template
 
+# Set API Keys using environment variables
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")
+
+# Uncomment the following lines to hardcode API keys
+# OPENAI_API_KEY = ""
+# ELEVENLABS_API_KEY = ""
 
-# Add your OpenAI API key
-OPENAI_API_KEY = ""
 openai.api_key = OPENAI_API_KEY
 
-# Add your ElevenLabs API key
-ELEVENLABS_API_KEY = ""
 ELEVENLABS_VOICE_STABILITY = 0.30
 ELEVENLABS_VOICE_SIMILARITY = 0.75
 
@@ -66,11 +69,13 @@ def generate_reply(conversation: list) -> str:
     return response["choices"][0]["message"]["content"]
 
 
-def generate_audio(text: str, output_path: str = "") -> str:
-    """Converts
+def generate_audio(text: str, voice_name: str, output_path: str = "") -> str:
+    """Converts text to audio.
 
     :param text: The text to convert to audio.
     :type text : str
+    :param voice_name: The name of the voice to use.
+    :type voice_name : str
     :param output_path: The location to save the finished mp3 file.
     :type output_path: str
     :returns: The output path for the successfully saved file.
@@ -79,9 +84,10 @@ def generate_audio(text: str, output_path: str = "") -> str:
     """
     voices = ELEVENLABS_ALL_VOICES
     try:
-        voice_id = next(filter(lambda v: v["name"] == ELEVENLABS_VOICE_NAME, voices))["voice_id"]
+        voice_id = next(filter(lambda v: v["name"] == voice_name, voices))["voice_id"]
     except StopIteration:
         voice_id = voices[0]["voice_id"]
+
     url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
     headers = {
         "xi-api-key": ELEVENLABS_API_KEY,
@@ -95,16 +101,16 @@ def generate_audio(text: str, output_path: str = "") -> str:
         }
     }
     response = requests.post(url, json=data, headers=headers)
+
     with open(output_path, "wb") as output:
         output.write(response.content)
-    return output_path
 
+    return output_path
 
 @app.route('/')
 def index():
     """Render the index page."""
-    return render_template('index.html', voice=ELEVENLABS_VOICE_NAME)
-
+    return render_template('index.html', voices=ELEVENLABS_ALL_VOICES, selected_voice=ELEVENLABS_VOICE_NAME)
 
 @app.route('/transcribe', methods=['POST'])
 def transcribe():
@@ -123,15 +129,16 @@ def transcribe():
 @app.route('/ask', methods=['POST'])
 def ask():
     """Generate a ChatGPT response from the given conversation, then convert it to audio using ElevenLabs."""
-    conversation = request.get_json(force=True).get("conversation", "")
+    input_data = request.get_json(force=True)
+    conversation = input_data.get("conversation", "")
+    voice_name = input_data.get("voice_name", ELEVENLABS_VOICE_NAME)
     reply = generate_reply(conversation)
     reply_file = f"{uuid.uuid4()}.mp3"
     reply_path = f"outputs/{reply_file}"
     os.makedirs(os.path.dirname(reply_path), exist_ok=True)
-    generate_audio(reply, output_path=reply_path)
+    generate_audio(text=reply, voice_name=voice_name, output_path=reply_path)
     return jsonify({'text': reply, 'audio': f"/listen/{reply_file}"})
 
-
 @app.route('/listen/<filename>')
 def listen(filename):
     """Return the audio file located at the given filename."""

diff --git a/templates/index.html b/templates/index.html
@@ -1,43 +1,55 @@
 <!DOCTYPE html>
 <html data-bs-theme="dark">
-<head>
-    <title>Ask {{voice}}</title>
-    <meta charset="utf-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
-    <style>
-        .audio-player {
-            margin-top: 20px;
-            margin-bottom: 40px;
-        }
-    </style>
-    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
-    <script src="https://unpkg.com/[email protected]/dist/index.umd.js"></script>
-</head>
-<body>
-    <div class="container text-center mt-5">
-        <div class="row">
-            <div class="col-12 col-md-6 col-xl-4 mx-auto">
-                <img class="mb-3" src="/static/hugh.png" alt="" style="height:250px">
-                <h1 class="mb-4">Ask {{voice}}</h1>
-                <form>
-                    <div class="form-group mb-3">
-                        <textarea id="transcription-box" class="form-control" rows="3" placeholder="Type a question or press record."></textarea>
+    <head>
+        <title>Ask {{voice}}</title>
+        <meta charset="utf-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1">
+        <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
+        <style>
+            .audio-player {
+                margin-top: 20px;
+                margin-bottom: 40px;
+            }
+            .voices-container {
+                display: inline-block;
+                margin-bottom: 20px;
+            }
+        </style>
+        <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
+        <script src="https://unpkg.com/[email protected]/dist/index.umd.js"></script>
+    </head>
+    <body>
+        <div class="container text-center mt-5">
+            <div class="row">
+                <div class="col-12 col-md-6 col-xl-4 mx-auto">
+                    <img class="mb-3" src="/static/hugh.png" alt="" style="height:250px">
+                    <h1 class="mb-4">Ask {{voice}}</h1>
+                    <div class="voices-container">
+                        <span>Select Voice:</span>
+                        <select id="voices-select">
+                            {% for voice in voices %}
+                            <option value="{{ voice.name }}" {% if voice.name == selected_voice %}selected{% endif %}>{{ voice.name}}</option>
+                            {% endfor %}
+                        </select>
                     </div>
-                    <div class="form-group d-flex justify-content-between">
-                        <button type="button" class="btn btn-danger" style="width:48%" id="record-button">Record</button>
-                        <button type="button" class="btn btn-primary" style="width:48%" id="ask-button">Ask</button>
+                    <form>
+                        <div class="form-group mb-3">
+                            <textarea id="transcription-box" class="form-control" rows="3" placeholder="Type a question or press record."></textarea>
+                        </div>
+                        <div class="form-group d-flex justify-content-between">
+                            <button type="button" class="btn btn-danger" style="width:48%" id="record-button">Record</button>
+                            <button type="button" class="btn btn-primary" style="width:48%" id="ask-button">Ask</button>
+                        </div>
+                    </form>
+                    <div class="audio-player">
+                        <audio id="audio-element"></audio>
+                    </div>
+                    <div class="response">
+                        <p id="response-text"></p>
                     </div>
-                </form>
-                <div class="audio-player">
-                    <audio id="audio-element"></audio>
-                </div>
-                <div class="response">
-                    <p id="response-text"></p>
                 </div>
             </div>
         </div>
-    </div>
     <script>
         $(document).ready(() => {
             // Get references to HTML elements
@@ -61,37 +73,54 @@ <h1 class="mb-4">Ask {{voice}}</h1>
 
             // Ask question and play audio when ask button is clicked
             askButton.click(() => {
+                // Disable buttons and set the ask button's text to "Thinking..."
                 disableButton(recordButton);
                 disableButton(askButton);
                 askButton[0].innerHTML = "Thinking...";
-                conversation.push({role: "user", "content": transcriptionBox.val()})
-                const textData = { conversation: conversation };
+
+                // Get the selected voice name
+                const voiceName = $("#voices-select").val();
+
+                // Add the user's message to the conversation
+                conversation.push({role: "user", "content": transcriptionBox.val()});
+
+                // Prepare data to send with the request
+                const textData = { conversation: conversation, voice_name: voiceName };
+
+                // Make a POST request to the /ask endpoint with the conversation and voice name
                 $.ajax({
                     type: 'POST',
                     url: '/ask',
                     data: JSON.stringify(textData),
                     contentType: 'application/json',
                     success: (data) => {
                         const fileURL = data.audio;
+
+                        // Clear the response text and add the assistant's response to the conversation
                         $("#response-text")[0].innerHTML = "";
-                        conversation.push({role: "assistant", "content": data.text})
+                        conversation.push({role: "assistant", "content": data.text});
+
+                        // Type the assistant's response with TypeIt
                         new TypeIt("#response-text", {
-                          strings: data.text,
-                          speed: 39,
-                          waitUntilVisible: false,
-                          cursorChar: "▊",
+                            strings: data.text,
+                            speed: 39,
+                            waitUntilVisible: false,
+                            cursorChar: "▊",
                         }).go();
+
+                        // Play the audio response
                         audioElement.src = fileURL;
                         audioElement.controls = true;
                         audioElement.play();
+
+                        // Enable buttons and reset the ask button's text
                         enableButton(recordButton);
                         enableButton(askButton);
                         askButton[0].innerHTML = "Ask";
                     }
                 });
             });
 
-
             // Initialize MediaRecorder
             let chunks = [];
             let recording = false;