Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Select ElevenLabs Voice Through the Web App UI #2

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Ignore uploads and outputs
uploads/
outputs/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
33 changes: 20 additions & 13 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import uuid
from flask import Flask, request, jsonify, send_file, render_template

# Set API Keys using environment variables
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY")

# Uncomment the following lines to hardcode API keys
# OPENAI_API_KEY = ""
# ELEVENLABS_API_KEY = ""

# Add your OpenAI API key
OPENAI_API_KEY = ""
openai.api_key = OPENAI_API_KEY

# Add your ElevenLabs API key
ELEVENLABS_API_KEY = ""
ELEVENLABS_VOICE_STABILITY = 0.30
ELEVENLABS_VOICE_SIMILARITY = 0.75

Expand Down Expand Up @@ -66,11 +69,13 @@ def generate_reply(conversation: list) -> str:
return response["choices"][0]["message"]["content"]


def generate_audio(text: str, output_path: str = "") -> str:
"""Converts
def generate_audio(text: str, voice_name: str, output_path: str = "") -> str:
"""Converts text to audio.

:param text: The text to convert to audio.
:type text : str
:param voice_name: The name of the voice to use.
:type voice_name : str
:param output_path: The location to save the finished mp3 file.
:type output_path: str
:returns: The output path for the successfully saved file.
Expand All @@ -79,9 +84,10 @@ def generate_audio(text: str, output_path: str = "") -> str:
"""
voices = ELEVENLABS_ALL_VOICES
try:
voice_id = next(filter(lambda v: v["name"] == ELEVENLABS_VOICE_NAME, voices))["voice_id"]
voice_id = next(filter(lambda v: v["name"] == voice_name, voices))["voice_id"]
except StopIteration:
voice_id = voices[0]["voice_id"]

url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
headers = {
"xi-api-key": ELEVENLABS_API_KEY,
Expand All @@ -95,16 +101,16 @@ def generate_audio(text: str, output_path: str = "") -> str:
}
}
response = requests.post(url, json=data, headers=headers)

with open(output_path, "wb") as output:
output.write(response.content)
return output_path

return output_path

@app.route('/')
def index():
"""Render the index page."""
return render_template('index.html', voice=ELEVENLABS_VOICE_NAME)

return render_template('index.html', voices=ELEVENLABS_ALL_VOICES, selected_voice=ELEVENLABS_VOICE_NAME)

@app.route('/transcribe', methods=['POST'])
def transcribe():
Expand All @@ -123,15 +129,16 @@ def transcribe():
@app.route('/ask', methods=['POST'])
def ask():
"""Generate a ChatGPT response from the given conversation, then convert it to audio using ElevenLabs."""
conversation = request.get_json(force=True).get("conversation", "")
input_data = request.get_json(force=True)
conversation = input_data.get("conversation", "")
voice_name = input_data.get("voice_name", ELEVENLABS_VOICE_NAME)
reply = generate_reply(conversation)
reply_file = f"{uuid.uuid4()}.mp3"
reply_path = f"outputs/{reply_file}"
os.makedirs(os.path.dirname(reply_path), exist_ok=True)
generate_audio(reply, output_path=reply_path)
generate_audio(text=reply, voice_name=voice_name, output_path=reply_path)
return jsonify({'text': reply, 'audio': f"/listen/{reply_file}"})


@app.route('/listen/<filename>')
def listen(filename):
"""Return the audio file located at the given filename."""
Expand Down
111 changes: 70 additions & 41 deletions templates/index.html
Original file line number Diff line number Diff line change
@@ -1,43 +1,55 @@
<!DOCTYPE html>
<html data-bs-theme="dark">
<head>
<title>Ask {{voice}}</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
<style>
.audio-player {
margin-top: 20px;
margin-bottom: 40px;
}
</style>
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
<script src="https://unpkg.com/[email protected]/dist/index.umd.js"></script>
</head>
<body>
<div class="container text-center mt-5">
<div class="row">
<div class="col-12 col-md-6 col-xl-4 mx-auto">
<img class="mb-3" src="/static/hugh.png" alt="" style="height:250px">
<h1 class="mb-4">Ask {{voice}}</h1>
<form>
<div class="form-group mb-3">
<textarea id="transcription-box" class="form-control" rows="3" placeholder="Type a question or press record."></textarea>
<head>
<title>Ask {{voice}}</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
<style>
.audio-player {
margin-top: 20px;
margin-bottom: 40px;
}
.voices-container {
display: inline-block;
margin-bottom: 20px;
}
</style>
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
<script src="https://unpkg.com/[email protected]/dist/index.umd.js"></script>
</head>
<body>
<div class="container text-center mt-5">
<div class="row">
<div class="col-12 col-md-6 col-xl-4 mx-auto">
<img class="mb-3" src="/static/hugh.png" alt="" style="height:250px">
<h1 class="mb-4">Ask {{voice}}</h1>
<div class="voices-container">
<span>Select Voice:</span>
<select id="voices-select">
{% for voice in voices %}
<option value="{{ voice.name }}" {% if voice.name == selected_voice %}selected{% endif %}>{{ voice.name}}</option>
{% endfor %}
</select>
</div>
<div class="form-group d-flex justify-content-between">
<button type="button" class="btn btn-danger" style="width:48%" id="record-button">Record</button>
<button type="button" class="btn btn-primary" style="width:48%" id="ask-button">Ask</button>
<form>
<div class="form-group mb-3">
<textarea id="transcription-box" class="form-control" rows="3" placeholder="Type a question or press record."></textarea>
</div>
<div class="form-group d-flex justify-content-between">
<button type="button" class="btn btn-danger" style="width:48%" id="record-button">Record</button>
<button type="button" class="btn btn-primary" style="width:48%" id="ask-button">Ask</button>
</div>
</form>
<div class="audio-player">
<audio id="audio-element"></audio>
</div>
<div class="response">
<p id="response-text"></p>
</div>
</form>
<div class="audio-player">
<audio id="audio-element"></audio>
</div>
<div class="response">
<p id="response-text"></p>
</div>
</div>
</div>
</div>
<script>
$(document).ready(() => {
// Get references to HTML elements
Expand All @@ -61,37 +73,54 @@ <h1 class="mb-4">Ask {{voice}}</h1>

// Ask question and play audio when ask button is clicked
askButton.click(() => {
// Disable buttons and set the ask button's text to "Thinking..."
disableButton(recordButton);
disableButton(askButton);
askButton[0].innerHTML = "Thinking...";
conversation.push({role: "user", "content": transcriptionBox.val()})
const textData = { conversation: conversation };

// Get the selected voice name
const voiceName = $("#voices-select").val();

// Add the user's message to the conversation
conversation.push({role: "user", "content": transcriptionBox.val()});

// Prepare data to send with the request
const textData = { conversation: conversation, voice_name: voiceName };

// Make a POST request to the /ask endpoint with the conversation and voice name
$.ajax({
type: 'POST',
url: '/ask',
data: JSON.stringify(textData),
contentType: 'application/json',
success: (data) => {
const fileURL = data.audio;

// Clear the response text and add the assistant's response to the conversation
$("#response-text")[0].innerHTML = "";
conversation.push({role: "assistant", "content": data.text})
conversation.push({role: "assistant", "content": data.text});

// Type the assistant's response with TypeIt
new TypeIt("#response-text", {
strings: data.text,
speed: 39,
waitUntilVisible: false,
cursorChar: "▊",
strings: data.text,
speed: 39,
waitUntilVisible: false,
cursorChar: "▊",
}).go();

// Play the audio response
audioElement.src = fileURL;
audioElement.controls = true;
audioElement.play();

// Enable buttons and reset the ask button's text
enableButton(recordButton);
enableButton(askButton);
askButton[0].innerHTML = "Ask";
}
});
});


// Initialize MediaRecorder
let chunks = [];
let recording = false;
Expand Down