-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
163 lines (142 loc) · 5.91 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import asyncio
import io
import logging
import os
import subprocess
import sys
import threading
import requests
import time
import sounddevice as sd
import numpy as np
from scipy.io.wavfile import write
from VoiceAssistant.voice import speak, play_sound
import webrtcvad
import noisereduce as nr
from rasa.core.agent import Agent
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
current_dir = os.path.dirname(os.path.realpath(__file__))
path_to_python = os.path.join(current_dir, "venv\Scripts\python.exe")
path_to_init_server = os.path.join(current_dir, "PhoWhisper_Server\init_server.py")
path_to_activate_ww = os.path.join(current_dir, "WakeWord\ww_interact.py")
rising_sound = "Rising.mp3"
path_to_rasa_model = os.path.join(current_dir, "VoiceAssistant\Rasa\models/demo_model2.tar.gz")
agent = None
def start_server():
"""Start the Flask server using a subprocess."""
# Make sure to specify the correct path to your init_server.py script
process = subprocess.Popen([path_to_python, path_to_init_server])
process.wait()
def detect_wake_word():
process = subprocess.Popen([path_to_python, path_to_activate_ww])
process.wait()
return process.returncode == 0
def record_audio(filename='audio.wav', fs=16000, max_silence_duration=0.7):
print("Start Recording...")
start_time = time.time()
vad = webrtcvad.Vad(3) # Set a moderate aggressiveness mode
frame_duration = 0.02 # Duration of a frame in seconds
frame_size = int(fs * frame_duration)
total_silence_duration = 0
num_silent_frames = 0
audio_data = []
# Initialize the stream
stream = sd.InputStream(samplerate=fs, channels=1, dtype='int16', blocksize=frame_size)
try:
with stream:
while total_silence_duration < max_silence_duration:
frame, overflowed = stream.read(frame_size)
if overflowed:
print("Overflow detected in stream.read().")
is_speech = vad.is_speech(frame.tobytes(), fs)
if is_speech:
audio_data.append(frame) # Reset silence duration
else:
num_silent_frames += 1
total_silence_duration = num_silent_frames * frame_duration
# Append silence if it's within the allowable gap
if not is_speech and total_silence_duration < max_silence_duration:
audio_data.append(frame)
print("Maximum silence duration reached. Stopping recording.")
except Exception as e:
print(f"An error occurred during recording: {e}")
if not audio_data:
print("No audio data captured.")
return None
# Concatenate all the frames of audio data
recorded_audio = np.concatenate([x.flatten() for x in audio_data], axis=0)
print("Record in {:.2f} seconds.".format(time.time() - start_time))
# Perform noise reduction
try:
#reduced_noise_audio = nr.reduce_noise(y=recorded_audio, sr=fs)
write(filename, fs, recorded_audio) #reduced_noise_audio.astype(np.int16)) # Save as WAV file
print(f"Audio recorded and saved to {filename}")
return filename
except Exception as e:
print(f"Error during noise reduction or file writing: {e}")
return None
def send_audio_to_server(audio_path):
"""Send the audio file to the Flask server for transcription."""
url = 'http://localhost:5000/transcribe' # Change this URL to your Flask server's URL
files = {'file': open(audio_path, 'rb')}
response = requests.post(url, files=files)
transcription_result = response.json().get('transcription', {}).get('text', '')
return transcription_result
def load_rasa_model():
speak("Tôi đang khởi động, bạn chờ chút nhé")
try:
global agent
agent = Agent.load(path_to_rasa_model)
print("Rasa model loaded successfully.")
return agent
except Exception as e:
print(f"Failed to load Rasa model: {e}")
return None
async def get_response(command):
try:
response = await agent.handle_text(command)
return response
except Exception as e:
print(f"Cannot get response: {e}")
return None
def in_conversation():
print("Conversation started")
play_sound(rising_sound)
start_time = time.time()
audio_path = record_audio() # Record the audio command
print("Return result in {:.2f} seconds.".format(time.time() - start_time))
if audio_path is None:
print("No audio detected or maximum silence reached, going back to sleep.") # Exit if silence is too long or no audio was recorded
return
start_time = time.time()
speak("Vâng ạ")
result = send_audio_to_server(audio_path) # Send the audio to the server and get the transcription
print("Received result in {:.2f} seconds.".format(time.time() - start_time))
print("Transcription Result:", result)
try:
responses= get_response(result)
for message in responses:
if 'event' in message and message['event'] == 'session_end':
speak(message['text'])
return
speak(message['text'])
print("Received result in {:.2f} seconds.".format(time.time() - start_time))
in_conversation()
os.remove(audio_path)
return
except Exception as e:
print(f"Failed to send audio to the server: {e}")
def main():
server_thread = threading.Thread(target=start_server)
server_thread.daemon = True # Set as a daemon so it will be killed once the main program exit
if not server_thread.is_alive():
server_thread.start()
if agent is None:
load_rasa_model()
while True:
if detect_wake_word(): # Wait for the wake word to be detected
speak("Em đây")
in_conversation()
time.sleep(1)
if __name__ == "__main__":
main()