-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkeyword_spotting_service.py
73 lines (53 loc) · 1.9 KB
/
keyword_spotting_service.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow.keras as keras
import librosa as lb
import numpy as np
TRAINED_MODEL = "model.h5"
NUM_SAMPLES_TO_CONSIDER = 16000
def preprocess(file_path, n_mfcc=13, n_fft=2048, hop_length=512):
# Load the audio file
signal, sr = lb.load(file_path)
# Ensure consistency in Audio File
if len(signal) >= NUM_SAMPLES_TO_CONSIDER:
signal = signal[:NUM_SAMPLES_TO_CONSIDER]
# Extract the MFCCs
mfccs = lb.feature.mfcc(signal, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
return mfccs.T
class _Keyword_Spotting_Service:
model = None
_mappings = [
"down",
"eight",
"go",
"happy",
"house",
"marvin",
"nine",
"right",
"seven",
"sheila"
]
_instance = None
def predict(self, file_path):
# Extract the MFCCs
MFCCs = preprocess(file_path)
# Convert 2D MFCCs to 4D
MFCCs = MFCCs[np.newaxis, ..., np.newaxis]
# Make Predictions
prediction = self.model.predict(MFCCs)
predicted_index = np.argmax(prediction)
predicted_keyword = self._mappings[predicted_index]
return predicted_keyword
def Keyword_spotting_service():
# Ensure that we only have one instance
if _Keyword_Spotting_Service._instance is None:
_Keyword_Spotting_Service._instance = _Keyword_Spotting_Service()
_Keyword_Spotting_Service.model = keras.models.load_model(TRAINED_MODEL)
return _Keyword_Spotting_Service._instance
if __name__ == "__main__":
kss = Keyword_spotting_service()
keyword1 = kss.predict("Test\\happy.wav")
keyword2 = kss.predict("Test\\marvin.wav")
keyword3 = kss.predict("Test\\eight.wav")
print(f"Predicted Keyword: {keyword1}, {keyword2}, {keyword3}")