-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_detailed_text_watson.py
83 lines (64 loc) · 2.63 KB
/
extract_detailed_text_watson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
'''
Extract text from speech using IBM's Watson Text-to-speech engine
Run as:
python extract_detailed_text_watson.py --dir [directory] --cat [category]
Juan Terven
Diana Cordova
Oct 2018
'''
import os
import pandas as pd
import subprocess
from watson_developer_cloud import SpeechToTextV1
import json
from natsort import natsorted
import argparse
directory = '/datasets/Our_dataset'
selected_cat = 'CNN'
IBM_USERNAME = ""
IBM_PASSWORD = ""
def main(args):
directory = args.videos_dir
selected_cat = args.category
speech_to_text = SpeechToTextV1(username=IBM_USERNAME, password=IBM_PASSWORD)
# Read wave file names in videos directory
audio_names = []
for video_file in os.listdir(os.path.join(directory, selected_cat)):
if video_file.endswith(".wav") and not os.path.isfile(os.path.join(directory, selected_cat, video_file[0:-4] + '.json')):
audio_names.append(video_file)
audio_names = natsorted(audio_names)
num_files = len(audio_names)
print('found', num_files, 'files')
#print(audio_names)
# Read spreadsheet
df = pd.read_excel(os.path.join(directory, selected_cat +'.xlsx'))
for audio_name in audio_names:
# For each video file, check if the link is available
data = df[df['Video'].str.contains(audio_name[:-4])==True]
link = ''
if data.shape[0] == 0:
print('Not found in spredsheet:', audio_name)
else:
link = data.iloc[0]['Link']
# Extract text using Watson
print('Extracting detailed text using Watson for', audio_name)
audio_path = os.path.join(directory, selected_cat, audio_name)
with open(audio_path, "rb") as audio_file:
result = speech_to_text.recognize(audio_file, content_type="audio/wav",
model='es-ES_BroadbandModel', timestamps=True,
word_confidence=True, ).get_result()
# add the link to the results
result['link'] = link
# save json file
out_json_path = audio_path[:-4] + '.json'
with open(out_json_path, 'w') as outfile:
json.dump(result, outfile)
if __name__== "__main__":
# Parse input arguments
parser = argparse.ArgumentParser(description='Extract subvideos')
parser.add_argument('--dir', dest='videos_dir',
help='Directory with videos', type=str)
parser.add_argument('--cat', dest='category',
help='Video category', type=str)
args = parser.parse_args()
main(args)