hetpandya · yaboiksa · Apr 22, 2023
diff --git a/setup.py b/setup.py
@@ -14,19 +14,19 @@
     author_email="[email protected]",
     license="MIT",
     install_requires=[
-        "librosa==0.7.2",
-        "youtube-dl",
+        "librosa>=0.7.2",
+        "yt-dlp",
         "tqdm",
         "pandas",
         "pydub",
-        "scikit-learn==0.19.2",
+        "scikit-learn>=0.19.2",
         "webrtcvad",
         "scipy>=1.0.0",
-        "numba==0.48",
+        "numba>=0.48",
         "inflect",
         "numpy>=1.14.0",
-        "unidecode==0.4.20",
-        "vtt_to_srt3",
+        "unidecode>=0.4.20",
+        "vtt_to_srt3==0.1.8.9",
         "youtube-transcript-api>=0.4.1"
     ],
     packages=["youtube_tts_data_generator"],

diff --git a/youtube_tts_data_generator/youtube_tts_data_generator.py b/youtube_tts_data_generator/youtube_tts_data_generator.py
@@ -1,4 +1,4 @@
-import youtube_dl
+import yt_dlp
 import os
 import errno
 import warnings
@@ -21,6 +21,8 @@
 import re
 from vtt_to_srt.vtt_to_srt import read_text_file, convert_content
 
+import soundfile as sf
+
 
 class NoSubtitleWarning(UserWarning):
     pass
@@ -301,15 +303,13 @@ def download(self, links_txt):
                 for ix in range(len(links)):
                     link = links[ix]
                     video_id = self.get_video_id(link)
-
                     if video_id != []:
                         filename = f"{self.name}{ix+1}.mp4"
                         wav_file = filename.replace(".mp4", ".wav")
                         self.ydl_opts["outtmpl"] = os.path.join(
                             self.download_dir, filename
                         )
-
-                        with youtube_dl.YoutubeDL(self.ydl_opts) as ydl:
+                        with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
                             try:
                                 trans = (
                                     YouTubeTranscriptApi.list_transcripts(video_id)
@@ -330,6 +330,7 @@ def download(self, links_txt):
                                         ),
                                     ),
                                     "w",
+                                    encoding="utf-8",
                                 ).write(json_formatted)
                                 ydl.download([link])
                                 print(
@@ -345,15 +346,18 @@ def download(self, links_txt):
                                     f"WARNING - video {link} does not have subtitles. Skipping..",
                                     NoSubtitleWarning,
                                 )
-
+                        for root, dirs, fns in os.walk(self.download_dir):
+                            for fn in fns:
+                                if wav_file and ".mp4" in fn:
+                                    os.replace(self.download_dir + "\\" + filename + ".wav", self.download_dir + "\\" + wav_file)
                         del self.ydl_opts["outtmpl"]
                     else:
                         warnings.warn(
                             f"WARNING - video {link} does not seem to be a valid YouTube url. Skipping..",
                             InvalidURLWarning,
                         )
                 if self.wav_filenames != []:
-                    with open(self.filenames_txt, "w") as f:
+                    with open(self.filenames_txt, "w", encoding="utf-8") as f:
                         lines = "filename,subtitle,trim_mins_begin,trim_mins_end\n"
                         for wav in self.wav_filenames:
                             lines += f"{wav},{wav.replace('.wav','')}.{self.dataset_lang}.json,0,0\n"
@@ -738,9 +742,21 @@ def finalize_dataset(self, min_audio_length=5, max_audio_length=14):
                 and trimmed_length <= max_audio_length
             ):
                 self.len_dataset += trimmed_length
+                """
                 librosa.output.write_wav(
-                    os.path.join(self.dest_dir, "wavs", audio), silence_removed, sr
+                    os.path.join(self.dest_dir, "wavs", audio), 
+                    silence_removed, 
+                    sr
+                )
+                """
+                sf.write(
+                    os.path.join(self.dest_dir, "wavs", audio), 
+                    silence_removed, 
+                    sr,
+                    format='wav',
+                    subtype='PCM_16'
                 )
+
                 filtered_audios.append(audio)
                 filtered_txts.append(audio.replace(".wav", ".txt"))