-
Notifications
You must be signed in to change notification settings - Fork 116
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
217 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
kaldi | ||
openfst* | ||
pychain | ||
lhotse |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# you can change cmd.sh depending on what type of queue you are using. | ||
# If you have no queueing system and want to run on a local machine, you | ||
# can change all instances 'queue.pl' to run.pl (but be careful and run | ||
# commands one by one: most recipes will exhaust the memory on your | ||
# machine). queue.pl works with GridEngine (qsub). slurm.pl works | ||
# with slurm. Different queues are configured differently, with different | ||
# queue names and different ways of specifying things like memory; | ||
# to account for these differences you can create and edit the file | ||
# conf/queue.conf to match your queue's configuration. Search for | ||
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, | ||
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. | ||
|
||
#export train_cmd="run.pl --mem 4G" | ||
#export cuda_cmd="run.pl --mem 4G --gpu 1" | ||
#export decode_cmd="run.pl --mem 4G" | ||
|
||
# JHU setup (copy queue-freegpu.pl from ESPnet into utils/) | ||
export train_cmd="queue.pl --mem 4G" | ||
export cuda_cmd="queue-freegpu.pl --mem 8G --gpu 1 --config conf/gpu.conf" | ||
export decode_cmd="queue.pl --mem 4G" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Default configuration | ||
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* | ||
option mem=* -l mem_free=$0,ram_free=$0 | ||
option mem=0 # Do not add anything to qsub_opts | ||
option num_threads=* -pe smp $0 | ||
option num_threads=1 # Do not add anything to qsub_opts | ||
option max_jobs_run=* -tc $0 | ||
default gpu=0 | ||
option gpu=0 | ||
option gpu=* -l 'hostname=c*,gpu=$0' -q g.q |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
#!/usr/bin/env python3 | ||
# Copyright (c) Yiming Wang | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import argparse | ||
import logging | ||
import os | ||
import sys | ||
from concurrent.futures import ProcessPoolExecutor | ||
from pathlib import Path | ||
|
||
import numpy as np | ||
|
||
|
||
logging.basicConfig( | ||
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", | ||
datefmt="%Y-%m-%d %H:%M:%S", | ||
level=os.environ.get("LOGLEVEL", "INFO").upper(), | ||
stream=sys.stdout, | ||
) | ||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def get_parser(): | ||
parser = argparse.ArgumentParser( | ||
description="data preparation for the MobvoiHotwords corpus" | ||
) | ||
# fmt: off | ||
parser.add_argument("--data-dir", default="data", type=str, help="data directory") | ||
parser.add_argument("--seed", default=1, type=int, help="random seed") | ||
parser.add_argument( | ||
"--nj", default=1, type=int, help="number of jobs for features extraction" | ||
) | ||
# fmt: on | ||
|
||
return parser | ||
|
||
|
||
def main(args): | ||
try: | ||
# TODO use pip install once it's available | ||
from espresso.tools.lhotse import CutSet, Mfcc, MfccConfig, LilcomFilesWriter, WavAugmenter | ||
from espresso.tools.lhotse.manipulation import combine | ||
from espresso.tools.lhotse.recipes.mobvoihotwords import download_and_untar, prepare_mobvoihotwords | ||
except ImportError: | ||
raise ImportError("Please install Lhotse by `make lhotse` after entering espresso/tools") | ||
|
||
root_dir = Path(args.data_dir) | ||
corpus_dir = root_dir / "MobvoiHotwords" | ||
output_dir = root_dir | ||
|
||
# Download and extract the corpus | ||
download_and_untar(root_dir) | ||
|
||
# Prepare manifests | ||
mobvoihotwords_manifests = prepare_mobvoihotwords(corpus_dir, output_dir) | ||
logger.info( | ||
"train/dev/test size: {}/{}/{}".format( | ||
len(mobvoihotwords_manifests["train"]["recordings"]), | ||
len(mobvoihotwords_manifests["dev"]["recordings"]), | ||
len(mobvoihotwords_manifests["test"]["recordings"]) | ||
) | ||
) | ||
|
||
# Data augmentation | ||
np.random.seed(args.seed) | ||
# equivalent to Kaldi's mfcc_hires config | ||
mfcc = Mfcc(config=MfccConfig(num_mel_bins=40, num_ceps=40, low_freq=20, high_freq=-400)) | ||
num_jobs = args.nj | ||
for partition, manifests in mobvoihotwords_manifests.items(): | ||
cut_set = CutSet.from_manifests( | ||
recordings=manifests["recordings"], | ||
supervisions=manifests["supervisions"], | ||
) | ||
sampling_rate = next(iter(cut_set)).sampling_rate | ||
with ProcessPoolExecutor(num_jobs) as ex: | ||
if "train" in partition: | ||
# original set | ||
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_orig") as storage: | ||
cut_set_orig = cut_set.compute_and_store_features( | ||
extractor=mfcc, | ||
storage=storage, | ||
augmenter=None, | ||
executor=ex, | ||
) | ||
# augmented with reverbration | ||
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_rev") as storage: | ||
cut_set_rev = cut_set.compute_and_store_features( | ||
extractor=mfcc, | ||
storage=storage, | ||
augmenter=WavAugmenter(effect_chain=reverb()), | ||
excutor=ex, | ||
) | ||
cut_set_rev = CutSet.from_cuts( | ||
cut.with_id("rev-" + cut.id) for cut in cut_set_rev.cuts | ||
) | ||
# augmented with speed perturbation | ||
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_sp1.1") as storage: | ||
cut_set_sp1p1 = cut_set.compute_and_store_features( | ||
extractor=mfcc, | ||
storage=storage, | ||
augmenter=WavAugmenter( | ||
effect_chain=speed(sampling_rate=sampling_rate, factor=1.1) | ||
), | ||
excutor=ex, | ||
) | ||
cut_set_sp1p1 = CutSet.from_cuts( | ||
cut.with_id("sp1.1-" + cut.id) for cut in cut_set_sp1p1.cuts | ||
) | ||
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_sp0.9") as storage: | ||
cut_set_sp0p9 = cut_set.compute_and_store_features( | ||
extractor=mfcc, | ||
storage=storage, | ||
augmenter=WavAugmenter( | ||
effect_chain=speed(sampling_rate=sampling_rate, factor=0.9) | ||
), | ||
excutor=ex, | ||
) | ||
cut_set_sp0p9 = CutSet.from_cuts( | ||
cut.with_id("sp0.9-" + cut.id) for cut in cut_set_sp0p9.cuts | ||
) | ||
# combine the original and augmented sets together | ||
cut_set = combine( | ||
cut_set_orig, cut_set_rev, cut_set_sp1p1, cut_set_sp0p9 | ||
) | ||
else: # no augmentations for dev and test sets | ||
with LilcomFilesWriter(f"{output_dir}/feats_{partition}") as storage: | ||
cut_set = cut_set.compute_and_store_features( | ||
extractor=mfcc, | ||
storage=storage, | ||
augmenter=None, | ||
executor=ex, | ||
) | ||
mobvoihotwords_manifests[partition]["cuts"] = cut_set | ||
cut_set.to_json(output_dir / f"cuts_{partition}.json.gz") | ||
|
||
|
||
def reverb(*args, **kwargs): | ||
""" | ||
Returns a reverb effect for wav augmentation. | ||
""" | ||
import augment | ||
effect_chain = augment.EffectChain() | ||
# Reverb it makes the signal to have two channels, | ||
# which we combine into 1 by running `channels` w/o parameters | ||
effect_chain.reverb(50, 50, lambda: np.random.randint(1, 30)).channels() | ||
return effect_chain | ||
|
||
|
||
def speed(sampling_rate: int, factor: float): | ||
""" | ||
Returns a speed perturbation effect with <factor> for wav augmentation. | ||
:param sampling_rate: a sampling rate value for which the effect will be created (resampling is needed for speed). | ||
:param factor: speed perturbation factor | ||
""" | ||
import augment | ||
effect_chain = augment.EffectChain() | ||
# The speed effect changes the sampling ratio; we have to compensate for that. | ||
# Here, we specify 'quick' options on both pitch and rate effects, to speed up things | ||
effect_chain.speed("-q", lambda: factor).rate("-q", sampling_rate) | ||
return effect_chain | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = get_parser() | ||
args = parser.parse_args() | ||
main(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
MAIN_ROOT=$PWD/../.. | ||
export KALDI_ROOT=$MAIN_ROOT/espresso/tools/kaldi | ||
|
||
# BEGIN from kaldi path.sh | ||
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh | ||
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH | ||
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 | ||
. $KALDI_ROOT/tools/config/common_path.sh | ||
export LC_ALL=C | ||
# END | ||
|
||
export PATH=~/anaconda3/bin:$PATH | ||
export PATH=$MAIN_ROOT:$MAIN_ROOT/espresso:$MAIN_ROOT/espresso/tools:$PATH | ||
export LD_LIBRARY_PATH=$MAIN_ROOT/espresso/tools/openfst/lib:$LD_LIBRARY_PATH | ||
export PYTHONPATH=$MAIN_ROOT:$MAIN_ROOT/espresso:$MAIN_ROOT/espresso/tools:$MAIN_ROOT/espresso/tools/lhotse:$MAIN_ROOT/espresso/tools/pychain:$PYTHONPATH | ||
export PYTHONUNBUFFERED=1 |