-
Notifications
You must be signed in to change notification settings - Fork 116
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
216 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
kaldi | ||
openfst* | ||
pychain | ||
lhotse |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# you can change cmd.sh depending on what type of queue you are using. | ||
# If you have no queueing system and want to run on a local machine, you | ||
# can change all instances 'queue.pl' to run.pl (but be careful and run | ||
# commands one by one: most recipes will exhaust the memory on your | ||
# machine). queue.pl works with GridEngine (qsub). slurm.pl works | ||
# with slurm. Different queues are configured differently, with different | ||
# queue names and different ways of specifying things like memory; | ||
# to account for these differences you can create and edit the file | ||
# conf/queue.conf to match your queue's configuration. Search for | ||
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, | ||
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. | ||
|
||
#export train_cmd="run.pl --mem 4G" | ||
#export cuda_cmd="run.pl --mem 4G --gpu 1" | ||
#export decode_cmd="run.pl --mem 4G" | ||
|
||
# JHU setup (copy queue-freegpu.pl from ESPnet into utils/) | ||
export train_cmd="queue.pl --mem 4G" | ||
export cuda_cmd="queue-freegpu.pl --mem 8G --gpu 1 --config conf/gpu.conf" | ||
export decode_cmd="queue.pl --mem 4G" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Default configuration | ||
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* | ||
option mem=* -l mem_free=$0,ram_free=$0 | ||
option mem=0 # Do not add anything to qsub_opts | ||
option num_threads=* -pe smp $0 | ||
option num_threads=1 # Do not add anything to qsub_opts | ||
option max_jobs_run=* -tc $0 | ||
default gpu=0 | ||
option gpu=0 | ||
option gpu=* -l 'hostname=c*,gpu=$0' -q g.q |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
#!/usr/bin/env python3 | ||
# Copyright (c) Yiming Wang | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import argparse | ||
import logging | ||
import os | ||
import sys | ||
from concurrent.futures import ProcessPoolExecutor | ||
from pathlib import Path | ||
|
||
|
||
logging.basicConfig( | ||
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", | ||
datefmt="%Y-%m-%d %H:%M:%S", | ||
level=os.environ.get("LOGLEVEL", "INFO").upper(), | ||
stream=sys.stdout, | ||
) | ||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def get_parser(): | ||
parser = argparse.ArgumentParser( | ||
description="data preparation for the MobvoiHotwords corpus" | ||
) | ||
# fmt: off | ||
# fmt: on | ||
|
||
return parser | ||
|
||
|
||
def main(args): | ||
try: | ||
# TODO use pip install once it's available | ||
from espresso.tools.lhotse import CutSet, Mfcc, MfccConfig, LilcomFilesWriter, WavAugmenter | ||
from espresso.tools.lhotse.recipes.mobvoihotwords import download_and_untar, prepare_mobvoihotwords | ||
from espresso.tools.lhotse.utils import fastcopy | ||
except ImportError: | ||
raise ImportError("Please install Lhotse by `make lhotse` after entering espresso/tools") | ||
|
||
root_dir = Path("data") | ||
corpus_dir = root_dir / "MobvoiHotwords" | ||
output_dir = root_dir | ||
|
||
# Download and extract the corpus | ||
download_and_untar(root_dir) | ||
|
||
# Prepare manifests | ||
mobvoihotwords_manifests = prepare_mobvoihotwords(corpus_dir, output_dir) | ||
logger.info( | ||
"train/dev/test size: {}/{}/{}".format( | ||
len(mobvoihotwords_manifests["train"]["recordings"]), | ||
len(mobvoihotwords_manifests["dev"]["recordings"]), | ||
len(mobvoihotwords_manifests["test"]["recordings"]) | ||
) | ||
) | ||
|
||
# Data augmentation | ||
mfcc_hires_config = fastcopy( | ||
MfccConfig(), num_mel_bins=40, num_ceps=40, low_freq=20, high_freq=-400 | ||
) | ||
num_jobs = 1 | ||
for partition, manifests in mobvoihotwords_manifests.items(): | ||
cut_set = CutSet.from_manifests( | ||
recordings=manifests["recordings"], | ||
supervisions=manifests["supervisions"], | ||
) | ||
with ProcessPoolExecutor(num_jobs) as ex: | ||
if "train" in partition: | ||
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_orig") as storage: | ||
cut_set_orig = cut_set.compute_and_store_features( | ||
extractor=Mfcc(config=mfcc_hires_config), | ||
storage=storage, | ||
augmenter=None, | ||
executor=ex, | ||
) | ||
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_rev") as storage: | ||
cut_set_rev = cut_set.compute_and_store_features( | ||
extractor=Mfcc(config=mfcc_hires_config), | ||
storage=storage, | ||
augmenter=WavAugmenter(effect_chain=reverb()), | ||
excutor=ex, | ||
) | ||
cut_set_rev = CutSet( | ||
{ | ||
("rev-" + cut.id): cut.with_id("rev-" + cut.id) | ||
for cut in cut_set_rev.cuts | ||
} | ||
) | ||
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_sp1.1") as storage: | ||
cut_set_sp1p1 = cut_set.compute_and_store_features( | ||
extractor=Mfcc(config=mfcc_hires_config), | ||
storage=storage, | ||
augmenter=WavAugmenter( | ||
effect_chain=speed_perturb(times=1.1, sampling_rate=16000) | ||
), | ||
excutor=ex, | ||
) | ||
cut_set_sp1p1 = CutSet( | ||
{ | ||
("sp1.1-" + cut.id): cut.with_id("sp1.1-" + cut.id) | ||
for cut in cut_set_sp1p1.cuts | ||
} | ||
) | ||
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_sp0.9") as storage: | ||
cut_set_sp0p9 = cut_set.compute_and_store_features( | ||
extractor=Mfcc(config=mfcc_hires_config), | ||
storage=storage, | ||
augmenter=WavAugmenter( | ||
effect_chain=speed_perturb(times=0.9, sampling_rate=16000) | ||
), | ||
excutor=ex, | ||
) | ||
cut_set_sp0p9 = CutSet( | ||
{ | ||
("sp0.9-" + cut.id): cut.with_id("sp0.9-" + cut.id) | ||
for cut in cut_set_sp0p9.cuts | ||
} | ||
) | ||
cut_set = CutSet.from_cuts( | ||
{ | ||
**cut_set_orig.cuts, **cut_set_rev.cuts, | ||
**cut_set_sp1p1.cuts, **cut_set_sp0p9.cuts} | ||
|
||
) | ||
else: | ||
with LilcomFilesWriter(f"{output_dir}/feats_{partition}") as storage: | ||
cut_set = cut_set.compute_and_store_features( | ||
extractor=Mfcc(config=mfcc_hires_config), | ||
storage=storage, | ||
augmenter=None, | ||
executor=ex, | ||
).pad() | ||
mobvoihotwords_manifests[partition]["cuts"] = cut_set | ||
cut_set.to_json(output_dir / f"cuts_{partition}.json.gz") | ||
|
||
|
||
def reverb(*args, **kwargs): | ||
""" | ||
Returns a reverb effect for wav augmentation. | ||
""" | ||
import augment | ||
effect_chain = augment.EffectChain() | ||
# Reverb it makes the signal to have two channels, | ||
# which we combine into 1 by running `channels` w/o parameters | ||
effect_chain.reverb(50, 50, lambda: np.random.randint(1, 30)).channels() | ||
return effect_chain | ||
|
||
|
||
def speed_perturb(times: float, sampling_rate: int): | ||
""" | ||
Returns a <times> speed perturbation effect for wav augmentation. | ||
:param times: resulting times of speed relative to the original speed | ||
:param sampling_rate: a sampling rate value for which the effect will be created (resampling is needed for pitch). | ||
""" | ||
import augment | ||
effect_chain = augment.EffectChain() | ||
# The pitch effect changes the sampling ratio; we have to compensate for that. | ||
# Here, we specify 'quick' options on both pitch and rate effects, to speed up things | ||
effect_chain.pitch("-q", lambda: (times - 1.0) * 100).rate("-q", sampling_rate) | ||
return effect_chain | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = get_parser() | ||
args = parser.parse_args() | ||
main(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
MAIN_ROOT=$PWD/../.. | ||
export KALDI_ROOT=$MAIN_ROOT/espresso/tools/kaldi | ||
|
||
# BEGIN from kaldi path.sh | ||
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh | ||
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH | ||
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 | ||
. $KALDI_ROOT/tools/config/common_path.sh | ||
export LC_ALL=C | ||
# END | ||
|
||
export PATH=~/anaconda3/bin:$PATH | ||
export PATH=$MAIN_ROOT:$MAIN_ROOT/espresso:$MAIN_ROOT/espresso/tools:$PATH | ||
export LD_LIBRARY_PATH=$MAIN_ROOT/espresso/tools/openfst/lib:$LD_LIBRARY_PATH | ||
export PYTHONPATH=$MAIN_ROOT:$MAIN_ROOT/espresso:$MAIN_ROOT/espresso/tools:$MAIN_ROOT/espresso/tools/lhotse:$MAIN_ROOT/espresso/tools/pychain:$PYTHONPATH | ||
export PYTHONUNBUFFERED=1 |