Skip to content

Commit

Permalink
add a data prep example for lhotse
Browse files Browse the repository at this point in the history
  • Loading branch information
freewym committed Nov 6, 2020
1 parent 1b0c26c commit f21dd7c
Show file tree
Hide file tree
Showing 5 changed files with 216 additions and 0 deletions.
1 change: 1 addition & 0 deletions espresso/tools/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
kaldi
openfst*
pychain
lhotse
20 changes: 20 additions & 0 deletions examples/mobvoihotwords/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

#export train_cmd="run.pl --mem 4G"
#export cuda_cmd="run.pl --mem 4G --gpu 1"
#export decode_cmd="run.pl --mem 4G"

# JHU setup (copy queue-freegpu.pl from ESPnet into utils/)
export train_cmd="queue.pl --mem 4G"
export cuda_cmd="queue-freegpu.pl --mem 8G --gpu 1 --config conf/gpu.conf"
export decode_cmd="queue.pl --mem 4G"
10 changes: 10 additions & 0 deletions examples/mobvoihotwords/conf/gpu.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Default configuration
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
default gpu=0
option gpu=0
option gpu=* -l 'hostname=c*,gpu=$0' -q g.q
169 changes: 169 additions & 0 deletions examples/mobvoihotwords/local/data_prep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
#!/usr/bin/env python3
# Copyright (c) Yiming Wang
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import logging
import os
import sys
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path


logging.basicConfig(
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=os.environ.get("LOGLEVEL", "INFO").upper(),
stream=sys.stdout,
)
logger = logging.getLogger(__name__)


def get_parser():
parser = argparse.ArgumentParser(
description="data preparation for the MobvoiHotwords corpus"
)
# fmt: off
# fmt: on

return parser


def main(args):
try:
# TODO use pip install once it's available
from espresso.tools.lhotse import CutSet, Mfcc, MfccConfig, LilcomFilesWriter, WavAugmenter
from espresso.tools.lhotse.recipes.mobvoihotwords import download_and_untar, prepare_mobvoihotwords
from espresso.tools.lhotse.utils import fastcopy
except ImportError:
raise ImportError("Please install Lhotse by `make lhotse` after entering espresso/tools")

root_dir = Path("data")
corpus_dir = root_dir / "MobvoiHotwords"
output_dir = root_dir

# Download and extract the corpus
download_and_untar(root_dir)

# Prepare manifests
mobvoihotwords_manifests = prepare_mobvoihotwords(corpus_dir, output_dir)
logger.info(
"train/dev/test size: {}/{}/{}".format(
len(mobvoihotwords_manifests["train"]["recordings"]),
len(mobvoihotwords_manifests["dev"]["recordings"]),
len(mobvoihotwords_manifests["test"]["recordings"])
)
)

# Data augmentation
mfcc_hires_config = fastcopy(
MfccConfig(), num_mel_bins=40, num_ceps=40, low_freq=20, high_freq=-400
)
num_jobs = 1
for partition, manifests in mobvoihotwords_manifests.items():
cut_set = CutSet.from_manifests(
recordings=manifests["recordings"],
supervisions=manifests["supervisions"],
)
with ProcessPoolExecutor(num_jobs) as ex:
if "train" in partition:
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_orig") as storage:
cut_set_orig = cut_set.compute_and_store_features(
extractor=Mfcc(config=mfcc_hires_config),
storage=storage,
augmenter=None,
executor=ex,
)
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_rev") as storage:
cut_set_rev = cut_set.compute_and_store_features(
extractor=Mfcc(config=mfcc_hires_config),
storage=storage,
augmenter=WavAugmenter(effect_chain=reverb()),
excutor=ex,
)
cut_set_rev = CutSet(
{
("rev-" + cut.id): cut.with_id("rev-" + cut.id)
for cut in cut_set_rev.cuts
}
)
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_sp1.1") as storage:
cut_set_sp1p1 = cut_set.compute_and_store_features(
extractor=Mfcc(config=mfcc_hires_config),
storage=storage,
augmenter=WavAugmenter(
effect_chain=speed_perturb(times=1.1, sampling_rate=16000)
),
excutor=ex,
)
cut_set_sp1p1 = CutSet(
{
("sp1.1-" + cut.id): cut.with_id("sp1.1-" + cut.id)
for cut in cut_set_sp1p1.cuts
}
)
with LilcomFilesWriter(f"{output_dir}/feats_{partition}_sp0.9") as storage:
cut_set_sp0p9 = cut_set.compute_and_store_features(
extractor=Mfcc(config=mfcc_hires_config),
storage=storage,
augmenter=WavAugmenter(
effect_chain=speed_perturb(times=0.9, sampling_rate=16000)
),
excutor=ex,
)
cut_set_sp0p9 = CutSet(
{
("sp0.9-" + cut.id): cut.with_id("sp0.9-" + cut.id)
for cut in cut_set_sp0p9.cuts
}
)
cut_set = CutSet.from_cuts(
{
**cut_set_orig.cuts, **cut_set_rev.cuts,
**cut_set_sp1p1.cuts, **cut_set_sp0p9.cuts}

)
else:
with LilcomFilesWriter(f"{output_dir}/feats_{partition}") as storage:
cut_set = cut_set.compute_and_store_features(
extractor=Mfcc(config=mfcc_hires_config),
storage=storage,
augmenter=None,
executor=ex,
).pad()
mobvoihotwords_manifests[partition]["cuts"] = cut_set
cut_set.to_json(output_dir / f"cuts_{partition}.json.gz")


def reverb(*args, **kwargs):
"""
Returns a reverb effect for wav augmentation.
"""
import augment
effect_chain = augment.EffectChain()
# Reverb it makes the signal to have two channels,
# which we combine into 1 by running `channels` w/o parameters
effect_chain.reverb(50, 50, lambda: np.random.randint(1, 30)).channels()
return effect_chain


def speed_perturb(times: float, sampling_rate: int):
"""
Returns a <times> speed perturbation effect for wav augmentation.
:param times: resulting times of speed relative to the original speed
:param sampling_rate: a sampling rate value for which the effect will be created (resampling is needed for pitch).
"""
import augment
effect_chain = augment.EffectChain()
# The pitch effect changes the sampling ratio; we have to compensate for that.
# Here, we specify 'quick' options on both pitch and rate effects, to speed up things
effect_chain.pitch("-q", lambda: (times - 1.0) * 100).rate("-q", sampling_rate)
return effect_chain


if __name__ == "__main__":
parser = get_parser()
args = parser.parse_args()
main(args)
16 changes: 16 additions & 0 deletions examples/mobvoihotwords/path.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
MAIN_ROOT=$PWD/../..
export KALDI_ROOT=$MAIN_ROOT/espresso/tools/kaldi

# BEGIN from kaldi path.sh
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
. $KALDI_ROOT/tools/config/common_path.sh
export LC_ALL=C
# END

export PATH=~/anaconda3/bin:$PATH
export PATH=$MAIN_ROOT:$MAIN_ROOT/espresso:$MAIN_ROOT/espresso/tools:$PATH
export LD_LIBRARY_PATH=$MAIN_ROOT/espresso/tools/openfst/lib:$LD_LIBRARY_PATH
export PYTHONPATH=$MAIN_ROOT:$MAIN_ROOT/espresso:$MAIN_ROOT/espresso/tools:$MAIN_ROOT/espresso/tools/lhotse:$MAIN_ROOT/espresso/tools/pychain:$PYTHONPATH
export PYTHONUNBUFFERED=1

0 comments on commit f21dd7c

Please sign in to comment.