add a data prep example for lhotse

freewym · Nov 6, 2020 · 1f09058 · 1f09058
1 parent 1b0c26c
commit 1f09058
Show file tree

Hide file tree

Showing 6 changed files with 217 additions and 1 deletion.
diff --git a/espresso/data/asr_k2_dataset.py b/espresso/data/asr_k2_dataset.py
@@ -115,7 +115,7 @@ def __init__(
             [cut.num_frames if cut.has_features else cut.num_samples for cut in cuts]
         )
         self.tgt_sizes = None
-        first_cut = cuts[self.cut_ids[0]]
+        first_cut = next(iter(cuts))
         # assume all cuts have no supervisions if the first one does not
         if len(first_cut.supervisions) > 0:
             assert len(first_cut.supervisions) == 1, "Only single-supervision cuts are allowed"

diff --git a/espresso/tools/.gitignore b/espresso/tools/.gitignore
@@ -1,3 +1,4 @@
 kaldi
 openfst*
 pychain
+lhotse
diff --git a/examples/mobvoihotwords/cmd.sh b/examples/mobvoihotwords/cmd.sh
@@ -0,0 +1,20 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+#export train_cmd="run.pl --mem 4G"
+#export cuda_cmd="run.pl --mem 4G --gpu 1"
+#export decode_cmd="run.pl --mem 4G"
+
+# JHU setup (copy queue-freegpu.pl from ESPnet into utils/)
+export train_cmd="queue.pl --mem 4G"
+export cuda_cmd="queue-freegpu.pl --mem 8G --gpu 1 --config conf/gpu.conf"
+export decode_cmd="queue.pl --mem 4G"
diff --git a/examples/mobvoihotwords/conf/gpu.conf b/examples/mobvoihotwords/conf/gpu.conf
@@ -0,0 +1,10 @@
+# Default configuration
+command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
+option mem=* -l mem_free=$0,ram_free=$0
+option mem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe smp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+default gpu=0
+option gpu=0
+option gpu=* -l 'hostname=c*,gpu=$0' -q g.q
diff --git a/examples/mobvoihotwords/local/data_prep.py b/examples/mobvoihotwords/local/data_prep.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+# Copyright (c) Yiming Wang
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+import os
+import sys
+from concurrent.futures import ProcessPoolExecutor
+from pathlib import Path
+
+import numpy as np
+
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger(__name__)
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="data preparation for the MobvoiHotwords corpus"
+    )
+    # fmt: off
+    parser.add_argument("--data-dir", default="data", type=str, help="data directory")
+    parser.add_argument("--seed", default=1, type=int, help="random seed")
+    parser.add_argument(
+        "--nj", default=1, type=int, help="number of jobs for features extraction"
+    )
+    # fmt: on
+
+    return parser
+
+
+def main(args):
+    try:
+        # TODO use pip install once it's available
+        from espresso.tools.lhotse import CutSet, Mfcc, MfccConfig, LilcomFilesWriter, WavAugmenter
+        from espresso.tools.lhotse.manipulation import combine
+        from espresso.tools.lhotse.recipes.mobvoihotwords import download_and_untar, prepare_mobvoihotwords
+    except ImportError:
+        raise ImportError("Please install Lhotse by `make lhotse` after entering espresso/tools")
+
+    root_dir = Path(args.data_dir)
+    corpus_dir = root_dir / "MobvoiHotwords"
+    output_dir = root_dir
+
+    # Download and extract the corpus
+    download_and_untar(root_dir)
+
+    # Prepare manifests
+    mobvoihotwords_manifests = prepare_mobvoihotwords(corpus_dir, output_dir)
+    logger.info(
+        "train/dev/test size: {}/{}/{}".format(
+            len(mobvoihotwords_manifests["train"]["recordings"]),
+            len(mobvoihotwords_manifests["dev"]["recordings"]),
+            len(mobvoihotwords_manifests["test"]["recordings"])
+        )
+    )
+
+    # Data augmentation
+    np.random.seed(args.seed)
+    # equivalent to Kaldi's mfcc_hires config
+    mfcc = Mfcc(config=MfccConfig(num_mel_bins=40, num_ceps=40, low_freq=20, high_freq=-400))
+    num_jobs = args.nj
+    for partition, manifests in mobvoihotwords_manifests.items():
+        cut_set = CutSet.from_manifests(
+            recordings=manifests["recordings"],
+            supervisions=manifests["supervisions"],
+        )
+        sampling_rate = next(iter(cut_set)).sampling_rate
+        with ProcessPoolExecutor(num_jobs) as ex:
+            if "train" in partition:
+                # original set
+                with LilcomFilesWriter(f"{output_dir}/feats_{partition}_orig") as storage:
+                    cut_set_orig = cut_set.compute_and_store_features(
+                        extractor=mfcc,
+                        storage=storage,
+                        augmenter=None,
+                        executor=ex,
+                    )
+                # augmented with reverbration
+                with  LilcomFilesWriter(f"{output_dir}/feats_{partition}_rev") as storage:
+                    cut_set_rev = cut_set.compute_and_store_features(
+                        extractor=mfcc,
+                        storage=storage,
+                        augmenter=WavAugmenter(effect_chain=reverb()),
+                        excutor=ex,
+                    )
+                    cut_set_rev = CutSet.from_cuts(
+                        cut.with_id("rev-" + cut.id) for cut in cut_set_rev.cuts
+                    )
+                # augmented with speed perturbation
+                with  LilcomFilesWriter(f"{output_dir}/feats_{partition}_sp1.1") as storage:
+                    cut_set_sp1p1 = cut_set.compute_and_store_features(
+                        extractor=mfcc,
+                        storage=storage,
+                        augmenter=WavAugmenter(
+                            effect_chain=speed(sampling_rate=sampling_rate, factor=1.1)
+                        ),
+                        excutor=ex,
+                    )
+                    cut_set_sp1p1 = CutSet.from_cuts(
+                        cut.with_id("sp1.1-" + cut.id) for cut in cut_set_sp1p1.cuts
+                    )
+                with  LilcomFilesWriter(f"{output_dir}/feats_{partition}_sp0.9") as storage:
+                    cut_set_sp0p9 = cut_set.compute_and_store_features(
+                        extractor=mfcc,
+                        storage=storage,
+                        augmenter=WavAugmenter(
+                            effect_chain=speed(sampling_rate=sampling_rate, factor=0.9)
+                        ),
+                        excutor=ex,
+                    )
+                    cut_set_sp0p9 = CutSet.from_cuts(
+                        cut.with_id("sp0.9-" + cut.id) for cut in cut_set_sp0p9.cuts
+                    )
+                # combine the original and augmented sets together
+                cut_set = combine(
+                    cut_set_orig, cut_set_rev, cut_set_sp1p1, cut_set_sp0p9
+                )
+            else:  # no augmentations for dev and test sets
+                with LilcomFilesWriter(f"{output_dir}/feats_{partition}") as storage:
+                    cut_set = cut_set.compute_and_store_features(
+                        extractor=mfcc,
+                        storage=storage,
+                        augmenter=None,
+                        executor=ex,
+                    )
+            mobvoihotwords_manifests[partition]["cuts"] = cut_set
+            cut_set.to_json(output_dir / f"cuts_{partition}.json.gz")
+
+
+def reverb(*args, **kwargs):
+    """
+    Returns a reverb effect for wav augmentation.
+    """
+    import augment
+    effect_chain = augment.EffectChain()
+    # Reverb it makes the signal to have two channels,
+    # which we combine into 1 by running `channels` w/o parameters
+    effect_chain.reverb(50, 50, lambda: np.random.randint(1, 30)).channels()
+    return effect_chain
+
+
+def speed(sampling_rate: int, factor: float):
+    """
+    Returns a speed perturbation effect with <factor> for wav augmentation.
+    :param sampling_rate: a sampling rate value for which the effect will be created (resampling is needed for speed).
+    :param factor: speed perturbation factor
+    """
+    import augment
+    effect_chain = augment.EffectChain()
+    # The speed effect changes the sampling ratio; we have to compensate for that.
+    # Here, we specify 'quick' options on both pitch and rate effects, to speed up things
+    effect_chain.speed("-q", lambda: factor).rate("-q", sampling_rate)
+    return effect_chain
+
+
+if __name__ == "__main__":
+    parser = get_parser()
+    args = parser.parse_args()
+    main(args)
diff --git a/examples/mobvoihotwords/path.sh b/examples/mobvoihotwords/path.sh
@@ -0,0 +1,16 @@
+MAIN_ROOT=$PWD/../..
+export KALDI_ROOT=$MAIN_ROOT/espresso/tools/kaldi
+
+# BEGIN from kaldi path.sh
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
+# END
+
+export PATH=~/anaconda3/bin:$PATH
+export PATH=$MAIN_ROOT:$MAIN_ROOT/espresso:$MAIN_ROOT/espresso/tools:$PATH
+export LD_LIBRARY_PATH=$MAIN_ROOT/espresso/tools/openfst/lib:$LD_LIBRARY_PATH
+export PYTHONPATH=$MAIN_ROOT:$MAIN_ROOT/espresso:$MAIN_ROOT/espresso/tools:$MAIN_ROOT/espresso/tools/lhotse:$MAIN_ROOT/espresso/tools/pychain:$PYTHONPATH
+export PYTHONUNBUFFERED=1