diff --git a/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh b/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh new file mode 100755 index 0000000000..a4a6cd8d7f --- /dev/null +++ b/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# This script computes fbank features for the test-clean and test-other datasets. +# The computed features are saved to ~/tmp/fbank-libri and are +# cached for later runs + +export PYTHONPATH=$PWD:$PYTHONPATH +echo $PYTHONPATH + +mkdir ~/tmp/fbank-libri +cd egs/librispeech/ASR +mkdir -p data +cd data +[ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank +cd .. +./local/compute_fbank_librispeech.py +ls -lh data/fbank/ diff --git a/.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh b/.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh new file mode 100755 index 0000000000..3efcc13e30 --- /dev/null +++ b/.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# This script downloads the test-clean and test-other datasets +# of LibriSpeech and unzip them to the folder ~/tmp/download, +# which is cached by GitHub actions for later runs. +# +# You will find directories ~/tmp/download/LibriSpeech after running +# this script. + +mkdir ~/tmp/download +cd egs/librispeech/ASR +ln -s ~/tmp/download . +cd download +wget -q --no-check-certificate https://www.openslr.org/resources/12/test-clean.tar.gz +tar xf test-clean.tar.gz +rm test-clean.tar.gz + +wget -q --no-check-certificate https://www.openslr.org/resources/12/test-other.tar.gz +tar xf test-other.tar.gz +rm test-other.tar.gz +pwd +ls -lh +ls -lh LibriSpeech diff --git a/.github/scripts/install-kaldifeat.sh b/.github/scripts/install-kaldifeat.sh new file mode 100755 index 0000000000..6666a5064f --- /dev/null +++ b/.github/scripts/install-kaldifeat.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# This script installs kaldifeat into the directory ~/tmp/kaldifeat +# which is cached by GitHub actions for later runs. + +mkdir -p ~/tmp +cd ~/tmp +git clone https://github.com/csukuangfj/kaldifeat +cd kaldifeat +mkdir build +cd build +cmake -DCMAKE_BUILD_TYPE=Release .. +make -j2 _kaldifeat diff --git a/.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh b/.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh new file mode 100755 index 0000000000..e0b87e0fc9 --- /dev/null +++ b/.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +# This script assumes that test-clean and test-other are downloaded +# to egs/librispeech/ASR/download/LibriSpeech and generates manifest +# files in egs/librispeech/ASR/data/manifests + +cd egs/librispeech/ASR +[ ! -e download ] && ln -s ~/tmp/download . +mkdir -p data/manifests +lhotse prepare librispeech -j 2 -p test-clean -p test-other ./download/LibriSpeech data/manifests +ls -lh data/manifests diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh index 2387a16e2a..59e9edf41e 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh @@ -45,3 +45,31 @@ for method in modified_beam_search beam_search; do $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav done + +echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" +if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then + mkdir -p pruned_transducer_stateless/exp + ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless/exp/epoch-999.pt + ln -s $PWD/$repo/data/lang_bpe_500 data/ + + ls -lh data + ls -lh pruned_transducer_stateless/exp + + log "Decoding test-clean and test-other" + + # use a small value for decoding with CPU + max_duration=50 + + for method in greedy_search fast_beam_search; do + log "Decoding with $method" + + ./pruned_transducer_stateless/decode.py \ + --decoding-method $method \ + --epoch 999 \ + --avg 1 \ + --max-duration $max_duration \ + --exp-dir pruned_transducer_stateless/exp + done + + rm pruned_transducer_stateless/exp/*.pt +fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh index ee86109964..1b62caab8c 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh @@ -49,3 +49,31 @@ for method in modified_beam_search beam_search fast_beam_search; do $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav done + +echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" +if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then + mkdir -p pruned_transducer_stateless2/exp + ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless2/exp/epoch-999.pt + ln -s $PWD/$repo/data/lang_bpe_500 data/ + + ls -lh data + ls -lh pruned_transducer_stateless2/exp + + log "Decoding test-clean and test-other" + + # use a small value for decoding with CPU + max_duration=50 + + for method in greedy_search fast_beam_search; do + log "Decoding with $method" + + ./pruned_transducer_stateless2/decode.py \ + --decoding-method $method \ + --epoch 999 \ + --avg 1 \ + --max-duration $max_duration \ + --exp-dir pruned_transducer_stateless2/exp + done + + rm pruned_transducer_stateless2/exp/*.pt +fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh index d28e888e73..1177e5a86e 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh @@ -49,3 +49,31 @@ for method in modified_beam_search beam_search fast_beam_search; do $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav done + +echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" +if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then + mkdir -p pruned_transducer_stateless3/exp + ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless3/exp/epoch-999.pt + ln -s $PWD/$repo/data/lang_bpe_500 data/ + + ls -lh data + ls -lh pruned_transducer_stateless3/exp + + log "Decoding test-clean and test-other" + + # use a small value for decoding with CPU + max_duration=50 + + for method in greedy_search fast_beam_search; do + log "Decoding with $method" + + ./pruned_transducer_stateless3/decode.py \ + --decoding-method $method \ + --epoch 999 \ + --avg 1 \ + --max-duration $max_duration \ + --exp-dir pruned_transducer_stateless3/exp + done + + rm pruned_transducer_stateless3/exp/*.pt +fi diff --git a/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh b/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh index 102547c8b8..d2a2d3c029 100755 --- a/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh +++ b/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh @@ -45,3 +45,31 @@ for method in modified_beam_search beam_search; do $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav done + +echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" +if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then + mkdir -p transducer_stateless2/exp + ln -s $PWD/$repo/exp/pretrained.pt transducer_stateless2/exp/epoch-999.pt + ln -s $PWD/$repo/data/lang_bpe_500 data/ + + ls -lh data + ls -lh transducer_stateless2/exp + + log "Decoding test-clean and test-other" + + # use a small value for decoding with CPU + max_duration=50 + + for method in greedy_search modified_beam_search; do + log "Decoding with $method" + + ./transducer_stateless2/decode.py \ + --decoding-method $method \ + --epoch 999 \ + --avg 1 \ + --max-duration $max_duration \ + --exp-dir transducer_stateless2/exp + done + + rm transducer_stateless2/exp/*.pt +fi diff --git a/.github/workflows/run-librispeech-2022-03-12.yml b/.github/workflows/run-librispeech-2022-03-12.yml index 135285f15e..39c6fd24f7 100644 --- a/.github/workflows/run-librispeech-2022-03-12.yml +++ b/.github/workflows/run-librispeech-2022-03-12.yml @@ -24,9 +24,18 @@ on: pull_request: types: [labeled] + schedule: + # minute (0-59) + # hour (0-23) + # day of the month (1-31) + # month (1-12) + # day of the week (0-6) + # nightly build at 15:50 UTC time every day + - cron: "50 15 * * *" + jobs: run_librispeech_2022_03_12: - if: github.event.label.name == 'ready' || github.event_name == 'push' + if: github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'schedule' runs-on: ${{ matrix.os }} strategy: matrix: @@ -63,20 +72,78 @@ jobs: if: steps.my-cache.outputs.cache-hit != 'true' shell: bash run: | - mkdir -p ~/tmp - cd ~/tmp - git clone https://github.com/csukuangfj/kaldifeat - cd kaldifeat - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j2 _kaldifeat + .github/scripts/install-kaldifeat.sh + + - name: Cache LibriSpeech test-clean and test-other datasets + id: libri-test-clean-and-test-other-data + uses: actions/cache@v2 + with: + path: | + ~/tmp/download + key: cache-libri-test-clean-and-test-other + + - name: Download LibriSpeech test-clean and test-other + if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' + shell: bash + run: | + .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh + + - name: Prepare manifests for LibriSpeech test-clean and test-other + shell: bash + run: | + .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh + + - name: Cache LibriSpeech test-clean and test-other fbank features + id: libri-test-clean-and-test-other-fbank + uses: actions/cache@v2 + with: + path: | + ~/tmp/fbank-libri + key: cache-libri-fbank-test-clean-and-test-other + + - name: Compute fbank for LibriSpeech test-clean and test-other + if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' + shell: bash + run: | + .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh + - name: Inference with pre-trained model shell: bash + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} run: | + mkdir -p egs/librispeech/ASR/data + ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank + ls -lh egs/librispeech/ASR/data/* + sudo apt-get -qq install git-lfs tree sox export PYTHONPATH=$PWD:$PYTHONPATH export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH + .github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh + + - name: Display decoding results + if: github.event_name == 'schedule' + shell: bash + run: | + cd egs/librispeech/ASR/ + tree ./pruned_transducer_stateless/exp + + cd pruned_transducer_stateless + echo "results for pruned_transducer_stateless" + echo "===greedy search===" + find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 + find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 + + echo "===fast_beam_search===" + find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 + find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 + + - name: Upload decoding results for pruned_transducer_stateless + uses: actions/upload-artifact@v2 + if: github.event_name == 'schedule' + with: + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless-2022-03-12 + path: egs/librispeech/ASR/pruned_transducer_stateless/exp/ diff --git a/.github/workflows/run-librispeech-2022-04-29.yml b/.github/workflows/run-librispeech-2022-04-29.yml index 129e30698e..ffaee25f18 100644 --- a/.github/workflows/run-librispeech-2022-04-29.yml +++ b/.github/workflows/run-librispeech-2022-04-29.yml @@ -24,9 +24,18 @@ on: pull_request: types: [labeled] + schedule: + # minute (0-59) + # hour (0-23) + # day of the month (1-31) + # month (1-12) + # day of the week (0-6) + # nightly build at 15:50 UTC time every day + - cron: "50 15 * * *" + jobs: run_librispeech_2022_04_29: - if: github.event.label.name == 'ready' || github.event_name == 'push' + if: github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'schedule' runs-on: ${{ matrix.os }} strategy: matrix: @@ -63,18 +72,50 @@ jobs: if: steps.my-cache.outputs.cache-hit != 'true' shell: bash run: | - mkdir -p ~/tmp - cd ~/tmp - git clone https://github.com/csukuangfj/kaldifeat - cd kaldifeat - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j2 _kaldifeat + .github/scripts/install-kaldifeat.sh + + - name: Cache LibriSpeech test-clean and test-other datasets + id: libri-test-clean-and-test-other-data + uses: actions/cache@v2 + with: + path: | + ~/tmp/download + key: cache-libri-test-clean-and-test-other + + - name: Download LibriSpeech test-clean and test-other + if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' + shell: bash + run: | + .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh + + - name: Prepare manifests for LibriSpeech test-clean and test-other + shell: bash + run: | + .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh + + - name: Cache LibriSpeech test-clean and test-other fbank features + id: libri-test-clean-and-test-other-fbank + uses: actions/cache@v2 + with: + path: | + ~/tmp/fbank-libri + key: cache-libri-fbank-test-clean-and-test-other + + - name: Compute fbank for LibriSpeech test-clean and test-other + if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' + shell: bash + run: | + .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - name: Inference with pre-trained model shell: bash + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} run: | + mkdir -p egs/librispeech/ASR/data + ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank + ls -lh egs/librispeech/ASR/data/* + sudo apt-get -qq install git-lfs tree sox export PYTHONPATH=$PWD:$PYTHONPATH export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH @@ -83,3 +124,45 @@ jobs: .github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh .github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh + + - name: Display decoding results + if: github.event_name == 'schedule' + shell: bash + run: | + cd egs/librispeech/ASR + tree pruned_transducer_stateless2/exp + cd pruned_transducer_stateless2 + echo "results for pruned_transducer_stateless2" + echo "===greedy search===" + find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 + find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 + + echo "===fast_beam_search===" + find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 + find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 + + cd ../ + tree pruned_transducer_stateless3/exp + cd pruned_transducer_stateless3 + echo "results for pruned_transducer_stateless3" + echo "===greedy search===" + find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 + find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 + + echo "===fast_beam_search===" + find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 + find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 + + - name: Upload decoding results for pruned_transducer_stateless2 + uses: actions/upload-artifact@v2 + if: github.event_name == 'schedule' + with: + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless2-2022-04-29 + path: egs/librispeech/ASR/pruned_transducer_stateless2/exp/ + + - name: Upload decoding results for pruned_transducer_stateless3 + uses: actions/upload-artifact@v2 + if: github.event_name == 'schedule' + with: + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless3-2022-04-29 + path: egs/librispeech/ASR/pruned_transducer_stateless3/exp/ diff --git a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml index 5871f926d8..c52b543d8b 100644 --- a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml +++ b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml @@ -24,9 +24,18 @@ on: pull_request: types: [labeled] + schedule: + # minute (0-59) + # hour (0-23) + # day of the month (1-31) + # month (1-12) + # day of the week (0-6) + # nightly build at 15:50 UTC time every day + - cron: "50 15 * * *" + jobs: run_librispeech_2022_04_19: - if: github.event.label.name == 'ready' || github.event_name == 'push' + if: github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'schedule' runs-on: ${{ matrix.os }} strategy: matrix: @@ -63,20 +72,77 @@ jobs: if: steps.my-cache.outputs.cache-hit != 'true' shell: bash run: | - mkdir -p ~/tmp - cd ~/tmp - git clone https://github.com/csukuangfj/kaldifeat - cd kaldifeat - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j2 _kaldifeat + .github/scripts/install-kaldifeat.sh + + - name: Cache LibriSpeech test-clean and test-other datasets + id: libri-test-clean-and-test-other-data + uses: actions/cache@v2 + with: + path: | + ~/tmp/download + key: cache-libri-test-clean-and-test-other + + - name: Download LibriSpeech test-clean and test-other + if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' + shell: bash + run: | + .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh + + - name: Prepare manifests for LibriSpeech test-clean and test-other + shell: bash + run: | + .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh + + - name: Cache LibriSpeech test-clean and test-other fbank features + id: libri-test-clean-and-test-other-fbank + uses: actions/cache@v2 + with: + path: | + ~/tmp/fbank-libri + key: cache-libri-fbank-test-clean-and-test-other + + - name: Compute fbank for LibriSpeech test-clean and test-other + if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' + shell: bash + run: | + .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - name: Inference with pre-trained model shell: bash + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} run: | + mkdir -p egs/librispeech/ASR/data + ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank + ls -lh egs/librispeech/ASR/data/* + sudo apt-get -qq install git-lfs tree sox export PYTHONPATH=$PWD:$PYTHONPATH export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH + .github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh + + - name: Display decoding results + if: github.event_name == 'schedule' + shell: bash + run: | + cd egs/librispeech/ASR/ + tree ./transducer_stateless2/exp + + cd transducer_stateless2 + echo "results for transducer_stateless2" + echo "===greedy search===" + find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 + find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 + + echo "===modified_beam_search===" + find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 + find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 + + - name: Upload decoding results for transducer_stateless2 + uses: actions/upload-artifact@v2 + if: github.event_name == 'schedule' + with: + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-transducer_stateless2-2022-04-19 + path: egs/librispeech/ASR/transducer_stateless2/exp/ diff --git a/.github/workflows/run-pretrained-conformer-ctc.yml b/.github/workflows/run-pretrained-conformer-ctc.yml index 6575ceb654..69f15060b1 100644 --- a/.github/workflows/run-pretrained-conformer-ctc.yml +++ b/.github/workflows/run-pretrained-conformer-ctc.yml @@ -62,14 +62,7 @@ jobs: if: steps.my-cache.outputs.cache-hit != 'true' shell: bash run: | - mkdir -p ~/tmp - cd ~/tmp - git clone https://github.com/csukuangfj/kaldifeat - cd kaldifeat - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j2 _kaldifeat + .github/scripts/install-kaldifeat.sh - name: Inference with pre-trained model shell: bash diff --git a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml index 80ab356e61..438f6e8827 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml @@ -62,14 +62,7 @@ jobs: if: steps.my-cache.outputs.cache-hit != 'true' shell: bash run: | - mkdir -p ~/tmp - cd ~/tmp - git clone https://github.com/csukuangfj/kaldifeat - cd kaldifeat - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j2 _kaldifeat + .github/scripts/install-kaldifeat.sh - name: Inference with pre-trained model shell: bash diff --git a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml index d2231750c7..f50ac2af78 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml @@ -62,14 +62,7 @@ jobs: if: steps.my-cache.outputs.cache-hit != 'true' shell: bash run: | - mkdir -p ~/tmp - cd ~/tmp - git clone https://github.com/csukuangfj/kaldifeat - cd kaldifeat - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j2 _kaldifeat + .github/scripts/install-kaldifeat.sh - name: Inference with pre-trained model shell: bash diff --git a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml index a84e804c63..659dbc9da1 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml @@ -62,14 +62,7 @@ jobs: if: steps.my-cache.outputs.cache-hit != 'true' shell: bash run: | - mkdir -p ~/tmp - cd ~/tmp - git clone https://github.com/csukuangfj/kaldifeat - cd kaldifeat - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j2 _kaldifeat + .github/scripts/install-kaldifeat.sh - name: Inference with pre-trained model shell: bash diff --git a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml index 7fa48d15a5..f4e56bd6cc 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml @@ -62,14 +62,7 @@ jobs: if: steps.my-cache.outputs.cache-hit != 'true' shell: bash run: | - mkdir -p ~/tmp - cd ~/tmp - git clone https://github.com/csukuangfj/kaldifeat - cd kaldifeat - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j2 _kaldifeat + .github/scripts/install-kaldifeat.sh - name: Inference with pre-trained model shell: bash diff --git a/.github/workflows/run-pretrained-transducer-stateless.yml b/.github/workflows/run-pretrained-transducer-stateless.yml index 678e793395..ca355e7783 100644 --- a/.github/workflows/run-pretrained-transducer-stateless.yml +++ b/.github/workflows/run-pretrained-transducer-stateless.yml @@ -62,14 +62,7 @@ jobs: if: steps.my-cache.outputs.cache-hit != 'true' shell: bash run: | - mkdir -p ~/tmp - cd ~/tmp - git clone https://github.com/csukuangfj/kaldifeat - cd kaldifeat - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j2 _kaldifeat + .github/scripts/install-kaldifeat.sh - name: Inference with pre-trained model shell: bash diff --git a/.github/workflows/run-pretrained-transducer.yml b/.github/workflows/run-pretrained-transducer.yml index 781783bcfb..f1b051047a 100644 --- a/.github/workflows/run-pretrained-transducer.yml +++ b/.github/workflows/run-pretrained-transducer.yml @@ -62,13 +62,6 @@ jobs: if: steps.my-cache.outputs.cache-hit != 'true' shell: bash run: | - mkdir -p ~/tmp - cd ~/tmp - git clone https://github.com/csukuangfj/kaldifeat - cd kaldifeat - mkdir build - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. make -j2 _kaldifeat - name: Inference with pre-trained model diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/beam_search.py b/egs/librispeech/ASR/pruned_transducer_stateless/beam_search.py index 100aeaa6e3..5d1e9b4716 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless/beam_search.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/beam_search.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings from dataclasses import dataclass from typing import Dict, List, Optional @@ -565,8 +566,10 @@ def modified_beam_search( for i in range(batch_size): topk_log_probs, topk_indexes = ragged_log_probs[i].topk(beam) - topk_hyp_indexes = (topk_indexes // vocab_size).tolist() - topk_token_indexes = (topk_indexes % vocab_size).tolist() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + topk_hyp_indexes = (topk_indexes // vocab_size).tolist() + topk_token_indexes = (topk_indexes % vocab_size).tolist() for k in range(len(topk_hyp_indexes)): hyp_idx = topk_hyp_indexes[k] @@ -679,8 +682,10 @@ def _deprecated_modified_beam_search( topk_hyp_indexes = topk_indexes // logits.size(-1) topk_token_indexes = topk_indexes % logits.size(-1) - topk_hyp_indexes = topk_hyp_indexes.tolist() - topk_token_indexes = topk_token_indexes.tolist() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + topk_hyp_indexes = topk_hyp_indexes.tolist() + topk_token_indexes = topk_token_indexes.tolist() for i in range(len(topk_hyp_indexes)): hyp = A[topk_hyp_indexes[i]]