diff --git a/.gitignore b/.gitignore index 20d341ea72..9646a9cdf8 100644 --- a/.gitignore +++ b/.gitignore @@ -116,7 +116,7 @@ tmp/ logs/ # Examples - mock data !examples/distilbert_text_classification/input/*.csv -!examples/_tests_distilbert_text_classification/input/*.csv +!examples/_tests_nlp_classification/input/*.csv examples/logs/ notebooks/ diff --git a/bin/_check_codestyle.sh b/bin/_check_codestyle.sh index 02d640f845..f761e3674e 100755 --- a/bin/_check_codestyle.sh +++ b/bin/_check_codestyle.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash -set -e + +# Cause the script to exit if a single command fails +set -eo pipefail -v # Parse -s flag which tells us that we should skip inplace yapf echo 'parse -s flag' @@ -14,27 +16,28 @@ echo 'isort: `isort -rc --check-only --settings-path ./setup.cfg`' isort -rc --check-only --settings-path ./setup.cfg # stop the build if there are any unexpected flake8 issues -echo 'flake8: `bash ./bin/flake8.sh`' -bash ./bin/flake8.sh --count \ +echo 'flake8: `bash ./bin/_flake8.sh`' +bash ./bin/_flake8.sh --count \ --config=./setup.cfg \ --show-source \ --statistics # exit-zero treats all errors as warnings. -echo 'flake8 (warnings): `flake8`' -flake8 . --count --exit-zero \ - --max-complexity=10 \ +echo 'flake8 (warnings): `bash ./bin/_flake8.sh`' +bash ./bin/_flake8.sh --count \ --config=./setup.cfg \ - --statistics + --show-source \ + --statistics \ + --exit-zero # test to make sure the code is yapf compliant if [[ -f ${skip_inplace} ]]; then - echo 'yapf: `bash ./bin/yapf.sh --all`' - bash ./bin/yapf.sh --all + echo 'yapf: `bash ./bin/_yapf.sh --all`' + bash ./bin/_yapf.sh --all else - echo 'yapf: `bash ./bin/yapf.sh --all-in-place`' - bash ./bin/yapf.sh --all-in-place + echo 'yapf: `bash ./bin/_yapf.sh --all-in-place`' + bash ./bin/_yapf.sh --all-in-place fi echo 'pytest: `pytest`' -pytest ./catalyst +pytest . diff --git a/bin/_flake8.sh b/bin/_flake8.sh new file mode 100644 index 0000000000..e824eee331 --- /dev/null +++ b/bin/_flake8.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# Cause the script to exit if a single command fails +set -eo pipefail -v + +# this stops git rev-parse from failing if we run this from the .git directory +builtin cd "$(dirname "${BASH_SOURCE:-$0}")" + +ROOT="$(git rev-parse --show-toplevel)" +builtin cd "$ROOT" || exit 1 + + +flake8 "$@" diff --git a/bin/_yapf.sh b/bin/_yapf.sh new file mode 100755 index 0000000000..0aadbfc9ef --- /dev/null +++ b/bin/_yapf.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +# Cause the script to exit if a single command fails +set -eo pipefail -v + +# this stops git rev-parse from failing if we run this from the .git directory +builtin cd "$(dirname "${BASH_SOURCE:-$0}")" + +ROOT="$(git rev-parse --show-toplevel)" +builtin cd "$ROOT" || exit 1 + + +YAPF_FLAGS=( + '--style' "$ROOT/setup.cfg" + '--recursive' + '--parallel' +) + +YAPF_EXCLUDES=( + '--exclude' 'docker/*' +) + +# Format specified files +format() { + yapf --in-place "${YAPF_FLAGS[@]}" -- "$@" +} + +# Format all files, and print the diff to stdout for travis. +format_all() { + yapf --diff "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" ./**/*.py +} + +format_all_in_place() { + yapf --in-place "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" ./**/*.py +} + +# This flag formats individual files. --files *must* be the first command line +# arg to use this option. +if [[ "$1" == '--files' ]]; then + format "${@:2}" + # If `--all` is passed, then any further arguments are ignored and the + # entire python directory is formatted. +elif [[ "$1" == '--all' ]]; then + format_all +elif [[ "$1" == '--all-in-place' ]]; then + format_all_in_place +else + # Format only the files that changed in last commit. + exit 1 +fi diff --git a/bin/flake8.sh b/bin/flake8.sh deleted file mode 100644 index da179defbf..0000000000 --- a/bin/flake8.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash - -# Cause the script to exit if a single command fails -set -eo pipefail -v - -# this stops git rev-parse from failing if we run this from the .git directory -builtin cd "$(dirname "${BASH_SOURCE:-$0}")" - -ROOT="$(git rev-parse --show-toplevel)" -builtin cd "$ROOT" || exit 1 - -git remote add 'upstream' 'https://github.com/catalyst-team/catalyst' || true - -# Only fetch master since that's the branch we're diffing against. -git fetch upstream master - -MERGEBASE="$(git merge-base upstream/master HEAD)" -if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' &>/dev/null; then - flake8 "$@" $(git diff --name-only --diff-filter=AM "$MERGEBASE" -- '*.py') -fi diff --git a/bin/tests/check_dl.sh b/bin/tests/check_dl.sh deleted file mode 100755 index 3366ba999e..0000000000 --- a/bin/tests/check_dl.sh +++ /dev/null @@ -1,407 +0,0 @@ -#!/usr/bin/env bash -pip install tifffile #TODO: check if really required - -mkdir -p data -# gdrive -# gdrive_download 1N82zh0kzmnzqRvUyMgVOGsCoS1kHf3RP ./data/isbi.tar.gz -# aws -wget https://catalyst-ai.s3-eu-west-1.amazonaws.com/isbi.tar.gz -O ./data/isbi.tar.gz -tar -xf ./data/isbi.tar.gz -C ./data/ - -# @TODO: fix macos fail with sed -set -e - -# imports check -(set -e; for f in examples/_tests_scripts/*.py; do PYTHONPATH=./catalyst:${PYTHONPATH} python "$f"; done) -#(set -e; for f in examples/_tests_scripts/dl_*.py; do PYTHONPATH=./catalyst:${PYTHONPATH} python "$f"; done) -#(set -e; for f in examples/_tests_scripts/z_*.py; do PYTHONPATH=./catalyst:${PYTHONPATH} python "$f"; done) - - -################################ pipeline 00 ################################ -rm -rf ./examples/logs - - -################################ pipeline 01 ################################ -echo 'pipeline 01' -EXPDIR=./examples/_tests_mnist_stages -LOGDIR=./examples/logs/_tests_mnist_stages1 -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config1.yml \ - --logdir=${LOGDIR} \ - --check - -if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then - echo "File $LOGFILE does not exist" - exit 1 -fi - -cat $LOGFILE -python -c """ -from safitty import Safict -metrics = Safict.load('$LOGFILE') -assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') -assert metrics.get('stage1.3', 'loss') < 2.1 -""" - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/trace.py \ - ${LOGDIR} - -rm -rf $LOGDIR - - -################################ pipeline 02 ################################ -echo 'pipeline 02' -EXPDIR=./examples/_tests_mnist_stages -LOGDIR=./examples/logs/_tests_mnist_stages1 -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config2.yml \ - --logdir=${LOGDIR} \ - --check - -if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then - echo "File $LOGFILE does not exist" - exit 1 -fi - -cat $LOGFILE -python -c """ -from safitty import Safict -metrics = Safict.load('$LOGFILE') -assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') -assert metrics.get('stage1.3', 'loss') < 2.1 -""" - - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config3.yml \ - --resume=${LOGDIR}/checkpoints/best.pth \ - --out_dir=${LOGDIR}/:str \ - --out_prefix="/predictions/":str - -cat $LOGFILE -python -c """ -import numpy as np -data = np.load('${LOGDIR}/predictions/infer.logits.npy') -assert data.shape == (10000, 10) -""" - -rm -rf $LOGDIR - - -################################ pipeline 03 ################################ -echo 'pipeline 03' -EXPDIR=./examples/_tests_mnist_stages -LOGDIR=./examples/logs/_tests_mnist_stages1 -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config4.yml \ - --logdir=${LOGDIR} \ - --check - -if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then - echo "File $LOGFILE does not exist" - exit 1 -fi - -cat $LOGFILE -python -c """ -from safitty import Safict -metrics = Safict.load('$LOGFILE') -assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') -assert metrics.get('stage1.3', 'loss') < 2.1 -""" - -rm -rf ${LOGDIR} - - -################################ pipeline 04 ################################ -echo 'pipeline 04' -EXPDIR=./examples/_tests_mnist_stages -LOGDIR=./examples/logs/_tests_mnist_stages1 -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config5.yml \ - --logdir=${LOGDIR} \ - --check - -if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then - echo "File $LOGFILE does not exist" - exit 1 -fi - -cat $LOGFILE -python -c """ -from safitty import Safict -metrics = Safict.load('$LOGFILE') -assert metrics.get('stage2.3', 'loss') < metrics.get('stage2.1', 'loss') -assert metrics.get('stage2.3', 'loss') < 2.1 -""" - -rm -rf ${LOGDIR} - - -################################ pipeline 05 ################################ -echo 'pipeline 05' -# LrFinder -EXPDIR=./examples/_tests_mnist_stages -LOGDIR=./examples/logs/_tests_mnist_stages1 -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config6.yml \ - --logdir=${LOGDIR} \ - --check - -if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then - echo "File $LOGFILE does not exist" - exit 1 -fi - -cat $LOGFILE -python -c """ -from safitty import Safict -metrics = Safict.load('$LOGFILE') -assert metrics.get('stage2.3', 'loss') < metrics.get('stage2.1', 'loss') -assert metrics.get('stage2.3', 'loss') < 14.5 -""" - -rm -rf ${LOGDIR} - - -################################ pipeline 06 ################################ -echo 'pipeline 06' -EXPDIR=./examples/_tests_mnist_stages -LOGDIR=./examples/logs/_tests_mnist_stages_finder -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config_finder.yml \ - --logdir=${LOGDIR} & - -sleep 30 -kill %1 - -rm -rf ${LOGDIR} - - -################################ pipeline 07 ################################ -echo 'pipeline 07' -EXPDIR=./examples/_tests_mnist_stages2 -LOGDIR=./examples/logs/_tests_mnist_stages2 -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config1.yml \ - --logdir=${LOGDIR} \ - --check - -if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then - echo "File $LOGFILE does not exist" - exit 1 -fi - -cat $LOGFILE -python -c """ -from safitty import Safict -metrics = Safict.load('$LOGFILE') -# assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') -assert metrics.get('best', 'loss') < 2.35 -""" - -rm -rf ${LOGDIR} - - -################################ pipeline 08 ################################ -echo 'pipeline 08' -EXPDIR=./examples/_tests_mnist_stages2 -LOGDIR=./examples/logs/_tests_mnist_stages2 -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config2.yml \ - --logdir=${LOGDIR} \ - --check - -if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then - echo "File $LOGFILE does not exist" - exit 1 -fi - -cat $LOGFILE -python -c """ -from safitty import Safict -metrics = Safict.load('$LOGFILE') -assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') -assert metrics.get('stage1.3', 'loss') < 2.35 -""" - -rm -rf ${LOGDIR} - - -################################ pipeline 09 ################################ -echo 'pipeline 09' -EXPDIR=./examples/_tests_mnist_stages2 -LOGDIR=./examples/logs/_tests_mnist_stages2 -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config3.yml \ - --logdir=${LOGDIR} \ - --check - -if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then - echo "File $LOGFILE does not exist" - exit 1 -fi - -cat $LOGFILE -python -c """ -from safitty import Safict -metrics = Safict.load('$LOGFILE') -# assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') -assert metrics.get('stage1.3', 'loss') < 2.33 -""" - -rm -rf ${LOGDIR} - - -################################ pipeline 10 ################################ -echo 'pipeline 10' -EXPDIR=./examples/_tests_mnist_stages2 -LOGDIR=./examples/logs/_tests_mnist_stages_finder -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config_finder.yml \ - --logdir=${LOGDIR} & - -sleep 30 -kill %1 - -rm -rf ${LOGDIR} - -################################ pipeline 11 ################################ -# SEGMENTATION -echo 'pipeline 11 - SEGMENTATION' -EXPDIR=./examples/_test_segmentation -LOGDIR=./examples/logs/_test_segmentation -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -## load the data -# mkdir -p ./examples/_test_segmentation/data -# cd ./examples/_test_segmentation/data/ -# download-gdrive 1iYaNijLmzsrMlAdMoUEhhJuo-5bkeAuj segmentation_data.zip -# extract-archive segmentation_data.zip -# cd ../../.. - -## train -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --configs ${EXPDIR}/config.yml ${EXPDIR}/transforms.yml \ - --logdir=${LOGDIR} \ - --stages/data_params/image_path=./examples/_test_segmentation/data/segmentation_data/train:str \ - --stages/data_params/mask_path=./examples/_test_segmentation/data/segmentation_data/train_masks:str \ - --check - -## check metrics -if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then - echo "File $LOGFILE does not exist" - exit 1 -fi - -cat $LOGFILE -python -c """ -from safitty import Safict -metrics = Safict.load('$LOGFILE') - -iou = metrics.get('last', 'iou') -loss = metrics.get('last', 'loss') - -print('iou', iou) -print('loss', loss) - -assert iou > 0.8, f'iou must be > 0.8, got {iou}' -assert loss < 0.2, f'loss must be < 0.2, got {loss}' -""" - -## remove logs -rm -rf ./examples/logs/_test_segmentation - -################################ pipeline 12 ################################ -# GAN -echo 'pipeline 12 - GAN' -EXPDIR=./examples/mnist_gan -LOGDIR=./examples/logs/mnist_gan -LOGFILE=${LOGDIR}/checkpoints/_metrics.json - -PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ - python catalyst/dl/scripts/run.py \ - --expdir=${EXPDIR} \ - --config=${EXPDIR}/config.yml \ - --logdir=${LOGDIR} \ - --stages/state_params/num_epochs=11:int - -if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then - echo "File $LOGFILE does not exist" - exit 1 -fi - -cat $LOGFILE -python -c """ -from safitty import Safict -metrics=Safict.load('$LOGFILE') - -loss_g = metrics.get('last', 'loss_g') -loss_d_real = metrics.get('last', 'loss_d_real') -loss_d_fake = metrics.get('last', 'loss_d_fake') -loss_d = metrics.get('last', 'loss_d') - -print('loss_g', loss_g) -print('loss_d_real', loss_d_real) -print('loss_d_fake', loss_d_fake) -print('loss_d', loss_d) - -# assert 0.9 < loss_g < 1.5 -# assert 0.3 < loss_d_real < 0.6 -# assert 0.28 < loss_d_fake < 0.58 -# assert 0.3 < loss_d < 0.6 -assert loss_g < 2.0 -assert loss_d_real < 0.9 -assert loss_d_fake < 0.9 -assert loss_d < 0.9 -""" - -rm -rf ${LOGDIR} - -################################ pipeline 99 ################################ -rm -rf ./examples/logs diff --git a/bin/tests/check_dl_all.sh b/bin/tests/check_dl_all.sh new file mode 100755 index 0000000000..09128dfa00 --- /dev/null +++ b/bin/tests/check_dl_all.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +# Cause the script to exit if a single command fails +set -eo pipefail -v + + +#################################### DL ##################################### +echo './bin/tests/check_dl_base.sh' +bash ./bin/tests/check_dl_base.sh + + +#################################### CV ##################################### +echo './bin/tests/check_dl_cv.sh' +bash ./bin/tests/check_dl_cv.sh + + +#################################### GAN #################################### +if [[ "$CUDA_VISIBLE_DEVICES" == "" || "$CUDA_VISIBLE_DEVICES" == "0" ]]; then + echo './bin/tests/check_dl_gan.sh' + bash ./bin/tests/check_dl_gan.sh +fi + + +#################################### NLP #################################### +echo './bin/tests/check_dl_nlp.sh' +bash ./bin/tests/check_dl_nlp.sh diff --git a/bin/tests/check_dl_base.sh b/bin/tests/check_dl_base.sh new file mode 100755 index 0000000000..a31c11a4f9 --- /dev/null +++ b/bin/tests/check_dl_base.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +# Cause the script to exit if a single command fails +set -eo pipefail -v + + +################################ pipeline 00 ################################ +pip install tifffile #TODO: check if really required + +rm -rf ./examples/logs + + +################################ pipeline 01 ################################ +# imports check +(set -e; for f in examples/_tests_scripts/*.py; do PYTHONPATH=./catalyst:${PYTHONPATH} python "$f"; done) +# (set -e; for f in examples/_tests_scripts/dl_*.py; do PYTHONPATH=./catalyst:${PYTHONPATH} python "$f"; done) +# (set -e; for f in examples/_tests_scripts/z_*.py; do PYTHONPATH=./catalyst:${PYTHONPATH} python "$f"; done) + + +################################ pipeline 99 ################################ +rm -rf ./examples/logs diff --git a/bin/tests/check_dl_cv.sh b/bin/tests/check_dl_cv.sh new file mode 100755 index 0000000000..738102eede --- /dev/null +++ b/bin/tests/check_dl_cv.sh @@ -0,0 +1,459 @@ +#!/usr/bin/env bash + +# Cause the script to exit if a single command fails +set -eo pipefail -v + + +################################ pipeline 00 ################################ +rm -rf ./examples/logs + + +################################ pipeline 01 ################################ +echo 'pipeline 01' +EXPDIR=./examples/_tests_cv_classification +LOGDIR=./examples/logs/_tests_cv_classification +LOGFILE=${LOGDIR}/checkpoints/_metrics.json + +PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/config1.yml \ + --logdir=${LOGDIR} \ + --check + +if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then + echo "File $LOGFILE does not exist" + exit 1 +fi + +cat $LOGFILE +echo 'pipeline 01' +python -c """ +from safitty import Safict +metrics = Safict.load('$LOGFILE') +assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') +assert metrics.get('stage1.3', 'loss') < 2.1 +""" + +echo 'pipeline 01 - trace' +PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/trace.py \ + ${LOGDIR} + +rm -rf $LOGDIR + + +################################ pipeline 02 ################################ +echo 'pipeline 02' +EXPDIR=./examples/_tests_cv_classification +LOGDIR=./examples/logs/_tests_cv_classification +LOGFILE=${LOGDIR}/checkpoints/_metrics.json + +PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/config2.yml \ + --logdir=${LOGDIR} \ + --check + +if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then + echo "File $LOGFILE does not exist" + exit 1 +fi + +cat $LOGFILE +echo 'pipeline 02' +python -c """ +from safitty import Safict +metrics = Safict.load('$LOGFILE') +assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') +assert metrics.get('stage1.3', 'loss') < 2.1 +""" + +if [[ "$USE_DDP" == "0" ]]; then + PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/config2_infer.yml \ + --resume=${LOGDIR}/checkpoints/best.pth \ + --out_dir=${LOGDIR}/:str \ + --out_prefix="/predictions/":str + + cat $LOGFILE + echo 'pipeline 02 - infer' + python -c """ +import numpy as np +data = np.load('${LOGDIR}/predictions/infer.logits.npy') +print(data.shape) +assert data.shape == (10000, 10) +""" + + rm -rf $LOGDIR +fi + + +################################ pipeline 03 ################################ +echo 'pipeline 03' +EXPDIR=./examples/_tests_cv_classification +LOGDIR=./examples/logs/_tests_cv_classification +LOGFILE=${LOGDIR}/checkpoints/_metrics.json + +PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/config3.yml \ + --logdir=${LOGDIR} \ + --check + +if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then + echo "File $LOGFILE does not exist" + exit 1 +fi + +cat $LOGFILE +echo 'pipeline 03' +python -c """ +from safitty import Safict +metrics = Safict.load('$LOGFILE') +assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') +assert metrics.get('stage1.3', 'loss') < 2.1 +""" + +rm -rf ${LOGDIR} + + +################################ pipeline 04 ################################ +echo 'pipeline 04' +EXPDIR=./examples/_tests_cv_classification +LOGDIR=./examples/logs/_tests_cv_classification +LOGFILE=${LOGDIR}/checkpoints/_metrics.json + +PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/config4.yml \ + --logdir=${LOGDIR} \ + --check + +if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then + echo "File $LOGFILE does not exist" + exit 1 +fi + +cat $LOGFILE +echo 'pipeline 04' +python -c """ +from safitty import Safict +metrics = Safict.load('$LOGFILE') +assert metrics.get('stage2.3', 'loss') < metrics.get('stage2.1', 'loss') +assert metrics.get('stage2.3', 'loss') < 2.1 +""" + +rm -rf ${LOGDIR} + + +################################ pipeline 05 ################################ +echo 'pipeline 05' +EXPDIR=./examples/_tests_cv_classification +LOGDIR=./examples/logs/_tests_cv_classification +LOGFILE=${LOGDIR}/checkpoints/_metrics.json + +PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/config5.yml \ + --logdir=${LOGDIR} \ + --check + +if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then + echo "File $LOGFILE does not exist" + exit 1 +fi + +cat $LOGFILE +echo 'pipeline 05' +python -c """ +from safitty import Safict +metrics = Safict.load('$LOGFILE') +# assert metrics.get('stage2.3', 'loss') < metrics.get('stage2.1', 'loss') +assert metrics.get('stage2.3', 'loss') < 27.0 +""" + +rm -rf ${LOGDIR} + + +################################ pipeline 06 ################################ +if [[ "$USE_DDP" == "0" ]]; then + echo 'pipeline 06 - LrFinder' + EXPDIR=./examples/_tests_cv_classification + LOGDIR=./examples/logs/_tests_cv_classification + LOGFILE=${LOGDIR}/checkpoints/_metrics.json + + PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/config6_finder.yml \ + --logdir=${LOGDIR} & + + sleep 30 + kill %1 + + if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then + echo "File $LOGFILE does not exist" + exit 1 + fi + + rm -rf ${LOGDIR} +fi + + +################################ pipeline 11 ################################ +echo 'pipeline 11' +EXPDIR=./examples/_tests_cv_classification_transforms +LOGDIR=./examples/logs/_tests_cv_classification_transforms +LOGFILE=${LOGDIR}/checkpoints/_metrics.json + +PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/config1.yml \ + --logdir=${LOGDIR} \ + --check + +if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then + echo "File $LOGFILE does not exist" + exit 1 +fi + +cat $LOGFILE +echo 'pipeline 11' +python -c """ +from safitty import Safict +metrics = Safict.load('$LOGFILE') +# assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') +assert metrics.get('best', 'loss') < 2.35 +""" + +rm -rf ${LOGDIR} + + +################################ pipeline 12 ################################ +echo 'pipeline 12' +EXPDIR=./examples/_tests_cv_classification_transforms +LOGDIR=./examples/logs/_tests_cv_classification_transforms +LOGFILE=${LOGDIR}/checkpoints/_metrics.json + +PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/config2.yml \ + --logdir=${LOGDIR} \ + --check + +if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then + echo "File $LOGFILE does not exist" + exit 1 +fi + +cat $LOGFILE +echo 'pipeline 12' +python -c """ +from safitty import Safict +metrics = Safict.load('$LOGFILE') +# assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') +assert metrics.get('stage1.3', 'loss') < 2.35 +""" + +rm -rf ${LOGDIR} + + +################################ pipeline 13 ################################ +echo 'pipeline 13' +EXPDIR=./examples/_tests_cv_classification_transforms +LOGDIR=./examples/logs/_tests_cv_classification_transforms +LOGFILE=${LOGDIR}/checkpoints/_metrics.json + +PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/config3.yml \ + --logdir=${LOGDIR} \ + --check + +if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then + echo "File $LOGFILE does not exist" + exit 1 +fi + +cat $LOGFILE +echo 'pipeline 13' +python -c """ +from safitty import Safict +metrics = Safict.load('$LOGFILE') +# assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') +assert metrics.get('stage1.3', 'loss') < 2.33 +""" + +rm -rf ${LOGDIR} + + +################################ pipeline 14 ################################ +if [[ "$USE_DDP" == "0" ]]; then + echo 'pipeline 14' + EXPDIR=./examples/_tests_cv_classification_transforms + LOGDIR=./examples/logs/_tests_cv_classification_transforms + LOGFILE=${LOGDIR}/checkpoints/_metrics.json + + PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/config4_finder.yml \ + --logdir=${LOGDIR} & + + sleep 30 + kill %1 + + if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then + echo "File $LOGFILE does not exist" + exit 1 + fi + + rm -rf ${LOGDIR} +fi + + +################################# pipeline 15 ################################ +#echo 'pipeline 15' +#EXPDIR=./examples/_tests_cv_classification_transforms +#LOGDIR=./examples/logs/_tests_cv_classification_transforms +#LOGFILE=${LOGDIR}/checkpoints/_metrics.json +# +#PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ +# python catalyst/dl/scripts/run.py \ +# --expdir=${EXPDIR} \ +# --config=${EXPDIR}/config5_fp16.yml \ +# --logdir=${LOGDIR} \ +# --check +# +#if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then +# echo "File $LOGFILE does not exist" +# exit 1 +#fi +# +#cat $LOGFILE +#echo 'pipeline 15' +#python -c """ +#from safitty import Safict +#metrics = Safict.load('$LOGFILE') +## assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') +#assert metrics.get('stage1.3', 'loss') < 2.33 +#""" +# +#rm -rf ${LOGDIR} + + +################################# pipeline 16 ################################ +#echo 'pipeline 16' +#EXPDIR=./examples/_tests_cv_classification_transforms +#LOGDIR=./examples/logs/_tests_cv_classification_transforms +#LOGFILE=${LOGDIR}/checkpoints/_metrics.json +# +#PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ +# python catalyst/dl/scripts/run.py \ +# --expdir=${EXPDIR} \ +# --config=${EXPDIR}/config6_fp16.yml \ +# --logdir=${LOGDIR} \ +# --check +# +#if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then +# echo "File $LOGFILE does not exist" +# exit 1 +#fi +# +#cat $LOGFILE +#echo 'pipeline 16' +#python -c """ +#from safitty import Safict +#metrics = Safict.load('$LOGFILE') +## assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') +#assert metrics.get('stage1.3', 'loss') < 2.33 +#""" +# +#rm -rf ${LOGDIR} + + +################################# pipeline 17 ################################ +#echo 'pipeline 17' +#EXPDIR=./examples/_tests_cv_classification_transforms +#LOGDIR=./examples/logs/_tests_cv_classification_transforms +#LOGFILE=${LOGDIR}/checkpoints/_metrics.json +# +#PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ +# python catalyst/dl/scripts/run.py \ +# --expdir=${EXPDIR} \ +# --config=${EXPDIR}/config7_fp16.yml \ +# --logdir=${LOGDIR} \ +# --check +# +#if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then +# echo "File $LOGFILE does not exist" +# exit 1 +#fi +# +#cat $LOGFILE +#echo 'pipeline 17' +#python -c """ +#from safitty import Safict +#metrics = Safict.load('$LOGFILE') +## assert metrics.get('stage1.3', 'loss') < metrics.get('stage1.1', 'loss') +#assert metrics.get('stage1.3', 'loss') < 2.33 +#""" +# +#rm -rf ${LOGDIR} + + +################################# pipeline 21 ################################ +## SEGMENTATION +#echo 'pipeline 21 - SEGMENTATION' +#EXPDIR=./examples/_tests_cv_segmentation +#LOGDIR=./examples/logs/_tests_cv_segmentation +#LOGFILE=${LOGDIR}/checkpoints/_metrics.json +# +### train +#PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ +# python catalyst/dl/scripts/run.py \ +# --expdir=${EXPDIR} \ +# --configs ${EXPDIR}/config.yml ${EXPDIR}/transforms.yml \ +# --logdir=${LOGDIR} \ +# --stages/data_params/image_path=./data/segmentation_data/train:str \ +# --stages/data_params/mask_path=./data/segmentation_data/train_masks:str \ +# --check +# +### check metrics +#if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then +# echo "File $LOGFILE does not exist" +# exit 1 +#fi +# +#cat $LOGFILE +#echo 'pipeline 21 - SEGMENTATION' +#python -c """ +#from safitty import Safict +#metrics = Safict.load('$LOGFILE') +# +#iou = metrics.get('last', 'iou') +#loss = metrics.get('last', 'loss') +# +#print('iou', iou) +#print('loss', loss) +# +#assert iou > 0.8, f'iou must be > 0.8, got {iou}' +#assert loss < 0.32, f'loss must be < 0.32, got {loss}' +#""" +# +### remove logs +#rm -rf ./examples/logs/_tests_cv_segmentation + + +################################ pipeline 99 ################################ +rm -rf ./examples/logs diff --git a/bin/tests/check_dl_gan.sh b/bin/tests/check_dl_gan.sh new file mode 100755 index 0000000000..7053c1cc15 --- /dev/null +++ b/bin/tests/check_dl_gan.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +# Cause the script to exit if a single command fails +set -eo pipefail -v + + +################################ pipeline 00 ################################ +rm -rf ./examples/logs + +################################ pipeline 01 ################################ +if [[ "$USE_APEX" == "0" ]]; then + # GAN + echo 'pipeline 01 - GAN' + EXPDIR=./examples/mnist_gans + LOGDIR=./examples/logs/mnist_gans + LOGFILE=${LOGDIR}/checkpoints/_metrics.json + + PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ + python catalyst/dl/scripts/run.py \ + --expdir=${EXPDIR} \ + --config=${EXPDIR}/configs/vanilla_gan.yml \ + --logdir=${LOGDIR} \ + --stages/state_params/num_epochs=11:int + + if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then + echo "File $LOGFILE does not exist" + exit 1 + fi + + cat $LOGFILE + echo 'pipeline 01 - GAN' + python -c """ +from safitty import Safict +metrics=Safict.load('$LOGFILE') + +loss_g = metrics.get('last', 'loss_g') +loss_d_real = metrics.get('last', 'loss_d_real') +loss_d_fake = metrics.get('last', 'loss_d_fake') +loss_d = metrics.get('last', 'loss_d') + +print('loss_g', loss_g) +print('loss_d_real', loss_d_real) +print('loss_d_fake', loss_d_fake) +print('loss_d', loss_d) + +# assert 0.9 < loss_g < 1.5 +# assert 0.3 < loss_d_real < 0.6 +# assert 0.28 < loss_d_fake < 0.58 +# assert 0.3 < loss_d < 0.6 +assert loss_g < 2.7 +assert loss_d_real < 1.0 +assert loss_d_fake < 1.0 +assert loss_d < 1.0 +""" + + rm -rf ${LOGDIR} +fi +################################ pipeline 99 ################################ +rm -rf ./examples/logs diff --git a/bin/tests/check_nlp.sh b/bin/tests/check_dl_nlp.sh similarity index 56% rename from bin/tests/check_nlp.sh rename to bin/tests/check_dl_nlp.sh index 5cbaa221a0..707bb70e50 100755 --- a/bin/tests/check_nlp.sh +++ b/bin/tests/check_dl_nlp.sh @@ -1,16 +1,17 @@ #!/usr/bin/env bash -# @TODO: fix macos fail with sed -set -e +# Cause the script to exit if a single command fails +set -eo pipefail -v + echo "check distilbert_text_classification" -LOGFILE=./examples/logs/_tests_distilbert_text_classification/checkpoints/_metrics.json +LOGFILE=./examples/logs/_tests_nlp_classification/checkpoints/_metrics.json PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ python catalyst/dl/scripts/run.py \ --expdir=./examples/distilbert_text_classification \ - --config=./examples/_tests_distilbert_text_classification/config1_basic.yml \ - --logdir=./examples/logs/_tests_distilbert_text_classification \ + --config=./examples/_tests_nlp_classification/config1_basic.yml \ + --logdir=./examples/logs/_tests_nlp_classification \ --check if [[ ! (-f "$LOGFILE" && -r "$LOGFILE") ]]; then @@ -25,13 +26,13 @@ metrics = Safict.load('$LOGFILE') assert metrics.get('train_val.3', 'loss') < metrics.get('train_val.1', 'loss') assert metrics.get('train_val.3', 'loss') < 2.0 """ -rm -rf ./examples/logs/_tests_distilbert_text_classification +rm -rf ./examples/logs/_tests_nlp_classification echo "train small_max_seq_length" PYTHONPATH=./examples:./catalyst:${PYTHONPATH} \ python catalyst/dl/scripts/run.py \ --expdir=./examples/distilbert_text_classification \ - --config=./examples/_tests_distilbert_text_classification/config2_small_max_seq_length.yml \ - --logdir=./examples/logs/_tests_distilbert_text_classification \ + --config=./examples/_tests_nlp_classification/config2_small_max_seq_length.yml \ + --logdir=./examples/logs/_tests_nlp_classification \ --check -rm -rf ./examples/logs/_tests_distilbert_text_classification \ No newline at end of file +rm -rf ./examples/logs/_tests_nlp_classification \ No newline at end of file diff --git a/bin/tests/check_ddpg.sh b/bin/tests/check_rl_ddpg.sh similarity index 100% rename from bin/tests/check_ddpg.sh rename to bin/tests/check_rl_ddpg.sh diff --git a/bin/tests/check_dqn.sh b/bin/tests/check_rl_dqn.sh similarity index 100% rename from bin/tests/check_dqn.sh rename to bin/tests/check_rl_dqn.sh diff --git a/bin/tests/check_ppo.sh b/bin/tests/check_rl_ppo.sh similarity index 100% rename from bin/tests/check_ppo.sh rename to bin/tests/check_rl_ppo.sh diff --git a/bin/tests/check_reinforce.sh b/bin/tests/check_rl_reinforce.sh similarity index 100% rename from bin/tests/check_reinforce.sh rename to bin/tests/check_rl_reinforce.sh diff --git a/bin/tests/check_sac.sh b/bin/tests/check_rl_sac.sh similarity index 100% rename from bin/tests/check_sac.sh rename to bin/tests/check_rl_sac.sh diff --git a/bin/tests/check_td3.sh b/bin/tests/check_rl_td3.sh similarity index 100% rename from bin/tests/check_td3.sh rename to bin/tests/check_rl_td3.sh diff --git a/bin/yapf.sh b/bin/yapf.sh deleted file mode 100755 index bdea230448..0000000000 --- a/bin/yapf.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env bash - -# Cause the script to exit if a single command fails -set -eo pipefail -v - -# this stops git rev-parse from failing if we run this from the .git directory -builtin cd "$(dirname "${BASH_SOURCE:-$0}")" - -ROOT="$(git rev-parse --show-toplevel)" -builtin cd "$ROOT" || exit 1 - -git remote add 'upstream' 'https://github.com/catalyst-team/catalyst' || true - -# Only fetch master since that's the branch we're diffing against. -git fetch upstream master - -YAPF_FLAGS=( - '--style' "$ROOT/setup.cfg" - '--recursive' - '--parallel' -) - -YAPF_EXCLUDES=( - '--exclude' 'docker/*' -) - -# Format specified files -format() { - yapf --in-place "${YAPF_FLAGS[@]}" -- "$@" -} - -# Format files that differ from main branch. Ignores dirs that are not slated -# for autoformat yet. -format_changed() { - # The `if` guard ensures that the list of filenames is not empty, which - # could cause yapf to receive 0 positional arguments, making it hang - # waiting for STDIN. - # - # `diff-filter=ACM` and $MERGEBASE is to ensure we only format files that - # exist on both branches. - MERGEBASE="$(git merge-base upstream/master HEAD)" - - if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' &>/dev/null; then - git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' | xargs -P 5 \ - yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}" - fi -} - -# Format all files, and print the diff to stdout for travis. -format_all() { - yapf --diff "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" ./**/**/*.py -} - -format_all_in_place() { - yapf --in-place "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" ./**/**/*.py -} - -# This flag formats individual files. --files *must* be the first command line -# arg to use this option. -if [[ "$1" == '--files' ]]; then - format "${@:2}" - # If `--all` is passed, then any further arguments are ignored and the - # entire python directory is formatted. -elif [[ "$1" == '--all' ]]; then - format_all -elif [[ "$1" == '--all-in-place' ]]; then - format_all_in_place -else - # Format only the files that changed in last commit. - format_changed -fi - -if ! git diff --quiet &>/dev/null; then - echo 'Reformatted changed files. Please review and stage the changes.' - echo 'Files updated:' - echo - - git --no-pager diff --name-only - - exit 1 -fi diff --git a/catalyst/core/callbacks/checkpoint.py b/catalyst/core/callbacks/checkpoint.py index a9ad36b9f9..8c3e6851b6 100644 --- a/catalyst/core/callbacks/checkpoint.py +++ b/catalyst/core/callbacks/checkpoint.py @@ -111,9 +111,10 @@ def load_checkpoint(*, filename, state: _State): print(f"=> loading checkpoint {filename}") checkpoint = utils.load_checkpoint(filename) - state.epoch = checkpoint["epoch"] - state.stage_epoch = checkpoint["stage_epoch"] - state.stage = checkpoint["stage"] + if not state.stage.startswith("infer"): + state.epoch = checkpoint["epoch"] + state.stage_epoch = checkpoint["stage_epoch"] + state.stage = checkpoint["stage"] utils.unpack_checkpoint( checkpoint, diff --git a/catalyst/core/runner.py b/catalyst/core/runner.py index 1b3facbc07..c4e000b5f3 100644 --- a/catalyst/core/runner.py +++ b/catalyst/core/runner.py @@ -338,7 +338,9 @@ def _run_epoch(self, stage: str, epoch: int): def _run_stage(self, stage: str): self._prepare_for_stage(stage) + # checkpoint loading self._run_event("stage", moment="start") + while self.state.stage_epoch < self.state.num_epochs: self._run_event("epoch", moment="start") utils.set_global_seed( diff --git a/catalyst/dl/experiment/config.py b/catalyst/dl/experiment/config.py index 0b649ebfe0..85dd35e008 100644 --- a/catalyst/dl/experiment/config.py +++ b/catalyst/dl/experiment/config.py @@ -1,7 +1,6 @@ from typing import Any, Callable, Dict, List, Mapping, Union # isort:skip from collections import OrderedDict from copy import deepcopy -from pathlib import Path import safitty @@ -118,19 +117,21 @@ def stages(self) -> List[str]: """Experiment's stage names""" stages_keys = list(self.stages_config.keys()) - # Change start `stages_keys` if resume data were founded - state_params = self.get_state_params(stages_keys[0]) - resume, resume_dir = [state_params.get(key, None) - for key in ["resume", "resume_dir"]] - - if resume_dir is not None: - resume = resume_dir / str(resume) - - if resume is not None and Path(resume).is_file(): - checkpoint = utils.load_checkpoint(resume) - start_stage = checkpoint["stage"] - start_idx = stages_keys.index(start_stage) - stages_keys = stages_keys[start_idx:] + # @TODO: return the feature + # # Change start `stages_keys` if resume data were founded + # state_params = self.get_state_params(stages_keys[0]) + # resume, resume_dir = [ + # state_params.get(key, None) for key in ["resume", "resume_dir"] + # ] + # + # if resume_dir is not None: + # resume = resume_dir / str(resume) + # + # if resume is not None and Path(resume).is_file(): + # checkpoint = utils.load_checkpoint(resume) + # start_stage = checkpoint["stage"] + # start_idx = stages_keys.index(start_stage) + # stages_keys = stages_keys[start_idx:] return stages_keys diff --git a/catalyst/dl/scripts/run.py b/catalyst/dl/scripts/run.py index 485a7be2e3..7581869ce7 100644 --- a/catalyst/dl/scripts/run.py +++ b/catalyst/dl/scripts/run.py @@ -2,6 +2,7 @@ import argparse from argparse import ArgumentParser +import os from pathlib import Path import safitty @@ -45,28 +46,30 @@ def build_args(parser: ArgumentParser): metavar="PATH", help="path to latest checkpoint" ) - utils.boolean_flag( - parser, - "autoresume", - default=False, + parser.add_argument( + "--autoresume", + type=str, help=( - "try automatically resume from logdir//last_full.pth " + "try automatically resume from logdir//{best,last}_full.pth " "if --resume is empty" - ) + ), + required=False, + choices=["best", "last"], + default=None ) parser.add_argument("--seed", type=int, default=42) utils.boolean_flag( parser, "apex", - default=True, + default=os.getenv("USE_APEX", "1") == "1", help="Enable/disable using of Apex extension" ) utils.boolean_flag( parser, - "data-parallel", - shorthand="dp", - default=False, - help="Force using of DataParallel" + "distributed", + shorthand="ddp", + default=os.getenv("USE_DDP", "0") == "1", + help="Run inn distributed mode" ) utils.boolean_flag(parser, "verbose", default=None) utils.boolean_flag(parser, "check", default=None) @@ -114,7 +117,7 @@ def main_worker(args, unknown_args): def main(args, unknown_args): """Run the ``catalyst-dl run`` script""" - distributed_run(args.data_parallel, main_worker, args, unknown_args) + distributed_run(args.distributed, main_worker, args, unknown_args) if __name__ == "__main__": diff --git a/catalyst/utils/config.py b/catalyst/utils/config.py index 21412df433..a45cd91546 100644 --- a/catalyst/utils/config.py +++ b/catalyst/utils/config.py @@ -254,11 +254,12 @@ def parse_config_args(*, config, args, unknown_args): continue config["args"][key] = value - if safitty.get(config, "args", "autoresume", default=False) and \ + autoresume = safitty.get(config, "args", "autoresume") + if autoresume is not None and \ safitty.get(config, "args", "logdir") is not None and \ safitty.get(config, "args", "resume") is None: logdir = Path(safitty.get(config, "args", "logdir")) - checkpoint_filename = logdir / "checkpoints" / "last_full.pth" + checkpoint_filename = logdir / "checkpoints" / f"{autoresume}_full.pth" if checkpoint_filename.is_file(): config["args"]["resume"] = str(checkpoint_filename) return config, args diff --git a/catalyst/utils/distributed.py b/catalyst/utils/distributed.py index 6d36b2db67..58bc5509a1 100644 --- a/catalyst/utils/distributed.py +++ b/catalyst/utils/distributed.py @@ -32,12 +32,13 @@ def get_rank() -> int: def is_apex_available() -> bool: + env_apex = os.getenv("USE_APEX", "1") == "1" try: import apex # noqa: F401 from apex import amp # noqa: F401 - return True + return True and env_apex except ImportError: - return False + return False and env_apex def assert_fp16_available() -> None: @@ -96,7 +97,7 @@ def get_distributed_params(): local_rank, rank = [v and int(v) for v in [local_rank, rank]] world_size = int(os.getenv("WORLD_SIZE", world_size)) - return OrderedDict( + output = OrderedDict( local_rank=local_rank, start_rank=start_rank, rank=rank, @@ -105,6 +106,8 @@ def get_distributed_params(): master_port=os.environ["MASTER_PORT"], ) + return output + def get_distributed_env( local_rank, rank, world_size, use_cuda_visible_devices=True @@ -113,19 +116,19 @@ def get_distributed_env( env["RANK"] = str(rank) env["WORLD_SIZE"] = str(world_size) env["LOCAL_RANK"] = str(local_rank) - available_gpus = utils.get_available_gpus() if use_cuda_visible_devices: + available_gpus = utils.get_available_gpus() env["LOCAL_RANK"] = "0" env["CUDA_VISIBLE_DEVICES"] = str(available_gpus[local_rank]) return env -def distributed_run(data_parallel, worker_fn, *args, **kwargs): +def distributed_run(distributed, worker_fn, *args, **kwargs): distributed_params = get_distributed_params() local_rank = distributed_params["local_rank"] world_size = distributed_params["world_size"] - if data_parallel or world_size <= 1: + if not distributed or world_size <= 1: worker_fn(*args, **kwargs) elif local_rank is not None: torch.cuda.set_device(int(local_rank)) @@ -199,8 +202,11 @@ def process_components( if utils.is_wrapped_with_ddp(model): pass + # distributed data parallel run (ddp) (with apex support) elif get_rank() >= 0: - assert isinstance(model, nn.Module) + assert isinstance(model, nn.Module), \ + "No support for dixtributed KV model yet" + local_rank = distributed_params.pop("local_rank", 0) device = f"cuda:{local_rank}" model = utils.maybe_recursive_call(model, "to", device=device) @@ -217,18 +223,32 @@ def process_components( if syncbn: model = apex.parallel.convert_syncbn_model(model) else: - model = torch.nn.parallel.DistributedDataParallel( + model = nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank ) - elif torch.cuda.device_count() > 1: - if isinstance(model, nn.Module): - model = torch.nn.DataParallel(model) - elif isinstance(model, dict): - model = {k: torch.nn.DataParallel(v) for k, v in model.items()} - elif use_apex: - model, optimizer = initialize_apex( - model, optimizer, **distributed_params - ) + # data parallel run (dp) (with apex support) + else: + # apex issue https://github.com/deepset-ai/FARM/issues/210 + can_use_apex = \ + (use_apex and torch.cuda.device_count() == 1) \ + or ( + torch.cuda.device_count() > 1 + and distributed_params.get("opt_level", "O0") == "O1" + ) + + if can_use_apex: + assert isinstance(model, nn.Module), \ + "No support for apex KV model yet" + + model, optimizer = initialize_apex( + model, optimizer, **distributed_params + ) + + if torch.cuda.device_count() > 1: + if isinstance(model, nn.Module): + model = nn.DataParallel(model) + elif isinstance(model, dict): + model = {k: nn.DataParallel(v) for k, v in model.items()} model: Model = utils.maybe_recursive_call(model, "to", device=device) diff --git a/catalyst/utils/torch.py b/catalyst/utils/torch.py index ce06c39c9f..089d7dae9a 100644 --- a/catalyst/utils/torch.py +++ b/catalyst/utils/torch.py @@ -135,7 +135,7 @@ def get_available_gpus(): """ if "CUDA_VISIBLE_DEVICES" in os.environ: result = os.environ["CUDA_VISIBLE_DEVICES"].split(",") - result = [int(id_) for id_ in result if id_ != ""] + result = [id_ for id_ in result if id_ != ""] # invisible GPUs # https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars if -1 in result: diff --git a/docs/conf.py b/docs/conf.py index 2f7f7072d8..80de5cbb98 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -114,13 +114,9 @@ def get_version(mode: str = "full") -> str: # Ignoring Third-party packages -autodoc_mock_imports = ["alchemy", - "neptune", - "wandb", - "gym", - "gridfs", - "pymongo", - "redis"] +autodoc_mock_imports = [ + "alchemy", "neptune", "wandb", "gym", "gridfs", "pymongo", "redis" +] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None diff --git a/examples/_tests_mnist_stages/README.md b/examples/_tests_cv_classification/README.md similarity index 100% rename from examples/_tests_mnist_stages/README.md rename to examples/_tests_cv_classification/README.md diff --git a/examples/_tests_mnist_stages2/__init__.py b/examples/_tests_cv_classification/__init__.py similarity index 100% rename from examples/_tests_mnist_stages2/__init__.py rename to examples/_tests_cv_classification/__init__.py diff --git a/examples/_tests_mnist_stages/config1.yml b/examples/_tests_cv_classification/config1.yml similarity index 93% rename from examples/_tests_mnist_stages/config1.yml rename to examples/_tests_cv_classification/config1.yml index 45c83d163f..d84c5717d6 100644 --- a/examples/_tests_mnist_stages/config1.yml +++ b/examples/_tests_cv_classification/config1.yml @@ -2,8 +2,8 @@ model_params: model: SimpleNet args: - expdir: "_tests_mnist_stages" - logdir: "./logs/mnist_simple" + expdir: "_tests_cv_classification" + logdir: "./logs/cv_classification" stages: diff --git a/examples/_tests_mnist_stages/config2.yml b/examples/_tests_cv_classification/config2.yml similarity index 91% rename from examples/_tests_mnist_stages/config2.yml rename to examples/_tests_cv_classification/config2.yml index dcedb6d4ba..964d225c2d 100644 --- a/examples/_tests_mnist_stages/config2.yml +++ b/examples/_tests_cv_classification/config2.yml @@ -2,8 +2,8 @@ model_params: model: SimpleNet args: - expdir: "_tests_mnist_stages" - logdir: "./logs/mnist_simple" + expdir: "_tests_cv_classification" + logdir: "./logs/cv_classification" stages: diff --git a/examples/_tests_mnist_stages/config3.yml b/examples/_tests_cv_classification/config2_infer.yml similarity index 86% rename from examples/_tests_mnist_stages/config3.yml rename to examples/_tests_cv_classification/config2_infer.yml index 3b51a12aeb..7cbb4067e2 100644 --- a/examples/_tests_mnist_stages/config3.yml +++ b/examples/_tests_cv_classification/config2_infer.yml @@ -2,7 +2,7 @@ model_params: model: SimpleNet args: - expdir: "_tests_mnist_stages" + expdir: "_tests_cv_classification" stages: diff --git a/examples/_tests_mnist_stages/config4.yml b/examples/_tests_cv_classification/config3.yml similarity index 92% rename from examples/_tests_mnist_stages/config4.yml rename to examples/_tests_cv_classification/config3.yml index 6f16e15741..909b91132c 100644 --- a/examples/_tests_mnist_stages/config4.yml +++ b/examples/_tests_cv_classification/config3.yml @@ -2,8 +2,8 @@ model_params: model: SimpleNet args: - expdir: "_tests_mnist_stages" - logdir: "./logs/mnist_simple" + expdir: "_tests_cv_classification" + logdir: "./logs/cv_classification" stages: diff --git a/examples/_tests_mnist_stages/config6.yml b/examples/_tests_cv_classification/config4.yml similarity index 91% rename from examples/_tests_mnist_stages/config6.yml rename to examples/_tests_cv_classification/config4.yml index 5f2589d06b..d03a099016 100644 --- a/examples/_tests_mnist_stages/config6.yml +++ b/examples/_tests_cv_classification/config4.yml @@ -2,8 +2,8 @@ model_params: model: SimpleNet args: - expdir: "_tests_mnist_stages" - logdir: "./logs/mnist_simple" + expdir: "_tests_cv_classification" + logdir: "./logs/cv_classification" stages: @@ -16,7 +16,7 @@ stages: minimize_metric: False criterion_params: - criterion: SymmetricCrossEntropyLoss + criterion: CrossEntropyLoss scheduler_params: scheduler: MultiStepLR diff --git a/examples/_tests_mnist_stages/config5.yml b/examples/_tests_cv_classification/config5.yml similarity index 90% rename from examples/_tests_mnist_stages/config5.yml rename to examples/_tests_cv_classification/config5.yml index 355120683f..e76a6fb198 100644 --- a/examples/_tests_mnist_stages/config5.yml +++ b/examples/_tests_cv_classification/config5.yml @@ -2,8 +2,8 @@ model_params: model: SimpleNet args: - expdir: "_tests_mnist_stages" - logdir: "./logs/mnist_simple" + expdir: "_tests_cv_classification" + logdir: "./logs/cv_classification" stages: @@ -16,7 +16,7 @@ stages: minimize_metric: False criterion_params: - criterion: CrossEntropyLoss + criterion: SymmetricCrossEntropyLoss scheduler_params: scheduler: MultiStepLR diff --git a/examples/_tests_mnist_stages/config_finder.yml b/examples/_tests_cv_classification/config6_finder.yml similarity index 88% rename from examples/_tests_mnist_stages/config_finder.yml rename to examples/_tests_cv_classification/config6_finder.yml index b95b205a4d..0b622402ff 100644 --- a/examples/_tests_mnist_stages/config_finder.yml +++ b/examples/_tests_cv_classification/config6_finder.yml @@ -2,8 +2,8 @@ model_params: model: SimpleNet args: - expdir: "mnist_simple" - logdir: "./logs/mnist_simple" + expdir: "cv_classification" + logdir: "./logs/cv_classification" stages: diff --git a/examples/_tests_mnist_stages/experiment.py b/examples/_tests_cv_classification/experiment.py similarity index 100% rename from examples/_tests_mnist_stages/experiment.py rename to examples/_tests_cv_classification/experiment.py diff --git a/examples/_tests_mnist_stages2/model.py b/examples/_tests_cv_classification/model.py similarity index 100% rename from examples/_tests_mnist_stages2/model.py rename to examples/_tests_cv_classification/model.py diff --git a/examples/_tests_mnist_stages2/README.md b/examples/_tests_cv_classification_transforms/README.md similarity index 100% rename from examples/_tests_mnist_stages2/README.md rename to examples/_tests_cv_classification_transforms/README.md diff --git a/examples/_tests_cv_classification_transforms/__init__.py b/examples/_tests_cv_classification_transforms/__init__.py new file mode 100644 index 0000000000..3797f3c89b --- /dev/null +++ b/examples/_tests_cv_classification_transforms/__init__.py @@ -0,0 +1,6 @@ +# flake8: noqa +from catalyst.dl import registry, SupervisedRunner as Runner +from .experiment import Experiment +from .model import SimpleNet + +registry.Model(SimpleNet) diff --git a/examples/_tests_mnist_stages2/config1.yml b/examples/_tests_cv_classification_transforms/config1.yml similarity index 94% rename from examples/_tests_mnist_stages2/config1.yml rename to examples/_tests_cv_classification_transforms/config1.yml index 05a62711d5..630cdb6700 100644 --- a/examples/_tests_mnist_stages2/config1.yml +++ b/examples/_tests_cv_classification_transforms/config1.yml @@ -5,8 +5,8 @@ runner_params: input_key: image args: - expdir: "mnist_simple" - logdir: "./logs/mnist_simple" + expdir: "cv_classification" + logdir: "./logs/cv_classification" stages: diff --git a/examples/_tests_mnist_stages2/config2.yml b/examples/_tests_cv_classification_transforms/config2.yml similarity index 95% rename from examples/_tests_mnist_stages2/config2.yml rename to examples/_tests_cv_classification_transforms/config2.yml index 61a05f2e67..339f53cdee 100644 --- a/examples/_tests_mnist_stages2/config2.yml +++ b/examples/_tests_cv_classification_transforms/config2.yml @@ -5,8 +5,8 @@ runner_params: input_key: image args: - expdir: "mnist_simple" - logdir: "./logs/mnist_simple" + expdir: "cv_classification" + logdir: "./logs/cv_classification" stages: diff --git a/examples/_tests_mnist_stages2/config3.yml b/examples/_tests_cv_classification_transforms/config3.yml similarity index 95% rename from examples/_tests_mnist_stages2/config3.yml rename to examples/_tests_cv_classification_transforms/config3.yml index a5bf679682..7c6541fa94 100644 --- a/examples/_tests_mnist_stages2/config3.yml +++ b/examples/_tests_cv_classification_transforms/config3.yml @@ -5,8 +5,8 @@ runner_params: input_key: &input_key image args: - expdir: "mnist_simple" - logdir: "./logs/mnist_simple" + expdir: "cv_classification" + logdir: "./logs/cv_classification" stages: diff --git a/examples/_tests_mnist_stages2/config_finder.yml b/examples/_tests_cv_classification_transforms/config4_finder.yml similarity index 91% rename from examples/_tests_mnist_stages2/config_finder.yml rename to examples/_tests_cv_classification_transforms/config4_finder.yml index 6d565cfb68..351851d014 100644 --- a/examples/_tests_mnist_stages2/config_finder.yml +++ b/examples/_tests_cv_classification_transforms/config4_finder.yml @@ -5,8 +5,8 @@ runner_params: input_key: image args: - expdir: "mnist_simple" - logdir: "./logs/mnist_simple" + expdir: "cv_classification" + logdir: "./logs/cv_classification" stages: diff --git a/examples/_tests_mnist_stages2/config_fp16_O1.yml b/examples/_tests_cv_classification_transforms/config5_fp16.yml similarity index 95% rename from examples/_tests_mnist_stages2/config_fp16_O1.yml rename to examples/_tests_cv_classification_transforms/config5_fp16.yml index 209808a0f3..8868d3914e 100644 --- a/examples/_tests_mnist_stages2/config_fp16_O1.yml +++ b/examples/_tests_cv_classification_transforms/config5_fp16.yml @@ -5,8 +5,8 @@ runner_params: input_key: image args: - expdir: "mnist_simple" - logdir: "./logs/mnist_simple" + expdir: "cv_classification" + logdir: "./logs/cv_classification" distributed_params: opt_level: O1 diff --git a/examples/_tests_mnist_stages2/config_fp16_O2.yml b/examples/_tests_cv_classification_transforms/config6_fp16.yml similarity index 95% rename from examples/_tests_mnist_stages2/config_fp16_O2.yml rename to examples/_tests_cv_classification_transforms/config6_fp16.yml index 292ae9859f..12de52819b 100644 --- a/examples/_tests_mnist_stages2/config_fp16_O2.yml +++ b/examples/_tests_cv_classification_transforms/config6_fp16.yml @@ -5,8 +5,8 @@ runner_params: input_key: image args: - expdir: "mnist_simple" - logdir: "./logs/mnist_simple" + expdir: "cv_classification" + logdir: "./logs/cv_classification" distributed_params: opt_level: O2 diff --git a/examples/_tests_mnist_stages2/config_fp16_O3.yml b/examples/_tests_cv_classification_transforms/config7_fp16.yml similarity index 94% rename from examples/_tests_mnist_stages2/config_fp16_O3.yml rename to examples/_tests_cv_classification_transforms/config7_fp16.yml index 2865c6e2e6..547943451b 100644 --- a/examples/_tests_mnist_stages2/config_fp16_O3.yml +++ b/examples/_tests_cv_classification_transforms/config7_fp16.yml @@ -5,8 +5,8 @@ runner_params: input_key: image args: - expdir: "mnist_simple" - logdir: "./logs/mnist_simple" + expdir: "cv_classification" + logdir: "./logs/cv_classification" distributed_params: opt_level: O3 diff --git a/examples/_tests_mnist_stages2/experiment.py b/examples/_tests_cv_classification_transforms/experiment.py similarity index 100% rename from examples/_tests_mnist_stages2/experiment.py rename to examples/_tests_cv_classification_transforms/experiment.py diff --git a/examples/_tests_mnist_stages/model.py b/examples/_tests_cv_classification_transforms/model.py similarity index 92% rename from examples/_tests_mnist_stages/model.py rename to examples/_tests_cv_classification_transforms/model.py index d483f7e386..3ed11782e8 100644 --- a/examples/_tests_mnist_stages/model.py +++ b/examples/_tests_cv_classification_transforms/model.py @@ -1,10 +1,7 @@ import torch.nn as nn import torch.nn.functional as F -from catalyst.dl import registry - -@registry.Model class SimpleNet(nn.Module): def __init__(self): super().__init__() diff --git a/examples/_test_segmentation/README.md b/examples/_tests_cv_segmentation/README.md similarity index 74% rename from examples/_test_segmentation/README.md rename to examples/_tests_cv_segmentation/README.md index 0e6942814d..3bd005234c 100644 --- a/examples/_test_segmentation/README.md +++ b/examples/_tests_cv_segmentation/README.md @@ -16,7 +16,7 @@ pip install -U "catalyst[cv]" # for zsh ```bash mkdir -p data -cd ./_test_segmentation/data/ +cd ./_tests_cv_segmentation/data/ download-gdrive 1iYaNijLmzsrMlAdMoUEhhJuo-5bkeAuj segmentation_data.zip extract-archive segmentation_data.zip cd ../.. @@ -26,8 +26,8 @@ cd ../.. ```bash catalyst-dl run --configs \ - ./_test_segmentation/config.yml \ - ./_test_segmentation/transforms.yml \ + ./_tests_cv_segmentation/config.yml \ + ./_tests_cv_segmentation/transforms.yml \ --verbose ``` @@ -36,13 +36,13 @@ catalyst-dl run --configs \ For more information about docker image goto `catalyst/docker`. ```bash -export LOGDIR=$(pwd)/logs/_test_segmentation +export LOGDIR=$(pwd)/logs/_tests_cv_segmentation docker run -it --rm \ -v $(pwd):/workspace \ -v $LOGDIR:/logdir/ \ catalyst-base \ catalyst-dl run --configs \ - ./_test_segmentation/config.yml \ - ./_test_segmentation/transforms.yml \ + ./_tests_cv_segmentation/config.yml \ + ./_tests_cv_segmentation/transforms.yml \ --verbose --logdir=/logdir ``` diff --git a/examples/_test_segmentation/__init__.py b/examples/_tests_cv_segmentation/__init__.py similarity index 100% rename from examples/_test_segmentation/__init__.py rename to examples/_tests_cv_segmentation/__init__.py diff --git a/examples/_test_segmentation/config.yml b/examples/_tests_cv_segmentation/config.yml similarity index 87% rename from examples/_test_segmentation/config.yml rename to examples/_tests_cv_segmentation/config.yml index a458e1f4fd..cb623271c8 100644 --- a/examples/_test_segmentation/config.yml +++ b/examples/_tests_cv_segmentation/config.yml @@ -8,8 +8,8 @@ runner_params: input_target_key: mask args: - expdir: "_test_segmentation" - logdir: "./logs/_test_segmentation" + expdir: "_tests_cv_segmentation" + logdir: "./logs/_tests_cv_segmentation" seed: &seed 42 stages: @@ -17,8 +17,8 @@ stages: data_params: batch_size: 8 num_workers: 0 - image_path: ./_test_segmentation/data/segmentation_data/train - mask_path: ./_test_segmentation/data/segmentation_data/train_masks + image_path: ./_tests_cv_segmentation/data/segmentation_data/train + mask_path: ./_tests_cv_segmentation/data/segmentation_data/train_masks valid_size: 0.2 state_params: diff --git a/examples/_test_segmentation/dataset.py b/examples/_tests_cv_segmentation/dataset.py similarity index 100% rename from examples/_test_segmentation/dataset.py rename to examples/_tests_cv_segmentation/dataset.py diff --git a/examples/_test_segmentation/experiment.py b/examples/_tests_cv_segmentation/experiment.py similarity index 100% rename from examples/_test_segmentation/experiment.py rename to examples/_tests_cv_segmentation/experiment.py diff --git a/examples/_test_segmentation/transforms.yml b/examples/_tests_cv_segmentation/transforms.yml similarity index 100% rename from examples/_test_segmentation/transforms.yml rename to examples/_tests_cv_segmentation/transforms.yml diff --git a/examples/_tests_mnist_stages/__init__.py b/examples/_tests_mnist_stages/__init__.py deleted file mode 100644 index 85bd25de1b..0000000000 --- a/examples/_tests_mnist_stages/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# flake8: noqa -from catalyst.dl import SupervisedRunner as Runner -from .experiment import Experiment -from .model import SimpleNet diff --git a/examples/_tests_distilbert_text_classification/README.md b/examples/_tests_nlp_classification/README.md similarity index 100% rename from examples/_tests_distilbert_text_classification/README.md rename to examples/_tests_nlp_classification/README.md diff --git a/examples/_tests_distilbert_text_classification/config1_basic.yml b/examples/_tests_nlp_classification/config1_basic.yml similarity index 90% rename from examples/_tests_distilbert_text_classification/config1_basic.yml rename to examples/_tests_nlp_classification/config1_basic.yml index 9b8b81cf37..b9c1933679 100644 --- a/examples/_tests_distilbert_text_classification/config1_basic.yml +++ b/examples/_tests_nlp_classification/config1_basic.yml @@ -11,9 +11,9 @@ model_params: args: # where to look for __init__.py file - expdir: '_tests_distilbert_text_classification' + expdir: '_tests_nlp_classification' # store logs in this subfolder - baselogdir: './logs/_tests_distilbert_text_classification' + baselogdir: './logs/_tests_nlp_classification' # common settings for all stages stages: @@ -21,7 +21,7 @@ stages: data_params: batch_size: 24 num_workers: 1 - path_to_data: './examples/_tests_distilbert_text_classification/input' + path_to_data: './examples/_tests_nlp_classification/input' train_filename: "train.csv" valid_filename: "valid.csv" text_field: "text" diff --git a/examples/_tests_distilbert_text_classification/config2_small_max_seq_length.yml b/examples/_tests_nlp_classification/config2_small_max_seq_length.yml similarity index 90% rename from examples/_tests_distilbert_text_classification/config2_small_max_seq_length.yml rename to examples/_tests_nlp_classification/config2_small_max_seq_length.yml index 64c419f77e..47e886d262 100644 --- a/examples/_tests_distilbert_text_classification/config2_small_max_seq_length.yml +++ b/examples/_tests_nlp_classification/config2_small_max_seq_length.yml @@ -11,9 +11,9 @@ model_params: args: # where to look for __init__.py file - expdir: '_tests_distilbert_text_classification' + expdir: '_tests_nlp_classification' # store logs in this subfolder - baselogdir: './logs/_tests_distilbert_text_classification' + baselogdir: './logs/_tests_nlp_classification' # common settings for all stages stages: @@ -21,7 +21,7 @@ stages: data_params: batch_size: 24 num_workers: 1 - path_to_data: './examples/_tests_distilbert_text_classification/input' + path_to_data: './examples/_tests_nlp_classification/input' train_filename: "train.csv" valid_filename: "valid.csv" text_field: "text" diff --git a/examples/_tests_distilbert_text_classification/input/train.csv b/examples/_tests_nlp_classification/input/train.csv similarity index 100% rename from examples/_tests_distilbert_text_classification/input/train.csv rename to examples/_tests_nlp_classification/input/train.csv diff --git a/examples/_tests_distilbert_text_classification/input/valid.csv b/examples/_tests_nlp_classification/input/valid.csv similarity index 100% rename from examples/_tests_distilbert_text_classification/input/valid.csv rename to examples/_tests_nlp_classification/input/valid.csv diff --git a/examples/_tests_scripts/z_segmentation.py b/examples/_tests_scripts/z_segmentation.py index 905933fcc2..12cd7bae6f 100644 --- a/examples/_tests_scripts/z_segmentation.py +++ b/examples/_tests_scripts/z_segmentation.py @@ -172,7 +172,9 @@ def get_loaders(transform): # lovasz LovaszLossBinary criterion +model = Unet(num_classes=1, in_channels=1, num_channels=32, num_blocks=2) criterion = LovaszLossBinary() +optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) runner.train( model=model, @@ -185,8 +187,6 @@ def get_loaders(transform): ) # Multiclasses checks -model = Unet(num_classes=2, in_channels=1, num_channels=32, num_blocks=2) - # lovasz LovaszLossMultiClass criterion data_transform = transforms.Compose([ @@ -210,7 +210,9 @@ def get_loaders(transform): loaders = get_loaders(data_transform) +model = Unet(num_classes=2, in_channels=1, num_channels=32, num_blocks=2) criterion = LovaszLossMultiClass() +optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) runner.train( model=model, @@ -251,7 +253,9 @@ def transform_targets(x): loaders = get_loaders(data_transform) +model = Unet(num_classes=2, in_channels=1, num_channels=32, num_blocks=2) criterion = LovaszLossMultiLabel() +optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) runner.train( model=model, diff --git a/examples/mnist_gans/configs/conditional_gan.yml b/examples/mnist_gans/configs/conditional_gan.yml index 446719269e..53039600f4 100644 --- a/examples/mnist_gans/configs/conditional_gan.yml +++ b/examples/mnist_gans/configs/conditional_gan.yml @@ -26,10 +26,10 @@ model_params: generator: model: SimpleCGenerator noise_dim: &noise_dim 16 - num_classes: &n_classes 10 + num_classes: &num_classes 10 discriminator: model: SimpleCDiscriminator - num_classes: *n_classes + num_classes: *num_classes args: @@ -57,7 +57,7 @@ stages: value: 0. output_key: &fake_targets "fake_targets" - transform: OneHotTargetTransform - num_classes: *n_classes + num_classes: *num_classes input_key: *class_targets output_key: *class_targets_one_hot diff --git a/examples/mnist_gans/configs/conditional_wasserstein_gan_gp.yml b/examples/mnist_gans/configs/conditional_wasserstein_gan_gp.yml index 99dca3c284..dc5ae7ceff 100644 --- a/examples/mnist_gans/configs/conditional_wasserstein_gan_gp.yml +++ b/examples/mnist_gans/configs/conditional_wasserstein_gan_gp.yml @@ -26,10 +26,10 @@ model_params: generator: model: SimpleCGenerator noise_dim: &noise_dim 16 - num_classes: &n_classes 10 + num_classes: &num_classes 10 critic: model: SimpleCDiscriminator - num_classes: *n_classes + num_classes: *num_classes args: @@ -51,7 +51,7 @@ stages: tensor_size: [*noise_dim] output_key: *noise_input - transform: OneHotTargetTransform - num_classes: *n_classes + num_classes: *num_classes input_key: *class_targets output_key: *class_targets_one_hot diff --git a/teamcity/ddpg.sh b/teamcity/ddpg.sh index ccf5a2c566..331fa4d1d6 100644 --- a/teamcity/ddpg.sh +++ b/teamcity/ddpg.sh @@ -7,5 +7,5 @@ pip install -r requirements/requirements.txt echo "pip install -r requirements/requirements-rl.txt" pip install -r requirements/requirements-rl.txt -echo "./bin/tests/check_ddpg.sh" -./bin/tests/check_ddpg.sh \ No newline at end of file +echo "./bin/tests/check_rl_ddpg.sh" +OMP_NUM_THREADS="1" MKL_NUM_THREADS="1" bash ./bin/tests/check_rl_ddpg.sh \ No newline at end of file diff --git a/teamcity/dqn.sh b/teamcity/dqn.sh index 98457596f3..34c0568e9f 100644 --- a/teamcity/dqn.sh +++ b/teamcity/dqn.sh @@ -7,5 +7,5 @@ pip install -r requirements/requirements.txt echo "pip install -r requirements/requirements-rl.txt" pip install -r requirements/requirements-rl.txt -echo "./bin/tests/check_dqn.sh" -./bin/tests/check_dqn.sh \ No newline at end of file +echo "./bin/tests/check_rl_dqn.sh" +OMP_NUM_THREADS="1" MKL_NUM_THREADS="1" bash ./bin/tests/check_rl_dqn.sh \ No newline at end of file diff --git a/teamcity/examples.sh b/teamcity/examples.sh index 99b85c8b24..d33a6f31d2 100644 --- a/teamcity/examples.sh +++ b/teamcity/examples.sh @@ -1,6 +1,17 @@ +#!/usr/bin/env bash + +# Cause the script to exit if a single command fails +set -eo pipefail -v + + +################################### LINUX ################################### + echo 'apt-get update && apt-get install wget' apt-get update && apt-get install wget + +################################### PYTHON ################################## + echo 'pip install -r requirements/requirements.txt' pip install -r requirements/requirements.txt @@ -16,8 +27,54 @@ pip install -r requirements/requirements-rl.txt echo 'pip install alchemy-catalyst' pip install alchemy-catalyst -echo './bin/tests/check_dl.sh' -./bin/tests/check_dl.sh -echo './bin/tests/check_nlp.sh' -./bin/tests/check_nlp.sh +################################### ENV ##################################### +OMP_NUM_THREADS="1" +MKL_NUM_THREADS="1" + + +################################### APEX #################################### +#git clone https://github.com/NVIDIA/apex apex_last +#pip install -v --no-cache-dir \ +# --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex_last +pip install -v --no-cache-dir \ + --global-option="--cpp_ext" --global-option="--cuda_ext" \ + git+https://github.com/NVIDIA/apex + +################################### DATA #################################### +rm -rf ./data + +# load the data +mkdir -p data +bash bin/scripts/download-gdrive 1N82zh0kzmnzqRvUyMgVOGsCoS1kHf3RP ./data/isbi.tar.gz +tar -xf ./data/isbi.tar.gz -C ./data/ + +# mkdir -p data +# bash bin/scripts/download-gdrive 1iYaNijLmzsrMlAdMoUEhhJuo-5bkeAuj ./data/segmentation_data.zip +# unzip -qqo ./data/segmentation_data.zip -d ./data 2> /dev/null || true + + +################################### CPU ###################################### +echo './bin/tests/check_dl_all.sh CPU' +USE_APEX="0" CUDA_VISIBLE_DEVICES="" bash ./bin/tests/check_dl_all.sh + + +################################### GPU ###################################### +echo './bin/tests/check_dl_all.sh GPU' +USE_APEX="0" CUDA_VISIBLE_DEVICES="0" bash ./bin/tests/check_dl_all.sh +USE_APEX="1" CUDA_VISIBLE_DEVICES="0" bash ./bin/tests/check_dl_all.sh + + +################################### GPU2 #################################### +echo './bin/tests/check_dl_all.sh GPU2' +USE_APEX="0" USE_DDP="0" CUDA_VISIBLE_DEVICES="0,1" \ + bash ./bin/tests/check_dl_all.sh +USE_APEX="0" USE_DDP="1" CUDA_VISIBLE_DEVICES="0,1" \ + bash ./bin/tests/check_dl_all.sh +USE_APEX="1" USE_DDP="0" CUDA_VISIBLE_DEVICES="0,1" \ + bash ./bin/tests/check_dl_all.sh +USE_APEX="1" USE_DDP="1" CUDA_VISIBLE_DEVICES="0,1" \ + bash ./bin/tests/check_dl_all.sh + +################################### DATA #################################### +rm -rf ./data diff --git a/teamcity/ppo.sh b/teamcity/ppo.sh index 863019b2a4..655540204b 100644 --- a/teamcity/ppo.sh +++ b/teamcity/ppo.sh @@ -7,5 +7,5 @@ pip install -r requirements/requirements.txt echo "pip install -r requirements/requirements-rl.txt" pip install -r requirements/requirements-rl.txt -echo "./bin/tests/check_ppo.sh" -./bin/tests/check_ppo.sh \ No newline at end of file +echo "./bin/tests/check_rl_ppo.sh" +OMP_NUM_THREADS="1" MKL_NUM_THREADS="1" bash ./bin/tests/check_rl_ppo.sh \ No newline at end of file diff --git a/teamcity/reinforce.sh b/teamcity/reinforce.sh index c56a90dfdd..49d37a0631 100644 --- a/teamcity/reinforce.sh +++ b/teamcity/reinforce.sh @@ -7,5 +7,5 @@ pip install -r requirements/requirements.txt echo "pip install -r requirements/requirements-rl.txt" pip install -r requirements/requirements-rl.txt -echo "./bin/tests/check_reinforce.sh" -./bin/tests/check_reinforce.sh \ No newline at end of file +echo "./bin/tests/check_rl_reinforce.sh" +OMP_NUM_THREADS="1" MKL_NUM_THREADS="1" bash ./bin/tests/check_rl_reinforce.sh \ No newline at end of file diff --git a/teamcity/sac.sh b/teamcity/sac.sh index 0e9f1e4004..89400a7147 100644 --- a/teamcity/sac.sh +++ b/teamcity/sac.sh @@ -7,5 +7,5 @@ pip install -r requirements/requirements.txt echo "pip install -r requirements/requirements-rl.txt" pip install -r requirements/requirements-rl.txt -echo "./bin/tests/check_sac.sh" -./bin/tests/check_sac.sh \ No newline at end of file +echo "./bin/tests/check_rl_sac.sh" +OMP_NUM_THREADS="1" MKL_NUM_THREADS="1" bash ./bin/tests/check_rl_sac.sh \ No newline at end of file diff --git a/teamcity/td3.sh b/teamcity/td3.sh index 56613c12d1..5b6af1d451 100644 --- a/teamcity/td3.sh +++ b/teamcity/td3.sh @@ -7,5 +7,5 @@ pip install -r requirements/requirements.txt echo "pip install -r requirements/requirements-rl.txt" pip install -r requirements/requirements-rl.txt -echo "./bin/tests/check_td3.sh" -./bin/tests/check_td3.sh \ No newline at end of file +echo "./bin/tests/check_rl_td3.sh" +OMP_NUM_THREADS="1" MKL_NUM_THREADS="1" bash ./bin/tests/check_rl_td3.sh \ No newline at end of file