From 47c1820795f413696a81791a5097069137ac88d4 Mon Sep 17 00:00:00 2001 From: Nicole Deflaux Date: Fri, 5 Mar 2021 06:37:08 -0800 Subject: [PATCH] Remove Python package version pins. Also: * added package `plotnine` per https://github.com/DataBiosphere/terra-docker/issues/126 * replaced package `tensorflow` with `tensorflow_cpu` to get rid of the warnings about GPUs being unavailable for Terra Cloud Runtimes * added package `google-resumable-media` as an explicit dependency to ensure a more recent version of it is used, pandas-gbq depends on it for table uploads * `--use_rest_api` flag is now needed for `%%bigquery magic` * As of release [google-cloud-bigquery 1.26.0 (2020-07-20)](https://github.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#1260-2020-07-20) the BigQuery Python client uses the BigQuery Storage client by default. * This currently causes an error on Terra Cloud Runtimes `the user does not have 'bigquery.readsessions.create' permission for ''`. * To work-around this must uninstall the dependency `google-cloud-bigquery-storage` so that flag `--use_rest_api` can be used with `%%bigquery` to use the older, slower mechanism for data transfer. --- .github/workflows/test-terra-jupyter-aou.yml | 14 +- .github/workflows/test-terra-jupyter-gatk.yml | 120 ++++++++++++++++ .github/workflows/test-terra-jupyter-hail.yml | 120 ++++++++++++++++ .../workflows/test-terra-jupyter-python.yml | 14 +- terra-jupyter-aou/Dockerfile | 7 +- .../tests/gatk_smoke_test.ipynb | 95 +++++++++++++ .../tests/hail_smoke_test.ipynb | 95 +++++++++++++ terra-jupyter-python/Dockerfile | 129 ++++++++++-------- terra-jupyter-python/tests/smoke_test.ipynb | 24 ++-- terra-jupyter-python/tests/smoke_test.py | 5 +- 10 files changed, 541 insertions(+), 82 deletions(-) create mode 100644 .github/workflows/test-terra-jupyter-gatk.yml create mode 100644 .github/workflows/test-terra-jupyter-hail.yml create mode 100644 terra-jupyter-gatk/tests/gatk_smoke_test.ipynb create mode 100644 terra-jupyter-hail/tests/hail_smoke_test.ipynb diff --git a/.github/workflows/test-terra-jupyter-aou.yml b/.github/workflows/test-terra-jupyter-aou.yml index 7a88ad38..4edd66a0 100644 --- a/.github/workflows/test-terra-jupyter-aou.yml +++ b/.github/workflows/test-terra-jupyter-aou.yml @@ -15,8 +15,18 @@ on: paths: - 'terra-jupyter-aou/**' - '.github/workflows/test-terra-jupyter-aou.yml' - # Note: secrets are not passed to pull requests from forks, so the dev team will need to use the manual workflow - # dispatch trigger when receiving community contributions. + + push: + # Note: GitHub secrets are not passed to pull requests from forks. For community contributions from + # regular contributors, its a good idea for the contributor to configure the GitHub actions to run correctly + # in their fork as described above. + # + # For occasional contributors, the dev team will merge the PR fork branch to a branch in upstream named + # test-community-contribution- to run all the GitHub Action smoke tests. + branches: [ 'test-community-contribution*' ] + paths: + - 'terra-jupyter-aou/**' + - '.github/workflows/test-terra-jupyter-aou.yml' workflow_dispatch: # Allows manually triggering of workflow on a selected branch via the GitHub Actions tab. diff --git a/.github/workflows/test-terra-jupyter-gatk.yml b/.github/workflows/test-terra-jupyter-gatk.yml new file mode 100644 index 00000000..8f41590d --- /dev/null +++ b/.github/workflows/test-terra-jupyter-gatk.yml @@ -0,0 +1,120 @@ +name: Test terra-jupyter-gatk +# Perform smoke tests on the terra-jupyter-gatk Docker image to have some amount of confidence that +# Python package versions are compatible. +# +# To configure the minimal auth needed for these tests to be able to read public data from Google Cloud Platform: +# Step 1: Create a service account per these instructions: +# https://github.com/google-github-actions/setup-gcloud/blob/master/setup-gcloud/README.md +# Step 2: Give the service account the following permissions within the project: BigQuery User +# Step 3: Store its key and project id as GitHub repository secrets GCP_SA_KEY and GCP_PROJECT_ID. +# https://docs.github.com/en/free-pro-team@latest/actions/reference/encrypted-secrets#creating-encrypted-secrets-for-a-repository + +on: + pull_request: + branches: [ master ] + paths: + - 'terra-jupyter-gatk/**' + - '.github/workflows/test-terra-jupyter-gatk.yml' + + push: + # Note: GitHub secrets are not passed to pull requests from forks. For community contributions from + # regular contributors, its a good idea for the contributor to configure the GitHub actions to run correctly + # in their fork as described above. + # + # For occasional contributors, the dev team will merge the PR fork branch to a branch in upstream named + # test-community-contribution- to run all the GitHub Action smoke tests. + branches: [ 'test-community-contribution*' ] + paths: + - 'terra-jupyter-gatk/**' + - '.github/workflows/test-terra-jupyter-gatk.yml' + + push: + # TODO(deflaux) remove these 'push' triggers after testing is complete. + branches: [ add-more-smoke-tests, update-python-versions, bump-aou-versions ] + + workflow_dispatch: + # Allows manually triggering of workflow on a selected branch via the GitHub Actions tab. + # GitHub blog demo: https://github.blog/changelog/2020-07-06-github-actions-manual-triggers-with-workflow_dispatch/. + +env: + GOOGLE_PROJECT: ${{ secrets.GCP_PROJECT_ID }} + +jobs: + + test_docker_image: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@master + with: + project_id: ${{ secrets.GCP_PROJECT_ID }} + service_account_key: ${{ secrets.GCP_SA_KEY }} + export_default_credentials: true + + - name: Build Docker image and base images too, if needed + run: | + gcloud auth configure-docker + ./build_smoke_test_image.sh terra-jupyter-gatk + + - name: Run Python code specific to notebooks with nbconvert + # Run all notebooks from start to finish, regardles of error, so that we can capture the + # result as a workflow artifact. + # See also https://github.com/marketplace/actions/run-notebook if a more complicated + # workflow for notebooks is needed in the future. + run: | + chmod a+w -R $GITHUB_WORKSPACE + docker run \ + --env GOOGLE_PROJECT \ + --volume "${{ env.GOOGLE_APPLICATION_CREDENTIALS }}:/tmp/credentials.json:ro" \ + --env GOOGLE_APPLICATION_CREDENTIALS="/tmp/credentials.json" \ + --volume $GITHUB_WORKSPACE:/tests \ + --workdir=/tests \ + --entrypoint="" \ + terra-jupyter-gatk:smoke-test \ + /bin/bash -c 'for nb in {terra-jupyter-python/tests,terra-jupyter-gatk/tests}/*ipynb ; do jupyter nbconvert --to html --ExecutePreprocessor.allow_errors=True --execute "${nb}" ; done' + + - name: Upload workflow artifacts + uses: actions/upload-artifact@v2 + with: + name: notebook-execution-results + path: | + terra-jupyter-python/tests/*.html + terra-jupyter-gatk/tests/*.html + retention-days: 30 + + - name: Test Python code with pytest + run: | + docker run \ + --env GOOGLE_PROJECT \ + --volume "${{ env.GOOGLE_APPLICATION_CREDENTIALS }}:/tmp/credentials.json:ro" \ + --env GOOGLE_APPLICATION_CREDENTIALS="/tmp/credentials.json" \ + --volume $GITHUB_WORKSPACE:/tests \ + --workdir=/tests \ + --entrypoint="" \ + terra-jupyter-gatk:smoke-test \ + /bin/bash -c 'pip3 install pytest ; pytest terra-jupyter-python/tests/ terra-jupyter-gatk/tests/' + + - name: Test Python code specific to notebooks with nbconvert + # Simply 'Cell -> Run All` these notebooks and expect no errors in the case of a successful run of the test suite. + # If the tests throw any exceptions, execution of the notebooks will halt at that point. Look at the workflow + # artifacts to understand if there are more failures than just the one that caused this task to halt. + run: | + docker run \ + --env GOOGLE_PROJECT \ + --volume "${{ env.GOOGLE_APPLICATION_CREDENTIALS }}:/tmp/credentials.json:ro" \ + --env GOOGLE_APPLICATION_CREDENTIALS="/tmp/credentials.json" \ + --volume $GITHUB_WORKSPACE:/tests \ + --workdir=/tests \ + --entrypoint="" \ + terra-jupyter-gatk:smoke-test \ + /bin/bash -c 'for nb in {terra-jupyter-python/tests,terra-jupyter-gatk/tests}/*ipynb ; do jupyter nbconvert --to html --execute "${nb}" ; done' + diff --git a/.github/workflows/test-terra-jupyter-hail.yml b/.github/workflows/test-terra-jupyter-hail.yml new file mode 100644 index 00000000..b10b0744 --- /dev/null +++ b/.github/workflows/test-terra-jupyter-hail.yml @@ -0,0 +1,120 @@ +name: Test terra-jupyter-hail +# Perform smoke tests on the terra-jupyter-hail Docker image to have some amount of confidence that +# Python package versions are compatible. +# +# To configure the minimal auth needed for these tests to be able to read public data from Google Cloud Platform: +# Step 1: Create a service account per these instructions: +# https://github.com/google-github-actions/setup-gcloud/blob/master/setup-gcloud/README.md +# Step 2: Give the service account the following permissions within the project: BigQuery User +# Step 3: Store its key and project id as GitHub repository secrets GCP_SA_KEY and GCP_PROJECT_ID. +# https://docs.github.com/en/free-pro-team@latest/actions/reference/encrypted-secrets#creating-encrypted-secrets-for-a-repository + +on: + pull_request: + branches: [ master ] + paths: + - 'terra-jupyter-hail/**' + - '.github/workflows/test-terra-jupyter-hail.yml' + + push: + # Note: GitHub secrets are not passed to pull requests from forks. For community contributions from + # regular contributors, its a good idea for the contributor to configure the GitHub actions to run correctly + # in their fork as described above. + # + # For occasional contributors, the dev team will merge the PR fork branch to a branch in upstream named + # test-community-contribution- to run all the GitHub Action smoke tests. + branches: [ 'test-community-contribution*' ] + paths: + - 'terra-jupyter-hail/**' + - '.github/workflows/test-terra-jupyter-hail.yml' + + push: + # TODO(deflaux) remove these 'push' triggers after testing is complete. + branches: [ add-more-smoke-tests, update-python-versions, bump-aou-versions ] + + workflow_dispatch: + # Allows manually triggering of workflow on a selected branch via the GitHub Actions tab. + # GitHub blog demo: https://github.blog/changelog/2020-07-06-github-actions-manual-triggers-with-workflow_dispatch/. + +env: + GOOGLE_PROJECT: ${{ secrets.GCP_PROJECT_ID }} + +jobs: + + test_docker_image: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@master + with: + project_id: ${{ secrets.GCP_PROJECT_ID }} + service_account_key: ${{ secrets.GCP_SA_KEY }} + export_default_credentials: true + + - name: Build Docker image and base images too, if needed + run: | + gcloud auth configure-docker + ./build_smoke_test_image.sh terra-jupyter-hail + + - name: Run Python code specific to notebooks with nbconvert + # Run all notebooks from start to finish, regardles of error, so that we can capture the + # result as a workflow artifact. + # See also https://github.com/marketplace/actions/run-notebook if a more complicated + # workflow for notebooks is needed in the future. + run: | + chmod a+w -R $GITHUB_WORKSPACE + docker run \ + --env GOOGLE_PROJECT \ + --volume "${{ env.GOOGLE_APPLICATION_CREDENTIALS }}:/tmp/credentials.json:ro" \ + --env GOOGLE_APPLICATION_CREDENTIALS="/tmp/credentials.json" \ + --volume $GITHUB_WORKSPACE:/tests \ + --workdir=/tests \ + --entrypoint="" \ + terra-jupyter-hail:smoke-test \ + /bin/bash -c 'for nb in {terra-jupyter-python/tests,terra-jupyter-hail/tests}/*ipynb ; do jupyter nbconvert --to html --ExecutePreprocessor.allow_errors=True --execute "${nb}" ; done' + + - name: Upload workflow artifacts + uses: actions/upload-artifact@v2 + with: + name: notebook-execution-results + path: | + terra-jupyter-python/tests/*.html + terra-jupyter-hail/tests/*.html + retention-days: 30 + + - name: Test Python code with pytest + run: | + docker run \ + --env GOOGLE_PROJECT \ + --volume "${{ env.GOOGLE_APPLICATION_CREDENTIALS }}:/tmp/credentials.json:ro" \ + --env GOOGLE_APPLICATION_CREDENTIALS="/tmp/credentials.json" \ + --volume $GITHUB_WORKSPACE:/tests \ + --workdir=/tests \ + --entrypoint="" \ + terra-jupyter-hail:smoke-test \ + /bin/bash -c 'pip3 install pytest ; pytest terra-jupyter-python/tests/ terra-jupyter-hail/tests/' + + - name: Test Python code specific to notebooks with nbconvert + # Simply 'Cell -> Run All` these notebooks and expect no errors in the case of a successful run of the test suite. + # If the tests throw any exceptions, execution of the notebooks will halt at that point. Look at the workflow + # artifacts to understand if there are more failures than just the one that caused this task to halt. + run: | + docker run \ + --env GOOGLE_PROJECT \ + --volume "${{ env.GOOGLE_APPLICATION_CREDENTIALS }}:/tmp/credentials.json:ro" \ + --env GOOGLE_APPLICATION_CREDENTIALS="/tmp/credentials.json" \ + --volume $GITHUB_WORKSPACE:/tests \ + --workdir=/tests \ + --entrypoint="" \ + terra-jupyter-hail:smoke-test \ + /bin/bash -c 'for nb in {terra-jupyter-python/tests,terra-jupyter-hail/tests}/*ipynb ; do jupyter nbconvert --to html --execute "${nb}" ; done' + diff --git a/.github/workflows/test-terra-jupyter-python.yml b/.github/workflows/test-terra-jupyter-python.yml index 62f0cf29..9bf14dbc 100644 --- a/.github/workflows/test-terra-jupyter-python.yml +++ b/.github/workflows/test-terra-jupyter-python.yml @@ -15,8 +15,18 @@ on: paths: - 'terra-jupyter-python/**' - '.github/workflows/test-terra-jupyter-python.yml' - # Note: secrets are not passed to pull requests from forks, so the dev team will need to use the manual workflow - # dispatch trigger when receiving community contributions. + + push: + # Note: GitHub secrets are not passed to pull requests from forks. For community contributions from + # regular contributors, its a good idea for the contributor to configure the GitHub actions to run correctly + # in their fork as described above. + # + # For occasional contributors, the dev team will merge the PR fork branch to a branch in upstream named + # test-community-contribution- to run all the GitHub Action smoke tests. + branches: [ 'test-community-contribution*' ] + paths: + - 'terra-jupyter-python/**' + - '.github/workflows/test-terra-jupyter-python.yml' workflow_dispatch: # Allows manually triggering of workflow on a selected branch via the GitHub Actions tab. diff --git a/terra-jupyter-aou/Dockerfile b/terra-jupyter-aou/Dockerfile index bad83323..d10302fe 100644 --- a/terra-jupyter-aou/Dockerfile +++ b/terra-jupyter-aou/Dockerfile @@ -1,4 +1,4 @@ -FROM us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:0.0.23 AS python +FROM us.gcr.io/broad-dsp-gcr-public/terra-jupyter-python:0.0.24 AS python FROM us.gcr.io/broad-dsp-gcr-public/terra-jupyter-r:1.0.13 @@ -85,9 +85,4 @@ ENV USER jupyter-user USER $USER RUN pip3 install --upgrade \ - pandas-profiling==2.10.1 \ - plotnine==0.7.1 \ - # Parent image pins tensorflow to an old alpha version. Override here for now. - tensorflow==2.3.0 \ - numpy==1.18.5 \ "git+git://github.com/all-of-us/workbench-snippets.git#egg=terra_widgets&subdirectory=py" diff --git a/terra-jupyter-gatk/tests/gatk_smoke_test.ipynb b/terra-jupyter-gatk/tests/gatk_smoke_test.ipynb new file mode 100644 index 00000000..282e143f --- /dev/null +++ b/terra-jupyter-gatk/tests/gatk_smoke_test.ipynb @@ -0,0 +1,95 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test cases requiring or benefiting from the context of a notebook\n", + "\n", + "If the notebook runs successfully from start to finish, the test is successful!\n", + "\n", + "TODO(all): Add additional tests and/or tests with particular assertions, as we encounter Python package version incompatibilities not currently detected by these tests.\n", + "\n", + "In general, only add test cases here that require the context of a notebook. This is because this notebook, as currently written, will abort at the **first** failure. Compare this to a proper test suite where all cases are run, giving much more information about the full extent of any problems encountered." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Package versions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 freeze" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 show plotnine pandas google-cloud-storage google-cloud-bigquery google-resumable-media" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test cases requiring the context of a notebook " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test cases benefiting from the context of a notebook " + ] + } + ], + "metadata": { + "environment": { + "name": "r-cpu.3-6.m56", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/r-cpu.3-6:m56" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.8" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/terra-jupyter-hail/tests/hail_smoke_test.ipynb b/terra-jupyter-hail/tests/hail_smoke_test.ipynb new file mode 100644 index 00000000..282e143f --- /dev/null +++ b/terra-jupyter-hail/tests/hail_smoke_test.ipynb @@ -0,0 +1,95 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test cases requiring or benefiting from the context of a notebook\n", + "\n", + "If the notebook runs successfully from start to finish, the test is successful!\n", + "\n", + "TODO(all): Add additional tests and/or tests with particular assertions, as we encounter Python package version incompatibilities not currently detected by these tests.\n", + "\n", + "In general, only add test cases here that require the context of a notebook. This is because this notebook, as currently written, will abort at the **first** failure. Compare this to a proper test suite where all cases are run, giving much more information about the full extent of any problems encountered." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Package versions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 freeze" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 show plotnine pandas google-cloud-storage google-cloud-bigquery google-resumable-media" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test cases requiring the context of a notebook " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test cases benefiting from the context of a notebook " + ] + } + ], + "metadata": { + "environment": { + "name": "r-cpu.3-6.m56", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/r-cpu.3-6:m56" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.8" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/terra-jupyter-python/Dockerfile b/terra-jupyter-python/Dockerfile index cbbef31a..8963a937 100644 --- a/terra-jupyter-python/Dockerfile +++ b/terra-jupyter-python/Dockerfile @@ -1,6 +1,6 @@ FROM us.gcr.io/broad-dsp-gcr-public/terra-jupyter-base:0.0.19 USER root -#this makes it so pip runs as root, not the user +# This makes it so pip runs as root, not the user. ENV PIP_USER=false RUN apt-get update && apt-get install -yq --no-install-recommends \ @@ -20,69 +20,82 @@ RUN apt-get update && apt-get install -yq --no-install-recommends \ ENV HTSLIB_CONFIGURE_OPTIONS="--enable-gcs" +# Dev note: in general, do not pin Python packages to any particular version. +# Depend on the smoke tests to help us identify any package incompatibilties. +# +# If we find that we do need to pin a package version, be sure to: +# 1) Add a comment saying what needs to be true for us to remove the pin. +# (e.g. link to an issue and put the details there) +# 2) If the smoke tests did not show the problem, add a new test case to improve +# test coverage for the identified problem. RUN pip3 -V \ && pip3 install --upgrade pip \ - && pip3 install numpy==1.15.2 \ - && pip3 install py4j==0.10.7 \ - && python3 -mpip install matplotlib==3.0.0 \ - && pip3 install pandas==0.25.3 \ - && pip3 install pandas-gbq==0.12.0 \ - && pip3 install pandas-profiling==2.4.0 \ - && pip3 install seaborn==0.9.0 \ - && pip3 install python-lzo==1.12 \ - && pip3 install google-cloud-bigquery==1.23.1 \ - && pip3 install google-api-core==1.6.0 \ - && pip3 install google-cloud-bigquery-datatransfer==0.4.1 \ - && pip3 install google-cloud-datastore==1.10.0 \ - && pip3 install google-cloud-resource-manager==0.30.0 \ - && pip3 install google-cloud-storage==1.23.0 \ - && pip3 install scikit-learn==0.20.0 \ - && pip3 install statsmodels==0.9.0 \ - && pip3 install ggplot==0.11.5 \ - && sed -i 's/pandas.lib/pandas/g' /usr/local/lib/python3.7/dist-packages/ggplot/stats/smoothers.py \ - # the next few `sed` lines are workaround for a ggplot bug. See https://github.com/yhat/ggpy/issues/662 - && sed -i 's/pandas.tslib.Timestamp/pandas.Timestamp/g' /usr/local/lib/python3.7/dist-packages/ggplot/stats/smoothers.py \ - && sed -i 's/pd.tslib.Timestamp/pd.Timestamp/g' /usr/local/lib/python3.7/dist-packages/ggplot/stats/smoothers.py \ - && sed -i 's/pd.tslib.Timestamp/pd.Timestamp/g' /usr/local/lib/python3.7/dist-packages/ggplot/utils.py \ - && pip3 install bokeh==1.0.0 \ - && pip3 install pyfasta==0.5.2 \ - && pip3 install markdown==2.4.1 \ - && pip3 install pdoc3==0.7.2 \ - && pip3 install biopython==1.72 \ - && pip3 install bx-python==0.8.2 \ - && pip3 install fastinterval==0.1.1 \ - && pip3 install matplotlib-venn==0.11.5 \ - && pip3 install bleach==1.5.0 \ - && pip3 install cycler==0.10.0 \ - && pip3 install h5py==2.7.1 \ - && pip3 install html5lib==0.9999999 \ - && pip3 install joblib==0.11 \ - && pip3 install keras==2.1.6 \ - && pip3 install patsy==0.4.1 \ - && pip3 install protobuf==3.7.1 \ - && pip3 install pymc3==3.10.0 \ - && pip3 install pyparsing==2.2.0 \ + && pip3 install numpy \ + && pip3 install py4j \ + && python3 -mpip install matplotlib \ + && pip3 install pandas \ + && pip3 install pandas-gbq \ + && pip3 install pandas-profiling \ + && pip3 install seaborn \ + && pip3 install python-lzo \ + && pip3 install google-cloud-bigquery \ + && pip3 install google-api-core \ + && pip3 install google-cloud-bigquery-datatransfer \ + && pip3 install google-cloud-datastore \ + && pip3 install google-cloud-resource-manager \ + && pip3 install google-cloud-storage \ + && pip3 install scikit-learn \ + && pip3 install statsmodels \ + && pip3 install ggplot \ + && pip3 install bokeh \ + && pip3 install pyfasta \ + && pip3 install markdown \ + && pip3 install pdoc3 \ + && pip3 install biopython \ + && pip3 install bx-python \ + && pip3 install fastinterval \ + && pip3 install matplotlib-venn \ + && pip3 install bleach \ + && pip3 install cycler \ + && pip3 install h5py \ + && pip3 install html5lib \ + && pip3 install joblib \ + && pip3 install keras \ + && pip3 install patsy \ + && pip3 install protobuf \ + && pip3 install pymc3 \ + && pip3 install pyparsing \ && pip3 install Cython \ - && pip3 install pysam==0.15.4 --no-binary pysam \ - && pip3 install python-dateutil==2.6.1 \ - && pip3 install pytz==2017.3 \ - && pip3 install pyvcf==0.6.8 \ - && pip3 install pyyaml==5.3.1 \ - && pip3 install scipy==1.2 \ - && pip3 install tensorflow==2.0.0a0 \ - && pip3 install theano==0.9.0 \ - && pip3 install tqdm==4.19.4 \ - && pip3 install werkzeug==0.12.2 \ - && pip3 install certifi==2017.4.17 \ - && pip3 install intel-openmp==2018.0.0 \ - && pip3 install mkl==2018.0.3 \ - && pip3 install readline==6.2 \ - && pip3 install setuptools==42.0.2 \ - && pip3 install wheel + && pip3 install pysam --no-binary pysam \ + && pip3 install python-dateutil \ + && pip3 install pytz \ + && pip3 install pyvcf \ + && pip3 install pyyaml \ + && pip3 install scipy \ + # Use the cpu version of Tensorflow to eliminate the warnings about absent gpus on the Cloud Runtime. + && pip3 install tensorflow_cpu \ + && pip3 install theano \ + && pip3 install tqdm \ + && pip3 install werkzeug \ + && pip3 install certifi \ + && pip3 install intel-openmp \ + && pip3 install mkl \ + && pip3 install readline \ + && pip3 install setuptools \ + && pip3 install wheel \ + && pip3 install plotnine \ + && pip3 install google-resumable-media \ + # Remove this after https://broadworkbench.atlassian.net/browse/CA-1179 + # As of release [google-cloud-bigquery 1.26.0 (2020-07-20)](https://github.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#1260-2020-07-20) + # the BigQuery Python client uses the BigQuery Storage client by default. + # This currently causes an error on Terra Cloud Runtimes `the user does not have 'bigquery.readsessions.create' permission + # for ''`. To work-around this uninstall the dependency so that flag `--use_rest_api` can be used + # with `%%bigquery` to use the older, slower mechanism for data transfer. + && pip3 uninstall -y google-cloud-bigquery-storage ENV USER jupyter-user USER $USER -#we want pip to install into the user's dir when the notebook is running +# We want pip to install into the user's dir when the notebook is running. ENV PIP_USER=true # Note: this entrypoint is provided for running Jupyter independently of Leonardo. diff --git a/terra-jupyter-python/tests/smoke_test.ipynb b/terra-jupyter-python/tests/smoke_test.ipynb index 026b1333..29d07eab 100644 --- a/terra-jupyter-python/tests/smoke_test.ipynb +++ b/terra-jupyter-python/tests/smoke_test.ipynb @@ -60,9 +60,11 @@ "source": [ "## Test BigQuery magic\n", "\n", - "TODO(deflaux) after we update the BigQuery Python client package, be sure to explicitly use flag `--use_rest_api` with `%%bigquery`\n", "* As of release [google-cloud-bigquery 1.26.0 (2020-07-20)](https://github.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#1260-2020-07-20) the BigQuery Python client uses the BigQuery Storage client by default.\n", - "* This currently causes an error on Terra Cloud Runtimes `the user does not have 'bigquery.readsessions.create' permission for ''`." + "* This currently causes an error on Terra Cloud Runtimes `the user does not have 'bigquery.readsessions.create' permission for ''`.\n", + "* To work around this, we do two things:\n", + " 1. remove the dependency `google-cloud-bigquery-storage` from the `terra-jupyter-python` image\n", + " 1. use flag `--use_rest_api` with `%%bigquery`" ] }, { @@ -80,7 +82,7 @@ "metadata": {}, "outputs": [], "source": [ - "%%bigquery\n", + "%%bigquery --use_rest_api\n", "\n", "SELECT country_name, alpha_2_code\n", "FROM `bigquery-public-data.utility_us.country_code_iso`\n", @@ -92,14 +94,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Test pandas profiling\n", - "\n", - "TODO(deflaux) its a known issue that pandas-profiler is broken in the current image. Enable this test after we update the package version." + "## Test pandas profiling" ] }, { - "cell_type": "raw", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -162,14 +164,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Test plotnine\n", - "\n", - "TODO(deflaux) enable this as part of https://github.com/DataBiosphere/terra-docker/issues/126" + "## Test plotnine" ] }, { - "cell_type": "raw", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap\n", "from plotnine.data import mtcars\n", diff --git a/terra-jupyter-python/tests/smoke_test.py b/terra-jupyter-python/tests/smoke_test.py index dbccf15c..9039b12a 100644 --- a/terra-jupyter-python/tests/smoke_test.py +++ b/terra-jupyter-python/tests/smoke_test.py @@ -16,13 +16,12 @@ def test_pandas(): pd.DataFrame( { -# TODO(deflaux) uncomment "A" and "F" after the pandas version upgrade. -# "A": 1.0, + "A": 1.0, "B": pd.Timestamp("20130102"), "C": pd.Series(1, index=list(range(4)), dtype="float32"), "D": np.array([3] * 4, dtype="int32"), "E": pd.Categorical(["test", "train", "test", "train"]), -# "F": "foo", + "F": "foo", } )