diff --git a/.github/actions/breeze/action.yml b/.github/actions/breeze/action.yml index 69ebcc7c66e6e..d7eaa1b088bee 100644 --- a/.github/actions/breeze/action.yml +++ b/.github/actions/breeze/action.yml @@ -22,6 +22,9 @@ inputs: python-version: description: 'Python version to use' default: "3.9" + use-uv: + description: 'Whether to use uv tool' + required: true outputs: host-python-version: description: Python version used in host @@ -33,13 +36,11 @@ runs: uses: actions/setup-python@v5 with: python-version: ${{ inputs.python-version }} - cache: 'pip' - cache-dependency-path: ./dev/breeze/pyproject.toml + # NOTE! Installing Breeze without using cache is FASTER than when using cache - uv is so fast and has + # so low overhead, that just running upload cache/restore cache is slower than installing it from scratch - name: "Install Breeze" shell: bash run: ./scripts/ci/install_breeze.sh - env: - PYTHON_VERSION: ${{ inputs.python-version }} - name: "Free space" shell: bash run: breeze ci free-space diff --git a/.github/actions/checkout_target_commit/action.yml b/.github/actions/checkout_target_commit/action.yml deleted file mode 100644 index e95e8b86254a0..0000000000000 --- a/.github/actions/checkout_target_commit/action.yml +++ /dev/null @@ -1,81 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ---- -name: 'Checkout target commit' -description: > - Checks out target commit with the exception of .github scripts directories that come from the target branch -inputs: - target-commit-sha: - description: 'SHA of the target commit to checkout' - required: true - pull-request-target: - description: 'Whether the workflow is a pull request target workflow' - required: true - is-committer-build: - description: 'Whether the build is done by a committer' - required: true -runs: - using: "composite" - steps: - - name: "Checkout target commit" - uses: actions/checkout@v4 - with: - ref: ${{ inputs.target-commit-sha }} - persist-credentials: false - #################################################################################################### - # BE VERY CAREFUL HERE! THIS LINE AND THE END OF THE WARNING. IN PULL REQUEST TARGET WORKFLOW - # WE CHECK OUT THE TARGET COMMIT ABOVE TO BE ABLE TO BUILD THE IMAGE FROM SOURCES FROM THE - # INCOMING PR, RATHER THAN FROM TARGET BRANCH. THIS IS A SECURITY RISK, BECAUSE THE PR - # CAN CONTAIN ANY CODE AND WE EXECUTE IT HERE. THEREFORE, WE NEED TO BE VERY CAREFUL WHAT WE - # DO HERE. WE SHOULD NOT EXECUTE ANY CODE THAT COMES FROM THE PR. WE SHOULD NOT RUN ANY BREEZE - # COMMAND NOR SCRIPTS NOR COMPOSITE ACTIONS. WE SHOULD ONLY RUN CODE THAT IS EMBEDDED DIRECTLY IN - # THIS WORKFLOW - BECAUSE THIS IS THE ONLY CODE THAT WE CAN TRUST. - #################################################################################################### - - name: Checkout target branch to 'target-airflow' folder to use ci/scripts and breeze from there. - uses: actions/checkout@v4 - with: - path: "target-airflow" - ref: ${{ github.base_ref }} - persist-credentials: false - if: inputs.pull-request-target == 'true' && inputs.is-committer-build != 'true' - - name: > - Replace "scripts/ci", "dev", ".github/actions" and ".github/workflows" with the target branch - so that the those directories are not coming from the PR - shell: bash - run: | - echo - echo -e "\033[33m Replace scripts, dev, actions with target branch for non-committer builds!\033[0m" - echo - rm -rfv "scripts/ci" - rm -rfv "dev" - rm -rfv ".github/actions" - rm -rfv ".github/workflows" - rm -v ".dockerignore" || true - mv -v "target-airflow/scripts/ci" "scripts" - mv -v "target-airflow/dev" "." - mv -v "target-airflow/.github/actions" "target-airflow/.github/workflows" ".github" - mv -v "target-airflow/.dockerignore" ".dockerignore" || true - if: inputs.pull-request-target == 'true' && inputs.is-committer-build != 'true' - #################################################################################################### - # AFTER IT'S SAFE. THE `dev`, `scripts/ci` AND `.github/actions` and `.dockerignore` ARE NOW COMING - # FROM THE BASE_REF - WHICH IS THE TARGET BRANCH OF THE PR. WE CAN TRUST THAT THOSE SCRIPTS ARE - # SAFE TO RUN AND CODE AVAILABLE IN THE DOCKER BUILD PHASE IS CONTROLLED BY THE `.dockerignore`. - # ALL THE REST OF THE CODE COMES FROM THE PR, AND FOR EXAMPLE THE CODE IN THE `Dockerfile.ci` CAN - # BE RUN SAFELY AS PART OF DOCKER BUILD. BECAUSE IT RUNS INSIDE THE DOCKER CONTAINER AND IT IS - # ISOLATED FROM THE RUNNER. - #################################################################################################### diff --git a/.github/actions/install-pre-commit/action.yml b/.github/actions/install-pre-commit/action.yml index 8c704e5367ba2..92147d9ee0234 100644 --- a/.github/actions/install-pre-commit/action.yml +++ b/.github/actions/install-pre-commit/action.yml @@ -21,29 +21,58 @@ description: 'Installs pre-commit and related packages' inputs: python-version: description: 'Python version to use' - default: 3.9 + default: "3.9" uv-version: description: 'uv version to use' - default: 0.5.5 + default: "0.5.14" # Keep this comment to allow automatic replacement of uv version pre-commit-version: description: 'pre-commit version to use' - default: 4.0.1 + default: "4.0.1" # Keep this comment to allow automatic replacement of pre-commit version pre-commit-uv-version: description: 'pre-commit-uv version to use' - default: 4.1.4 + default: "4.1.4" # Keep this comment to allow automatic replacement of pre-commit-uv version runs: using: "composite" steps: - name: Install pre-commit, uv, and pre-commit-uv shell: bash + env: + UV_VERSION: ${{inputs.uv-version}} + PRE_COMMIT_VERSION: ${{inputs.pre-commit-version}} + PRE_COMMIT_UV_VERSION: ${{inputs.pre-commit-uv-version}} run: | - pip install uv==${{inputs.uv-version}} || true - uv tool install pre-commit==${{inputs.pre-commit-version}} --with uv==${{inputs.uv-version}} \ - --with pre-commit-uv==${{inputs.pre-commit-uv-version}} - - name: Cache pre-commit envs - uses: actions/cache@v4 + pip install uv==${UV_VERSION} || true + uv tool install pre-commit==${PRE_COMMIT_VERSION} --with uv==${UV_VERSION} \ + --with pre-commit-uv==${PRE_COMMIT_UV_VERSION} + working-directory: ${{ github.workspace }} + # We need to use tar file with archive to restore all the permissions and symlinks + - name: "Delete ~.cache" + run: | + du ~/ --max-depth=2 + echo + echo Deleting ~/.cache + echo + rm -rf ~/.cache + echo + shell: bash + - name: "Restore pre-commit cache" + uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c with: - path: ~/.cache/pre-commit - key: "pre-commit-${{inputs.python-version}}-${{ hashFiles('.pre-commit-config.yaml') }}" - restore-keys: | - pre-commit-${{inputs.python-version}}- + key: cache-pre-commit-v4-${{ inputs.python-version }}-${{ hashFiles('.pre-commit-config.yaml') }} + path: /tmp/ + id: restore-pre-commit-cache + - name: "Restore .cache from the tar file" + run: tar -C ~ -xzf /tmp/cache-pre-commit.tar.gz + shell: bash + if: steps.restore-pre-commit-cache.outputs.stash-hit == 'true' + - name: "Show restored files" + run: | + echo "Restored files" + du ~/ --max-depth=2 + echo + shell: bash + if: steps.restore-pre-commit-cache.outputs.stash-hit == 'true' + - name: Install pre-commit hooks + shell: bash + run: pre-commit install-hooks || (cat ~/.cache/pre-commit/pre-commit.log && exit 1) + working-directory: ${{ github.workspace }} diff --git a/.github/actions/post_tests_success/action.yml b/.github/actions/post_tests_success/action.yml index 37b51154d3e13..b7b00a6fc0df3 100644 --- a/.github/actions/post_tests_success/action.yml +++ b/.github/actions/post_tests_success/action.yml @@ -33,7 +33,7 @@ runs: - name: "Upload artifact for warnings" uses: actions/upload-artifact@v4 with: - name: test-warnings-${{env.JOB_ID}} + name: test-warnings-${{ env.JOB_ID }} path: ./files/warnings-*.txt retention-days: 7 if-no-files-found: ignore @@ -50,5 +50,5 @@ runs: if: env.ENABLE_COVERAGE == 'true' && env.TEST_TYPES != 'Helm' && inputs.python-version != '3.12' with: name: coverage-${{env.JOB_ID}} - flags: python-${{env.PYTHON_MAJOR_MINOR_VERSION}},${{env.BACKEND}}-${{env.BACKEND_VERSION}} + flags: python-${{ env.PYTHON_MAJOR_MINOR_VERSION }},${{ env.BACKEND }}-${{ env.BACKEND_VERSION }} directory: "./files/coverage-reports/" diff --git a/.github/actions/prepare_all_ci_images/action.yml b/.github/actions/prepare_all_ci_images/action.yml new file mode 100644 index 0000000000000..d156818b9b283 --- /dev/null +++ b/.github/actions/prepare_all_ci_images/action.yml @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +--- +name: 'Prepare all CI images' +description: 'Recreates current python CI images from artifacts for all python versions' +inputs: + python-versions-list-as-string: + description: 'Stringified array of all Python versions to test - separated by spaces.' + required: true + platform: + description: 'Platform for the build - linux/amd64 or linux/arm64' + required: true +runs: + using: "composite" + steps: + - name: "Cleanup docker" + run: ./scripts/ci/cleanup_docker.sh + shell: bash + # TODO: Currently we cannot loop through the list of python versions and have dynamic list of + # tasks. Instead we hardcode all possible python versions and they - but + # this should be implemented in stash action as list of keys to download. + # That includes 3.8 - 3.12 as we are backporting it to v2-10-test branch + # This is captured in https://github.com/apache/airflow/issues/45268 + - name: "Restore CI docker image ${{ inputs.platform }}:3.8" + uses: ./.github/actions/prepare_single_ci_image + with: + platform: ${{ inputs.platform }} + python: "3.8" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} + - name: "Restore CI docker image ${{ inputs.platform }}:3.9" + uses: ./.github/actions/prepare_single_ci_image + with: + platform: ${{ inputs.platform }} + python: "3.9" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} + - name: "Restore CI docker image ${{ inputs.platform }}:3.10" + uses: ./.github/actions/prepare_single_ci_image + with: + platform: ${{ inputs.platform }} + python: "3.10" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} + - name: "Restore CI docker image ${{ inputs.platform }}:3.11" + uses: ./.github/actions/prepare_single_ci_image + with: + platform: ${{ inputs.platform }} + python: "3.11" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} + - name: "Restore CI docker image ${{ inputs.platform }}:3.12" + uses: ./.github/actions/prepare_single_ci_image + with: + platform: ${{ inputs.platform }} + python: "3.12" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} diff --git a/.github/actions/prepare_breeze_and_image/action.yml b/.github/actions/prepare_breeze_and_image/action.yml index 41aa17092d589..e6755444b2f4f 100644 --- a/.github/actions/prepare_breeze_and_image/action.yml +++ b/.github/actions/prepare_breeze_and_image/action.yml @@ -16,12 +16,21 @@ # under the License. # --- -name: 'Prepare breeze && current python image' -description: 'Installs breeze and pulls current python image' +name: 'Prepare breeze && current image (CI or PROD)' +description: 'Installs breeze and recreates current python image from artifact' inputs: - pull-image-type: - description: 'Which image to pull' - default: CI + python: + description: 'Python version for image to prepare' + required: true + image-type: + description: 'Which image type to prepare (ci/prod)' + default: "ci" + platform: + description: 'Platform for the build - linux/amd64 or linux/arm64' + required: true + use-uv: + description: 'Whether to use uv' + required: true outputs: host-python-version: description: Python version used in host @@ -29,17 +38,25 @@ outputs: runs: using: "composite" steps: + - name: "Cleanup docker" + run: ./scripts/ci/cleanup_docker.sh + shell: bash - name: "Install Breeze" uses: ./.github/actions/breeze + with: + use-uv: ${{ inputs.use-uv }} id: breeze - - name: Login to ghcr.io - shell: bash - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG }} - shell: bash - run: breeze ci-image pull --tag-as-latest - if: inputs.pull-image-type == 'CI' - - name: Pull PROD image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG }} + - name: "Restore ${{ inputs.image-type }} docker image ${{ inputs.platform }}:${{ inputs.python }}" + uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: ${{ inputs.image-type }}-image-save-${{ inputs.platform }}-${{ inputs.python }} + path: "/tmp/" + - name: "Load ${{ inputs.image-type }} image ${{ inputs.platform }}:${{ inputs.python }}" + env: + PLATFORM: ${{ inputs.platform }} + PYTHON: ${{ inputs.python }} + IMAGE_TYPE: ${{ inputs.image-type }} + run: > + breeze ${IMAGE_TYPE}-image load + --platform ${PLATFORM} --python ${PYTHON} shell: bash - run: breeze prod-image pull --tag-as-latest - if: inputs.pull-image-type == 'PROD' diff --git a/.github/actions/prepare_single_ci_image/action.yml b/.github/actions/prepare_single_ci_image/action.yml new file mode 100644 index 0000000000000..3dde30033aa15 --- /dev/null +++ b/.github/actions/prepare_single_ci_image/action.yml @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +--- +name: 'Prepare single CI image' +description: > + Recreates current python image from artifacts (needed for the hard-coded actions calling all + possible Python versions in "prepare_all_ci_images" action. Hopefully we can get rid of it when + the https://github.com/apache/airflow/issues/45268 is resolved and we contribute capability of + downloading multiple keys to the stash action. +inputs: + python: + description: 'Python version for image to prepare' + required: true + python-versions-list-as-string: + description: 'Stringified array of all Python versions to prepare - separated by spaces.' + required: true + platform: + description: 'Platform for the build - linux/amd64 or linux/arm64' + required: true +runs: + using: "composite" + steps: + - name: "Restore CI docker images ${{ inputs.platform }}:${{ inputs.python }}" + uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: ci-image-save-${{ inputs.platform }}-${{ inputs.python }} + path: "/tmp/" + if: contains(inputs.python-versions-list-as-string, inputs.python) + - name: "Load CI image ${{ inputs.platform }}:${{ inputs.python }}" + env: + PLATFORM: ${{ inputs.platform }} + PYTHON: ${{ inputs.python }} + run: breeze ci-image load --platform "${PLATFORM}" --python "${PYTHON}" + shell: bash + if: contains(inputs.python-versions-list-as-string, inputs.python) diff --git a/.github/workflows/additional-ci-image-checks.yml b/.github/workflows/additional-ci-image-checks.yml index 8a3b46e70d37d..56cee1697620c 100644 --- a/.github/workflows/additional-ci-image-checks.yml +++ b/.github/workflows/additional-ci-image-checks.yml @@ -32,10 +32,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining self-hosted runners." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string python-versions: description: "The list of python versions (stringified JSON array) to run the tests on." required: true @@ -103,8 +99,6 @@ jobs: contents: read # This write is only given here for `push` events from "apache/airflow" repo. It is not given for PRs # from forks. This is to prevent malicious PRs from creating images in the "apache/airflow" repo. - # For regular build for PRS this "build-prod-images" workflow will be skipped anyway by the - # "in-workflow-build" condition packages: write secrets: inherit with: @@ -117,7 +111,7 @@ jobs: python-versions: ${{ inputs.python-versions }} branch: ${{ inputs.branch }} constraints-branch: ${{ inputs.constraints-branch }} - use-uv: ${{ inputs.use-uv}} + use-uv: ${{ inputs.use-uv }} include-success-outputs: ${{ inputs.include-success-outputs }} docker-cache: ${{ inputs.docker-cache }} disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} @@ -149,8 +143,13 @@ jobs: run: ./scripts/ci/cleanup_docker.sh - name: "Install Breeze" uses: ./.github/actions/breeze + with: + use-uv: ${{ inputs.use-uv }} - name: "Login to ghcr.io" - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin + env: + actor: ${{ github.actor }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: echo "$GITHUB_TOKEN" | docker login ghcr.io -u "$actor" --password-stdin - name: "Check that image builds quickly" run: breeze shell --max-time 600 --platform "linux/amd64" @@ -159,22 +158,23 @@ jobs: # # There is no point in running this one in "canary" run, because the above step is doing the # # same build anyway. # build-ci-arm-images: -# name: Build CI ARM images (in-workflow) +# name: Build CI ARM images # uses: ./.github/workflows/ci-image-build.yml # permissions: # contents: read # packages: write # secrets: inherit # with: +# platform: "linux/arm64" # push-image: "false" +# upload-image-artifact: "true" +# upload-mount-cache-artifact: ${{ inputs.canary-run }} # runs-on-as-json-public: ${{ inputs.runs-on-as-json-public }} # runs-on-as-json-self-hosted: ${{ inputs.runs-on-as-json-self-hosted }} -# image-tag: ${{ inputs.image-tag }} # python-versions: ${{ inputs.python-versions }} -# platform: "linux/arm64" # branch: ${{ inputs.branch }} # constraints-branch: ${{ inputs.constraints-branch }} -# use-uv: ${{ inputs.use-uv}} +# use-uv: ${{ inputs.use-uv }} # upgrade-to-newer-dependencies: ${{ inputs.upgrade-to-newer-dependencies }} # docker-cache: ${{ inputs.docker-cache }} # disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} diff --git a/.github/workflows/additional-prod-image-tests.yml b/.github/workflows/additional-prod-image-tests.yml index 5ffd2001e0e26..bca5e3a592713 100644 --- a/.github/workflows/additional-prod-image-tests.yml +++ b/.github/workflows/additional-prod-image-tests.yml @@ -32,10 +32,6 @@ on: # yamllint disable-line rule:truthy description: "Branch used to construct constraints URL from." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string upgrade-to-newer-dependencies: description: "Whether to upgrade to newer dependencies (true/false)" required: true @@ -60,6 +56,10 @@ on: # yamllint disable-line rule:truthy description: "Which version of python should be used by default" required: true type: string + use-uv: + description: "Whether to use uv" + required: true + type: string jobs: prod-image-extra-checks-main: name: PROD image extra checks (main) @@ -70,7 +70,6 @@ jobs: default-python-version: ${{ inputs.default-python-version }} branch: ${{ inputs.default-branch }} use-uv: "false" - image-tag: ${{ inputs.image-tag }} build-provider-packages: ${{ inputs.default-branch == 'main' }} upgrade-to-newer-dependencies: ${{ inputs.upgrade-to-newer-dependencies }} chicken-egg-providers: ${{ inputs.chicken-egg-providers }} @@ -88,7 +87,6 @@ jobs: default-python-version: ${{ inputs.default-python-version }} branch: ${{ inputs.default-branch }} use-uv: "false" - image-tag: ${{ inputs.image-tag }} build-provider-packages: ${{ inputs.default-branch == 'main' }} upgrade-to-newer-dependencies: ${{ inputs.upgrade-to-newer-dependencies }} chicken-egg-providers: ${{ inputs.chicken-egg-providers }} @@ -117,36 +115,30 @@ jobs: persist-credentials: false - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - - name: "Install Breeze" - uses: ./.github/actions/breeze - - name: Login to ghcr.io - shell: bash - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: Pull PROD image ${{ inputs.default-python-version}}:${{ inputs.image-tag }} - run: breeze prod-image pull --tag-as-latest - env: - PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" - IMAGE_TAG: "${{ inputs.image-tag }}" - - name: "Setup python" - uses: actions/setup-python@v5 + - name: "Prepare breeze & PROD image: ${{ inputs.default-python-version }}" + uses: ./.github/actions/prepare_breeze_and_image with: - python-version: ${{ inputs.default-python-version }} - cache: 'pip' - cache-dependency-path: ./dev/requirements.txt + platform: "linux/amd64" + image-type: "prod" + python: ${{ inputs.default-python-version }} + use-uv: ${{ inputs.use-uv }} - name: "Test examples of PROD image building" + env: + GITHUB_REPOSITORY: ${{ github.repository }} + DEFAULT_BRANCH: ${{ inputs.default-branch }} + DEFAULT_PYTHON_VERSION: ${{ inputs.default-python-version }} run: " cd ./docker_tests && \ python -m pip install -r requirements.txt && \ - TEST_IMAGE=\"ghcr.io/${{ github.repository }}/${{ inputs.default-branch }}\ - /prod/python${{ inputs.default-python-version }}:${{ inputs.image-tag }}\" \ + TEST_IMAGE=\"ghcr.io/$GITHUB_REPOSITORY/$DEFAULT_BRANCH\ + /prod/python$DEFAULT_PYTHON_VERSION\" \ python -m pytest test_examples_of_prod_image_building.py -n auto --color=yes" test-docker-compose-quick-start: timeout-minutes: 60 - name: "Docker-compose quick start with PROD image verifying" + name: "Docker Compose quick start with PROD image verifying" runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} env: - IMAGE_TAG: "${{ inputs.image-tag }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -161,14 +153,13 @@ jobs: with: fetch-depth: 2 persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Install Breeze" - uses: ./.github/actions/breeze - - name: Login to ghcr.io - shell: bash - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: "Pull image ${{ inputs.default-python-version}}:${{ inputs.image-tag }}" - run: breeze prod-image pull --tag-as-latest + - name: "Prepare breeze & PROD image: ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + image-type: "prod" + python: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} + use-uv: ${{ inputs.use-uv }} + id: breeze - name: "Test docker-compose quick start" run: breeze testing docker-compose-tests diff --git a/.github/workflows/backport-cli.yml b/.github/workflows/backport-cli.yml index 0a091cc7dbcef..3706cd65bb01e 100644 --- a/.github/workflows/backport-cli.yml +++ b/.github/workflows/backport-cli.yml @@ -64,24 +64,28 @@ jobs: id: execute-backport env: GH_AUTH: ${{ secrets.GITHUB_TOKEN }} + TARGET_BRANCH: ${{ inputs.target-branch }} + COMMIT_SHA: ${{ inputs.commit-sha }} run: | git config --global user.email "name@example.com" git config --global user.name "Your Name" set +e { echo 'cherry_picker_output<> "${GITHUB_OUTPUT}" continue-on-error: true - name: Parse backport output id: parse-backport-output + env: + CHERRY_PICKER_OUTPUT: ${{ steps.execute-backport.outputs.cherry_picker_output }} run: | set +e - echo "${{ steps.execute-backport.outputs.cherry_picker_output }}" + echo "${CHERRY_PICKER_OUTPUT}" - url=$(echo "${{ steps.execute-backport.outputs.cherry_picker_output }}" | \ + url=$(echo "${CHERRY_PICKER_OUTPUT}" | \ grep -o 'Backport PR created at https://[^ ]*' | \ awk '{print $5}') @@ -99,17 +103,20 @@ jobs: GH_TOKEN: ${{ github.token }} REPOSITORY: ${{ github.repository }} RUN_ID: ${{ github.run_id }} + COMMIT_SHA: ${{ inputs.commit-sha }} + TARGET_BRANCH: ${{ inputs.target-branch }} + BACKPORT_URL: ${{ steps.parse-backport-output.outputs.backport-url }} run: | - COMMIT_INFO_URL="https://api.github.com/repos/${{ github.repository }}/commits/" - COMMIT_INFO_URL="${COMMIT_INFO_URL}${{ inputs.commit-sha }}/pulls" + COMMIT_INFO_URL="https://api.github.com/repos/$REPOSITORY/commits/" + COMMIT_INFO_URL="${COMMIT_INFO_URL}$COMMIT_SHA/pulls" PR_NUMBER=$(gh api \ -H "Accept: application/vnd.github+json" \ -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ github.repository }}/commits/${{ inputs.commit-sha }}/pulls \ + /repos/$REPOSITORY/commits/$COMMIT_SHA/pulls \ --jq '.[0].number') python ./dev/backport/update_backport_status.py \ - ${{ steps.parse-backport-output.outputs.backport-url }} \ - ${{ inputs.commit-sha }} ${{ inputs.target-branch }} \ + $BACKPORT_URL \ + $COMMIT_SHA $TARGET_BRANCH \ "$PR_NUMBER" diff --git a/.github/workflows/basic-tests.yml b/.github/workflows/basic-tests.yml index c8ba85969f5e3..353f65d9a6c9c 100644 --- a/.github/workflows/basic-tests.yml +++ b/.github/workflows/basic-tests.yml @@ -56,6 +56,10 @@ on: # yamllint disable-line rule:truthy description: "Whether to run only latest version checks (true/false)" required: true type: string + use-uv: + description: "Whether to use uv in the image" + required: true + type: string jobs: run-breeze-tests: timeout-minutes: 10 @@ -72,16 +76,12 @@ jobs: persist-credentials: false - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - - uses: actions/setup-python@v5 + - name: "Install Breeze" + uses: ./.github/actions/breeze with: - python-version: "${{ inputs.default-python-version }}" - cache: 'pip' - cache-dependency-path: ./dev/breeze/pyproject.toml - - run: pip install --editable ./dev/breeze/ - - run: python -m pytest -n auto --color=yes + use-uv: ${{ inputs.use-uv }} + - run: uv tool run --from apache-airflow-breeze pytest -n auto --color=yes working-directory: ./dev/breeze/ - - tests-ui: timeout-minutes: 10 name: React UI tests @@ -108,15 +108,24 @@ jobs: node-version: 21 cache: 'pnpm' cache-dependency-path: 'airflow/ui/pnpm-lock.yaml' - - name: "Cache eslint" - uses: actions/cache@v4 + - name: "Restore eslint cache (ui)" + uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c with: - path: 'airflow/ui/node_modules' - key: ${{ runner.os }}-ui-node-modules-${{ hashFiles('airflow/ui/**/pnpm-lock.yaml') }} + path: airflow/ui/node_modules/ + key: cache-ui-node-modules-v1-${{ runner.os }}-${{ hashFiles('airflow/ui/**/pnpm-lock.yaml') }} + id: restore-eslint-cache - run: cd airflow/ui && pnpm install --frozen-lockfile - run: cd airflow/ui && pnpm test env: FORCE_COLOR: 2 + - name: "Save eslint cache (ui)" + uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + path: airflow/ui/node_modules/ + key: cache-ui-node-modules-v1-${{ runner.os }}-${{ hashFiles('airflow/ui/**/pnpm-lock.yaml') }} + if-no-files-found: 'error' + retention-days: '2' + if: steps.restore-eslint-cache.outputs.stash-hit != 'true' tests-www: timeout-minutes: 10 @@ -137,15 +146,50 @@ jobs: uses: actions/setup-node@v4 with: node-version: 21 - - name: "Cache eslint" - uses: actions/cache@v4 + - name: "Restore eslint cache (www)" + uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c with: - path: 'airflow/www/node_modules' - key: ${{ runner.os }}-www-node-modules-${{ hashFiles('airflow/www/**/yarn.lock') }} + path: airflow/www/node_modules/ + key: cache-www-node-modules-v1-${{ runner.os }}-${{ hashFiles('airflow/www/**/yarn.lock') }} + id: restore-eslint-cache - run: yarn --cwd airflow/www/ install --frozen-lockfile --non-interactive - run: yarn --cwd airflow/www/ run test env: FORCE_COLOR: 2 + - name: "Save eslint cache (www)" + uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + path: airflow/www/node_modules/ + key: cache-www-node-modules-v1-${{ runner.os }}-${{ hashFiles('airflow/www/**/yarn.lock') }} + if-no-files-found: 'error' + retention-days: '2' + if: steps.restore-eslint-cache.outputs.stash-hit != 'true' + + install-pre-commit: + timeout-minutes: 5 + name: "Install pre-commit for cache" + runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} + env: + PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" + if: inputs.basic-checks-only == 'true' + steps: + - name: "Cleanup repo" + shell: bash + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + with: + persist-credentials: false + - name: "Install Breeze" + uses: ./.github/actions/breeze + with: + use-uv: ${{ inputs.use-uv }} + id: breeze + - name: "Install pre-commit" + uses: ./.github/actions/install-pre-commit + id: pre-commit + with: + python-version: ${{steps.breeze.outputs.host-python-version}} # Those checks are run if no image needs to be built for checks. This is for simple changes that # Do not touch any of the python code or any of the important files that might require building @@ -154,6 +198,7 @@ jobs: timeout-minutes: 30 name: "Static checks: basic checks only" runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} + needs: install-pre-commit if: inputs.basic-checks-only == 'true' steps: - name: "Cleanup repo" @@ -165,20 +210,10 @@ jobs: persist-credentials: false - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - - name: "Setup python" - uses: actions/setup-python@v5 - with: - python-version: ${{ inputs.default-python-version }} - cache: 'pip' - cache-dependency-path: ./dev/breeze/pyproject.toml - - name: "Setup python" - uses: actions/setup-python@v5 - with: - python-version: "${{ inputs.default-python-version }}" - cache: 'pip' - cache-dependency-path: ./dev/breeze/pyproject.toml - name: "Install Breeze" uses: ./.github/actions/breeze + with: + use-uv: ${{ inputs.use-uv }} id: breeze - name: "Install pre-commit" uses: ./.github/actions/install-pre-commit @@ -216,6 +251,7 @@ jobs: timeout-minutes: 45 name: "Upgrade checks" runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} + needs: install-pre-commit env: PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" if: inputs.canary-run == 'true' && inputs.latest-versions-only != 'true' @@ -229,12 +265,16 @@ jobs: persist-credentials: false - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - # Install python from scratch. No cache used. We always want to have fresh version of everything - - uses: actions/setup-python@v5 + - name: "Install Breeze" + uses: ./.github/actions/breeze + with: + use-uv: ${{ inputs.use-uv }} + id: breeze + - name: "Install pre-commit" + uses: ./.github/actions/install-pre-commit + id: pre-commit with: - python-version: "${{ inputs.default-python-version }}" - - name: "Install latest pre-commit" - run: pip install pre-commit + python-version: ${{steps.breeze.outputs.host-python-version}} - name: "Autoupdate all pre-commits" run: pre-commit autoupdate - name: "Run automated upgrade for black" @@ -267,11 +307,12 @@ jobs: run: > pre-commit run --all-files --show-diff-on-failure --color always --verbose - --hook-stage manual update-installers || true + --hook-stage manual update-installers-and-pre-commit || true if: always() env: UPGRADE_UV: "true" UPGRADE_PIP: "false" + UPGRADE_PRE_COMMIT: "true" - name: "Run automated upgrade for pip" run: > pre-commit run @@ -288,7 +329,6 @@ jobs: runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} env: PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" - IMAGE_TAG: ${{ inputs.image-tag }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} @@ -306,6 +346,8 @@ jobs: run: ./scripts/ci/cleanup_docker.sh - name: "Install Breeze" uses: ./.github/actions/breeze + with: + use-uv: ${{ inputs.use-uv }} - name: "Cleanup dist files" run: rm -fv ./dist/* - name: Setup git for tagging diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml deleted file mode 100644 index 9135dcb9d9e94..0000000000000 --- a/.github/workflows/build-images.yml +++ /dev/null @@ -1,264 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ---- -name: Build Images -run-name: > - Build images for ${{ github.event.pull_request.title }} ${{ github.event.pull_request._links.html.href }} -on: # yamllint disable-line rule:truthy - pull_request_target: - branches: - - main - - v2-10-stable - - v2-10-test - - providers-[a-z]+-?[a-z]*/v[0-9]+-[0-9]+ -permissions: - # all other permissions are set to none - contents: read - pull-requests: read - packages: read -env: - ANSWER: "yes" - # You can override CONSTRAINTS_GITHUB_REPOSITORY by setting secret in your repo but by default the - # Airflow one is going to be used - CONSTRAINTS_GITHUB_REPOSITORY: >- - ${{ secrets.CONSTRAINTS_GITHUB_REPOSITORY != '' && - secrets.CONSTRAINTS_GITHUB_REPOSITORY || 'apache/airflow' }} - # This token is WRITE one - pull_request_target type of events always have the WRITE token - DB_RESET: "true" - GITHUB_REPOSITORY: ${{ github.repository }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ github.event.pull_request.head.sha || github.sha }}" - INCLUDE_SUCCESS_OUTPUTS: "true" - USE_SUDO: "true" - VERBOSE: "true" - -concurrency: - group: build-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - build-info: - timeout-minutes: 10 - name: Build Info - # At build-info stage we do not yet have outputs so we need to hard-code the runs-on to public runners - runs-on: ["ubuntu-22.04"] - env: - TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} - outputs: - image-tag: ${{ github.event.pull_request.head.sha || github.sha }} - python-versions: ${{ steps.selective-checks.outputs.python-versions }} - python-versions-list-as-string: ${{ steps.selective-checks.outputs.python-versions-list-as-string }} - default-python-version: ${{ steps.selective-checks.outputs.default-python-version }} - upgrade-to-newer-dependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} - run-tests: ${{ steps.selective-checks.outputs.run-tests }} - run-kubernetes-tests: ${{ steps.selective-checks.outputs.run-kubernetes-tests }} - ci-image-build: ${{ steps.selective-checks.outputs.ci-image-build }} - prod-image-build: ${{ steps.selective-checks.outputs.prod-image-build }} - docker-cache: ${{ steps.selective-checks.outputs.docker-cache }} - default-branch: ${{ steps.selective-checks.outputs.default-branch }} - disable-airflow-repo-cache: ${{ steps.selective-checks.outputs.disable-airflow-repo-cache }} - force-pip: ${{ steps.selective-checks.outputs.force-pip }} - constraints-branch: ${{ steps.selective-checks.outputs.default-constraints-branch }} - runs-on-as-json-default: ${{ steps.selective-checks.outputs.runs-on-as-json-default }} - runs-on-as-json-public: ${{ steps.selective-checks.outputs.runs-on-as-json-public }} - runs-on-as-json-self-hosted: ${{ steps.selective-checks.outputs.runs-on-as-json-self-hosted }} - is-self-hosted-runner: ${{ steps.selective-checks.outputs.is-self-hosted-runner }} - is-committer-build: ${{ steps.selective-checks.outputs.is-committer-build }} - is-airflow-runner: ${{ steps.selective-checks.outputs.is-airflow-runner }} - is-amd-runner: ${{ steps.selective-checks.outputs.is-amd-runner }} - is-arm-runner: ${{ steps.selective-checks.outputs.is-arm-runner }} - is-vm-runner: ${{ steps.selective-checks.outputs.is-vm-runner }} - is-k8s-runner: ${{ steps.selective-checks.outputs.is-k8s-runner }} - chicken-egg-providers: ${{ steps.selective-checks.outputs.chicken-egg-providers }} - target-commit-sha: "${{steps.discover-pr-merge-commit.outputs.target-commit-sha || - github.event.pull_request.head.sha || - github.sha - }}" - if: github.repository == 'apache/airflow' - steps: - - name: Cleanup repo - shell: bash - run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - - name: Discover PR merge commit - id: discover-pr-merge-commit - run: | - # Sometimes target-commit-sha cannot be - TARGET_COMMIT_SHA="$(gh api '${{ github.event.pull_request.url }}' --jq .merge_commit_sha)" - if [[ ${TARGET_COMMIT_SHA} == "" ]]; then - # Sometimes retrieving the merge commit SHA from PR fails. We retry it once. Otherwise we - # fall-back to github.event.pull_request.head.sha - echo - echo "Could not retrieve merge commit SHA from PR, waiting for 3 seconds and retrying." - echo - sleep 3 - TARGET_COMMIT_SHA="$(gh api '${{ github.event.pull_request.url }}' --jq .merge_commit_sha)" - if [[ ${TARGET_COMMIT_SHA} == "" ]]; then - echo - echo "Could not retrieve merge commit SHA from PR, falling back to PR head SHA." - echo - TARGET_COMMIT_SHA="${{ github.event.pull_request.head.sha }}" - fi - fi - echo "TARGET_COMMIT_SHA=${TARGET_COMMIT_SHA}" - echo "TARGET_COMMIT_SHA=${TARGET_COMMIT_SHA}" >> ${GITHUB_ENV} - echo "target-commit-sha=${TARGET_COMMIT_SHA}" >> ${GITHUB_OUTPUT} - if: github.event_name == 'pull_request_target' - # The labels in the event aren't updated when re-triggering the job, So lets hit the API to get - # up-to-date values - - name: Get latest PR labels - id: get-latest-pr-labels - run: | - echo -n "pull-request-labels=" >> ${GITHUB_OUTPUT} - gh api graphql --paginate -F node_id=${{github.event.pull_request.node_id}} -f query=' - query($node_id: ID!, $endCursor: String) { - node(id:$node_id) { - ... on PullRequest { - labels(first: 100, after: $endCursor) { - nodes { name } - pageInfo { hasNextPage endCursor } - } - } - } - }' --jq '.data.node.labels.nodes[]' | jq --slurp -c '[.[].name]' >> ${GITHUB_OUTPUT} - if: github.event_name == 'pull_request_target' - - uses: actions/checkout@v4 - with: - ref: ${{ env.TARGET_COMMIT_SHA }} - persist-credentials: false - fetch-depth: 2 - #################################################################################################### - # WE ONLY DO THAT CHECKOUT ABOVE TO RETRIEVE THE TARGET COMMIT AND IT'S PARENT. DO NOT RUN ANY CODE - # RIGHT AFTER THAT AS WE ARE GOING TO RESTORE THE TARGET BRANCH CODE IN THE NEXT STEP. - #################################################################################################### - - name: Checkout target branch to use ci/scripts and breeze from there. - uses: actions/checkout@v4 - with: - ref: ${{ github.base_ref }} - persist-credentials: false - #################################################################################################### - # HERE EVERYTHING IS PERFECTLY SAFE TO RUN. AT THIS POINT WE HAVE THE TARGET BRANCH CHECKED OUT - # AND WE CAN RUN ANY CODE FROM IT. WE CAN RUN BREEZE COMMANDS, WE CAN RUN SCRIPTS, WE CAN RUN - # COMPOSITE ACTIONS. WE CAN RUN ANYTHING THAT IS IN THE TARGET BRANCH AND THERE IS NO RISK THAT - # CODE WILL BE RUN FROM THE PR. - #################################################################################################### - - name: Cleanup docker - run: ./scripts/ci/cleanup_docker.sh - - name: Setup python - uses: actions/setup-python@v5 - with: - python-version: "3.9" - - name: Install Breeze - uses: ./.github/actions/breeze - #################################################################################################### - # WE RUN SELECTIVE CHECKS HERE USING THE TARGET COMMIT AND ITS PARENT TO BE ABLE TO COMPARE THEM - # AND SEE WHAT HAS CHANGED IN THE PR. THE CODE IS STILL RUN FROM THE TARGET BRANCH, SO IT IS SAFE - # TO RUN IT, WE ONLY PASS TARGET_COMMIT_SHA SO THAT SELECTIVE CHECKS CAN SEE WHAT'S COMING IN THE PR - #################################################################################################### - - name: Selective checks - id: selective-checks - env: - PR_LABELS: "${{ steps.get-latest-pr-labels.outputs.pull-request-labels }}" - COMMIT_REF: "${{ env.TARGET_COMMIT_SHA }}" - VERBOSE: "false" - AIRFLOW_SOURCES_ROOT: "${{ github.workspace }}" - run: breeze ci selective-check 2>> ${GITHUB_OUTPUT} - - name: env - run: printenv - env: - PR_LABELS: ${{ steps.get-latest-pr-labels.outputs.pull-request-labels }} - GITHUB_CONTEXT: ${{ toJson(github) }} - - - build-ci-images: - name: Build CI images - permissions: - contents: read - packages: write - secrets: inherit - needs: [build-info] - uses: ./.github/workflows/ci-image-build.yml - # Only run this it if the PR comes from fork, otherwise build will be done "in-PR-workflow" - if: | - needs.build-info.outputs.ci-image-build == 'true' && - github.event.pull_request.head.repo.full_name != 'apache/airflow' - with: - runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} - runs-on-as-json-self-hosted: ${{ needs.build-info.outputs.runs-on-as-json-self-hosted }} - do-build: ${{ needs.build-info.outputs.ci-image-build }} - target-commit-sha: ${{ needs.build-info.outputs.target-commit-sha }} - pull-request-target: "true" - is-committer-build: ${{ needs.build-info.outputs.is-committer-build }} - push-image: "true" - use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} - image-tag: ${{ needs.build-info.outputs.image-tag }} - platform: "linux/amd64" - python-versions: ${{ needs.build-info.outputs.python-versions }} - branch: ${{ needs.build-info.outputs.default-branch }} - constraints-branch: ${{ needs.build-info.outputs.constraints-branch }} - upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} - docker-cache: ${{ needs.build-info.outputs.docker-cache }} - disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} - - - generate-constraints: - name: Generate constraints - needs: [build-info, build-ci-images] - uses: ./.github/workflows/generate-constraints.yml - with: - runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} - python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} - # For regular PRs we do not need "no providers" constraints - they are only needed in canary builds - generate-no-providers-constraints: "false" - image-tag: ${{ needs.build-info.outputs.image-tag }} - chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} - debug-resources: ${{ needs.build-info.outputs.debug-resources }} - - build-prod-images: - name: Build PROD images - permissions: - contents: read - packages: write - secrets: inherit - needs: [build-info, generate-constraints] - uses: ./.github/workflows/prod-image-build.yml - # Only run this it if the PR comes from fork, otherwise build will be done "in-PR-workflow" - if: | - needs.build-info.outputs.prod-image-build == 'true' && - github.event.pull_request.head.repo.full_name != 'apache/airflow' - with: - runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} - build-type: "Regular" - do-build: ${{ needs.build-info.outputs.ci-image-build }} - upload-package-artifact: "true" - target-commit-sha: ${{ needs.build-info.outputs.target-commit-sha }} - pull-request-target: "true" - is-committer-build: ${{ needs.build-info.outputs.is-committer-build }} - push-image: "true" - use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} - image-tag: ${{ needs.build-info.outputs.image-tag }} - platform: linux/amd64 - python-versions: ${{ needs.build-info.outputs.python-versions }} - default-python-version: ${{ needs.build-info.outputs.default-python-version }} - branch: ${{ needs.build-info.outputs.default-branch }} - constraints-branch: ${{ needs.build-info.outputs.constraints-branch }} - build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} - upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} - chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} - docker-cache: ${{ needs.build-info.outputs.docker-cache }} - disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} diff --git a/.github/workflows/ci-image-build.yml b/.github/workflows/ci-image-build.yml index b8e2feac1755f..d15c297d82a00 100644 --- a/.github/workflows/ci-image-build.yml +++ b/.github/workflows/ci-image-build.yml @@ -28,13 +28,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining self-hosted runners." required: true type: string - do-build: - description: > - Whether to actually do the build (true/false). If set to false, the build is done - already in pull-request-target workflow, so we skip it here. - required: false - default: "true" - type: string target-commit-sha: description: "The commit SHA to checkout for the build" required: false @@ -59,6 +52,14 @@ on: # yamllint disable-line rule:truthy required: false default: "true" type: string + upload-image-artifact: + description: "Whether to upload docker image artifact" + required: true + type: string + upload-mount-cache-artifact: + description: "Whether to upload mount-cache artifact" + required: true + type: string debian-version: description: "Base Debian distribution to use for the build (bookworm)" type: string @@ -71,10 +72,6 @@ on: # yamllint disable-line rule:truthy description: "Whether to use uv to build the image (true/false)" required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string python-versions: description: "JSON-formatted array of Python versions to build images from" required: true @@ -104,20 +101,9 @@ jobs: strategy: fail-fast: true matrix: - # yamllint disable-line rule:line-length - python-version: ${{ inputs.do-build == 'true' && fromJSON(inputs.python-versions) || fromJSON('[""]') }} + python-version: ${{ fromJSON(inputs.python-versions) || fromJSON('[""]') }} timeout-minutes: 110 - name: "\ -${{ inputs.do-build == 'true' && 'Build' || 'Skip building' }} \ -CI ${{ inputs.platform }} image\ -${{ matrix.python-version }}${{ inputs.do-build == 'true' && ':' || '' }}\ -${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" - # The ARM images need to be built using self-hosted runners as ARM macos public runners - # do not yet allow us to run docker effectively and fast. - # https://github.com/actions/runner-images/issues/9254#issuecomment-1917916016 - # https://github.com/abiosoft/colima/issues/970 - # https://github.com/actions/runner/issues/1456 - # See https://github.com/apache/airflow/pull/38640 + name: "Build CI ${{ inputs.platform }} image ${{ matrix.python-version }}" # NOTE!!!!! This has to be put in one line for runs-on to recognize the "fromJSON" properly !!!! # adding space before (with >) apparently turns the `runs-on` processed line into a string "Array" # instead of an array of strings. @@ -125,54 +111,51 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" runs-on: ${{ (inputs.platform == 'linux/amd64') && fromJSON(inputs.runs-on-as-json-public) || fromJSON(inputs.runs-on-as-json-self-hosted) }} env: BACKEND: sqlite + PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} DEFAULT_BRANCH: ${{ inputs.branch }} DEFAULT_CONSTRAINTS_BRANCH: ${{ inputs.constraints-branch }} VERSION_SUFFIX_FOR_PYPI: "dev0" GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - USE_UV: ${{ inputs.use-uv }} VERBOSE: "true" steps: - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: inputs.do-build == 'true' - name: "Checkout target branch" uses: actions/checkout@v4 with: persist-credentials: false - - name: "Checkout target commit" - uses: ./.github/actions/checkout_target_commit - if: inputs.do-build == 'true' - with: - target-commit-sha: ${{ inputs.target-commit-sha }} - pull-request-target: ${{ inputs.pull-request-target }} - is-committer-build: ${{ inputs.is-committer-build }} - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - if: inputs.do-build == 'true' - name: "Install Breeze" uses: ./.github/actions/breeze - if: inputs.do-build == 'true' - - name: "Regenerate dependencies in case they were modified manually so that we can build an image" - shell: bash - run: | - pip install rich>=12.4.4 pyyaml - python scripts/ci/pre_commit/update_providers_dependencies.py - if: inputs.do-build == 'true' && inputs.upgrade-to-newer-dependencies != 'false' - - name: "Start ARM instance" - run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh - if: inputs.do-build == 'true' && inputs.platform == 'linux/arm64' - - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - if: inputs.do-build == 'true' + with: + use-uv: ${{ inputs.use-uv }} + - name: "Restore ci-cache mount image ${{ inputs.platform }}:${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: "ci-cache-mount-save-v2-${{ inputs.platform }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + path: "/tmp/" + id: restore-cache-mount + - name: "Import mount-cache ${{ inputs.platform }}:${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + env: + PYTHON_MAJOR_MINOR_VERSION: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} + run: > + breeze ci-image import-mount-cache + --cache-file /tmp/ci-cache-mount-save-v2-${PYTHON_MAJOR_MINOR_VERSION}.tar.gz + if: steps.restore-cache-mount.outputs.stash-hit == 'true' + - name: "Login to ghcr.io" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ACTOR: ${{ github.actor }} + run: echo "${GITHUB_TOKEN}" | docker login ghcr.io -u ${ACTOR} --password-stdin - name: > Build ${{ inputs.push-image == 'true' && ' & push ' || '' }} - ${{ inputs.platform }}:${{ matrix.python-version }}:${{ inputs.image-tag }} + ${{ inputs.platform }}:${{ env.PYTHON_MAJOR_MINOR_VERSION }} image run: > - breeze ci-image build --builder airflow_cache --tag-as-latest --image-tag "${{ inputs.image-tag }}" - --python "${{ matrix.python-version }}" --platform "${{ inputs.platform }}" + breeze ci-image build --platform "${PLATFORM}" env: DOCKER_CACHE: ${{ inputs.docker-cache }} DISABLE_AIRFLOW_REPO_CACHE: ${{ inputs.disable-airflow-repo-cache }} @@ -189,7 +172,32 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" GITHUB_USERNAME: ${{ github.actor }} PUSH: ${{ inputs.push-image }} VERBOSE: "true" - if: inputs.do-build == 'true' - - name: "Stop ARM instance" - run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() && inputs.do-build == 'true' && inputs.platform == 'linux/arm64' + PLATFORM: ${{ inputs.platform }} + - name: "Export CI docker image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + env: + PLATFORM: ${{ inputs.platform }} + run: breeze ci-image save --platform "${PLATFORM}" + if: inputs.upload-image-artifact == 'true' + - name: "Stash CI docker image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: ci-image-save-${{ inputs.platform }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }} + path: "/tmp/ci-image-save-*-${{ env.PYTHON_MAJOR_MINOR_VERSION }}.tar" + if-no-files-found: 'error' + retention-days: '2' + if: inputs.upload-image-artifact == 'true' + - name: "Export mount cache ${{ inputs.platform }}:${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + env: + PYTHON_MAJOR_MINOR_VERSION: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} + run: > + breeze ci-image export-mount-cache + --cache-file /tmp/ci-cache-mount-save-v2-${PYTHON_MAJOR_MINOR_VERSION}.tar.gz + if: inputs.upload-mount-cache-artifact == 'true' + - name: "Stash cache mount ${{ inputs.platform }}:${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: "ci-cache-mount-save-v2-${{ inputs.platform }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + path: "/tmp/ci-cache-mount-save-v2-${{ env.PYTHON_MAJOR_MINOR_VERSION }}.tar.gz" + if-no-files-found: 'error' + retention-days: 2 + if: inputs.upload-mount-cache-artifact == 'true' diff --git a/.github/workflows/ci-image-checks.yml b/.github/workflows/ci-image-checks.yml index 63598755c32d0..21c857e7bd710 100644 --- a/.github/workflows/ci-image-checks.yml +++ b/.github/workflows/ci-image-checks.yml @@ -28,10 +28,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining the labels used for docs build." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string needs-mypy: description: "Whether to run mypy checks (true/false)" required: true @@ -108,16 +104,64 @@ on: # yamllint disable-line rule:truthy description: "Whether to run coverage or not (true/false)" required: true type: string + use-uv: + description: "Whether to use uv to build the image (true/false)" + required: true + type: string jobs: + install-pre-commit: + timeout-minutes: 5 + name: "Install pre-commit for cache (only canary runs)" + runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }} + env: + PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" + if: inputs.basic-checks-only == 'false' + steps: + - name: "Cleanup repo" + shell: bash + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + if: inputs.canary-run == 'true' + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + with: + persist-credentials: false + if: inputs.canary-run == 'true' + - name: "Install Breeze" + uses: ./.github/actions/breeze + with: + use-uv: ${{ inputs.use-uv }} + id: breeze + if: inputs.canary-run == 'true' + - name: "Install pre-commit" + uses: ./.github/actions/install-pre-commit + id: pre-commit + with: + python-version: ${{steps.breeze.outputs.host-python-version}} + if: inputs.canary-run == 'true' + - name: "Prepare .tar file from pre-commit cache" + run: | + tar -C ~ -czf /tmp/cache-pre-commit.tar.gz .cache/pre-commit .cache/uv + shell: bash + if: inputs.canary-run == 'true' + - name: "Save pre-commit cache" + uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + # yamllint disable rule:line-length + key: cache-pre-commit-v4-${{ steps.breeze.outputs.host-python-version }}-${{ hashFiles('.pre-commit-config.yaml') }} + path: /tmp/cache-pre-commit.tar.gz + if-no-files-found: 'error' + retention-days: '2' + if: inputs.canary-run == 'true' + static-checks: timeout-minutes: 45 name: "Static checks" runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }} + needs: install-pre-commit env: PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" UPGRADE_TO_NEWER_DEPENDENCIES: "${{ inputs.upgrade-to-newer-dependencies }}" - IMAGE_TAG: ${{ inputs.image-tag }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} if: inputs.basic-checks-only == 'false' && inputs.latest-versions-only != 'true' steps: @@ -128,16 +172,12 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Setup python" - uses: actions/setup-python@v5 - with: - python-version: ${{ inputs.default-python-version }} - cache: 'pip' - cache-dependency-path: ./dev/breeze/pyproject.toml - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version}}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} + use-uv: ${{ inputs.use-uv }} id: breeze - name: "Install pre-commit" uses: ./.github/actions/install-pre-commit @@ -158,6 +198,7 @@ jobs: timeout-minutes: 45 name: "MyPy checks" runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }} + needs: install-pre-commit if: inputs.needs-mypy == 'true' strategy: fail-fast: false @@ -165,7 +206,6 @@ jobs: mypy-check: ${{ fromJSON(inputs.mypy-checks) }} env: PYTHON_MAJOR_MINOR_VERSION: "${{inputs.default-python-version}}" - IMAGE_TAG: "${{ inputs.image-tag }}" GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - name: "Cleanup repo" @@ -175,10 +215,12 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} + use-uv: ${{ inputs.use-uv }} id: breeze - name: "Install pre-commit" uses: ./.github/actions/install-pre-commit @@ -186,7 +228,7 @@ jobs: with: python-version: ${{steps.breeze.outputs.host-python-version}} - name: "MyPy checks for ${{ matrix.mypy-check }}" - run: pre-commit run --color always --verbose --hook-stage manual ${{matrix.mypy-check}} --all-files + run: pre-commit run --color always --verbose --hook-stage manual "$MYPY_CHECK" --all-files env: VERBOSE: "false" COLUMNS: "250" @@ -194,6 +236,7 @@ jobs: DEFAULT_BRANCH: ${{ inputs.branch }} RUFF_FORMAT: "github" INCLUDE_MYPY_VOLUME: "false" + MYPY_CHECK: ${{ matrix.mypy-check }} build-docs: timeout-minutes: 150 @@ -208,7 +251,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_NOT_READY_PROVIDERS: "true" INCLUDE_SUCCESS_OUTPUTS: "${{ inputs.include-success-outputs }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" @@ -221,28 +263,39 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image - - uses: actions/cache@v4 - id: cache-doc-inventories + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} + use-uv: ${{ inputs.use-uv }} + - name: "Restore docs inventory cache" + uses: apache/infrastructure-actions/stash/restore@c94b890bbedc2fc61466d28e6bd9966bc6c6643c with: path: ./docs/_inventory_cache/ - key: docs-inventory-${{ hashFiles('pyproject.toml;') }} - restore-keys: | - docs-inventory-${{ hashFiles('pyproject.toml;') }} - docs-inventory- + # TODO(potiuk): do better with determining the key + key: cache-docs-inventory-v1-${{ hashFiles('pyproject.toml') }} + id: restore-docs-inventory-cache - name: "Building docs with ${{ matrix.flag }} flag" + env: + DOCS_LIST_AS_STRING: ${{ inputs.docs-list-as-string }} run: > - breeze build-docs ${{ inputs.docs-list-as-string }} ${{ matrix.flag }} + breeze build-docs ${DOCS_LIST_AS_STRING} ${{ matrix.flag }} + - name: "Save docs inventory cache" + uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + path: ./docs/_inventory_cache/ + key: cache-docs-inventory-v1-${{ hashFiles('pyproject.toml') }} + if-no-files-found: 'error' + retention-days: '2' + if: steps.restore-docs-inventory-cache != 'true' - name: "Upload build docs" uses: actions/upload-artifact@v4 with: name: airflow-docs path: './docs/_build' - retention-days: 7 - if-no-files-found: error + retention-days: '7' + if-no-files-found: 'error' if: matrix.flag == '--docs-only' publish-docs: @@ -254,7 +307,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_NOT_READY_PROVIDERS: "true" INCLUDE_SUCCESS_OUTPUTS: "${{ inputs.include-success-outputs }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" @@ -283,12 +335,18 @@ jobs: run: > git clone https://github.com/apache/airflow-site.git /mnt/airflow-site/airflow-site && echo "AIRFLOW_SITE_DIRECTORY=/mnt/airflow-site/airflow-site" >> "$GITHUB_ENV" - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} + use-uv: ${{ inputs.use-uv }} - name: "Publish docs" + env: + DOCS_LIST_AS_STRING: ${{ inputs.docs-list-as-string }} run: > breeze release-management publish-docs --override-versioned --run-in-parallel - ${{ inputs.docs-list-as-string }} + ${DOCS_LIST_AS_STRING} - name: Check disk space available run: df -h - name: "Generate back references for providers" @@ -331,7 +389,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" JOB_ID: "python-api-client-tests" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" VERBOSE: "true" @@ -353,8 +410,12 @@ jobs: fetch-depth: 1 persist-credentials: false path: ./airflow-client-python - - name: "Prepare breeze & CI image: ${{inputs.default-python-version}}:${{inputs.image-tag}}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} + use-uv: ${{ inputs.use-uv }} - name: "Generate airflow python client" run: > breeze release-management prepare-python-client --package-format both diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09cc3328dd8a7..d820bd4c6ec47 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,19 +32,12 @@ on: # yamllint disable-line rule:truthy - providers-[a-z]+-?[a-z]*/v[0-9]+-[0-9]+ workflow_dispatch: permissions: - # All other permissions are set to none + # All other permissions are set to none by default contents: read - # Technically read access while waiting for images should be more than enough. However, - # there is a bug in GitHub Actions/Packages and in case private repositories are used, you get a permission - # denied error when attempting to just pull private image, changing the token permission to write solves the - # issue. This is not dangerous, because if it is for "apache/airflow", only maintainers can push ci.yml - # changes. If it is for a fork, then the token is read-only anyway. - packages: write env: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ github.event.pull_request.head.sha || github.sha }}" SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} VERBOSE: "true" @@ -64,7 +57,6 @@ jobs: all-python-versions-list-as-string: >- ${{ steps.selective-checks.outputs.all-python-versions-list-as-string }} basic-checks-only: ${{ steps.selective-checks.outputs.basic-checks-only }} - build-job-description: ${{ steps.source-run-info.outputs.build-job-description }} canary-run: ${{ steps.source-run-info.outputs.canary-run }} chicken-egg-providers: ${{ steps.selective-checks.outputs.chicken-egg-providers }} ci-image-build: ${{ steps.selective-checks.outputs.ci-image-build }} @@ -88,8 +80,6 @@ jobs: full-tests-needed: ${{ steps.selective-checks.outputs.full-tests-needed }} has-migrations: ${{ steps.selective-checks.outputs.has-migrations }} helm-test-packages: ${{ steps.selective-checks.outputs.helm-test-packages }} - image-tag: ${{ github.event.pull_request.head.sha || github.sha }} - in-workflow-build: ${{ steps.source-run-info.outputs.in-workflow-build }} include-success-outputs: ${{ steps.selective-checks.outputs.include-success-outputs }} individual-providers-test-types-list-as-string: >- ${{ steps.selective-checks.outputs.individual-providers-test-types-list-as-string }} @@ -99,6 +89,7 @@ jobs: is-k8s-runner: ${{ steps.selective-checks.outputs.is-k8s-runner }} is-self-hosted-runner: ${{ steps.selective-checks.outputs.is-self-hosted-runner }} is-vm-runner: ${{ steps.selective-checks.outputs.is-vm-runner }} + kubernetes-combos: ${{ steps.selective-checks.outputs.kubernetes-combos }} kubernetes-combos-list-as-string: >- ${{ steps.selective-checks.outputs.kubernetes-combos-list-as-string }} kubernetes-versions-list-as-string: >- @@ -144,6 +135,7 @@ jobs: test-groups: ${{ steps.selective-checks.outputs.test-groups }} testable-core-integrations: ${{ steps.selective-checks.outputs.testable-core-integrations }} testable-providers-integrations: ${{ steps.selective-checks.outputs.testable-providers-integrations }} + use-uv: ${{ steps.selective-checks.outputs.force-pip == 'true' && 'false' || 'true' }} upgrade-to-newer-dependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} steps: - name: "Cleanup repo" @@ -163,6 +155,9 @@ jobs: persist-credentials: false - name: "Install Breeze" uses: ./.github/actions/breeze + with: + use-uv: ${{ inputs.use-uv }} + id: breeze - name: "Get information about the Workflow" id: source-run-info run: breeze ci get-workflow-info 2>> ${GITHUB_OUTPUT} @@ -174,7 +169,6 @@ jobs: PR_LABELS: "${{ steps.source-run-info.outputs.pr-labels }}" COMMIT_REF: "${{ github.sha }}" VERBOSE: "false" - run: breeze ci selective-check 2>> ${GITHUB_OUTPUT} - name: env run: printenv @@ -196,82 +190,46 @@ jobs: skip-pre-commits: ${{needs.build-info.outputs.skip-pre-commits}} canary-run: ${{needs.build-info.outputs.canary-run}} latest-versions-only: ${{needs.build-info.outputs.latest-versions-only}} + use-uv: ${{needs.build-info.outputs.use-uv}} + build-ci-images: - name: > - ${{ needs.build-info.outputs.in-workflow-build == 'true' && 'Build' || 'Skip building' }} - CI images in-workflow + name: Build CI images needs: [build-info] uses: ./.github/workflows/ci-image-build.yml permissions: contents: read # This write is only given here for `push` events from "apache/airflow" repo. It is not given for PRs # from forks. This is to prevent malicious PRs from creating images in the "apache/airflow" repo. - # For regular build for PRS this "build-prod-images" workflow will be skipped anyway by the - # "in-workflow-build" condition packages: write secrets: inherit with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} runs-on-as-json-self-hosted: ${{ needs.build-info.outputs.runs-on-as-json-self-hosted }} - do-build: ${{ needs.build-info.outputs.in-workflow-build }} - image-tag: ${{ needs.build-info.outputs.image-tag }} platform: "linux/amd64" + push-image: "false" + upload-image-artifact: "true" + upload-mount-cache-artifact: ${{ needs.build-info.outputs.canary-run }} python-versions: ${{ needs.build-info.outputs.python-versions }} branch: ${{ needs.build-info.outputs.default-branch }} - use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} + use-uv: ${{ needs.build-info.outputs.use-uv }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} constraints-branch: ${{ needs.build-info.outputs.default-constraints-branch }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} - - wait-for-ci-images: - timeout-minutes: 120 - name: "Wait for CI images" - runs-on: ${{ fromJSON(needs.build-info.outputs.runs-on-as-json-public) }} - needs: [build-info, build-ci-images] if: needs.build-info.outputs.ci-image-build == 'true' - env: - BACKEND: sqlite - # Force more parallelism for pull even on public images - PARALLELISM: 6 - INCLUDE_SUCCESS_OUTPUTS: "${{needs.build-info.outputs.include-success-outputs}}" - steps: - - name: "Cleanup repo" - shell: bash - run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 - with: - persist-credentials: false - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Install Breeze" - uses: ./.github/actions/breeze - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: Wait for CI images ${{ env.PYTHON_VERSIONS }}:${{ needs.build-info.outputs.image-tag }} - id: wait-for-images - run: breeze ci-image pull --run-in-parallel --wait-for-image --tag-as-latest - env: - PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} - DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}} - if: needs.build-info.outputs.in-workflow-build == 'false' additional-ci-image-checks: name: "Additional CI image checks" - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] uses: ./.github/workflows/additional-ci-image-checks.yml + permissions: + contents: read + packages: write if: needs.build-info.outputs.canary-run == 'true' with: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} runs-on-as-json-self-hosted: ${{ needs.build-info.outputs.runs-on-as-json-self-hosted }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} branch: ${{ needs.build-info.outputs.default-branch }} constraints-branch: ${{ needs.build-info.outputs.default-constraints-branch }} @@ -284,35 +242,31 @@ jobs: latest-versions-only: ${{ needs.build-info.outputs.latest-versions-only }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} - use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} - + use-uv: ${{ needs.build-info.outputs.use-uv }} generate-constraints: name: "Generate constraints" - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] uses: ./.github/workflows/generate-constraints.yml - if: > - needs.build-info.outputs.ci-image-build == 'true' && - needs.build-info.outputs.only-new-ui-files != 'true' + if: needs.build-info.outputs.ci-image-build == 'true' with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} # generate no providers constraints only in canary builds - they take quite some time to generate # they are not needed for regular builds, they are only needed to update constraints in canaries generate-no-providers-constraints: ${{ needs.build-info.outputs.canary-run }} - image-tag: ${{ needs.build-info.outputs.image-tag }} chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} + use-uv: ${{ needs.build-info.outputs.use-uv }} ci-image-checks: name: "CI image checks" - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] uses: ./.github/workflows/ci-image-checks.yml secrets: inherit with: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} runs-on-as-json-docs-build: ${{ needs.build-info.outputs.runs-on-as-json-docs-build }} - image-tag: ${{ needs.build-info.outputs.image-tag }} needs-mypy: ${{ needs.build-info.outputs.needs-mypy }} mypy-checks: ${{ needs.build-info.outputs.mypy-checks }} python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} @@ -332,11 +286,12 @@ jobs: needs-api-codegen: ${{ needs.build-info.outputs.needs-api-codegen }} default-postgres-version: ${{ needs.build-info.outputs.default-postgres-version }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} + use-uv: ${{ needs.build-info.outputs.use-uv }} providers: name: "Provider packages tests" uses: ./.github/workflows/test-provider-packages.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -346,7 +301,6 @@ jobs: needs.build-info.outputs.latest-versions-only != 'true' with: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} - image-tag: ${{ needs.build-info.outputs.image-tag }} canary-run: ${{ needs.build-info.outputs.canary-run }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} @@ -356,11 +310,12 @@ jobs: skip-providers-tests: ${{ needs.build-info.outputs.skip-providers-tests }} python-versions: ${{ needs.build-info.outputs.python-versions }} providers-test-types-list-as-string: ${{ needs.build-info.outputs.providers-test-types-list-as-string }} + use-uv: ${{ needs.build-info.outputs.use-uv }} tests-helm: name: "Helm tests" uses: ./.github/workflows/helm-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -369,8 +324,8 @@ jobs: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} helm-test-packages: ${{ needs.build-info.outputs.helm-test-packages }} - image-tag: ${{ needs.build-info.outputs.image-tag }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} + use-uv: ${{ needs.build-info.outputs.use-uv }} if: > needs.build-info.outputs.needs-helm-tests == 'true' && needs.build-info.outputs.default-branch == 'main' && @@ -379,7 +334,7 @@ jobs: tests-postgres: name: "Postgres tests" uses: ./.github/workflows/run-unit-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -390,7 +345,6 @@ jobs: test-name: "Postgres" test-scope: "DB" test-groups: ${{ needs.build-info.outputs.test-groups }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} backend-versions: ${{ needs.build-info.outputs.postgres-versions }} excluded-providers-as-string: ${{ needs.build-info.outputs.excluded-providers-as-string }} @@ -401,12 +355,13 @@ jobs: run-migration-tests: "true" run-coverage: ${{ needs.build-info.outputs.run-coverage }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} + use-uv: ${{ needs.build-info.outputs.use-uv }} if: needs.build-info.outputs.run-tests == 'true' tests-mysql: name: "MySQL tests" uses: ./.github/workflows/run-unit-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -417,7 +372,6 @@ jobs: test-name: "MySQL" test-scope: "DB" test-groups: ${{ needs.build-info.outputs.test-groups }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} backend-versions: ${{ needs.build-info.outputs.mysql-versions }} excluded-providers-as-string: ${{ needs.build-info.outputs.excluded-providers-as-string }} @@ -428,12 +382,13 @@ jobs: run-coverage: ${{ needs.build-info.outputs.run-coverage }} run-migration-tests: "true" debug-resources: ${{ needs.build-info.outputs.debug-resources }} + use-uv: ${{ needs.build-info.outputs.use-uv }} if: needs.build-info.outputs.run-tests == 'true' tests-sqlite: name: "Sqlite tests" uses: ./.github/workflows/run-unit-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -445,7 +400,6 @@ jobs: test-name-separator: "" test-scope: "DB" test-groups: ${{ needs.build-info.outputs.test-groups }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} # No versions for sqlite backend-versions: "['']" @@ -457,12 +411,13 @@ jobs: run-coverage: ${{ needs.build-info.outputs.run-coverage }} run-migration-tests: "true" debug-resources: ${{ needs.build-info.outputs.debug-resources }} + use-uv: ${{ needs.build-info.outputs.use-uv }} if: needs.build-info.outputs.run-tests == 'true' tests-non-db: name: "Non-DB tests" uses: ./.github/workflows/run-unit-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -474,7 +429,6 @@ jobs: test-name-separator: "" test-scope: "Non-DB" test-groups: ${{ needs.build-info.outputs.test-groups }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} # No versions for non-db backend-versions: "['']" @@ -485,12 +439,13 @@ jobs: include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} + use-uv: ${{ needs.build-info.outputs.use-uv }} if: needs.build-info.outputs.run-tests == 'true' tests-special: name: "Special tests" uses: ./.github/workflows/special-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read @@ -504,7 +459,6 @@ jobs: test-groups: ${{ needs.build-info.outputs.test-groups }} default-branch: ${{ needs.build-info.outputs.default-branch }} runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} - image-tag: ${{ needs.build-info.outputs.image-tag }} core-test-types-list-as-string: ${{ needs.build-info.outputs.core-test-types-list-as-string }} providers-test-types-list-as-string: ${{ needs.build-info.outputs.providers-test-types-list-as-string }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} @@ -516,10 +470,11 @@ jobs: upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} + use-uv: ${{ needs.build-info.outputs.use-uv }} tests-integration-system: name: Integration and System Tests - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] uses: ./.github/workflows/integration-system-tests.yml permissions: contents: read @@ -527,7 +482,6 @@ jobs: secrets: inherit with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} - image-tag: ${{ needs.build-info.outputs.image-tag }} testable-core-integrations: ${{ needs.build-info.outputs.testable-core-integrations }} testable-providers-integrations: ${{ needs.build-info.outputs.testable-providers-integrations }} run-system-tests: ${{ needs.build-info.outputs.run-tests }} @@ -537,11 +491,12 @@ jobs: skip-providers-tests: ${{ needs.build-info.outputs.skip-providers-tests }} run-coverage: ${{ needs.build-info.outputs.run-coverage }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} + use-uv: ${{ needs.build-info.outputs.use-uv }} if: needs.build-info.outputs.run-tests == 'true' tests-with-lowest-direct-resolution: name: "Lowest direct dependency providers tests" - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] uses: ./.github/workflows/run-unit-tests.yml permissions: contents: read @@ -556,7 +511,6 @@ jobs: test-scope: "All" test-groups: ${{ needs.build-info.outputs.test-groups }} backend: "postgres" - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} backend-versions: "['${{ needs.build-info.outputs.default-postgres-version }}']" excluded-providers-as-string: ${{ needs.build-info.outputs.excluded-providers-as-string }} @@ -568,117 +522,70 @@ jobs: run-coverage: ${{ needs.build-info.outputs.run-coverage }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} monitor-delay-time-in-seconds: 120 + use-uv: ${{ needs.build-info.outputs.use-uv }} build-prod-images: - name: > - ${{ needs.build-info.outputs.in-workflow-build == 'true' && 'Build' || 'Skip building' }} - PROD images in-workflow + name: Build PROD images needs: [build-info, build-ci-images, generate-constraints] uses: ./.github/workflows/prod-image-build.yml permissions: contents: read # This write is only given here for `push` events from "apache/airflow" repo. It is not given for PRs # from forks. This is to prevent malicious PRs from creating images in the "apache/airflow" repo. - # For regular build for PRS this "build-prod-images" workflow will be skipped anyway by the - # "in-workflow-build" condition packages: write secrets: inherit with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} build-type: "Regular" - do-build: ${{ needs.build-info.outputs.in-workflow-build }} - upload-package-artifact: "true" - image-tag: ${{ needs.build-info.outputs.image-tag }} platform: "linux/amd64" + push-image: "false" + upload-image-artifact: "true" + upload-package-artifact: "true" python-versions: ${{ needs.build-info.outputs.python-versions }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} branch: ${{ needs.build-info.outputs.default-branch }} - push-image: "true" - use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} + use-uv: ${{ needs.build-info.outputs.use-uv }} build-provider-packages: ${{ needs.build-info.outputs.default-branch == 'main' }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} constraints-branch: ${{ needs.build-info.outputs.default-constraints-branch }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} - - wait-for-prod-images: - timeout-minutes: 80 - name: "Wait for PROD images" - runs-on: ${{ fromJSON(needs.build-info.outputs.runs-on-as-json-public) }} - needs: [build-info, wait-for-ci-images, build-prod-images] - if: needs.build-info.outputs.prod-image-build == 'true' - env: - BACKEND: sqlite - PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}" - # Force more parallelism for pull on public images - PARALLELISM: 6 - INCLUDE_SUCCESS_OUTPUTS: "${{needs.build-info.outputs.include-success-outputs}}" - IMAGE_TAG: ${{ needs.build-info.outputs.image-tag }} - steps: - - name: "Cleanup repo" - shell: bash - run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 - with: - persist-credentials: false - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: "Install Breeze" - uses: ./.github/actions/breeze - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - if: needs.build-info.outputs.in-workflow-build == 'false' - - name: Wait for PROD images ${{ env.PYTHON_VERSIONS }}:${{ needs.build-info.outputs.image-tag }} - # We wait for the images to be available either from "build-images.yml' run as pull_request_target - # or from build-prod-images (or build-prod-images-release-branch) above. - # We are utilising single job to wait for all images because this job merely waits - # For the images to be available. - run: breeze prod-image pull --wait-for-image --run-in-parallel - env: - PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }} - DEBUG_RESOURCES: ${{ needs.build-info.outputs.debug-resources }} - if: needs.build-info.outputs.in-workflow-build == 'false' + prod-image-build: ${{ needs.build-info.outputs.prod-image-build }} additional-prod-image-tests: name: "Additional PROD image tests" - needs: [build-info, wait-for-prod-images, generate-constraints] + needs: [build-info, build-prod-images, generate-constraints] uses: ./.github/workflows/additional-prod-image-tests.yml with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} default-branch: ${{ needs.build-info.outputs.default-branch }} constraints-branch: ${{ needs.build-info.outputs.default-constraints-branch }} - image-tag: ${{ needs.build-info.outputs.image-tag }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} canary-run: ${{ needs.build-info.outputs.canary-run }} + use-uv: ${{ needs.build-info.outputs.use-uv }} if: needs.build-info.outputs.prod-image-build == 'true' tests-kubernetes: name: "Kubernetes tests" uses: ./.github/workflows/k8s-tests.yml - needs: [build-info, wait-for-prod-images] + needs: [build-info, build-prod-images] permissions: contents: read packages: read secrets: inherit with: + platform: "linux/amd64" runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} - kubernetes-versions-list-as-string: ${{ needs.build-info.outputs.kubernetes-versions-list-as-string }} - kubernetes-combos-list-as-string: ${{ needs.build-info.outputs.kubernetes-combos-list-as-string }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} - use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} + use-uv: ${{ needs.build-info.outputs.use-uv }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} + kubernetes-combos: ${{ needs.build-info.outputs.kubernetes-combos }} if: > ( needs.build-info.outputs.run-kubernetes-tests == 'true' || needs.build-info.outputs.needs-helm-tests == 'true') @@ -686,17 +593,18 @@ jobs: tests-task-sdk: name: "Task SDK tests" uses: ./.github/workflows/task-sdk-tests.yml - needs: [build-info, wait-for-ci-images] + needs: [build-info, build-ci-images] permissions: contents: read packages: read secrets: inherit with: runs-on-as-json-default: ${{ needs.build-info.outputs.runs-on-as-json-default }} - image-tag: ${{ needs.build-info.outputs.image-tag }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} python-versions: ${{ needs.build-info.outputs.python-versions }} run-task-sdk-tests: ${{ needs.build-info.outputs.run-task-sdk-tests }} + use-uv: ${{ needs.build-info.outputs.use-uv }} + canary-run: ${{ needs.build-info.outputs.canary-run }} if: > ( needs.build-info.outputs.run-task-sdk-tests == 'true' || needs.build-info.outputs.run-tests == 'true' && @@ -711,30 +619,29 @@ jobs: needs: - build-info - generate-constraints - - wait-for-ci-images - - wait-for-prod-images - ci-image-checks - tests-sqlite - tests-mysql - tests-postgres - tests-non-db - tests-integration-system + - build-prod-images uses: ./.github/workflows/finalize-tests.yml with: runs-on-as-json-public: ${{ needs.build-info.outputs.runs-on-as-json-public }} runs-on-as-json-self-hosted: ${{ needs.build-info.outputs.runs-on-as-json-self-hosted }} - image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} branch: ${{ needs.build-info.outputs.default-branch }} constraints-branch: ${{ needs.build-info.outputs.default-constraints-branch }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} - in-workflow-build: ${{ needs.build-info.outputs.in-workflow-build }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} canary-run: ${{ needs.build-info.outputs.canary-run }} + use-uv: ${{ needs.build-info.outputs.use-uv }} + debug-resources: ${{ needs.build-info.outputs.debug-resources }} notify-slack-failure: name: "Notify Slack on Failure" diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index ca54b398202c6..3f4d7221fc61e 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -49,6 +49,8 @@ jobs: persist-credentials: false - name: "Install Breeze" uses: ./.github/actions/breeze + with: + use-uv: "true" - name: "Get information about the Workflow" id: source-run-info run: breeze ci get-workflow-info 2>> ${GITHUB_OUTPUT} diff --git a/.github/workflows/finalize-tests.yml b/.github/workflows/finalize-tests.yml index 6f9bc74168b42..1d0ac8a600c1d 100644 --- a/.github/workflows/finalize-tests.yml +++ b/.github/workflows/finalize-tests.yml @@ -28,10 +28,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining self-hosted runners." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string python-versions: description: "JSON-formatted array of Python versions to test" required: true @@ -52,10 +48,6 @@ on: # yamllint disable-line rule:truthy description: "Which version of python should be used by default" required: true type: string - in-workflow-build: - description: "Whether the build is executed as part of the workflow (true/false)" - required: true - type: string upgrade-to-newer-dependencies: description: "Whether to upgrade to newer dependencies (true/false)" required: true @@ -76,6 +68,14 @@ on: # yamllint disable-line rule:truthy description: "Whether this is a canary run (true/false)" required: true type: string + use-uv: + description: "Whether to use uv to build the image (true/false)" + required: true + type: string + debug-resources: + description: "Whether to debug resources or not (true/false)" + required: true + type: string jobs: update-constraints: runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} @@ -87,7 +87,6 @@ jobs: env: DEBUG_RESOURCES: ${{ inputs.debug-resources}} PYTHON_VERSIONS: ${{ inputs.python-versions-list-as-string }} - IMAGE_TAG: ${{ inputs.image-tag }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} @@ -149,7 +148,7 @@ jobs: python-versions: ${{ inputs.python-versions }} branch: ${{ inputs.branch }} constraints-branch: ${{ inputs.constraints-branch }} - use-uv: ${{ needs.build-info.outputs.force-pip == 'true' && 'false' || 'true' }} + use-uv: ${{ inputs.use-uv }} include-success-outputs: ${{ inputs.include-success-outputs }} docker-cache: ${{ inputs.docker-cache }} disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} @@ -192,10 +191,14 @@ jobs: persist-credentials: false - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - - name: "Download all artifacts from the current build" + - name: "Free up disk space" + shell: bash + run: ./scripts/tools/free_up_disk_space.sh + - name: "Download all test warning artifacts from the current build" uses: actions/download-artifact@v4 with: path: ./artifacts + pattern: test-warnings-* - name: "Setup python" uses: actions/setup-python@v5 with: diff --git a/.github/workflows/generate-constraints.yml b/.github/workflows/generate-constraints.yml index d6e536dfd091a..740310e1cc09b 100644 --- a/.github/workflows/generate-constraints.yml +++ b/.github/workflows/generate-constraints.yml @@ -32,10 +32,6 @@ on: # yamllint disable-line rule:truthy description: "Whether to generate constraints without providers (true/false)" required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string chicken-egg-providers: description: "Space-separated list of providers that should be installed from context files" required: true @@ -44,6 +40,10 @@ on: # yamllint disable-line rule:truthy description: "Whether to run in debug mode (true/false)" required: true type: string + use-uv: + description: "Whether to use uvloop (true/false)" + required: true + type: string jobs: generate-constraints: permissions: @@ -57,7 +57,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} INCLUDE_SUCCESS_OUTPUTS: "true" - IMAGE_TAG: ${{ inputs.image-tag }} PYTHON_VERSIONS: ${{ inputs.python-versions-list-as-string }} VERBOSE: "true" VERSION_SUFFIX_FOR_PYPI: "dev0" @@ -69,21 +68,17 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - name: "Install Breeze" uses: ./.github/actions/breeze - - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: "\ - Pull CI images \ - ${{ inputs.python-versions-list-as-string }}:\ - ${{ inputs.image-tag }}" - run: breeze ci-image pull --run-in-parallel --tag-as-latest - - name: " - Verify CI images \ - ${{ inputs.python-versions-list-as-string }}:\ - ${{ inputs.image-tag }}" + with: + use-uv: ${{ inputs.use-uv }} + id: breeze + - name: "Prepare all CI images: ${{ inputs.python-versions-list-as-string}}" + uses: ./.github/actions/prepare_all_ci_images + with: + platform: "linux/amd64" + python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} + - name: "Verify all CI images ${{ inputs.python-versions-list-as-string }}" run: breeze ci-image verify --run-in-parallel - name: "Source constraints" shell: bash @@ -104,22 +99,28 @@ jobs: # from the source code, not from the PyPI because they have apache-airflow>=X.Y.Z dependency # And when we prepare them from sources they will have apache-airflow>=X.Y.Z.dev0 shell: bash + env: + CHICKEN_EGG_PROVIDERS: ${{ inputs.chicken-egg-providers }} run: > breeze release-management prepare-provider-packages --include-not-ready-providers --package-format wheel --version-suffix-for-pypi dev0 - ${{ inputs.chicken-egg-providers }} + ${CHICKEN_EGG_PROVIDERS} if: inputs.chicken-egg-providers != '' - name: "PyPI constraints" shell: bash timeout-minutes: 25 + env: + CHICKEN_EGG_PROVIDERS: ${{ inputs.chicken-egg-providers }} run: > breeze release-management generate-constraints --run-in-parallel --airflow-constraints-mode constraints --answer yes - --chicken-egg-providers "${{ inputs.chicken-egg-providers }}" --parallelism 3 + --chicken-egg-providers "${CHICKEN_EGG_PROVIDERS}" --parallelism 3 - name: "Dependency upgrade summary" shell: bash + env: + PYTHON_VERSIONS: ${{ env.PYTHON_VERSIONS }} run: | - for PYTHON_VERSION in ${{ env.PYTHON_VERSIONS }}; do + for PYTHON_VERSION in $PYTHON_VERSIONS; do echo "Summarizing Python $PYTHON_VERSION" cat "files/constraints-${PYTHON_VERSION}"/*.md >> $GITHUB_STEP_SUMMARY || true done diff --git a/.github/workflows/helm-tests.yml b/.github/workflows/helm-tests.yml index 4c1ec1023fc90..9dc300c61c0a1 100644 --- a/.github/workflows/helm-tests.yml +++ b/.github/workflows/helm-tests.yml @@ -32,14 +32,14 @@ on: # yamllint disable-line rule:truthy description: "Stringified JSON array of helm test packages to test" required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string default-python-version: description: "Which version of python should be used by default" required: true type: string + use-uv: + description: "Whether to use uvloop (true/false)" + required: true + type: string jobs: tests-helm: timeout-minutes: 80 @@ -57,7 +57,6 @@ jobs: DB_RESET: "false" JOB_ID: "helm-tests" USE_XDIST: "true" - IMAGE_TAG: "${{ inputs.image-tag }}" GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} @@ -70,12 +69,16 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{inputs.default-python-version}}:${{inputs.image-tag}}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} + use-uv: ${{ inputs.use-uv }} - name: "Helm Unit Tests: ${{ matrix.helm-test-package }}" - run: breeze testing helm-tests --test-type "${{ matrix.helm-test-package }}" + env: + HELM_TEST_PACKAGE: "${{ matrix.helm-test-package }}" + run: breeze testing helm-tests --test-type "${HELM_TEST_PACKAGE}" tests-helm-release: timeout-minutes: 80 @@ -95,6 +98,8 @@ jobs: run: ./scripts/ci/cleanup_docker.sh - name: "Install Breeze" uses: ./.github/actions/breeze + with: + use-uv: ${{ inputs.use-uv }} - name: Setup git for tagging run: | git config --global user.email "name@example.com" diff --git a/.github/workflows/integration-system-tests.yml b/.github/workflows/integration-system-tests.yml index 7fde2ae968363..f992b726e30df 100644 --- a/.github/workflows/integration-system-tests.yml +++ b/.github/workflows/integration-system-tests.yml @@ -24,10 +24,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining public runners." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string testable-core-integrations: description: "The list of testable core integrations as JSON array." required: true @@ -64,6 +60,10 @@ on: # yamllint disable-line rule:truthy description: "Debug resources (true/false)" required: true type: string + use-uv: + description: "Whether to use uv" + required: true + type: string jobs: tests-core-integration: timeout-minutes: 130 @@ -75,7 +75,6 @@ jobs: matrix: integration: ${{ fromJSON(inputs.testable-core-integrations) }} env: - IMAGE_TAG: "${{ inputs.image-tag }}" BACKEND: "postgres" BACKEND_VERSION: ${{ inputs.default-postgres-version }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" @@ -95,13 +94,17 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} + use-uv: ${{ inputs.use-uv }} - name: "Integration: core ${{ matrix.integration }}" + env: + INTEGRATION: "${{ matrix.integration }}" # yamllint disable rule:line-length - run: ./scripts/ci/testing/run_integration_tests_with_retry.sh core "${{ matrix.integration }}" + run: ./scripts/ci/testing/run_integration_tests_with_retry.sh core "${INTEGRATION}" - name: "Post Tests success" uses: ./.github/actions/post_tests_success with: @@ -121,7 +124,6 @@ jobs: matrix: integration: ${{ fromJSON(inputs.testable-providers-integrations) }} env: - IMAGE_TAG: "${{ inputs.image-tag }}" BACKEND: "postgres" BACKEND_VERSION: ${{ inputs.default-postgres-version }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" @@ -141,12 +143,16 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} + use-uv: ${{ inputs.use-uv }} - name: "Integration: providers ${{ matrix.integration }}" - run: ./scripts/ci/testing/run_integration_tests_with_retry.sh providers "${{ matrix.integration }}" + env: + INTEGRATION: "${{ matrix.integration }}" + run: ./scripts/ci/testing/run_integration_tests_with_retry.sh providers "${INTEGRATION}" - name: "Post Tests success" uses: ./.github/actions/post_tests_success with: @@ -162,7 +168,6 @@ jobs: name: "System Tests" runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} env: - IMAGE_TAG: "${{ inputs.image-tag }}" BACKEND: "postgres" BACKEND_VERSION: ${{ inputs.default-postgres-version }}" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" @@ -182,10 +187,12 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} + use-uv: ${{ inputs.use-uv }} - name: "System Tests" run: > ./scripts/ci/testing/run_system_tests.sh diff --git a/.github/workflows/k8s-tests.yml b/.github/workflows/k8s-tests.yml index 3b3e067038db9..6f867af65e9cd 100644 --- a/.github/workflows/k8s-tests.yml +++ b/.github/workflows/k8s-tests.yml @@ -20,24 +20,20 @@ name: K8s tests on: # yamllint disable-line rule:truthy workflow_call: inputs: - runs-on-as-json-default: - description: "The array of labels (in json form) determining default runner used for the build." + platform: + description: "Platform for the build - 'linux/amd64' or 'linux/arm64'" required: true type: string - image-tag: - description: "Tag to set for the image" + runs-on-as-json-default: + description: "The array of labels (in json form) determining default runner used for the build." required: true type: string python-versions-list-as-string: description: "List of Python versions to test: space separated string" required: true type: string - kubernetes-versions-list-as-string: - description: "List of Kubernetes versions to test" - required: true - type: string - kubernetes-combos-list-as-string: - description: "List of combinations of Kubernetes and Python versions to test: space separated string" + kubernetes-combos: + description: "Array of combinations of Kubernetes and Python versions to test" required: true type: string include-success-outputs: @@ -54,20 +50,18 @@ on: # yamllint disable-line rule:truthy type: string jobs: tests-kubernetes: - timeout-minutes: 240 - name: "\ - K8S System:${{ matrix.executor }} - ${{ matrix.use-standard-naming }} - \ - ${{ inputs.kubernetes-versions-list-as-string }}" + timeout-minutes: 60 + name: "K8S System:${{ matrix.executor }}-${{ matrix.kubernetes-combo }}-${{ matrix.use-standard-naming }}" runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }} strategy: matrix: executor: [KubernetesExecutor, CeleryExecutor, LocalExecutor] use-standard-naming: [true, false] + kubernetes-combo: ${{ fromJSON(inputs.kubernetes-combos) }} fail-fast: false env: DEBUG_RESOURCES: ${{ inputs.debug-resources }} INCLUDE_SUCCESS_OUTPUTS: ${{ inputs.include-success-outputs }} - IMAGE_TAG: ${{ inputs.image-tag }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} @@ -76,55 +70,58 @@ jobs: - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - name: "Prepare PYTHON_MAJOR_MINOR_VERSION and KUBERNETES_VERSION" + id: prepare-versions + env: + KUBERNETES_COMBO: ${{ matrix.kubernetes-combo }} + run: | + echo "PYTHON_MAJOR_MINOR_VERSION=${KUBERNETES_COMBO}" | sed 's/-.*//' >> $GITHUB_ENV + echo "KUBERNETES_VERSION=${KUBERNETES_COMBO}" | sed 's/=[^-]*-/=/' >> $GITHUB_ENV - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Install Breeze" - uses: ./.github/actions/breeze - id: breeze - - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: Pull PROD images ${{ inputs.python-versions-list-as-string }}:${{ inputs.image-tag }} - run: breeze prod-image pull --run-in-parallel --tag-as-latest - env: - PYTHON_VERSIONS: ${{ inputs.python-versions-list-as-string }} - # Force more parallelism for pull even on public images - PARALLELISM: 6 - - name: "Cache bin folder with tools for kubernetes testing" - uses: actions/cache@v4 + # env.PYTHON_MAJOR_MINOR_VERSION, env.KUBERNETES_VERSION are set in the previous + # step id: prepare-versions + - name: "Prepare breeze & PROD image: ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + uses: ./.github/actions/prepare_breeze_and_image with: - path: ".build/.k8s-env" - key: "\ - k8s-env-${{ steps.breeze.outputs.host-python-version }}-\ - ${{ hashFiles('scripts/ci/kubernetes/k8s_requirements.txt','hatch_build.py') }}" - - name: "Switch breeze to use uv" - run: breeze setup config --use-uv - if: inputs.use-uv == 'true' - - name: Run complete K8S tests ${{ inputs.kubernetes-combos-list-as-string }} - run: breeze k8s run-complete-tests --run-in-parallel --upgrade --no-copy-local-sources + platform: ${{ inputs.platform }} + image-type: "prod" + python: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} + use-uv: ${{ inputs.use-uv }} + id: breeze + # preparing k8s environment with uv takes < 15 seconds with `uv` - there is no point in caching it. + - name: "\ + Run complete K8S tests ${{ matrix.executor }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }}-\ + ${{env.KUBERNETES_VERSION}}-${{ matrix.use-standard-naming }}" + run: breeze k8s run-complete-tests --upgrade --no-copy-local-sources env: - PYTHON_VERSIONS: ${{ inputs.python-versions-list-as-string }} - KUBERNETES_VERSIONS: ${{ inputs.kubernetes-versions-list-as-string }} EXECUTOR: ${{ matrix.executor }} USE_STANDARD_NAMING: ${{ matrix.use-standard-naming }} VERBOSE: "false" - - name: Upload KinD logs on failure ${{ inputs.kubernetes-combos-list-as-string }} + - name: "\ + Upload KinD logs on failure ${{ matrix.executor }}-${{ matrix.kubernetes-combo }}-\ + ${{ matrix.use-standard-naming }}" uses: actions/upload-artifact@v4 if: failure() || cancelled() with: - name: kind-logs-${{ matrix.executor }}-${{ matrix.use-standard-naming }} + name: "\ + kind-logs-${{ matrix.kubernetes-combo }}-${{ matrix.executor }}-\ + ${{ matrix.use-standard-naming }}" path: /tmp/kind_logs_* - retention-days: 7 - - name: Upload test resource logs on failure ${{ inputs.kubernetes-combos-list-as-string }} + retention-days: '7' + - name: "\ + Upload test resource logs on failure ${{ matrix.executor }}-${{ matrix.kubernetes-combo }}-\ + ${{ matrix.use-standard-naming }}" uses: actions/upload-artifact@v4 if: failure() || cancelled() with: - name: k8s-test-resources-${{ matrix.executor }}-${{ matrix.use-standard-naming }} + name: "\ + k8s-test-resources-${{ matrix.kubernetes-combo }}-${{ matrix.executor }}-\ + ${{ matrix.use-standard-naming }}" path: /tmp/k8s_test_resources_* - retention-days: 7 + retention-days: '7' - name: "Delete clusters just in case they are left" run: breeze k8s delete-cluster --all if: always() diff --git a/.github/workflows/news-fragment.yml b/.github/workflows/news-fragment.yml index 4bcf95e2bba88..73e58a0193711 100644 --- a/.github/workflows/news-fragment.yml +++ b/.github/workflows/news-fragment.yml @@ -37,12 +37,14 @@ jobs: fetch-depth: 0 - name: Check news fragment existence + env: + BASE_REF: ${{ github.base_ref }} run: > python -m pip install --upgrade uv && uv tool run towncrier check --dir . --config newsfragments/config.toml - --compare-with origin/${{ github.base_ref }} + --compare-with origin/${BASE_REF} || { printf "\033[1;33mMissing significant newsfragment for PR labeled with @@ -54,6 +56,8 @@ jobs: ; } - name: Check news fragment contains change types + env: + BASE_REF: ${{ github.base_ref }} run: > change_types=( 'DAG changes' @@ -64,7 +68,7 @@ jobs: 'Plugin changes' 'Dependency change' ) - news_fragment_content=`git diff origin/${{ github.base_ref }} newsfragments/*.significant.rst` + news_fragment_content=`git diff origin/${BASE_REF} newsfragments/*.significant.rst` for type in "${change_types[@]}"; do if [[ $news_fragment_content != *"$type"* ]]; then diff --git a/.github/workflows/prod-image-build.yml b/.github/workflows/prod-image-build.yml index df4f24981ff30..d90d1910f9336 100644 --- a/.github/workflows/prod-image-build.yml +++ b/.github/workflows/prod-image-build.yml @@ -30,13 +30,6 @@ on: # yamllint disable-line rule:truthy variations. required: true type: string - do-build: - description: > - Whether to actually do the build (true/false). If set to false, the build is done - already in pull-request-target workflow, so we skip it here. - required: false - default: "true" - type: string upload-package-artifact: description: > Whether to upload package artifacts (true/false). If false, the job will rely on artifacts prepared @@ -62,6 +55,11 @@ on: # yamllint disable-line rule:truthy description: "Whether to push image to the registry (true/false)" required: true type: string + upload-image-artifact: + description: "Whether to upload docker image artifact" + required: false + default: "false" + type: string debian-version: description: "Base Debian distribution to use for the build (bookworm)" type: string @@ -74,10 +72,6 @@ on: # yamllint disable-line rule:truthy description: "Whether to use uv to build the image (true/false)" required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string python-versions: description: "JSON-formatted array of Python versions to build images from" required: true @@ -118,12 +112,17 @@ on: # yamllint disable-line rule:truthy description: "Disable airflow repo cache read from main." required: true type: string + prod-image-build: + description: "Whether this is a prod-image build (true/false)" + required: true + type: string jobs: build-prod-packages: - name: "${{ inputs.do-build == 'true' && 'Build' || 'Skip building' }} Airflow and provider packages" + name: "Build Airflow and provider packages" timeout-minutes: 10 runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} + if: inputs.prod-image-build == 'true' env: PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" VERSION_SUFFIX_FOR_PYPI: ${{ inputs.branch == 'main' && 'dev0' || '' }} @@ -131,32 +130,23 @@ jobs: - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - name: "Checkout target branch" uses: actions/checkout@v4 with: persist-credentials: false - - name: "Checkout target commit" - uses: ./.github/actions/checkout_target_commit - with: - target-commit-sha: ${{ inputs.target-commit-sha }} - pull-request-target: ${{ inputs.pull-request-target }} - is-committer-build: ${{ inputs.is-committer-build }} - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' - - uses: actions/setup-python@v5 - with: - python-version: "${{ inputs.default-python-version }}" - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - name: "Cleanup dist and context file" shell: bash run: rm -fv ./dist/* ./docker-context-files/* - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - name: "Install Breeze" uses: ./.github/actions/breeze - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + with: + use-uv: ${{ inputs.use-uv }} + if: inputs.upload-package-artifact == 'true' - name: "Prepare providers packages" shell: bash run: > @@ -164,28 +154,28 @@ jobs: --package-list-file ./prod_image_installed_providers.txt --package-format wheel if: > - inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' && inputs.build-provider-packages == 'true' - name: "Prepare chicken-eggs provider packages" shell: bash + env: + CHICKEN_EGG_PROVIDERS: ${{ inputs.chicken-egg-providers }} run: > breeze release-management prepare-provider-packages - --package-format wheel ${{ inputs.chicken-egg-providers }} + --package-format wheel ${CHICKEN_EGG_PROVIDERS} if: > - inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' && inputs.chicken-egg-providers != '' - name: "Prepare airflow package" shell: bash run: > breeze release-management prepare-airflow-package --package-format wheel - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - name: "Prepare task-sdk package" shell: bash run: > breeze release-management prepare-task-sdk-package --package-format wheel - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' - name: "Upload prepared packages as artifacts" uses: actions/upload-artifact@v4 with: @@ -193,25 +183,21 @@ jobs: path: ./dist retention-days: 7 if-no-files-found: error - if: inputs.do-build == 'true' && inputs.upload-package-artifact == 'true' + if: inputs.upload-package-artifact == 'true' build-prod-images: strategy: fail-fast: false matrix: - # yamllint disable-line rule:line-length - python-version: ${{ inputs.do-build == 'true' && fromJSON(inputs.python-versions) || fromJSON('[""]') }} + python-version: ${{ fromJSON(inputs.python-versions) || fromJSON('[""]') }} timeout-minutes: 80 - name: "\ -${{ inputs.do-build == 'true' && 'Build' || 'Skip building' }} \ -PROD ${{ inputs.build-type }} image\ -${{ matrix.python-version }}${{ inputs.do-build == 'true' && ':' || '' }}\ -${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" + name: "Build PROD ${{ inputs.build-type }} image ${{ matrix.python-version }}" runs-on: ${{ fromJSON(inputs.runs-on-as-json-public) }} needs: - build-prod-packages env: BACKEND: sqlite + PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python-version }}" DEFAULT_BRANCH: ${{ inputs.branch }} DEFAULT_CONSTRAINTS_BRANCH: ${{ inputs.constraints-branch }} VERSION_SUFFIX_FOR_PYPI: ${{ inputs.branch == 'main' && 'dev0' || '' }} @@ -225,63 +211,48 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - USE_UV: ${{ inputs.use-uv }} VERBOSE: "true" steps: - name: "Cleanup repo" shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: inputs.do-build == 'true' - name: "Checkout target branch" uses: actions/checkout@v4 with: persist-credentials: false - - name: "Checkout target commit" - uses: ./.github/actions/checkout_target_commit - with: - target-commit-sha: ${{ inputs.target-commit-sha }} - pull-request-target: ${{ inputs.pull-request-target }} - is-committer-build: ${{ inputs.is-committer-build }} - if: inputs.do-build == 'true' - name: "Cleanup docker" run: ./scripts/ci/cleanup_docker.sh - if: inputs.do-build == 'true' - name: "Install Breeze" uses: ./.github/actions/breeze - if: inputs.do-build == 'true' - - name: "Regenerate dependencies in case they was modified manually so that we can build an image" - shell: bash - run: | - pip install rich>=12.4.4 pyyaml - python scripts/ci/pre_commit/update_providers_dependencies.py - if: inputs.do-build == 'true' && inputs.upgrade-to-newer-dependencies != 'false' + with: + use-uv: ${{ inputs.use-uv }} - name: "Cleanup dist and context file" shell: bash run: rm -fv ./dist/* ./docker-context-files/* - if: inputs.do-build == 'true' - name: "Download packages prepared as artifacts" uses: actions/download-artifact@v4 with: name: prod-packages path: ./docker-context-files - if: inputs.do-build == 'true' - name: "Download constraints" uses: actions/download-artifact@v4 with: name: constraints path: ./docker-context-files - if: inputs.do-build == 'true' - - name: Login to ghcr.io - shell: bash - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - if: inputs.do-build == 'true' - - name: "Build PROD images w/ source providers ${{ matrix.python-version }}:${{ inputs.image-tag }}" + - name: "Login to ghcr.io" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ACTOR: ${{ github.actor }} + run: echo "${GITHUB_TOKEN}" | docker login ghcr.io -u ${ACTOR} --password-stdin + - name: "Build PROD images w/ source providers ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" shell: bash run: > - breeze prod-image build --tag-as-latest --image-tag "${{ inputs.image-tag }}" + breeze prod-image build + --builder airflow_cache --commit-sha "${{ github.sha }}" - --install-packages-from-context --airflow-constraints-mode constraints-source-providers - --use-constraints-for-context-packages --python "${{ matrix.python-version }}" + --install-packages-from-context + --airflow-constraints-mode constraints-source-providers + --use-constraints-for-context-packages env: PUSH: ${{ inputs.push-image }} DOCKER_CACHE: ${{ inputs.docker-cache }} @@ -290,14 +261,16 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} UPGRADE_TO_NEWER_DEPENDENCIES: ${{ inputs.upgrade-to-newer-dependencies }} INCLUDE_NOT_READY_PROVIDERS: "true" - if: inputs.do-build == 'true' && inputs.build-provider-packages == 'true' - - name: "Build PROD images with PyPi providers ${{ matrix.python-version }}:${{ inputs.image-tag }}" + if: inputs.build-provider-packages == 'true' + - name: "Build PROD images with PyPi providers ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" shell: bash run: > - breeze prod-image build --builder airflow_cache --tag-as-latest - --image-tag "${{ inputs.image-tag }}" --commit-sha "${{ github.sha }}" - --install-packages-from-context --airflow-constraints-mode constraints - --use-constraints-for-context-packages --python "${{ matrix.python-version }}" + breeze prod-image build + --builder airflow_cache + --commit-sha "${{ github.sha }}" + --install-packages-from-context + --airflow-constraints-mode constraints + --use-constraints-for-context-packages env: PUSH: ${{ inputs.push-image }} DOCKER_CACHE: ${{ inputs.docker-cache }} @@ -306,9 +279,20 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} UPGRADE_TO_NEWER_DEPENDENCIES: ${{ inputs.upgrade-to-newer-dependencies }} INCLUDE_NOT_READY_PROVIDERS: "true" - if: inputs.do-build == 'true' && inputs.build-provider-packages != 'true' - - name: Verify PROD image ${{ matrix.python-version }}:${{ inputs.image-tag }} + if: inputs.build-provider-packages != 'true' + - name: "Verify PROD image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + run: breeze prod-image verify + - name: "Export PROD docker image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + env: + PLATFORM: ${{ inputs.platform }} run: > - breeze prod-image verify --image-tag "${{ inputs.image-tag }}" - --python "${{ matrix.python-version }}" - if: inputs.do-build == 'true' + breeze prod-image save --platform "${PLATFORM}" + if: inputs.upload-image-artifact == 'true' + - name: "Stash PROD docker image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}" + uses: apache/infrastructure-actions/stash/save@c94b890bbedc2fc61466d28e6bd9966bc6c6643c + with: + key: prod-image-save-${{ inputs.platform }}-${{ env.PYTHON_MAJOR_MINOR_VERSION }} + path: "/tmp/prod-image-save-*-${{ env.PYTHON_MAJOR_MINOR_VERSION }}.tar" + if-no-files-found: 'error' + retention-days: '2' + if: inputs.upload-image-artifact == 'true' diff --git a/.github/workflows/prod-image-extra-checks.yml b/.github/workflows/prod-image-extra-checks.yml index bb63faef7b243..f5a4b771436a7 100644 --- a/.github/workflows/prod-image-extra-checks.yml +++ b/.github/workflows/prod-image-extra-checks.yml @@ -40,9 +40,6 @@ on: # yamllint disable-line rule:truthy description: "Whether to use uv to build the image (true/false)" required: true type: string - image-tag: - required: true - type: string build-provider-packages: description: "Whether to build provider packages (true/false). If false providers are from PyPI" required: true @@ -73,8 +70,8 @@ jobs: with: runs-on-as-json-public: ${{ inputs.runs-on-as-json-public }} build-type: "MySQL Client" + upload-image-artifact: "false" upload-package-artifact: "false" - image-tag: mysql-${{ inputs.image-tag }} install-mysql-client-type: "mysql" python-versions: ${{ inputs.python-versions }} default-python-version: ${{ inputs.default-python-version }} @@ -89,6 +86,7 @@ jobs: constraints-branch: ${{ inputs.constraints-branch }} docker-cache: ${{ inputs.docker-cache }} disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} + prod-image-build: "true" pip-image: uses: ./.github/workflows/prod-image-build.yml @@ -97,8 +95,8 @@ jobs: with: runs-on-as-json-public: ${{ inputs.runs-on-as-json-public }} build-type: "pip" + upload-image-artifact: "false" upload-package-artifact: "false" - image-tag: mysql-${{ inputs.image-tag }} install-mysql-client-type: "mysql" python-versions: ${{ inputs.python-versions }} default-python-version: ${{ inputs.default-python-version }} @@ -113,3 +111,4 @@ jobs: constraints-branch: ${{ inputs.constraints-branch }} docker-cache: ${{ inputs.docker-cache }} disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} + prod-image-build: "true" diff --git a/.github/workflows/push-image-cache.yml b/.github/workflows/push-image-cache.yml index 10a33275ad3f3..b1c9d12754206 100644 --- a/.github/workflows/push-image-cache.yml +++ b/.github/workflows/push-image-cache.yml @@ -110,7 +110,7 @@ jobs: GITHUB_USERNAME: ${{ github.actor }} INCLUDE_SUCCESS_OUTPUTS: "${{ inputs.include-success-outputs }}" INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} - USE_UV: ${{ inputs.use-uv }} + PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python }}" UPGRADE_TO_NEWER_DEPENDENCIES: "false" VERBOSE: "true" VERSION_SUFFIX_FOR_PYPI: "dev0" @@ -126,23 +126,33 @@ jobs: run: ./scripts/ci/cleanup_docker.sh - name: "Install Breeze" uses: ./.github/actions/breeze - - name: "Start ARM instance" - run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh - if: inputs.platform == 'linux/arm64' + with: + use-uv: ${{ inputs.use-uv }} - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: "Push CI ${{ inputs.cache-type }} cache: ${{ matrix.python }} ${{ inputs.platform }}" - run: > - breeze ci-image build --builder airflow_cache --prepare-buildx-cache - --platform "${{ inputs.platform }}" --python ${{ matrix.python }} - - name: "Stop ARM instance" - run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() && inputs.platform == 'linux/arm64' - - name: "Push CI latest images: ${{ matrix.python }} (linux/amd64 only)" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ACTOR: ${{ github.actor }} + run: echo "${GITHUB_TOKEN}" | docker login ghcr.io -u ${ACTOR} --password-stdin + - name: "Push CI latest images: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} (linux/amd64 only)" + env: + PLATFORM: ${{ inputs.platform }} run: > - breeze ci-image build --builder airflow_cache --push - --python "${{ matrix.python }}" --platform "${{ inputs.platform }}" + breeze + ci-image build + --builder airflow_cache + --platform "${PLATFORM}" + --push if: inputs.push-latest-images == 'true' && inputs.platform == 'linux/amd64' + # yamllint disable-line rule:line-length + - name: "Push CI ${{ inputs.cache-type }} cache:${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ inputs.platform }}" + env: + PLATFORM: ${{ inputs.platform }} + run: > + breeze ci-image build + --builder airflow_cache + --prepare-buildx-cache + --platform "${PLATFORM}" + --push push-prod-image-cache: name: "Push PROD ${{ inputs.cache-type }}:${{ matrix.python }} image cache" @@ -172,8 +182,8 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} + PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python }}" UPGRADE_TO_NEWER_DEPENDENCIES: "false" - USE_UV: ${{ inputs.branch == 'main' && inputs.use-uv || 'false' }} VERBOSE: "true" VERSION_SUFFIX_FOR_PYPI: "dev0" if: inputs.include-prod-images == 'true' @@ -189,6 +199,8 @@ jobs: run: ./scripts/ci/cleanup_docker.sh - name: "Install Breeze" uses: ./.github/actions/breeze + with: + use-uv: ${{ inputs.use-uv }} - name: "Cleanup dist and context file" run: rm -fv ./dist/* ./docker-context-files/* - name: "Download packages prepared as artifacts" @@ -196,25 +208,33 @@ jobs: with: name: prod-packages path: ./docker-context-files - - name: "Start ARM instance" - run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh - if: inputs.platform == 'linux/arm64' - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: "Push PROD ${{ inputs.cache-type }} cache: ${{ matrix.python-version }} ${{ inputs.platform }}" - run: > - breeze prod-image build --builder airflow_cache - --prepare-buildx-cache --platform "${{ inputs.platform }}" - --install-packages-from-context --airflow-constraints-mode constraints-source-providers - --python ${{ matrix.python }} - - name: "Stop ARM instance" - run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() && inputs.platform == 'linux/arm64' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ACTOR: ${{ github.actor }} + run: echo "${GITHUB_TOKEN}" | docker login ghcr.io -u ${ACTOR} --password-stdin # We only push "AMD" images as it is really only needed for any kind of automated builds in CI # and currently there is not an easy way to make multi-platform image from two separate builds # and we can do it after we stopped the ARM instance as it is not needed anymore - - name: "Push PROD latest image: ${{ matrix.python }} (linux/amd64 ONLY)" + - name: "Push PROD latest image: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} (linux/amd64 ONLY)" + env: + PLATFORM: ${{ inputs.platform }} run: > - breeze prod-image build --builder airflow_cache --install-packages-from-context - --push --platform "${{ inputs.platform }}" + breeze prod-image build + --builder airflow_cache + --install-packages-from-context + --platform "${PLATFORM}" + --airflow-constraints-mode constraints-source-providers if: inputs.push-latest-images == 'true' && inputs.platform == 'linux/amd64' + # yamllint disable-line rule:line-length + - name: "Push PROD ${{ inputs.cache-type }} cache: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} ${{ inputs.platform }}" + env: + PLATFORM: ${{ inputs.platform }} + run: > + breeze prod-image build + --builder airflow_cache + --prepare-buildx-cache + --install-packages-from-context + --platform "${PLATFORM}" + --airflow-constraints-mode constraints-source-providers + --push diff --git a/.github/workflows/release_dockerhub_image.yml b/.github/workflows/release_dockerhub_image.yml index 5ce1585131f76..b8758146cc1b1 100644 --- a/.github/workflows/release_dockerhub_image.yml +++ b/.github/workflows/release_dockerhub_image.yml @@ -63,11 +63,14 @@ jobs: run: ./scripts/ci/cleanup_docker.sh - name: "Install Breeze" uses: ./.github/actions/breeze + with: + use-uv: "false" - name: Selective checks id: selective-checks env: VERBOSE: "false" run: breeze ci selective-check 2>> ${GITHUB_OUTPUT} + release-images: timeout-minutes: 120 name: "Release images: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }}" @@ -99,6 +102,8 @@ jobs: run: ./scripts/ci/cleanup_docker.sh - name: "Install Breeze" uses: ./.github/actions/breeze + with: + use-uv: "false" - name: Free space run: breeze ci free-space --answer yes - name: "Cleanup dist and context file" @@ -108,7 +113,10 @@ jobs: echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login --password-stdin --username ${{ secrets.DOCKERHUB_USER }} - name: Login to ghcr.io - run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ACTOR: ${{ github.actor }} + run: echo "${GITHUB_TOKEN}" | docker login ghcr.io -u ${ACTOR} --password-stdin - name: "Install buildx plugin" # yamllint disable rule:line-length run: | @@ -141,10 +149,12 @@ jobs: # from the source code, not from the PyPI because they have apache-airflow>=X.Y.Z dependency # And when we prepare them from sources they will have apache-airflow>=X.Y.Z.dev0 shell: bash + env: + CHICKEN_EGG_PROVIDERS: ${{ needs.build-info.outputs.chicken-egg-providers }} run: > breeze release-management prepare-provider-packages --package-format wheel - --version-suffix-for-pypi dev0 ${{ needs.build-info.outputs.chicken-egg-providers }} + --version-suffix-for-pypi dev0 ${CHICKEN_EGG_PROVIDERS} if: needs.build-info.outputs.chicken-egg-providers != '' - name: "Copy dist packages to docker-context files" shell: bash @@ -152,42 +162,61 @@ jobs: if: needs.build-info.outputs.chicken-egg-providers != '' - name: > Release regular images: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }} - run: > - breeze release-management release-prod-images - --dockerhub-repo ${{ github.repository }} - --airflow-version ${{ github.event.inputs.airflowVersion }} - ${{ needs.build-info.outputs.skipLatest }} - ${{ needs.build-info.outputs.limitPlatform }} - --limit-python ${{ matrix.python-version }} - --chicken-egg-providers "${{ needs.build-info.outputs.chicken-egg-providers }}" env: COMMIT_SHA: ${{ github.sha }} - - name: > - Release slim images: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }} + REPOSITORY: ${{ github.repository }} + PYTHON_VERSION: ${{ matrix.python-version }} + AIRFLOW_VERSION: ${{ github.event.inputs.airflowVersion }} + SKIP_LATEST: ${{ needs.build-info.outputs.skipLatest }} + LIMIT_PLATFORM: ${{ needs.build-info.outputs.limitPlatform }} + CHICKEN_EGG_PROVIDERS: ${{ needs.build-info.outputs.chicken-egg-providers }} run: > breeze release-management release-prod-images - --dockerhub-repo ${{ github.repository }} - --airflow-version ${{ github.event.inputs.airflowVersion }} - ${{ needs.build-info.outputs.skipLatest }} - ${{ needs.build-info.outputs.limitPlatform }} - --limit-python ${{ matrix.python-version }} --slim-images + --dockerhub-repo "${REPOSITORY}" + --airflow-version "${AIRFLOW_VERSION}" + ${SKIP_LATEST} + ${LIMIT_PLATFORM} + --limit-python ${PYTHON_VERSION} + --chicken-egg-providers ${CHICKEN_EGG_PROVIDERS} + - name: > + Release slim images: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }} env: COMMIT_SHA: ${{ github.sha }} + REPOSITORY: ${{ github.repository }} + PYTHON_VERSION: ${{ matrix.python-version }} + AIRFLOW_VERSION: ${{ github.event.inputs.airflowVersion }} + SKIP_LATEST: ${{ needs.build-info.outputs.skipLatest }} + LIMIT_PLATFORM: ${{ needs.build-info.outputs.limitPlatform }} + run: > + breeze release-management release-prod-images + --dockerhub-repo "${REPOSITORY}" + --airflow-version "${AIRFLOW_VERSION}" + ${SKIP_LATEST} + ${LIMIT_PLATFORM} + --limit-python ${PYTHON_VERSION} --slim-images - name: > Verify regular AMD64 image: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }} + env: + PYTHON_VERSION: ${{ matrix.python-version }} + AIRFLOW_VERSION: ${{ github.event.inputs.airflowVersion }} + REPOSITORY: ${{ github.repository }} run: > breeze prod-image verify --pull --image-name - ${{github.repository}}:${{github.event.inputs.airflowVersion}}-python${{matrix.python-version}} + ${REPOSITORY}:${AIRFLOW_VERSION}-python${PYTHON_VERSION} - name: > Verify slim AMD64 image: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }} + env: + PYTHON_VERSION: ${{ matrix.python-version }} + AIRFLOW_VERSION: ${{ github.event.inputs.airflowVersion }} + REPOSITORY: ${{ github.repository }} run: > breeze prod-image verify --pull --slim-image --image-name - ${{github.repository}}:slim-${{github.event.inputs.airflowVersion}}-python${{matrix.python-version}} + ${REPOSITORY}:slim-${AIRFLOW_VERSION}-python${PYTHON_VERSION} - name: "Docker logout" run: docker logout if: always() diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index 6b491f6bff4ab..1c24e659d0979 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -45,10 +45,6 @@ on: # yamllint disable-line rule:truthy required: false default: ":" type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string python-versions: description: "The list of python versions (stringified JSON array) to run the tests on." required: true @@ -116,6 +112,10 @@ on: # yamllint disable-line rule:truthy required: false default: 20 type: number + use-uv: + description: "Whether to use uv" + required: true + type: string jobs: tests: timeout-minutes: 120 @@ -144,7 +144,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_SUCCESS_OUTPUTS: ${{ inputs.include-success-outputs }} # yamllint disable rule:line-length JOB_ID: "${{ matrix.test-group }}-${{ inputs.test-scope }}-${{ inputs.test-name }}-${{inputs.backend}}-${{ matrix.backend-version }}-${{ matrix.python-version }}" @@ -163,10 +162,12 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{matrix.python-version}}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ matrix.python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ matrix.python-version }} + use-uv: ${{ inputs.use-uv }} - name: > Migration Tests: ${{ matrix.python-version }}:${{ env.PARALLEL_TEST_TYPES }} uses: ./.github/actions/migration_tests @@ -174,7 +175,10 @@ jobs: - name: > ${{ matrix.test-group}}:${{ inputs.test-scope }} Tests ${{ inputs.test-name }} ${{ matrix.backend-version }} Py${{ matrix.python-version }}:${{ env.PARALLEL_TEST_TYPES }} - run: ./scripts/ci/testing/run_unit_tests.sh "${{ matrix.test-group }}" "${{ inputs.test-scope }}" + env: + TEST_GROUP: "${{ matrix.test-group }}" + TEST_SCOPE: "${{ inputs.test-scope }}" + run: ./scripts/ci/testing/run_unit_tests.sh "${TEST_GROUP}" "${TEST_SCOPE}" - name: "Post Tests success" uses: ./.github/actions/post_tests_success with: diff --git a/.github/workflows/special-tests.yml b/.github/workflows/special-tests.yml index decc7271b728b..36ccbf871cca9 100644 --- a/.github/workflows/special-tests.yml +++ b/.github/workflows/special-tests.yml @@ -32,10 +32,6 @@ on: # yamllint disable-line rule:truthy description: "The json representing list of test test groups to run" required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string core-test-types-list-as-string: description: "The list of core test types to run separated by spaces" required: true @@ -80,6 +76,10 @@ on: # yamllint disable-line rule:truthy description: "Whether to debug resources or not (true/false)" required: true type: string + use-uv: + description: "Whether to use uv or not (true/false)" + required: true + type: string jobs: tests-min-sqlalchemy: @@ -96,7 +96,6 @@ jobs: test-scope: "DB" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} @@ -105,6 +104,7 @@ jobs: providers-test-types-list-as-string: ${{ inputs.providers-test-types-list-as-string }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} + use-uv: ${{ inputs.use-uv }} tests-boto: name: "Latest Boto test" @@ -120,7 +120,6 @@ jobs: test-scope: "All" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} @@ -130,6 +129,7 @@ jobs: include-success-outputs: ${{ inputs.include-success-outputs }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} + use-uv: ${{ inputs.use-uv }} tests-pendulum-2: name: "Pendulum2 test" @@ -145,7 +145,6 @@ jobs: test-scope: "All" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} @@ -155,6 +154,7 @@ jobs: include-success-outputs: ${{ inputs.include-success-outputs }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} + use-uv: ${{ inputs.use-uv }} tests-quarantined: name: "Quarantined test" @@ -169,7 +169,6 @@ jobs: test-scope: "Quarantined" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} @@ -179,6 +178,7 @@ jobs: include-success-outputs: ${{ inputs.include-success-outputs }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} + use-uv: ${{ inputs.use-uv }} tests-arm-collection: name: "ARM Collection test" @@ -193,7 +193,6 @@ jobs: test-scope: "ARM collection" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} @@ -203,8 +202,10 @@ jobs: include-success-outputs: ${{ inputs.include-success-outputs }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} + use-uv: ${{ inputs.use-uv }} if: ${{ inputs.default-branch == 'main' }} + # matrix.test-group comes from run-unit-tests.yml tests-system: name: "System test: ${{ matrix.test-group }}" uses: ./.github/workflows/run-unit-tests.yml @@ -218,7 +219,6 @@ jobs: test-scope: "System" test-groups: ${{ inputs.test-groups }} backend: "postgres" - image-tag: ${{ inputs.image-tag }} python-versions: "['${{ inputs.default-python-version }}']" backend-versions: "['${{ inputs.default-postgres-version }}']" excluded-providers-as-string: ${{ inputs.excluded-providers-as-string }} @@ -228,3 +228,4 @@ jobs: include-success-outputs: ${{ inputs.include-success-outputs }} run-coverage: ${{ inputs.run-coverage }} debug-resources: ${{ inputs.debug-resources }} + use-uv: ${{ inputs.use-uv }} diff --git a/.github/workflows/task-sdk-tests.yml b/.github/workflows/task-sdk-tests.yml index acc9872e6ed96..501e880fd3be0 100644 --- a/.github/workflows/task-sdk-tests.yml +++ b/.github/workflows/task-sdk-tests.yml @@ -24,10 +24,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining default runner used for the build." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string default-python-version: description: "Which version of python should be used by default" required: true @@ -40,6 +36,15 @@ on: # yamllint disable-line rule:truthy description: "Whether to run Task SDK tests or not (true/false)" required: true type: string + use-uv: + description: "Whether to use uv to build the image (true/false)" + required: true + type: string + canary-run: + description: "Whether this is a canary run (true/false)" + required: true + type: string + jobs: task-sdk-tests: timeout-minutes: 80 @@ -53,7 +58,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_NOT_READY_PROVIDERS: "true" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" VERBOSE: "true" @@ -66,10 +70,12 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ matrix.python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ matrix.python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ matrix.python-version }} + use-uv: ${{ inputs.use-uv }} - name: "Cleanup dist files" run: rm -fv ./dist/* - name: "Prepare Task SDK packages: wheel" @@ -81,5 +87,7 @@ jobs: uv tool install twine && twine check dist/*.whl - name: > Run unit tests for Airflow Task SDK:Python ${{ matrix.python-version }} + env: + PYTHON_VERSION: "${{ matrix.python-version }}" run: > - breeze testing task-sdk-tests --python "${{ matrix.python-version }}" + breeze testing task-sdk-tests --python "${PYTHON_VERSION}" diff --git a/.github/workflows/test-provider-packages.yml b/.github/workflows/test-provider-packages.yml index 08715af6b58ba..877ff1f1b23c9 100644 --- a/.github/workflows/test-provider-packages.yml +++ b/.github/workflows/test-provider-packages.yml @@ -24,10 +24,6 @@ on: # yamllint disable-line rule:truthy description: "The array of labels (in json form) determining default runner used for the build." required: true type: string - image-tag: - description: "Tag to set for the image" - required: true - type: string canary-run: description: "Whether this is a canary run" required: true @@ -62,6 +58,10 @@ on: # yamllint disable-line rule:truthy description: "JSON-formatted array of Python versions to build images from" required: true type: string + use-uv: + description: "Whether to use uv" + required: true + type: string jobs: prepare-install-verify-provider-packages: timeout-minutes: 80 @@ -75,7 +75,6 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_NOT_READY_PROVIDERS: "true" PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" VERBOSE: "true" @@ -87,11 +86,12 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: > - Prepare breeze & CI image: ${{ inputs.default-python-version }}:${{ inputs.image-tag }} + - name: "Prepare breeze & CI image: ${{ inputs.default-python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ inputs.default-python-version }} + use-uv: ${{ inputs.use-uv }} - name: "Cleanup dist files" run: rm -fv ./dist/* - name: "Prepare provider documentation" @@ -126,29 +126,35 @@ jobs: breeze release-management generate-constraints --airflow-constraints-mode constraints-source-providers --answer yes - name: "Install and verify wheel provider packages" + env: + PACKAGE_FORMAT: ${{ matrix.package-format }} + PYTHON_MAJOR_MINOR_VERSION: ${env.PYTHON_MAJOR_MINOR_VERSION} + AIRFLOW_SKIP_CONSTRAINTS: "${{ inputs.upgrade-to-newer-dependencies }}" run: > breeze release-management verify-provider-packages --use-packages-from-dist - --package-format ${{ matrix.package-format }} - --use-airflow-version ${{ matrix.package-format }} + --package-format "${PACKAGE_FORMAT}" + --use-airflow-version "${PACKAGE_FORMAT}" --airflow-constraints-reference default --providers-constraints-location - /files/constraints-${{env.PYTHON_MAJOR_MINOR_VERSION}}/constraints-source-providers-${{env.PYTHON_MAJOR_MINOR_VERSION}}.txt - env: - AIRFLOW_SKIP_CONSTRAINTS: "${{ inputs.upgrade-to-newer-dependencies }}" + /files/constraints-${PYTHON_MAJOR_MINOR_VERSION}/constraints-source-providers-${PYTHON_MAJOR_MINOR_VERSION}.txt if: matrix.package-format == 'wheel' - name: "Install all sdist provider packages and airflow" + env: + PACKAGE_FORMAT: ${{ matrix.package-format }} + PYTHON_MAJOR_MINOR_VERSION: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} run: > breeze release-management install-provider-packages --use-packages-from-dist - --package-format ${{ matrix.package-format }} - --use-airflow-version ${{ matrix.package-format }} + --package-format "${PACKAGE_FORMAT}" + --use-airflow-version ${PACKAGE_FORMAT} --airflow-constraints-reference default --providers-constraints-location - /files/constraints-${{env.PYTHON_MAJOR_MINOR_VERSION}}/constraints-source-providers-${{env.PYTHON_MAJOR_MINOR_VERSION}}.txt + /files/constraints-${PYTHON_MAJOR_MINOR_VERSION}/constraints-source-providers-${PYTHON_MAJOR_MINOR_VERSION}.txt --run-in-parallel if: matrix.package-format == 'sdist' + # All matrix parameters are passed as JSON string in the input variable providers-compatibility-tests-matrix providers-compatibility-tests-matrix: timeout-minutes: 80 name: Compat ${{ matrix.airflow-version }}:P${{ matrix.python-version }} providers test @@ -161,9 +167,8 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} - IMAGE_TAG: "${{ inputs.image-tag }}" INCLUDE_NOT_READY_PROVIDERS: "true" - PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" + PYTHON_MAJOR_MINOR_VERSION: "${{ matrix.python-version }}" VERSION_SUFFIX_FOR_PYPI: "dev0" VERBOSE: "true" CLEAN_AIRFLOW_INSTALLATION: "${{ inputs.canary-run }}" @@ -176,10 +181,12 @@ jobs: uses: actions/checkout@v4 with: persist-credentials: false - - name: "Cleanup docker" - run: ./scripts/ci/cleanup_docker.sh - - name: "Prepare breeze & CI image: ${{ matrix.python-version }}:${{ inputs.image-tag }}" + - name: "Prepare breeze & CI image: ${{ matrix.python-version }}" uses: ./.github/actions/prepare_breeze_and_image + with: + platform: "linux/amd64" + python: ${{ matrix.python-version }} + use-uv: ${{ inputs.use-uv }} - name: "Cleanup dist files" run: rm -fv ./dist/* - name: "Prepare provider packages: wheel" @@ -189,8 +196,10 @@ jobs: - name: > Remove incompatible Airflow ${{ matrix.airflow-version }}:Python ${{ matrix.python-version }} provider packages + env: + REMOVE_PROVIDERS: ${{ matrix.remove-providers }} run: | - for provider in ${{ matrix.remove-providers }}; do + for provider in ${REMOVE_PROVIDERS}; do echo "Removing incompatible provider: ${provider}" rm -vf dist/apache_airflow_providers_${provider/./_}* done @@ -204,25 +213,34 @@ jobs: # We do not need to run import check if we run tests, the tests should cover all the import checks # automatically if: matrix.run-tests != 'true' + env: + AIRFLOW_VERSION: "${{ matrix.airflow-version }}" run: > breeze release-management verify-provider-packages --use-packages-from-dist --package-format wheel --use-airflow-version wheel - --airflow-constraints-reference constraints-${{matrix.airflow-version}} + --airflow-constraints-reference constraints-${AIRFLOW_VERSION} --providers-skip-constraints --install-airflow-with-constraints + - name: Check amount of disk space available + run: df -H + shell: bash - name: > Run provider unit tests on Airflow ${{ matrix.airflow-version }}:Python ${{ matrix.python-version }} if: matrix.run-tests == 'true' + env: + PROVIDERS_TEST_TYPES: "${{ inputs.providers-test-types-list-as-string }}" + AIRFLOW_VERSION: "${{ matrix.airflow-version }}" + REMOVE_PROVIDERS: "${{ matrix.remove-providers }}" run: > breeze testing providers-tests --run-in-parallel - --parallel-test-types "${{ inputs.providers-test-types-list-as-string }}" + --parallel-test-types "${PROVIDERS_TEST_TYPES}" --use-packages-from-dist --package-format wheel - --use-airflow-version "${{ matrix.airflow-version }}" - --airflow-constraints-reference constraints-${{matrix.airflow-version}} + --use-airflow-version "${AIRFLOW_VERSION}" + --airflow-constraints-reference constraints-${AIRFLOW_VERSION} --install-airflow-with-constraints --providers-skip-constraints - --skip-providers "${{ matrix.remove-providers }}" + --skip-providers "${REMOVE_PROVIDERS}" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a769a5e109bde..13886b1e2fb67 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,6 +20,7 @@ default_language_version: python: python3 node: 22.2.0 minimum_pre_commit_version: '3.2.0' +exclude: ^.*/.*_vendor/ repos: - repo: meta hooks: @@ -35,7 +36,6 @@ repos: name: Add TOC for Markdown and RST files files: ^README\.md$|^UPDATING.*\.md$|^chart/UPDATING.*\.md$|^dev/.*\.md$|^dev/.*\.rst$|^.github/.*\.md|^tests/system/README.md$ - exclude: ^.*/.*_vendor/ args: - "--maxlevel" - "2" @@ -47,8 +47,7 @@ repos: files: \.sql$ exclude: | (?x) - ^\.github/| - ^.*/.*_vendor/ + ^\.github/ args: - --comment-style - "/*||*/" @@ -57,7 +56,7 @@ repos: - --fuzzy-match-generates-todo - id: insert-license name: Add license for all RST files - exclude: ^\.github/.*$|^.*/.*_vendor/|newsfragments/.*\.rst$ + exclude: ^\.github/.*$|newsfragments/.*\.rst$ args: - --comment-style - "||" @@ -68,7 +67,7 @@ repos: - id: insert-license name: Add license for CSS/JS/JSX/PUML/TS/TSX files: \.(css|jsx?|puml|tsx?)$ - exclude: ^\.github/.*$|^.*/.*_vendor/|^airflow/www/static/js/types/api-generated.ts$|ui/openapi-gen/ + exclude: ^\.github/.*$|^airflow/www/static/js/types/api-generated.ts$|ui/openapi-gen/ args: - --comment-style - "/*!| *| */" @@ -78,7 +77,7 @@ repos: - id: insert-license name: Add license for all JINJA template files files: ^airflow/www/templates/.*\.html$ - exclude: ^\.github/.*$|^.*/.*_vendor/ + exclude: ^\.github/.*$ args: - --comment-style - "{#||#}" @@ -87,7 +86,7 @@ repos: - --fuzzy-match-generates-todo - id: insert-license name: Add license for all Shell files - exclude: ^\.github/.*$|^.*/.*_vendor/|^dev/breeze/autocomplete/.*$ + exclude: ^\.github/.*$|^dev/breeze/autocomplete/.*$ files: \.bash$|\.sh$ args: - --comment-style @@ -97,7 +96,7 @@ repos: - --fuzzy-match-generates-todo - id: insert-license name: Add license for all toml files - exclude: ^\.github/.*$|^.*/.*_vendor/|^dev/breeze/autocomplete/.*$ + exclude: ^\.github/.*$|^dev/breeze/autocomplete/.*$ files: \.toml$ args: - --comment-style @@ -107,7 +106,7 @@ repos: - --fuzzy-match-generates-todo - id: insert-license name: Add license for all Python files - exclude: ^\.github/.*$|^.*/.*_vendor/ + exclude: ^\.github/.*$ files: \.py$|\.pyi$ args: - --comment-style @@ -117,7 +116,7 @@ repos: - --fuzzy-match-generates-todo - id: insert-license name: Add license for all XML files - exclude: ^\.github/.*$|^.*/.*_vendor/ + exclude: ^\.github/.*$ files: \.xml$ args: - --comment-style @@ -136,7 +135,7 @@ repos: - --fuzzy-match-generates-todo - id: insert-license name: Add license for all YAML files except Helm templates - exclude: ^\.github/.*$|^.*/.*_vendor/|^chart/templates/.*|.*/reproducible_build.yaml$|^airflow/api_fastapi/core_api/openapi/v1-generated.yaml$|^.*/pnpm-lock.yaml$ + exclude: ^\.github/.*$|^chart/templates/.*|.*/reproducible_build.yaml$|^airflow/api_fastapi/core_api/openapi/v1-generated.yaml$|^.*/pnpm-lock.yaml$ types: [yaml] files: \.ya?ml$ args: @@ -148,7 +147,7 @@ repos: - id: insert-license name: Add license for all Markdown files files: \.md$ - exclude: PROVIDER_CHANGES.*\.md$|^.*/.*_vendor/ + exclude: PROVIDER_CHANGES.*\.md$ args: - --comment-style - "" @@ -157,7 +156,7 @@ repos: - --fuzzy-match-generates-todo - id: insert-license name: Add license for all other files - exclude: ^\.github/.*$|^.*/.*_vendor/ + exclude: ^\.github/.*$ args: - --comment-style - "|#|" @@ -198,21 +197,12 @@ repos: additional_dependencies: ['pyyaml'] pass_filenames: false require_serial: true - - id: update-build-dependencies - name: Update build-dependencies to latest (manual) - entry: ./scripts/ci/pre_commit/update_build_dependencies.py + - id: update-installers-and-pre-commit + name: Update installers and pre-commit to latest (manual) + entry: ./scripts/ci/pre_commit/update_installers_and_pre_commit.py stages: ['manual'] language: python - files: ^.pre-commit-config.yaml$|^scripts/ci/pre_commit/update_build_dependencies.py$ - pass_filenames: false - require_serial: true - additional_dependencies: ['rich>=12.4.4'] - - id: update-installers - name: Update installers to latest (manual) - entry: ./scripts/ci/pre_commit/update_installers.py - stages: ['manual'] - language: python - files: ^.pre-commit-config.yaml$|^scripts/ci/pre_commit/update_installers.py$ + files: ^.pre-commit-config.yaml$|^scripts/ci/pre_commit/update_installers_and_pre_commit.py$ pass_filenames: false require_serial: true additional_dependencies: ['pyyaml', 'rich>=12.4.4', 'requests'] @@ -262,28 +252,23 @@ repos: name: Detect accidentally committed debug statements - id: check-builtin-literals name: Require literal syntax when initializing builtins - exclude: ^.*/.*_vendor/ - id: detect-private-key name: Detect if private key is added to the repository exclude: ^docs/apache-airflow-providers-ssh/connections/ssh.rst$ - id: end-of-file-fixer name: Make sure that there is an empty line at the end - exclude: ^.*/.*_vendor/|^docs/apache-airflow/img/.*\.dot|^docs/apache-airflow/img/.*\.sha256 + exclude: ^docs/apache-airflow/img/.*\.dot|^docs/apache-airflow/img/.*\.sha256 - id: mixed-line-ending name: Detect if mixed line ending is used (\r vs. \r\n) - exclude: ^.*/.*_vendor/ - id: check-executables-have-shebangs name: Check that executables have shebang - exclude: ^.*/.*_vendor/ - id: check-xml name: Check XML files with xmllint - exclude: ^.*/.*_vendor/ - id: trailing-whitespace name: Remove trailing whitespace at end of line - exclude: ^.*/.*_vendor/|^docs/apache-airflow/img/.*\.dot|^dev/breeze/doc/images/output.*$ + exclude: ^docs/apache-airflow/img/.*\.dot|^dev/breeze/doc/images/output.*$ - id: fix-encoding-pragma name: Remove encoding header from Python files - exclude: ^.*/.*_vendor/ args: - --remove - id: pretty-format-json @@ -300,10 +285,8 @@ repos: hooks: - id: rst-backticks name: Check if RST files use double backticks for code - exclude: ^.*/.*_vendor/ - id: python-no-log-warn name: Check if there are no deprecate log warn - exclude: ^.*/.*_vendor/ - repo: https://github.com/adrienverge/yamllint rev: v1.35.1 hooks: @@ -311,15 +294,12 @@ repos: name: Check YAML files with yamllint entry: yamllint -c yamllint-config.yml --strict types: [yaml] - exclude: ^.*airflow\.template\.yaml$|^.*init_git_sync\.template\.yaml$|^.*/.*_vendor/|^chart/(?:templates|files)/.*\.yaml$|openapi/.*\.yaml$|^\.pre-commit-config\.yaml$|^.*/reproducible_build.yaml$|^.*pnpm-lock\.yaml$ + exclude: ^.*airflow\.template\.yaml$|^.*init_git_sync\.template\.yaml$|^chart/(?:templates|files)/.*\.yaml$|openapi/.*\.yaml$|^\.pre-commit-config\.yaml$|^.*/reproducible_build.yaml$|^.*pnpm-lock\.yaml$ - repo: https://github.com/ikamensh/flynt rev: '1.0.1' hooks: - id: flynt name: Run flynt string format converter for Python - exclude: | - (?x) - ^.*/.*_vendor/ args: # If flynt detects too long text it ignores it. So we set a very large limit to make it easy # to split the text by hand. Too long lines are detected by flake8 (below), @@ -337,11 +317,20 @@ repos: The word(s) should be in lowercase." && exec codespell "$@"' -- language: python types: [text] - exclude: ^.*/.*_vendor/|material-icons\.css$|^images/.*$|^RELEASE_NOTES\.txt$|^.*package-lock\.json$|^.*/kinglear\.txt$|^.*pnpm-lock\.yaml$ + exclude: material-icons\.css$|^images/.*$|^RELEASE_NOTES\.txt$|^.*package-lock\.json$|^.*/kinglear\.txt$|^.*pnpm-lock\.yaml$ args: - --ignore-words=docs/spelling_wordlist.txt - --skip=providers/src/airflow/providers/*/*.rst,airflow/www/*.log,docs/*/commits.rst,docs/apache-airflow/tutorial/pipeline_example.csv,*.min.js,*.lock,INTHEWILD.md - --exclude-file=.codespellignorelines + - repo: https://github.com/woodruffw/zizmor-pre-commit + rev: v1.0.0 + hooks: + - id: zizmor + name: Run zizmor to check for github workflow syntax errors + types: [yaml] + files: \.github/workflows/.*$|\.github/actions/.*$ + require_serial: true + entry: zizmor - repo: local # Note that this is the 2nd "local" repo group in the .pre-commit-config.yaml file. This is because # we try to minimise the number of passes that must happen in order to apply some of the changes @@ -376,7 +365,7 @@ repos: args: [--fix] require_serial: true additional_dependencies: ["ruff==0.8.1"] - exclude: ^.*/.*_vendor/|^tests/dags/test_imports.py|^performance/tests/test_.*.py + exclude: ^tests/dags/test_imports.py|^performance/tests/test_.*.py - id: ruff-format name: Run 'ruff format' description: "Run 'ruff format' for extremely fast Python formatting" @@ -386,13 +375,13 @@ repos: args: [] require_serial: true additional_dependencies: ["ruff==0.8.1"] - exclude: ^.*/.*_vendor/|^tests/dags/test_imports.py$ + exclude: ^tests/dags/test_imports.py$ - id: replace-bad-characters name: Replace bad characters entry: ./scripts/ci/pre_commit/replace_bad_characters.py language: python types: [file, text] - exclude: ^.*/.*_vendor/|^clients/gen/go\.sh$|^\.gitmodules$ + exclude: ^clients/gen/go\.sh$|^\.gitmodules$ additional_dependencies: ['rich>=12.4.4'] - id: lint-openapi name: Lint OpenAPI using spectral @@ -658,7 +647,6 @@ repos: ^airflow/www/static/js/types/api-generated.ts$| ^airflow/www/templates/appbuilder/flash.html$| ^chart/values.schema.json$| - ^.*/.*_vendor/| ^dev/| ^docs/README.rst$| ^docs/apache-airflow-providers-amazon/secrets-backends/aws-ssm-parameter-store.rst$| @@ -740,7 +728,7 @@ repos: files: > (?x) ^providers/src/airflow/providers/.*\.py$ - exclude: ^.*/.*_vendor/|providers/src/airflow/providers/standard/operators/bash.py|providers/src/airflow/providers/standard/operators/python.py|providers/src/airflow/providers/standard/sensors/external_task.py + exclude: providers/src/airflow/providers/standard/operators/bash.py|providers/src/airflow/providers/standard/operators/python.py|providers/src/airflow/providers/standard/sensors/external_task.py - id: check-get-lineage-collector-providers language: python name: Check providers import hook lineage code from compat @@ -769,28 +757,24 @@ repos: description: NEW_SESSION, provide_session, and create_session should be imported from airflow.utils.session to avoid import cycles. entry: "from airflow\\.utils\\.db import.* (NEW_SESSION|provide_session|create_session)" files: \.py$ - exclude: ^.*/.*_vendor/ pass_filenames: true - id: check-incorrect-use-of-LoggingMixin language: pygrep name: Make sure LoggingMixin is not used alone entry: "LoggingMixin\\(\\)" files: \.py$ - exclude: ^.*/.*_vendor/ pass_filenames: true - id: check-daysago-import-from-utils language: pygrep name: days_ago imported from airflow.utils.dates entry: "(airflow\\.){0,1}utils\\.dates\\.days_ago" files: \.py$ - exclude: ^.*/.*_vendor/ pass_filenames: true - id: check-start-date-not-used-in-defaults language: pygrep name: start_date not in default_args entry: "default_args\\s*=\\s*{\\s*(\"|')start_date(\"|')|(\"|')start_date(\"|'):" files: \.*example_dags.*\.py$ - exclude: ^.*/.*_vendor/ pass_filenames: true - id: check-apache-license-rat name: Check if licenses are OK for Apache @@ -995,7 +979,6 @@ repos: language: python pass_filenames: true files: .*\.schema\.json$ - exclude: ^.*/.*_vendor/ require_serial: true additional_dependencies: ['jsonschema>=3.2.0,<5.0', 'PyYAML==6.0.2', 'requests==2.32.3'] - id: lint-json-schema @@ -1080,7 +1063,6 @@ repos: language: python pass_filenames: true files: \.py$ - exclude: ^.*/.*_vendor/ additional_dependencies: ['rich>=12.4.4'] - id: check-code-deprecations name: Check deprecations categories in decorators @@ -1088,7 +1070,6 @@ repos: language: python pass_filenames: true files: ^airflow/.*\.py$ - exclude: ^.*/.*_vendor/ additional_dependencies: ["rich>=12.4.4", "python-dateutil"] - id: lint-chart-schema name: Lint chart/values.schema.json file @@ -1160,7 +1141,6 @@ repos: language: python pass_filenames: true files: \.py$ - exclude: ^.*/.*_vendor/ additional_dependencies: ['rich>=12.4.4', 'astor'] - id: create-missing-init-py-files-tests name: Create missing init.py files in tests @@ -1321,7 +1301,6 @@ repos: files: \.py$ exclude: | (?x)^( - .*/.*_vendor/| airflow/migrations| clients/python/test_.*\.py| dev| @@ -1349,7 +1328,6 @@ repos: language: python entry: ./scripts/ci/pre_commit/mypy.py --namespace-packages files: ^providers/src/airflow/providers/.*\.py$|^providers/tests//.*\.py$ - exclude: ^.*/.*_vendor/ require_serial: true additional_dependencies: ['rich>=12.4.4'] - id: mypy-providers @@ -1383,7 +1361,6 @@ repos: language: python entry: ./scripts/ci/pre_commit/mypy.py --namespace-packages files: ^task_sdk/src/airflow/sdk/.*\.py$|^task_sdk/tests//.*\.py$ - exclude: ^.*/.*_vendor/ require_serial: true additional_dependencies: ['rich>=12.4.4'] - id: mypy-task-sdk diff --git a/Dockerfile b/Dockerfile index fe49db186479d..06cdf1600e3a1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,7 +55,7 @@ ARG PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" # Also use `force pip` label on your PR to swap all places we use `uv` to `pip` ARG AIRFLOW_PIP_VERSION=24.3.1 # ARG AIRFLOW_PIP_VERSION="git+https://github.com/pypa/pip.git@main" -ARG AIRFLOW_UV_VERSION=0.5.11 +ARG AIRFLOW_UV_VERSION=0.5.14 ARG AIRFLOW_USE_UV="false" ARG UV_HTTP_TIMEOUT="300" ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" @@ -422,85 +422,6 @@ common::show_packaging_tool_version_and_location common::install_packaging_tools EOF -# The content below is automatically copied from scripts/docker/install_airflow_dependencies_from_branch_tip.sh -COPY <<"EOF" /install_airflow_dependencies_from_branch_tip.sh -#!/usr/bin/env bash - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_REPO:?Should be set}" -: "${AIRFLOW_BRANCH:?Should be set}" -: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" -: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" - -function install_airflow_dependencies_from_branch_tip() { - echo - echo "${COLOR_BLUE}Installing airflow from ${AIRFLOW_BRANCH}. It is used to cache dependencies${COLOR_RESET}" - echo - if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} - fi - if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} - fi - local TEMP_AIRFLOW_DIR - TEMP_AIRFLOW_DIR=$(mktemp -d) - # Install latest set of dependencies - without constraints. This is to download a "base" set of - # dependencies that we can cache and reuse when installing airflow using constraints and latest - # pyproject.toml in the next step (when we install regular airflow). - set -x - curl -fsSL "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" | \ - tar xz -C "${TEMP_AIRFLOW_DIR}" --strip 1 - # Make sure editable dependencies are calculated when devel-ci dependencies are installed - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - --editable "${TEMP_AIRFLOW_DIR}[${AIRFLOW_EXTRAS}]" - set +x - common::install_packaging_tools - set -x - echo "${COLOR_BLUE}Uninstalling providers. Dependencies remain${COLOR_RESET}" - # Uninstall airflow and providers to keep only the dependencies. In the future when - # planned https://github.com/pypa/pip/issues/11440 is implemented in pip we might be able to use this - # flag and skip the remove step. - pip freeze | grep apache-airflow-providers | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} || true - set +x - echo - echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}" - echo - set +x - ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow - rm -rf "${TEMP_AIRFLOW_DIR}" - set -x - # If you want to make sure dependency is removed from cache in your PR when you removed it from - # pyproject.toml - please add your dependency here as a list of strings - # for example: - # DEPENDENCIES_TO_REMOVE=("package_a" "package_b") - # Once your PR is merged, you should make a follow-up PR to remove it from this list - # and increase the AIRFLOW_CI_BUILD_EPOCH in Dockerfile.ci to make sure your cache is rebuilt. - local DEPENDENCIES_TO_REMOVE - # IMPORTANT!! Make sure to increase AIRFLOW_CI_BUILD_EPOCH in Dockerfile.ci when you remove a dependency from that list - DEPENDENCIES_TO_REMOVE=() - if [[ "${DEPENDENCIES_TO_REMOVE[*]}" != "" ]]; then - echo - echo "${COLOR_BLUE}Uninstalling just removed dependencies (temporary until cache refreshes)${COLOR_RESET}" - echo "${COLOR_BLUE}Dependencies to uninstall: ${DEPENDENCIES_TO_REMOVE[*]}${COLOR_RESET}" - echo - set +x - ${PACKAGING_TOOL_CMD} uninstall "${DEPENDENCIES_TO_REMOVE[@]}" || true - set -x - # make sure that the dependency is not needed by something else - pip check - fi -} - -common::get_colors -common::get_packaging_tool -common::get_airflow_version_specification -common::get_constraints_location -common::show_packaging_tool_version_and_location - -install_airflow_dependencies_from_branch_tip -EOF - # The content below is automatically copied from scripts/docker/common.sh COPY <<"EOF" /common.sh #!/usr/bin/env bash @@ -524,8 +445,6 @@ function common::get_packaging_tool() { ## IMPORTANT: IF YOU MODIFY THIS FUNCTION YOU SHOULD ALSO MODIFY CORRESPONDING FUNCTION IN ## `scripts/in_container/_in_container_utils.sh` - local PYTHON_BIN - PYTHON_BIN=$(which python) if [[ ${AIRFLOW_USE_UV} == "true" ]]; then echo echo "${COLOR_BLUE}Using 'uv' to install Airflow${COLOR_RESET}" @@ -533,8 +452,8 @@ function common::get_packaging_tool() { export PACKAGING_TOOL="uv" export PACKAGING_TOOL_CMD="uv pip" if [[ -z ${VIRTUAL_ENV=} ]]; then - export EXTRA_INSTALL_FLAGS="--python ${PYTHON_BIN}" - export EXTRA_UNINSTALL_FLAGS="--python ${PYTHON_BIN}" + export EXTRA_INSTALL_FLAGS="--system" + export EXTRA_UNINSTALL_FLAGS="--system" else export EXTRA_INSTALL_FLAGS="" export EXTRA_UNINSTALL_FLAGS="" @@ -900,18 +819,12 @@ function install_airflow() { # Determine the installation_command_flags based on AIRFLOW_INSTALLATION_METHOD method local installation_command_flags if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then - # We need _a_ file in there otherwise the editable install doesn't include anything in the .pth file - mkdir -p ./providers/src/airflow/providers/ - touch ./providers/src/airflow/providers/__init__.py - - # Similarly we need _a_ file for task_sdk too - mkdir -p ./task_sdk/src/airflow/sdk/ - echo '__version__ = "0.0.0dev0"' > ./task_sdk/src/airflow/sdk/__init__.py - - trap 'rm -f ./providers/src/airflow/providers/__init__.py ./task_sdk/src/airflow/__init__.py 2>/dev/null' EXIT - # When installing from sources - we always use `--editable` mode - installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION} --editable ./providers --editable ./task_sdk" + installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION} --editable ./task_sdk" + while IFS= read -r -d '' pyproject_toml_file; do + project_folder=$(dirname ${pyproject_toml_file}) + installation_command_flags="${installation_command_flags} --editable ${project_folder}" + done < <(find "providers" -name "pyproject.toml" -print0) elif [[ ${AIRFLOW_INSTALLATION_METHOD} == "apache-airflow" ]]; then installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" elif [[ ${AIRFLOW_INSTALLATION_METHOD} == apache-airflow\ @\ * ]]; then @@ -1407,7 +1320,8 @@ ARG PYTHON_BASE_IMAGE ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 \ - PIP_CACHE_DIR=/tmp/.cache/pip + PIP_CACHE_DIR=/tmp/.cache/pip \ + UV_CACHE_DIR=/tmp/.cache/uv ARG DEV_APT_DEPS="" ARG ADDITIONAL_DEV_APT_DEPS="" @@ -1473,9 +1387,6 @@ ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" # By default PIP has progress bar but you can disable it. ARG PIP_PROGRESS_BAR -# By default we do not use pre-cached packages, but in CI/Breeze environment we override this to speed up -# builds in case pyproject.toml changed. This is pure optimisation of CI/Breeze builds. -ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="false" # This is airflow version that is put in the label of the image build ARG AIRFLOW_VERSION # By default latest released version of airflow is installed (when empty) but this value can be overridden @@ -1513,7 +1424,6 @@ ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT} \ AIRFLOW_USE_UV=${AIRFLOW_USE_UV} \ - AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} \ AIRFLOW_VERSION_SPECIFICATION=${AIRFLOW_VERSION_SPECIFICATION} \ @@ -1538,8 +1448,7 @@ ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ # Copy all scripts required for installation - changing any of those should lead to # rebuilding from here -COPY --from=scripts common.sh install_packaging_tools.sh \ - install_airflow_dependencies_from_branch_tip.sh create_prod_venv.sh /scripts/docker/ +COPY --from=scripts common.sh install_packaging_tools.sh create_prod_venv.sh /scripts/docker/ # We can set this value to true in case we want to install .whl/.tar.gz packages placed in the # docker-context-files folder. This can be done for both additional packages you want to install @@ -1569,13 +1478,7 @@ ENV AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} # By default PIP installs everything to ~/.local and it's also treated as VIRTUALENV ENV VIRTUAL_ENV="${AIRFLOW_USER_HOME_DIR}/.local" -RUN bash /scripts/docker/install_packaging_tools.sh; \ - bash /scripts/docker/create_prod_venv.sh; \ - if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \ - ${INSTALL_PACKAGES_FROM_CONTEXT} == "false" && \ - ${UPGRADE_INVALIDATION_STRING} == "" ]]; then \ - bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \ - fi +RUN bash /scripts/docker/install_packaging_tools.sh; bash /scripts/docker/create_prod_venv.sh COPY --chown=airflow:0 ${AIRFLOW_SOURCES_FROM} ${AIRFLOW_SOURCES_TO} @@ -1599,10 +1502,10 @@ COPY --from=scripts install_from_docker_context_files.sh install_airflow.sh \ # an incorrect architecture. ARG TARGETARCH # Value to be able to easily change cache id and therefore use a bare new cache -ARG PIP_CACHE_EPOCH="9" +ARG DEPENDENCY_CACHE_EPOCH="9" # hadolint ignore=SC2086, SC2010, DL3042 -RUN --mount=type=cache,id=$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ +RUN --mount=type=cache,id=prod-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/tmp/.cache/,uid=${AIRFLOW_UID} \ if [[ ${INSTALL_PACKAGES_FROM_CONTEXT} == "true" ]]; then \ bash /scripts/docker/install_from_docker_context_files.sh; \ fi; \ @@ -1622,7 +1525,7 @@ RUN --mount=type=cache,id=$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$P # during the build additionally to whatever has been installed so far. It is recommended that # the requirements.txt contains only dependencies with == version specification # hadolint ignore=DL3042 -RUN --mount=type=cache,id=additional-requirements-$PYTHON_BASE_IMAGE-$AIRFLOW_PIP_VERSION-$TARGETARCH-$PIP_CACHE_EPOCH,target=/tmp/.cache/pip,uid=${AIRFLOW_UID} \ +RUN --mount=type=cache,id=prod-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/tmp/.cache/,uid=${AIRFLOW_UID} \ if [[ -f /docker-context-files/requirements.txt ]]; then \ pip install -r /docker-context-files/requirements.txt; \ fi @@ -1650,7 +1553,9 @@ ARG PYTHON_BASE_IMAGE ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ # Make sure noninteractive debian install is used and language variables set DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ - LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 LD_LIBRARY_PATH=/usr/local/lib + LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 LD_LIBRARY_PATH=/usr/local/lib \ + PIP_CACHE_DIR=/tmp/.cache/pip \ + UV_CACHE_DIR=/tmp/.cache/uv ARG RUNTIME_APT_DEPS="" ARG ADDITIONAL_RUNTIME_APT_DEPS="" diff --git a/Dockerfile.ci b/Dockerfile.ci index 6d160952b4964..1d8b3944bc03e 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -363,85 +363,6 @@ common::show_packaging_tool_version_and_location common::install_packaging_tools EOF -# The content below is automatically copied from scripts/docker/install_airflow_dependencies_from_branch_tip.sh -COPY <<"EOF" /install_airflow_dependencies_from_branch_tip.sh -#!/usr/bin/env bash - -. "$( dirname "${BASH_SOURCE[0]}" )/common.sh" - -: "${AIRFLOW_REPO:?Should be set}" -: "${AIRFLOW_BRANCH:?Should be set}" -: "${INSTALL_MYSQL_CLIENT:?Should be true or false}" -: "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" - -function install_airflow_dependencies_from_branch_tip() { - echo - echo "${COLOR_BLUE}Installing airflow from ${AIRFLOW_BRANCH}. It is used to cache dependencies${COLOR_RESET}" - echo - if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} - fi - if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then - AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} - fi - local TEMP_AIRFLOW_DIR - TEMP_AIRFLOW_DIR=$(mktemp -d) - # Install latest set of dependencies - without constraints. This is to download a "base" set of - # dependencies that we can cache and reuse when installing airflow using constraints and latest - # pyproject.toml in the next step (when we install regular airflow). - set -x - curl -fsSL "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz" | \ - tar xz -C "${TEMP_AIRFLOW_DIR}" --strip 1 - # Make sure editable dependencies are calculated when devel-ci dependencies are installed - ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} \ - --editable "${TEMP_AIRFLOW_DIR}[${AIRFLOW_EXTRAS}]" - set +x - common::install_packaging_tools - set -x - echo "${COLOR_BLUE}Uninstalling providers. Dependencies remain${COLOR_RESET}" - # Uninstall airflow and providers to keep only the dependencies. In the future when - # planned https://github.com/pypa/pip/issues/11440 is implemented in pip we might be able to use this - # flag and skip the remove step. - pip freeze | grep apache-airflow-providers | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} || true - set +x - echo - echo "${COLOR_BLUE}Uninstalling just airflow. Dependencies remain. Now target airflow can be reinstalled using mostly cached dependencies${COLOR_RESET}" - echo - set +x - ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} apache-airflow - rm -rf "${TEMP_AIRFLOW_DIR}" - set -x - # If you want to make sure dependency is removed from cache in your PR when you removed it from - # pyproject.toml - please add your dependency here as a list of strings - # for example: - # DEPENDENCIES_TO_REMOVE=("package_a" "package_b") - # Once your PR is merged, you should make a follow-up PR to remove it from this list - # and increase the AIRFLOW_CI_BUILD_EPOCH in Dockerfile.ci to make sure your cache is rebuilt. - local DEPENDENCIES_TO_REMOVE - # IMPORTANT!! Make sure to increase AIRFLOW_CI_BUILD_EPOCH in Dockerfile.ci when you remove a dependency from that list - DEPENDENCIES_TO_REMOVE=() - if [[ "${DEPENDENCIES_TO_REMOVE[*]}" != "" ]]; then - echo - echo "${COLOR_BLUE}Uninstalling just removed dependencies (temporary until cache refreshes)${COLOR_RESET}" - echo "${COLOR_BLUE}Dependencies to uninstall: ${DEPENDENCIES_TO_REMOVE[*]}${COLOR_RESET}" - echo - set +x - ${PACKAGING_TOOL_CMD} uninstall "${DEPENDENCIES_TO_REMOVE[@]}" || true - set -x - # make sure that the dependency is not needed by something else - pip check - fi -} - -common::get_colors -common::get_packaging_tool -common::get_airflow_version_specification -common::get_constraints_location -common::show_packaging_tool_version_and_location - -install_airflow_dependencies_from_branch_tip -EOF - # The content below is automatically copied from scripts/docker/common.sh COPY <<"EOF" /common.sh #!/usr/bin/env bash @@ -465,8 +386,6 @@ function common::get_packaging_tool() { ## IMPORTANT: IF YOU MODIFY THIS FUNCTION YOU SHOULD ALSO MODIFY CORRESPONDING FUNCTION IN ## `scripts/in_container/_in_container_utils.sh` - local PYTHON_BIN - PYTHON_BIN=$(which python) if [[ ${AIRFLOW_USE_UV} == "true" ]]; then echo echo "${COLOR_BLUE}Using 'uv' to install Airflow${COLOR_RESET}" @@ -474,8 +393,8 @@ function common::get_packaging_tool() { export PACKAGING_TOOL="uv" export PACKAGING_TOOL_CMD="uv pip" if [[ -z ${VIRTUAL_ENV=} ]]; then - export EXTRA_INSTALL_FLAGS="--python ${PYTHON_BIN}" - export EXTRA_UNINSTALL_FLAGS="--python ${PYTHON_BIN}" + export EXTRA_INSTALL_FLAGS="--system" + export EXTRA_UNINSTALL_FLAGS="--system" else export EXTRA_INSTALL_FLAGS="" export EXTRA_UNINSTALL_FLAGS="" @@ -670,18 +589,12 @@ function install_airflow() { # Determine the installation_command_flags based on AIRFLOW_INSTALLATION_METHOD method local installation_command_flags if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then - # We need _a_ file in there otherwise the editable install doesn't include anything in the .pth file - mkdir -p ./providers/src/airflow/providers/ - touch ./providers/src/airflow/providers/__init__.py - - # Similarly we need _a_ file for task_sdk too - mkdir -p ./task_sdk/src/airflow/sdk/ - echo '__version__ = "0.0.0dev0"' > ./task_sdk/src/airflow/sdk/__init__.py - - trap 'rm -f ./providers/src/airflow/providers/__init__.py ./task_sdk/src/airflow/__init__.py 2>/dev/null' EXIT - # When installing from sources - we always use `--editable` mode - installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION} --editable ./providers --editable ./task_sdk" + installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION} --editable ./task_sdk" + while IFS= read -r -d '' pyproject_toml_file; do + project_folder=$(dirname ${pyproject_toml_file}) + installation_command_flags="${installation_command_flags} --editable ${project_folder}" + done < <(find "providers" -name "pyproject.toml" -print0) elif [[ ${AIRFLOW_INSTALLATION_METHOD} == "apache-airflow" ]]; then installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" elif [[ ${AIRFLOW_INSTALLATION_METHOD} == apache-airflow\ @\ * ]]; then @@ -980,9 +893,12 @@ function determine_airflow_to_use() { echo echo "${COLOR_BLUE}Uninstalling all packages first${COLOR_RESET}" echo - pip freeze | grep -ve "^-e" | grep -ve "^#" | grep -ve "^uv" | xargs pip uninstall -y --root-user-action ignore + # shellcheck disable=SC2086 + ${PACKAGING_TOOL_CMD} freeze | grep -ve "^-e" | grep -ve "^#" | grep -ve "^uv" | \ + xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} # Now install rich ad click first to use the installation script - uv pip install rich rich-click click --python "/usr/local/bin/python" \ + # shellcheck disable=SC2086 + ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} rich rich-click click --python "/usr/local/bin/python" \ --constraint https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt fi python "${IN_CONTAINER_DIR}/install_airflow_and_providers.py" @@ -992,7 +908,8 @@ function determine_airflow_to_use() { python "${IN_CONTAINER_DIR}/install_devel_deps.py" \ --constraint https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt # Some packages might leave legacy typing module which causes test issues - pip uninstall -y typing || true + # shellcheck disable=SC2086 + ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} typing || true if [[ ${LINK_PROVIDERS_TO_AIRFLOW_PACKAGE=} == "true" ]]; then echo echo "${COLOR_BLUE}Linking providers to airflow package as we are using them from mounted sources.${COLOR_RESET}" @@ -1202,7 +1119,10 @@ ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} \ DEPENDENCIES_EPOCH_NUMBER=${DEPENDENCIES_EPOCH_NUMBER} \ INSTALL_MYSQL_CLIENT="true" \ INSTALL_MSSQL_CLIENT="true" \ - INSTALL_POSTGRES_CLIENT="true" + INSTALL_POSTGRES_CLIENT="true" \ + PIP_CACHE_DIR=/root/.cache/pip \ + UV_CACHE_DIR=/root/.cache/uv + RUN echo "Base image version: ${PYTHON_BASE_IMAGE}" @@ -1254,7 +1174,7 @@ RUN bash /scripts/docker/install_mysql.sh prod \ && chmod 0440 /etc/sudoers.d/airflow # Install Helm -ARG HELM_VERSION="v3.15.3" +ARG HELM_VERSION="v3.16.4" RUN SYSTEM=$(uname -s | tr '[:upper:]' '[:lower:]') \ && PLATFORM=$([ "$(uname -m)" = "aarch64" ] && echo "arm64" || echo "amd64" ) \ @@ -1282,12 +1202,7 @@ ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" # By changing the epoch we can force reinstalling Airflow and pip all dependencies # It can also be overwritten manually by setting the AIRFLOW_CI_BUILD_EPOCH environment variable. ARG AIRFLOW_CI_BUILD_EPOCH="10" -ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true" # Setup PIP -# By default PIP install run without cache to make image smaller -ARG PIP_NO_CACHE_DIR="true" -# By default UV install run without cache to make image smaller -ARG UV_NO_CACHE="true" ARG UV_HTTP_TIMEOUT="300" # By default PIP has progress bar but you can disable it. ARG PIP_PROGRESS_BAR="on" @@ -1315,7 +1230,6 @@ ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \ DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} \ - AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ @@ -1327,9 +1241,7 @@ ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ INSTALL_POSTGRES_CLIENT="true" \ AIRFLOW_INSTALLATION_METHOD="." \ AIRFLOW_VERSION_SPECIFICATION="" \ - PIP_NO_CACHE_DIR=${PIP_NO_CACHE_DIR} \ PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \ - UV_NO_CACHE=${UV_NO_CACHE} \ ADDITIONAL_PIP_INSTALL_FLAGS=${ADDITIONAL_PIP_INSTALL_FLAGS} \ CASS_DRIVER_BUILD_CONCURRENCY=${CASS_DRIVER_BUILD_CONCURRENCY} \ CASS_DRIVER_NO_CYTHON=${CASS_DRIVER_NO_CYTHON} @@ -1338,25 +1250,10 @@ RUN echo "Airflow version: ${AIRFLOW_VERSION}" # Copy all scripts required for installation - changing any of those should lead to # rebuilding from here -COPY --from=scripts install_packaging_tools.sh install_airflow_dependencies_from_branch_tip.sh \ - common.sh /scripts/docker/ +COPY --from=scripts common.sh install_packaging_tools.sh install_additional_dependencies.sh /scripts/docker/ # We are first creating a venv where all python packages and .so binaries needed by those are # installed. -# In case of CI builds we want to pre-install main version of airflow dependencies so that -# We do not have to always reinstall it from the scratch. -# And is automatically reinstalled from the scratch every time patch release of python gets released -# The Airflow and providers are uninstalled, only dependencies remain. -# the cache is only used when "upgrade to newer dependencies" is not set to automatically -# account for removed dependencies (we do not install them in the first place) -# -# We are installing from branch tip without fixing UV or PIP version - in order to avoid rebuilding the -# base cache layer every time the UV or PIP version changes. -RUN bash /scripts/docker/install_packaging_tools.sh; \ - if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \ - bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \ - fi - # Here we fix the versions so all subsequent commands will use the versions # from the sources @@ -1365,38 +1262,39 @@ RUN bash /scripts/docker/install_packaging_tools.sh; \ # Also use `force pip` label on your PR to swap all places we use `uv` to `pip` ARG AIRFLOW_PIP_VERSION=24.3.1 # ARG AIRFLOW_PIP_VERSION="git+https://github.com/pypa/pip.git@main" -ARG AIRFLOW_UV_VERSION=0.5.11 +ARG AIRFLOW_UV_VERSION=0.5.14 # TODO(potiuk): automate with upgrade check (possibly) ARG AIRFLOW_PRE_COMMIT_VERSION="4.0.1" ARG AIRFLOW_PRE_COMMIT_UV_VERSION="4.1.4" ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ + # This is needed since we are using cache mounted from the host + UV_LINK_MODE=copy \ AIRFLOW_PRE_COMMIT_VERSION=${AIRFLOW_PRE_COMMIT_VERSION} # The PATH is needed for PIPX to find the tools installed ENV PATH="/root/.local/bin:${PATH}" +# Useful for creating a cache id based on the underlying architecture, preventing the use of cached python packages from +# an incorrect architecture. +ARG TARGETARCH +# Value to be able to easily change cache id and therefore use a bare new cache +ARG DEPENDENCY_CACHE_EPOCH="0" + # Install useful command line tools in their own virtualenv so that they do not clash with # dependencies installed in Airflow also reinstall PIP and UV to make sure they are installed # in the version specified above RUN bash /scripts/docker/install_packaging_tools.sh -# Airflow sources change frequently but dependency configuration won't change that often -# We copy pyproject.toml and other files needed to perform setup of dependencies -# So in case pyproject.toml changes we can install latest dependencies required. -COPY pyproject.toml ${AIRFLOW_SOURCES}/pyproject.toml -COPY providers/pyproject.toml ${AIRFLOW_SOURCES}/providers/pyproject.toml -COPY task_sdk/pyproject.toml ${AIRFLOW_SOURCES}/task_sdk/pyproject.toml -COPY task_sdk/README.md ${AIRFLOW_SOURCES}/task_sdk/README.md -COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/ -COPY tests_common/ ${AIRFLOW_SOURCES}/tests_common/ -COPY generated/* ${AIRFLOW_SOURCES}/generated/ -COPY constraints/* ${AIRFLOW_SOURCES}/constraints/ -COPY LICENSE ${AIRFLOW_SOURCES}/LICENSE -COPY hatch_build.py ${AIRFLOW_SOURCES}/ COPY --from=scripts install_airflow.sh /scripts/docker/ +# We can copy everything here. The Context is filtered by dockerignore. This makes sure we are not +# copying over stuff that is accidentally generated or that we do not need (such as egg-info) +# if you want to add something that is missing and you expect to see it in the image you can +# add it with ! in .dockerignore next to the airflow, test etc. directories there +COPY . ${AIRFLOW_SOURCES}/ + # Those are additional constraints that are needed for some extras but we do not want to # force them on the main Airflow package. Currently we need no extra limits as PIP 23.1+ has much better # dependency resolution and we do not need to limit the versions of the dependencies @@ -1415,36 +1313,30 @@ ENV EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=${EAGER_UPGRADE_ADDITIONAL_REQUIREMENT # Usually we will install versions based on the dependencies in pyproject.toml and upgraded only if needed. # But in cron job we will install latest versions matching pyproject.toml to see if there is no breaking change # and push the constraints if everything is successful -RUN bash /scripts/docker/install_airflow.sh - -COPY --from=scripts entrypoint_ci.sh /entrypoint -COPY --from=scripts entrypoint_exec.sh /entrypoint-exec -RUN chmod a+x /entrypoint /entrypoint-exec +RUN --mount=type=cache,id=ci-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/root/.cache/ bash /scripts/docker/install_airflow.sh COPY --from=scripts install_packaging_tools.sh install_additional_dependencies.sh /scripts/docker/ -# Additional python deps to install ARG ADDITIONAL_PYTHON_DEPS="" -RUN bash /scripts/docker/install_packaging_tools.sh; \ +ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS} + +RUN --mount=type=cache,id=ci-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/root/.cache/ \ + bash /scripts/docker/install_packaging_tools.sh; \ if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \ bash /scripts/docker/install_additional_dependencies.sh; \ fi -# Install autocomplete for airflow -RUN if command -v airflow; then \ - register-python-argcomplete airflow >> ~/.bashrc ; \ - fi - -# Install autocomplete for Kubectl -RUN echo "source /etc/bash_completion" >> ~/.bashrc +COPY --from=scripts entrypoint_ci.sh /entrypoint +COPY --from=scripts entrypoint_exec.sh /entrypoint-exec +RUN chmod a+x /entrypoint /entrypoint-exec -# We can copy everything here. The Context is filtered by dockerignore. This makes sure we are not -# copying over stuff that is accidentally generated or that we do not need (such as egg-info) -# if you want to add something that is missing and you expect to see it in the image you can -# add it with ! in .dockerignore next to the airflow, test etc. directories there -COPY . ${AIRFLOW_SOURCES}/ +# Install autocomplete for airflow and kubectl +RUN if command -v airflow; then \ + register-python-argcomplete airflow >> ~/.bashrc ; \ + fi; \ + echo "source /etc/bash_completion" >> ~/.bashrc WORKDIR ${AIRFLOW_SOURCES} @@ -1455,7 +1347,13 @@ ARG AIRFLOW_IMAGE_DATE_CREATED ENV PATH="/files/bin/:/opt/airflow/scripts/in_container/bin/:${PATH}" \ GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm/" \ BUILD_ID=${BUILD_ID} \ - COMMIT_SHA=${COMMIT_SHA} + COMMIT_SHA=${COMMIT_SHA} \ + # When we enter the image, the /root/.cache is not mounted from temporary mount cache. + # We do not want to share the cache from host to avoid all kinds of problems where cache + # is different with different platforms / python versions. We want to have a clean cache + # in the image - and in this case /root/.cache is on the same filesystem as the installed packages. + # so we can go back to the default link mode being hardlink. + UV_LINK_MODE=hardlink # Link dumb-init for backwards compatibility (so that older images also work) RUN ln -sf /usr/bin/dumb-init /usr/local/bin/dumb-init diff --git a/NOTICE b/NOTICE index f6040a224c826..957cbef4e9947 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Apache Airflow -Copyright 2016-2024 The Apache Software Foundation +Copyright 2016-2025 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/airflow/api_fastapi/core_api/datamodels/connections.py b/airflow/api_fastapi/core_api/datamodels/connections.py index 98ac5389e5dc5..04f47841e60a3 100644 --- a/airflow/api_fastapi/core_api/datamodels/connections.py +++ b/airflow/api_fastapi/core_api/datamodels/connections.py @@ -94,3 +94,4 @@ class ConnectionBulkBody(BaseModel): """Connections Serializer for requests body.""" connections: list[ConnectionBody] + overwrite: bool | None = Field(default=False) diff --git a/airflow/api_fastapi/core_api/datamodels/pools.py b/airflow/api_fastapi/core_api/datamodels/pools.py index 807627c7fefe7..28a44fc56a265 100644 --- a/airflow/api_fastapi/core_api/datamodels/pools.py +++ b/airflow/api_fastapi/core_api/datamodels/pools.py @@ -83,3 +83,4 @@ class PoolPostBulkBody(BaseModel): """Pools serializer for post bodies.""" pools: list[PoolPostBody] + overwrite: bool | None = Field(default=False) diff --git a/airflow/api_fastapi/core_api/datamodels/variables.py b/airflow/api_fastapi/core_api/datamodels/variables.py index ab40415ac3c2b..8307809bc5f5b 100644 --- a/airflow/api_fastapi/core_api/datamodels/variables.py +++ b/airflow/api_fastapi/core_api/datamodels/variables.py @@ -33,7 +33,7 @@ class VariableResponse(BaseModel): model_config = ConfigDict(populate_by_name=True, from_attributes=True) key: str - val: str | None = Field(alias="value") + val: str = Field(alias="value") description: str | None is_encrypted: bool @@ -56,7 +56,7 @@ class VariableBody(BaseModel): """Variable serializer for bodies.""" key: str = Field(max_length=ID_LEN) - value: str | None = Field(serialization_alias="val") + value: str = Field(serialization_alias="val") description: str | None = Field(default=None) @@ -65,3 +65,11 @@ class VariableCollectionResponse(BaseModel): variables: list[VariableResponse] total_entries: int + + +class VariablesImportResponse(BaseModel): + """Import Variables serializer for responses.""" + + created_variable_keys: list[str] + import_count: int + created_count: int diff --git a/airflow/api_fastapi/core_api/openapi/v1-generated.yaml b/airflow/api_fastapi/core_api/openapi/v1-generated.yaml index aee6f8bacf1bd..30169f0b5c73a 100644 --- a/airflow/api_fastapi/core_api/openapi/v1-generated.yaml +++ b/airflow/api_fastapi/core_api/openapi/v1-generated.yaml @@ -1738,12 +1738,12 @@ paths: schema: $ref: '#/components/schemas/HTTPValidationError' /public/connections/bulk: - post: + put: tags: - Connection - summary: Post Connections + summary: Put Connections description: Create connection entry. - operationId: post_connections + operationId: put_connections requestBody: content: application/json: @@ -1751,8 +1751,8 @@ paths: $ref: '#/components/schemas/ConnectionBulkBody' required: true responses: - '201': - description: Successful Response + '200': + description: Created with overwrite content: application/json: schema: @@ -1775,6 +1775,12 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPExceptionResponse' + '201': + description: Created + content: + application/json: + schema: + $ref: '#/components/schemas/ConnectionCollectionResponse' '422': description: Validation Error content: @@ -3993,12 +3999,12 @@ paths: schema: $ref: '#/components/schemas/HTTPValidationError' /public/pools/bulk: - post: + put: tags: - Pool - summary: Post Pools + summary: Put Pools description: Create multiple pools. - operationId: post_pools + operationId: put_pools requestBody: content: application/json: @@ -4007,7 +4013,7 @@ paths: required: true responses: '201': - description: Successful Response + description: Created content: application/json: schema: @@ -4030,6 +4036,12 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPExceptionResponse' + '200': + description: Created with overwriting + content: + application/json: + schema: + $ref: '#/components/schemas/PoolCollectionResponse' '422': description: Validation Error content: @@ -4415,6 +4427,12 @@ paths: schema: $ref: '#/components/schemas/HTTPExceptionResponse' description: Not Found + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Conflict '422': description: Validation Error content: @@ -5015,6 +5033,12 @@ paths: schema: $ref: '#/components/schemas/HTTPExceptionResponse' description: Not Found + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Conflict '422': description: Validation Error content: @@ -5857,6 +5881,68 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /public/variables/import: + post: + tags: + - Variable + summary: Import Variables + description: Import variables from a JSON file. + operationId: import_variables + parameters: + - name: action_if_exists + in: query + required: false + schema: + enum: + - overwrite + - fail + - skip + type: string + default: fail + title: Action If Exists + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/Body_import_variables' + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/VariablesImportResponse' + '401': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unauthorized + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Forbidden + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Conflict + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unprocessable Entity /public/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/{task_id}/logs/{try_number}: get: tags: @@ -6435,6 +6521,16 @@ components: - status title: BaseInfoResponse description: Base info serializer for responses. + Body_import_variables: + properties: + file: + type: string + format: binary + title: File + type: object + required: + - file + title: Body_import_variables ClearTaskInstancesBody: properties: dry_run: @@ -6689,6 +6785,12 @@ components: $ref: '#/components/schemas/ConnectionBody' type: array title: Connections + overwrite: + anyOf: + - type: boolean + - type: 'null' + title: Overwrite + default: false type: object required: - connections @@ -8561,6 +8663,12 @@ components: $ref: '#/components/schemas/PoolPostBody' type: array title: Pools + overwrite: + anyOf: + - type: boolean + - type: 'null' + title: Overwrite + default: false type: object required: - pools @@ -9652,9 +9760,7 @@ components: maxLength: 250 title: Key value: - anyOf: - - type: string - - type: 'null' + type: string title: Value description: anyOf: @@ -9689,9 +9795,7 @@ components: type: string title: Key value: - anyOf: - - type: string - - type: 'null' + type: string title: Value description: anyOf: @@ -9709,6 +9813,26 @@ components: - is_encrypted title: VariableResponse description: Variable serializer for responses. + VariablesImportResponse: + properties: + created_variable_keys: + items: + type: string + type: array + title: Created Variable Keys + import_count: + type: integer + title: Import Count + created_count: + type: integer + title: Created Count + type: object + required: + - created_variable_keys + - import_count + - created_count + title: VariablesImportResponse + description: Import Variables serializer for responses. VersionInfo: properties: version: diff --git a/airflow/api_fastapi/core_api/routes/public/connections.py b/airflow/api_fastapi/core_api/routes/public/connections.py index 61fc76832c61d..081fe7b0dd5a7 100644 --- a/airflow/api_fastapi/core_api/routes/public/connections.py +++ b/airflow/api_fastapi/core_api/routes/public/connections.py @@ -19,7 +19,7 @@ import os from typing import Annotated, cast -from fastapi import Depends, HTTPException, Query, status +from fastapi import Depends, HTTPException, Query, Response, status from fastapi.exceptions import RequestValidationError from pydantic import ValidationError from sqlalchemy import select @@ -135,18 +135,48 @@ def post_connection( return connection -@connections_router.post( +@connections_router.put( "/bulk", - status_code=status.HTTP_201_CREATED, - responses=create_openapi_http_exception_doc([status.HTTP_409_CONFLICT]), + responses={ + **create_openapi_http_exception_doc([status.HTTP_409_CONFLICT]), + status.HTTP_201_CREATED: { + "description": "Created", + "model": ConnectionCollectionResponse, + }, + status.HTTP_200_OK: { + "description": "Created with overwrite", + "model": ConnectionCollectionResponse, + }, + }, ) -def post_connections( +def put_connections( + response: Response, post_body: ConnectionBulkBody, session: SessionDep, ) -> ConnectionCollectionResponse: """Create connection entry.""" - connections = [Connection(**body.model_dump(by_alias=True)) for body in post_body.connections] - session.add_all(connections) + response.status_code = status.HTTP_201_CREATED if not post_body.overwrite else status.HTTP_200_OK + connections: list[Connection] + if not post_body.overwrite: + connections = [Connection(**body.model_dump(by_alias=True)) for body in post_body.connections] + session.add_all(connections) + else: + connection_ids = [conn.connection_id for conn in post_body.connections] + existed_connections = session.execute( + select(Connection).filter(Connection.conn_id.in_(connection_ids)) + ).scalars() + existed_connections_dict = {conn.conn_id: conn for conn in existed_connections} + connections = [] + # if conn_id exists, update the corresponding connection, else add a new connection + for body in post_body.connections: + if body.connection_id in existed_connections_dict: + connection = existed_connections_dict[body.connection_id] + for key, val in body.model_dump(by_alias=True).items(): + setattr(connection, key, val) + connections.append(connection) + else: + connections.append(Connection(**body.model_dump(by_alias=True))) + session.add_all(connections) return ConnectionCollectionResponse( connections=cast(list[ConnectionResponse], connections), total_entries=len(connections), diff --git a/airflow/api_fastapi/core_api/routes/public/pools.py b/airflow/api_fastapi/core_api/routes/public/pools.py index 0638edb8daa12..4c4ff35e9bbe1 100644 --- a/airflow/api_fastapi/core_api/routes/public/pools.py +++ b/airflow/api_fastapi/core_api/routes/public/pools.py @@ -18,7 +18,7 @@ from typing import Annotated, cast -from fastapi import Depends, HTTPException, Query, status +from fastapi import Depends, HTTPException, Query, Response, status from fastapi.exceptions import RequestValidationError from pydantic import ValidationError from sqlalchemy import delete, select @@ -176,21 +176,49 @@ def post_pool( return pool -@pools_router.post( +@pools_router.put( "/bulk", status_code=status.HTTP_201_CREATED, - responses=create_openapi_http_exception_doc( - [ - status.HTTP_409_CONFLICT, # handled by global exception handler - ] - ), + responses={ + **create_openapi_http_exception_doc( + [ + status.HTTP_409_CONFLICT, # handled by global exception handler + ] + ), + status.HTTP_201_CREATED: { + "description": "Created", + "model": PoolCollectionResponse, + }, + status.HTTP_200_OK: { + "description": "Created with overwriting", + "model": PoolCollectionResponse, + }, + }, ) -def post_pools( - body: PoolPostBulkBody, +def put_pools( + response: Response, + put_body: PoolPostBulkBody, session: SessionDep, ) -> PoolCollectionResponse: """Create multiple pools.""" - pools = [Pool(**body.model_dump()) for body in body.pools] + response.status_code = status.HTTP_201_CREATED if not put_body.overwrite else status.HTTP_200_OK + pools: list[Pool] + if not put_body.overwrite: + pools = [Pool(**body.model_dump()) for body in put_body.pools] + else: + pool_names = [pool.pool for pool in put_body.pools] + existed_pools = session.execute(select(Pool).filter(Pool.pool.in_(pool_names))).scalars() + existed_pools_dict = {pool.pool: pool for pool in existed_pools} + pools = [] + # if pool already exists, update the corresponding pool, else add a new pool + for body in put_body.pools: + if body.pool in existed_pools_dict: + pool = existed_pools_dict[body.pool] + for key, val in body.model_dump().items(): + setattr(pool, key, val) + pools.append(pool) + else: + pools.append(Pool(**body.model_dump())) session.add_all(pools) return PoolCollectionResponse( pools=cast(list[PoolResponse], pools), diff --git a/airflow/api_fastapi/core_api/routes/public/task_instances.py b/airflow/api_fastapi/core_api/routes/public/task_instances.py index c1556996b1d55..9eaf191374746 100644 --- a/airflow/api_fastapi/core_api/routes/public/task_instances.py +++ b/airflow/api_fastapi/core_api/routes/public/task_instances.py @@ -629,11 +629,15 @@ def post_clear_task_instances( @task_instances_router.patch( task_instances_prefix + "/{task_id}", - responses=create_openapi_http_exception_doc([status.HTTP_404_NOT_FOUND, status.HTTP_400_BAD_REQUEST]), + responses=create_openapi_http_exception_doc( + [status.HTTP_404_NOT_FOUND, status.HTTP_400_BAD_REQUEST, status.HTTP_409_CONFLICT], + ), ) @task_instances_router.patch( task_instances_prefix + "/{task_id}/{map_index}", - responses=create_openapi_http_exception_doc([status.HTTP_404_NOT_FOUND, status.HTTP_400_BAD_REQUEST]), + responses=create_openapi_http_exception_doc( + [status.HTTP_404_NOT_FOUND, status.HTTP_400_BAD_REQUEST, status.HTTP_409_CONFLICT], + ), ) def patch_task_instance( dag_id: str, @@ -702,8 +706,10 @@ def patch_task_instance( commit=True, session=session, ) - if not ti: - raise HTTPException(status.HTTP_404_NOT_FOUND, err_msg_404) + if not tis: + raise HTTPException( + status.HTTP_409_CONFLICT, f"Task id {task_id} is already in {data['new_state']} state" + ) ti = tis[0] if isinstance(tis, list) else tis elif key == "note": if update_mask or body.note is not None: diff --git a/airflow/api_fastapi/core_api/routes/public/variables.py b/airflow/api_fastapi/core_api/routes/public/variables.py index ccc8ee7dc2265..0f02dc14e03a4 100644 --- a/airflow/api_fastapi/core_api/routes/public/variables.py +++ b/airflow/api_fastapi/core_api/routes/public/variables.py @@ -16,9 +16,10 @@ # under the License. from __future__ import annotations -from typing import Annotated +import json +from typing import Annotated, Literal -from fastapi import Depends, HTTPException, Query, status +from fastapi import Depends, HTTPException, Query, UploadFile, status from fastapi.exceptions import RequestValidationError from pydantic import ValidationError from sqlalchemy import select @@ -35,6 +36,7 @@ VariableBody, VariableCollectionResponse, VariableResponse, + VariablesImportResponse, ) from airflow.api_fastapi.core_api.openapi.exceptions import create_openapi_http_exception_doc from airflow.models.variable import Variable @@ -180,3 +182,56 @@ def post_variable( variable = session.scalar(select(Variable).where(Variable.key == post_body.key).limit(1)) return variable + + +@variables_router.post( + "/import", + status_code=status.HTTP_200_OK, + responses=create_openapi_http_exception_doc( + [status.HTTP_400_BAD_REQUEST, status.HTTP_409_CONFLICT, status.HTTP_422_UNPROCESSABLE_ENTITY] + ), +) +def import_variables( + file: UploadFile, + session: SessionDep, + action_if_exists: Literal["overwrite", "fail", "skip"] = "fail", +) -> VariablesImportResponse: + """Import variables from a JSON file.""" + try: + file_content = file.file.read().decode("utf-8") + variables = json.loads(file_content) + + if not isinstance(variables, dict): + raise ValueError("Uploaded JSON must contain key-value pairs.") + except (json.JSONDecodeError, ValueError) as e: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid JSON format: {e}") + + if not variables: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="No variables found in the provided JSON.", + ) + + existing_keys = {variable for variable in session.execute(select(Variable.key)).scalars()} + import_keys = set(variables.keys()) + + matched_keys = existing_keys & import_keys + + if action_if_exists == "fail" and matched_keys: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=f"The variables with these keys: {matched_keys} already exists.", + ) + elif action_if_exists == "skip": + create_keys = import_keys - matched_keys + else: + create_keys = import_keys + + for key in create_keys: + Variable.set(key=key, value=variables[key], session=session) + + return VariablesImportResponse( + created_count=len(create_keys), + import_count=len(import_keys), + created_variable_keys=list(create_keys), + ) diff --git a/airflow/api_fastapi/execution_api/routes/task_instances.py b/airflow/api_fastapi/execution_api/routes/task_instances.py index 016f5222c79d8..4956466ca707a 100644 --- a/airflow/api_fastapi/execution_api/routes/task_instances.py +++ b/airflow/api_fastapi/execution_api/routes/task_instances.py @@ -44,7 +44,7 @@ from airflow.models.taskreschedule import TaskReschedule from airflow.models.trigger import Trigger from airflow.utils import timezone -from airflow.utils.state import State +from airflow.utils.state import State, TerminalTIState # TODO: Add dependency on JWT token router = AirflowRouter() @@ -185,9 +185,13 @@ def ti_update_state( # We only use UUID above for validation purposes ti_id_str = str(task_instance_id) - old = select(TI.state).where(TI.id == ti_id_str).with_for_update() + old = select(TI.state, TI.try_number, TI.max_tries).where(TI.id == ti_id_str).with_for_update() try: - (previous_state,) = session.execute(old).one() + ( + previous_state, + try_number, + max_tries, + ) = session.execute(old).one() except NoResultFound: log.error("Task Instance %s not found", ti_id_str) raise HTTPException( @@ -205,11 +209,17 @@ def ti_update_state( if isinstance(ti_patch_payload, TITerminalStatePayload): query = TI.duration_expression_update(ti_patch_payload.end_date, query, session.bind) - query = query.values(state=ti_patch_payload.state) - if ti_patch_payload.state == State.FAILED: - # clear the next_method and next_kwargs - query = query.values(next_method=None, next_kwargs=None) + updated_state = ti_patch_payload.state + # if we get failed, we should attempt to retry, as it is a more + # normal state. Tasks with retries are more frequent than without retries. + if ti_patch_payload.state == TerminalTIState.FAIL_WITHOUT_RETRY: updated_state = State.FAILED + elif ti_patch_payload.state == State.FAILED: + if _is_eligible_to_retry(previous_state, try_number, max_tries): + updated_state = State.UP_FOR_RETRY + else: + updated_state = State.FAILED + query = query.values(state=updated_state) elif isinstance(ti_patch_payload, TIDeferredStatePayload): # Calculate timeout if it was passed timeout = None @@ -359,3 +369,15 @@ def ti_put_rtif( _update_rtif(task_instance, put_rtif_payload, session) return {"message": "Rendered task instance fields successfully set"} + + +def _is_eligible_to_retry(state: str, try_number: int, max_tries: int) -> bool: + """Is task instance is eligible for retry.""" + if state == State.RESTARTING: + # If a task is cleared when running, it goes into RESTARTING state and is always + # eligible for retry + return True + + # max_tries is initialised with the retries defined at task level, we do not need to explicitly ask for + # retries from the task SDK now, we can handle using max_tries + return max_tries != 0 and try_number <= max_tries diff --git a/airflow/cli/cli_config.py b/airflow/cli/cli_config.py index 93f6aa7b5103a..cb847d30db03c 100644 --- a/airflow/cli/cli_config.py +++ b/airflow/cli/cli_config.py @@ -588,14 +588,6 @@ def string_lower_type(val): type=int, default=60, ) -ARG_DB_RESERIALIZE_DAGS = Arg( - ("--no-reserialize-dags",), - # Not intended for user, so dont show in help - help=argparse.SUPPRESS, - action="store_false", - default=True, - dest="reserialize_dags", -) ARG_DB_VERSION__UPGRADE = Arg( ("-n", "--to-version"), help=( @@ -1473,7 +1465,6 @@ class GroupCommand(NamedTuple): ARG_DB_SQL_ONLY, ARG_DB_FROM_REVISION, ARG_DB_FROM_VERSION, - ARG_DB_RESERIALIZE_DAGS, ARG_VERBOSE, ), ), diff --git a/airflow/cli/commands/local_commands/db_command.py b/airflow/cli/commands/local_commands/db_command.py index d6a5f8c260725..5a4f70cc60472 100644 --- a/airflow/cli/commands/local_commands/db_command.py +++ b/airflow/cli/commands/local_commands/db_command.py @@ -75,7 +75,7 @@ def _get_version_revision( return _get_version_revision(new_version, recursion_limit) -def run_db_migrate_command(args, command, revision_heads_map: dict[str, str], reserialize_dags: bool = True): +def run_db_migrate_command(args, command, revision_heads_map: dict[str, str]): """ Run the db migrate command. @@ -122,19 +122,11 @@ def run_db_migrate_command(args, command, revision_heads_map: dict[str, str], re print(f"Performing upgrade to the metadata database {settings.engine.url!r}") else: print("Generating sql for upgrade -- upgrade commands will *not* be submitted.") - if reserialize_dags: - command( - to_revision=to_revision, - from_revision=from_revision, - show_sql_only=args.show_sql_only, - reserialize_dags=True, - ) - else: - command( - to_revision=to_revision, - from_revision=from_revision, - show_sql_only=args.show_sql_only, - ) + command( + to_revision=to_revision, + from_revision=from_revision, + show_sql_only=args.show_sql_only, + ) if not args.show_sql_only: print("Database migrating done!") @@ -202,7 +194,7 @@ def migratedb(args): raise SystemExit(f"Invalid version {args.from_version!r} supplied as `--from-version`.") if parsed_version < parse_version("2.0.0"): raise SystemExit("--from-version must be greater or equal to 2.0.0") - run_db_migrate_command(args, db.upgradedb, _REVISION_HEADS_MAP, reserialize_dags=True) + run_db_migrate_command(args, db.upgradedb, _REVISION_HEADS_MAP) @cli_utils.action_cli(check_db=False) diff --git a/airflow/cli/commands/remote_commands/config_command.py b/airflow/cli/commands/remote_commands/config_command.py index 17e7b62321761..35b3e5d46bd3f 100644 --- a/airflow/cli/commands/remote_commands/config_command.py +++ b/airflow/cli/commands/remote_commands/config_command.py @@ -110,6 +110,7 @@ def message(self) -> str: CONFIGS_CHANGES = [ + # admin ConfigChange( config=ConfigParameter("admin", "hide_sensitive_variable_fields"), renamed_to=ConfigParameter("core", "hide_sensitive_var_conn_fields"), @@ -118,6 +119,7 @@ def message(self) -> str: config=ConfigParameter("admin", "sensitive_variable_fields"), renamed_to=ConfigParameter("core", "sensitive_var_conn_names"), ), + # core ConfigChange( config=ConfigParameter("core", "check_slas"), suggestion="The SLA feature is removed in Airflow 3.0, to be replaced with Airflow Alerts in " @@ -188,6 +190,9 @@ def message(self) -> str: config=ConfigParameter("core", "max_db_retries"), renamed_to=ConfigParameter("database", "max_db_retries"), ), + ConfigChange(config=ConfigParameter("core", "task_runner")), + ConfigChange(config=ConfigParameter("core", "enable_xcom_pickling")), + # api ConfigChange( config=ConfigParameter("api", "access_control_allow_origin"), renamed_to=ConfigParameter("api", "access_control_allow_origins"), @@ -196,11 +201,13 @@ def message(self) -> str: config=ConfigParameter("api", "auth_backend"), renamed_to=ConfigParameter("api", "auth_backends"), ), + # logging ConfigChange( config=ConfigParameter("logging", "enable_task_context_logger"), suggestion="Remove TaskContextLogger: Replaced by the Log table for better handling of task log " "messages outside the execution context.", ), + # metrics ConfigChange( config=ConfigParameter("metrics", "metrics_use_pattern_match"), ), @@ -218,12 +225,15 @@ def message(self) -> str: config=ConfigParameter("metrics", "statsd_block_list"), renamed_to=ConfigParameter("metrics", "metrics_block_list"), ), + # traces ConfigChange( config=ConfigParameter("traces", "otel_task_log_event"), ), + # operators ConfigChange( config=ConfigParameter("operators", "allow_illegal_arguments"), ), + # webserver ConfigChange( config=ConfigParameter("webserver", "allow_raw_html_descriptions"), ), @@ -247,10 +257,12 @@ def message(self) -> str: config=ConfigParameter("webserver", "force_log_out_after"), renamed_to=ConfigParameter("webserver", "session_lifetime_minutes"), ), + # policy ConfigChange( config=ConfigParameter("policy", "airflow_local_settings"), renamed_to=ConfigParameter("policy", "task_policy"), ), + # scheduler ConfigChange( config=ConfigParameter("scheduler", "dependency_detector"), ), @@ -305,6 +317,7 @@ def message(self) -> str: config=ConfigParameter("scheduler", "statsd_custom_client_path"), renamed_to=ConfigParameter("metrics", "statsd_custom_client_path"), ), + # celery ConfigChange( config=ConfigParameter("celery", "stalled_task_timeout"), renamed_to=ConfigParameter("scheduler", "task_queued_timeout"), @@ -317,6 +330,7 @@ def message(self) -> str: config=ConfigParameter("celery", "task_adoption_timeout"), renamed_to=ConfigParameter("scheduler", "task_queued_timeout"), ), + # kubernetes_executor ConfigChange( config=ConfigParameter("kubernetes_executor", "worker_pods_pending_timeout"), renamed_to=ConfigParameter("scheduler", "task_queued_timeout"), @@ -325,6 +339,7 @@ def message(self) -> str: config=ConfigParameter("kubernetes_executor", "worker_pods_pending_timeout_check_interval"), renamed_to=ConfigParameter("scheduler", "task_queued_timeout_check_interval"), ), + # smtp ConfigChange( config=ConfigParameter("smtp", "smtp_user"), suggestion="Please use the SMTP connection (`smtp_default`).", diff --git a/airflow/cli/commands/remote_commands/dag_command.py b/airflow/cli/commands/remote_commands/dag_command.py index 669a075a6db2c..a405e8fba2214 100644 --- a/airflow/cli/commands/remote_commands/dag_command.py +++ b/airflow/cli/commands/remote_commands/dag_command.py @@ -537,5 +537,7 @@ def dag_test(args, dag: DAG | None = None, session: Session = NEW_SESSION) -> No @provide_session def dag_reserialize(args, session: Session = NEW_SESSION) -> None: """Serialize a DAG instance.""" - dagbag = DagBag(process_subdir(args.subdir)) - dagbag.sync_to_db(session=session) + # TODO: AIP-66 bundle centric reserialize + raise NotImplementedError( + "AIP-66: This command is not implemented yet - use `dag-processor --num-runs 1` in the meantime." + ) diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 1a2d664955ddd..5890e43de6c87 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -2676,30 +2676,38 @@ dag_bundles: description: | Configuration for the DAG bundles. This allows Airflow to load DAGs from different sources. - Airflow will consume all options added to this section. Below you will see only the default, - ``dags_folder``. The option name is the bundle name and the value is a json object with the following - keys: - - * classpath: The classpath of the bundle class - * kwargs: The keyword arguments to pass to the bundle class - * refresh_interval: The interval in seconds to refresh the bundle from its source. + options: + backends: + description: | + List of backend configs. Must supply name, classpath, and kwargs for each backend. - For example, to add a new bundle named ``hello`` to my Airflow instance, add the following to your - airflow.cfg (this is just an example, the classpath and kwargs are not real): + By default, ``refresh_interval`` is set to ``[scheduler] dag_dir_list_interval``, but that can + also be overridden in kwargs if desired. - .. code-block:: ini + The default is the dags folder dag bundle. - [dag_bundles] - hello: {classpath: "airflow.some.classpath", kwargs: {"hello": "world"}, refresh_interval: 60} - options: - dags_folder: - description: | - This is the default DAG bundle that loads DAGs from the traditional ``[core] dags_folder``. - By default, ``refresh_interval`` is set to ``[scheduler] dag_dir_list_interval``, but that can be - overridden here if desired. - Parsing DAGs from the DAG folder can be disabled by setting this option to an empty string. - version_added: ~ + Note: As shown below, you can split your json config over multiple lines by indenting. + See configparser documentation for an example: + https://docs.python.org/3/library/configparser.html#supported-ini-file-structure. + version_added: 3.0.0 type: string - example: ~ - default: '{{"classpath": "airflow.dag_processing.bundles.dagfolder.DagsFolderDagBundle", - "kwargs": {{}}}}' + example: > + [ + { + "name": "my-git-repo", + "classpath": "airflow.dag_processing.bundles.git.GitDagBundle", + "kwargs": { + "subdir": "dags", + "repo_url": "git@github.com:example.com/my-dags.git", + "tracking_ref": "main", + "refresh_interval": 0 + } + ] + default: > + [ + {{ + "name": "dags-folder", + "classpath": "airflow.dag_processing.bundles.dagfolder.DagsFolderDagBundle", + "kwargs": {{}} + }} + ] diff --git a/airflow/configuration.py b/airflow/configuration.py index f5b2f8f7d5328..1ea8948483175 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -515,6 +515,8 @@ def _write_option_header( if example is not None and include_examples: if extra_spacing: file.write("#\n") + example_lines = example.splitlines() + example = "\n# ".join(example_lines) file.write(f"# Example: {option} = {example}\n") needs_separation = True if include_sources and sources_dict: @@ -553,6 +555,8 @@ def _write_value( file.write(f"# {option} = \n") else: if comment_out_everything: + value_lines = value.splitlines() + value = "\n# ".join(value_lines) file.write(f"# {option} = {value}\n") else: file.write(f"{option} = {value}\n") diff --git a/airflow/dag_processing/bundles/manager.py b/airflow/dag_processing/bundles/manager.py index 4f8b59b956e18..2eaba73148571 100644 --- a/airflow/dag_processing/bundles/manager.py +++ b/airflow/dag_processing/bundles/manager.py @@ -26,6 +26,8 @@ from airflow.utils.session import NEW_SESSION, provide_session if TYPE_CHECKING: + from collections.abc import Iterable + from sqlalchemy.orm import Session from airflow.dag_processing.bundles.base import BaseDagBundle @@ -34,52 +36,57 @@ class DagBundlesManager(LoggingMixin): """Manager for DAG bundles.""" - @property - def bundle_configs(self) -> dict[str, dict]: - """Get all DAG bundle configurations.""" - configured_bundles = conf.getsection("dag_bundles") + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._bundle_config = {} + self.parse_config() - if not configured_bundles: - return {} + def parse_config(self) -> None: + """ + Get all DAG bundle configurations and store in instance variable. - # If dags_folder is empty string, we remove it. This allows the default dags_folder bundle to be disabled. - if not configured_bundles["dags_folder"]: - del configured_bundles["dags_folder"] + If a bundle class for a given name has already been imported, it will not be imported again. - dict_bundles: dict[str, dict] = {} - for key in configured_bundles.keys(): - config = conf.getjson("dag_bundles", key) - if not isinstance(config, dict): - raise AirflowConfigException(f"Bundle config for {key} is not a dict: {config}") - dict_bundles[key] = config + todo (AIP-66): proper validation of the bundle configuration so we have better error messages - return dict_bundles + :meta private: + """ + if self._bundle_config: + return + + backends = conf.getjson("dag_bundles", "backends") + + if not backends: + return + + if not isinstance(backends, list): + raise AirflowConfigException( + "Bundle config is not a list. Check config value" + " for section `dag_bundles` and key `backends`." + ) + seen = set() + for cfg in backends: + name = cfg["name"] + if name in seen: + raise ValueError(f"Dag bundle {name} is configured twice.") + seen.add(name) + class_ = import_string(cfg["classpath"]) + kwargs = cfg["kwargs"] + self._bundle_config[name] = (class_, kwargs) @provide_session def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: - known_bundles = {b.name: b for b in session.query(DagBundleModel).all()} - - for name in self.bundle_configs.keys(): - if bundle := known_bundles.get(name): + stored = {b.name: b for b in session.query(DagBundleModel).all()} + for name in self._bundle_config.keys(): + if bundle := stored.pop(name, None): bundle.active = True else: session.add(DagBundleModel(name=name)) self.log.info("Added new DAG bundle %s to the database", name) - for name, bundle in known_bundles.items(): - if name not in self.bundle_configs: - bundle.active = False - self.log.warning("DAG bundle %s is no longer found in config and has been disabled", name) - - def get_all_dag_bundles(self) -> list[BaseDagBundle]: - """ - Get all DAG bundles. - - :param session: A database session. - - :return: list of DAG bundles. - """ - return [self.get_bundle(name, version=None) for name in self.bundle_configs.keys()] + for name, bundle in stored.items(): + bundle.active = False + self.log.warning("DAG bundle %s is no longer found in config and has been disabled", name) def get_bundle(self, name: str, version: str | None = None) -> BaseDagBundle: """ @@ -90,7 +97,17 @@ def get_bundle(self, name: str, version: str | None = None) -> BaseDagBundle: :return: The DAG bundle. """ - # TODO: proper validation of the bundle configuration so we have better error messages - bundle_config = self.bundle_configs[name] - bundle_class = import_string(bundle_config["classpath"]) - return bundle_class(name=name, version=version, **bundle_config["kwargs"]) + cfg_tuple = self._bundle_config.get(name) + if not cfg_tuple: + raise ValueError(f"Requested bundle '{name}' is not configured.") + class_, kwargs = cfg_tuple + return class_(name=name, version=version, **kwargs) + + def get_all_dag_bundles(self) -> Iterable[BaseDagBundle]: + """ + Get all DAG bundles. + + :return: list of DAG bundles. + """ + for name, (class_, kwargs) in self._bundle_config.items(): + yield class_(name=name, version=None, **kwargs) diff --git a/airflow/decorators/base.py b/airflow/decorators/base.py index 9b303e1a703e7..64081ef4070b0 100644 --- a/airflow/decorators/base.py +++ b/airflow/decorators/base.py @@ -578,7 +578,7 @@ def __attrs_post_init__(self): XComArg.apply_upstream_relationship(self, self.op_kwargs_expand_input.value) def _expand_mapped_kwargs( - self, context: Context, session: Session, *, include_xcom: bool + self, context: Mapping[str, Any], session: Session, *, include_xcom: bool ) -> tuple[Mapping[str, Any], set[int]]: # We only use op_kwargs_expand_input so this must always be empty. if self.expand_input is not EXPAND_INPUT_EMPTY: diff --git a/airflow/decorators/task_group.py b/airflow/decorators/task_group.py index 591ba39018e1d..2fabd29157ddc 100644 --- a/airflow/decorators/task_group.py +++ b/airflow/decorators/task_group.py @@ -189,6 +189,7 @@ def task_group( ui_color: str = "CornflowerBlue", ui_fgcolor: str = "#000", add_suffix_on_collision: bool = False, + group_display_name: str = "", ) -> Callable[[Callable[FParams, FReturn]], _TaskGroupFactory[FParams, FReturn]]: ... diff --git a/airflow/macros/__init__.py b/airflow/macros/__init__.py index be4554818acf2..26b08c8a6b383 100644 --- a/airflow/macros/__init__.py +++ b/airflow/macros/__init__.py @@ -17,11 +17,7 @@ # under the License. from __future__ import annotations -import json # noqa: F401 -import time # noqa: F401 -import uuid # noqa: F401 -from datetime import datetime, timedelta -from random import random # noqa: F401 +from datetime import datetime from typing import TYPE_CHECKING, Any import dateutil # noqa: F401 @@ -29,47 +25,12 @@ from babel.dates import LC_TIME, format_datetime import airflow.utils.yaml as yaml # noqa: F401 +from airflow.sdk.definitions.macros import ds_add, ds_format, json, time, uuid # noqa: F401 if TYPE_CHECKING: from pendulum import DateTime -def ds_add(ds: str, days: int) -> str: - """ - Add or subtract days from a YYYY-MM-DD. - - :param ds: anchor date in ``YYYY-MM-DD`` format to add to - :param days: number of days to add to the ds, you can use negative values - - >>> ds_add("2015-01-01", 5) - '2015-01-06' - >>> ds_add("2015-01-06", -5) - '2015-01-01' - """ - if not days: - return str(ds) - dt = datetime.strptime(str(ds), "%Y-%m-%d") + timedelta(days=days) - return dt.strftime("%Y-%m-%d") - - -def ds_format(ds: str, input_format: str, output_format: str) -> str: - """ - Output datetime string in a given format. - - :param ds: Input string which contains a date. - :param input_format: Input string format (e.g., '%Y-%m-%d'). - :param output_format: Output string format (e.g., '%Y-%m-%d'). - - >>> ds_format("2015-01-01", "%Y-%m-%d", "%m-%d-%y") - '01-01-15' - >>> ds_format("1/5/2015", "%m/%d/%Y", "%Y-%m-%d") - '2015-01-05' - >>> ds_format("12/07/2024", "%d/%m/%Y", "%A %d %B %Y", "en_US") - 'Friday 12 July 2024' - """ - return datetime.strptime(str(ds), input_format).strftime(output_format) - - def ds_format_locale( ds: str, input_format: str, output_format: str, locale: Locale | str | None = None ) -> str: @@ -99,6 +60,7 @@ def ds_format_locale( ) +# TODO: Task SDK: Move this to the Task SDK once we evaluate "pendulum"'s dependency def datetime_diff_for_humans(dt: Any, since: DateTime | None = None) -> str: """ Return a human-readable/approximate difference between datetimes. diff --git a/airflow/models/abstractoperator.py b/airflow/models/abstractoperator.py index aa23bf33e131a..f87b6e06b1c07 100644 --- a/airflow/models/abstractoperator.py +++ b/airflow/models/abstractoperator.py @@ -19,7 +19,7 @@ import datetime import inspect -from collections.abc import Iterable, Iterator, Sequence +from collections.abc import Iterable, Iterator, Mapping, Sequence from functools import cached_property from typing import TYPE_CHECKING, Any, Callable @@ -30,10 +30,9 @@ from airflow.exceptions import AirflowException from airflow.models.expandinput import NotFullyPopulated from airflow.sdk.definitions.abstractoperator import AbstractOperator as TaskSDKAbstractOperator -from airflow.template.templater import Templater from airflow.utils.context import Context from airflow.utils.db import exists_query -from airflow.utils.log.secrets_masker import redact +from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.setup_teardown import SetupTeardownContext from airflow.utils.sqlalchemy import with_row_locks from airflow.utils.state import State, TaskInstanceState @@ -42,8 +41,6 @@ from airflow.utils.weight_rule import WeightRule, db_safe_priority if TYPE_CHECKING: - from collections.abc import Mapping - import jinja2 # Slow import. from sqlalchemy.orm import Session @@ -52,7 +49,6 @@ from airflow.models.mappedoperator import MappedOperator from airflow.models.taskinstance import TaskInstance from airflow.sdk.definitions.baseoperator import BaseOperator - from airflow.sdk.definitions.dag import DAG from airflow.sdk.definitions.node import DAGNode from airflow.task.priority_strategy import PriorityWeightStrategy from airflow.triggers.base import StartTriggerArgs @@ -88,7 +84,7 @@ class NotMapped(Exception): """Raise if a task is neither mapped nor has any parent mapped groups.""" -class AbstractOperator(Templater, TaskSDKAbstractOperator): +class AbstractOperator(LoggingMixin, TaskSDKAbstractOperator): """ Common implementation for operators, including unmapped and mapped. @@ -128,72 +124,6 @@ def on_failure_fail_dagrun(self, value): ) self._on_failure_fail_dagrun = value - def get_template_env(self, dag: DAG | None = None) -> jinja2.Environment: - """Get the template environment for rendering templates.""" - if dag is None: - dag = self.get_dag() - return super().get_template_env(dag=dag) - - def _render(self, template, context, dag: DAG | None = None): - if dag is None: - dag = self.get_dag() - return super()._render(template, context, dag=dag) - - def _do_render_template_fields( - self, - parent: Any, - template_fields: Iterable[str], - context: Mapping[str, Any], - jinja_env: jinja2.Environment, - seen_oids: set[int], - ) -> None: - """Override the base to use custom error logging.""" - for attr_name in template_fields: - try: - value = getattr(parent, attr_name) - except AttributeError: - raise AttributeError( - f"{attr_name!r} is configured as a template field " - f"but {parent.task_type} does not have this attribute." - ) - try: - if not value: - continue - except Exception: - # This may happen if the templated field points to a class which does not support `__bool__`, - # such as Pandas DataFrames: - # https://github.com/pandas-dev/pandas/blob/9135c3aaf12d26f857fcc787a5b64d521c51e379/pandas/core/generic.py#L1465 - self.log.info( - "Unable to check if the value of type '%s' is False for task '%s', field '%s'.", - type(value).__name__, - self.task_id, - attr_name, - ) - # We may still want to render custom classes which do not support __bool__ - pass - - try: - if callable(value): - rendered_content = value(context=context, jinja_env=jinja_env) - else: - rendered_content = self.render_template( - value, - context, - jinja_env, - seen_oids, - ) - except Exception: - value_masked = redact(name=attr_name, value=value) - self.log.exception( - "Exception rendering Jinja template for task '%s', field '%s'. Template: %r", - self.task_id, - attr_name, - value_masked, - ) - raise - else: - setattr(parent, attr_name, rendered_content) - def _iter_all_mapped_downstreams(self) -> Iterator[MappedOperator | MappedTaskGroup]: """ Return mapped nodes that are direct dependencies of the current task. @@ -582,7 +512,7 @@ def expand_mapped_task(self, run_id: str, *, session: Session) -> tuple[Sequence def render_template_fields( self, - context: Context, + context: Mapping[str, Any], jinja_env: jinja2.Environment | None = None, ) -> None: """ diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index 08839cc0bf720..f28e05584fdf8 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -100,7 +100,6 @@ if TYPE_CHECKING: from types import ClassMethodDescriptorType - import jinja2 # Slow import. from sqlalchemy.orm import Session from airflow.models.abstractoperator import TaskStateChangeCallback @@ -738,23 +737,6 @@ def post_execute(self, context: Any, result: Any = None): logger=self.log, ).run(context, result) - def render_template_fields( - self, - context: Context, - jinja_env: jinja2.Environment | None = None, - ) -> None: - """ - Template all attributes listed in *self.template_fields*. - - This mutates the attributes in-place and is irreversible. - - :param context: Context dict with values to apply on content. - :param jinja_env: Jinja's environment to use for rendering. - """ - if not jinja_env: - jinja_env = self.get_template_env() - self._do_render_template_fields(self, self.template_fields, context, jinja_env, set()) - @provide_session def clear( self, diff --git a/airflow/models/expandinput.py b/airflow/models/expandinput.py index 8d86ec193eb4d..bf3c6e9505600 100644 --- a/airflow/models/expandinput.py +++ b/airflow/models/expandinput.py @@ -25,7 +25,7 @@ import attr -from airflow.utils.mixins import ResolveMixin +from airflow.sdk.definitions.mixins import ResolveMixin from airflow.utils.session import NEW_SESSION, provide_session if TYPE_CHECKING: @@ -35,7 +35,6 @@ from airflow.models.xcom_arg import XComArg from airflow.serialization.serialized_objects import _ExpandInputRef from airflow.typing_compat import TypeGuard - from airflow.utils.context import Context ExpandInput = Union["DictOfListsExpandInput", "ListOfDictsExpandInput"] @@ -69,7 +68,9 @@ def iter_references(self) -> Iterable[tuple[Operator, str]]: yield from self._input.iter_references() @provide_session - def resolve(self, context: Context, *, include_xcom: bool = True, session: Session = NEW_SESSION) -> Any: + def resolve( + self, context: Mapping[str, Any], *, include_xcom: bool = True, session: Session = NEW_SESSION + ) -> Any: data, _ = self._input.resolve(context, session=session, include_xcom=include_xcom) return data[self._key] @@ -166,7 +167,7 @@ def get_total_map_length(self, run_id: str, *, session: Session) -> int: return functools.reduce(operator.mul, (lengths[name] for name in self.value), 1) def _expand_mapped_field( - self, key: str, value: Any, context: Context, *, session: Session, include_xcom: bool + self, key: str, value: Any, context: Mapping[str, Any], *, session: Session, include_xcom: bool ) -> Any: if _needs_run_time_resolution(value): value = ( @@ -210,7 +211,7 @@ def iter_references(self) -> Iterable[tuple[Operator, str]]: yield from x.iter_references() def resolve( - self, context: Context, session: Session, *, include_xcom: bool = True + self, context: Mapping[str, Any], session: Session, *, include_xcom: bool = True ) -> tuple[Mapping[str, Any], set[int]]: data = { k: self._expand_mapped_field(k, v, context, session=session, include_xcom=include_xcom) @@ -260,7 +261,7 @@ def iter_references(self) -> Iterable[tuple[Operator, str]]: yield from x.iter_references() def resolve( - self, context: Context, session: Session, *, include_xcom: bool = True + self, context: Mapping[str, Any], session: Session, *, include_xcom: bool = True ) -> tuple[Mapping[str, Any], set[int]]: map_index = context["ti"].map_index if map_index < 0: diff --git a/airflow/models/mappedoperator.py b/airflow/models/mappedoperator.py index 524415b848f62..4d362714794e3 100644 --- a/airflow/models/mappedoperator.py +++ b/airflow/models/mappedoperator.py @@ -671,7 +671,7 @@ def serialize_for_task_group(self) -> tuple[DagAttributeTypes, Any]: return DagAttributeTypes.OP, self.task_id def _expand_mapped_kwargs( - self, context: Context, session: Session, *, include_xcom: bool + self, context: Mapping[str, Any], session: Session, *, include_xcom: bool ) -> tuple[Mapping[str, Any], set[int]]: """ Get the kwargs to create the unmapped operator. @@ -869,7 +869,7 @@ def get_mapped_ti_count(self, run_id: str, *, session: Session) -> int: def render_template_fields( self, - context: Context, + context: Mapping[str, Any], jinja_env: jinja2.Environment | None = None, ) -> None: """ diff --git a/airflow/models/param.py b/airflow/models/param.py index ab7d2facd7e3b..416d9cfb8b9b4 100644 --- a/airflow/models/param.py +++ b/airflow/models/param.py @@ -20,18 +20,17 @@ import copy import json import logging -from collections.abc import ItemsView, Iterable, MutableMapping, ValuesView +from collections.abc import ItemsView, Iterable, Mapping, MutableMapping, ValuesView from typing import TYPE_CHECKING, Any, ClassVar from airflow.exceptions import AirflowException, ParamValidationError -from airflow.utils.mixins import ResolveMixin +from airflow.sdk.definitions.mixins import ResolveMixin from airflow.utils.types import NOTSET, ArgNotSet if TYPE_CHECKING: from airflow.models.dagrun import DagRun from airflow.models.operator import Operator from airflow.sdk.definitions.dag import DAG - from airflow.utils.context import Context logger = logging.getLogger(__name__) @@ -295,7 +294,7 @@ def __init__(self, current_dag: DAG, name: str, default: Any = NOTSET): def iter_references(self) -> Iterable[tuple[Operator, str]]: return () - def resolve(self, context: Context, *, include_xcom: bool = True) -> Any: + def resolve(self, context: Mapping[str, Any], *, include_xcom: bool = True) -> Any: """Pull DagParam value from DagRun context. This method is run during ``op.execute()``.""" with contextlib.suppress(KeyError): return context["dag_run"].conf[self._name] diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index a5e50cb0d2cdb..27293fa2d022e 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -107,10 +107,10 @@ from airflow.models.xcom import LazyXComSelectSequence, XCom from airflow.plugins_manager import integrate_macros_plugins from airflow.sdk.definitions.asset import Asset, AssetAlias, AssetNameRef, AssetUniqueKey, AssetUriRef +from airflow.sdk.definitions.templater import SandboxedEnvironment from airflow.sentry import Sentry from airflow.settings import task_instance_mutation_hook from airflow.stats import Stats -from airflow.templates import SandboxedEnvironment from airflow.ti_deps.dep_context import DepContext from airflow.ti_deps.dependencies_deps import REQUEUEABLE_DEPS, RUNNING_DEPS from airflow.traces.tracer import Trace diff --git a/airflow/models/xcom_arg.py b/airflow/models/xcom_arg.py index 103ddc663323c..cf4147dcbfcdf 100644 --- a/airflow/models/xcom_arg.py +++ b/airflow/models/xcom_arg.py @@ -29,9 +29,9 @@ from airflow.models import MappedOperator, TaskInstance from airflow.models.abstractoperator import AbstractOperator from airflow.models.taskmixin import DependencyMixin +from airflow.sdk.definitions.mixins import ResolveMixin from airflow.sdk.types import NOTSET, ArgNotSet from airflow.utils.db import exists_query -from airflow.utils.mixins import ResolveMixin from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.setup_teardown import SetupTeardownContext from airflow.utils.state import State @@ -44,7 +44,6 @@ from airflow.models.operator import Operator from airflow.sdk.definitions.baseoperator import BaseOperator from airflow.sdk.definitions.dag import DAG - from airflow.utils.context import Context from airflow.utils.edgemodifier import EdgeModifier # Callable objects contained by MapXComArg. We only accept callables from @@ -206,8 +205,9 @@ def get_task_map_length(self, run_id: str, *, session: Session) -> int | None: """ raise NotImplementedError() - @provide_session - def resolve(self, context: Context, session: Session = NEW_SESSION, *, include_xcom: bool = True) -> Any: + def resolve( + self, context: Mapping[str, Any], session: Session | None = None, *, include_xcom: bool = True + ) -> Any: """ Pull XCom value. @@ -420,8 +420,11 @@ def get_task_map_length(self, run_id: str, *, session: Session) -> int | None: ) return session.scalar(query) + # TODO: Task-SDK: Remove session argument once everything is ported over to Task SDK @provide_session - def resolve(self, context: Context, session: Session = NEW_SESSION, *, include_xcom: bool = True) -> Any: + def resolve( + self, context: Mapping[str, Any], session: Session = NEW_SESSION, *, include_xcom: bool = True + ) -> Any: ti = context["ti"] if TYPE_CHECKING: assert isinstance(ti, TaskInstance) @@ -431,12 +434,12 @@ def resolve(self, context: Context, session: Session = NEW_SESSION, *, include_x context["expanded_ti_count"], session=session, ) + result = ti.xcom_pull( task_ids=task_id, map_indexes=map_indexes, key=self.key, default=NOTSET, - session=session, ) if not isinstance(result, ArgNotSet): return result @@ -535,7 +538,9 @@ def get_task_map_length(self, run_id: str, *, session: Session) -> int | None: return self.arg.get_task_map_length(run_id, session=session) @provide_session - def resolve(self, context: Context, session: Session = NEW_SESSION, *, include_xcom: bool = True) -> Any: + def resolve( + self, context: Mapping[str, Any], session: Session = NEW_SESSION, *, include_xcom: bool = True + ) -> Any: value = self.arg.resolve(context, session=session, include_xcom=include_xcom) if not isinstance(value, (Sequence, dict)): raise ValueError(f"XCom map expects sequence or dict, not {type(value).__name__}") @@ -616,7 +621,9 @@ def get_task_map_length(self, run_id: str, *, session: Session) -> int | None: return max(ready_lengths) @provide_session - def resolve(self, context: Context, session: Session = NEW_SESSION, *, include_xcom: bool = True) -> Any: + def resolve( + self, context: Mapping[str, Any], session: Session = NEW_SESSION, *, include_xcom: bool = True + ) -> Any: values = [arg.resolve(context, session=session, include_xcom=include_xcom) for arg in self.args] for value in values: if not isinstance(value, (Sequence, dict)): @@ -691,7 +698,9 @@ def get_task_map_length(self, run_id: str, *, session: Session) -> int | None: return sum(ready_lengths) @provide_session - def resolve(self, context: Context, session: Session = NEW_SESSION, *, include_xcom: bool = True) -> Any: + def resolve( + self, context: Mapping[str, Any], session: Session = NEW_SESSION, *, include_xcom: bool = True + ) -> Any: values = [arg.resolve(context, session=session, include_xcom=include_xcom) for arg in self.args] for value in values: if not isinstance(value, (Sequence, dict)): diff --git a/airflow/notifications/basenotifier.py b/airflow/notifications/basenotifier.py index eaac6d11df36d..398d95cbb8d0a 100644 --- a/airflow/notifications/basenotifier.py +++ b/airflow/notifications/basenotifier.py @@ -21,8 +21,9 @@ from collections.abc import Sequence from typing import TYPE_CHECKING -from airflow.template.templater import Templater +from airflow.sdk.definitions.templater import Templater from airflow.utils.context import context_merge +from airflow.utils.log.logging_mixin import LoggingMixin if TYPE_CHECKING: import jinja2 @@ -31,7 +32,7 @@ from airflow.utils.context import Context -class BaseNotifier(Templater): +class BaseNotifier(LoggingMixin, Templater): """BaseNotifier class for sending notifications.""" template_fields: Sequence[str] = () diff --git a/airflow/sensors/base.py b/airflow/sensors/base.py index d363c8cd23780..39172ce64afd7 100644 --- a/airflow/sensors/base.py +++ b/airflow/sensors/base.py @@ -46,11 +46,9 @@ from airflow.models.taskreschedule import TaskReschedule from airflow.ti_deps.deps.ready_to_reschedule import ReadyToRescheduleDep from airflow.utils import timezone -from airflow.utils.session import NEW_SESSION, create_session, provide_session +from airflow.utils.session import create_session if TYPE_CHECKING: - from sqlalchemy.orm.session import Session - from airflow.typing_compat import Self from airflow.utils.context import Context @@ -84,30 +82,6 @@ def __bool__(self) -> bool: return self.is_done -@provide_session -def _orig_start_date( - dag_id: str, task_id: str, run_id: str, map_index: int, try_number: int, session: Session = NEW_SESSION -): - """ - Get the original start_date for a rescheduled task. - - :meta private: - """ - return session.scalar( - select(TaskReschedule) - .where( - TaskReschedule.dag_id == dag_id, - TaskReschedule.task_id == task_id, - TaskReschedule.run_id == run_id, - TaskReschedule.map_index == map_index, - TaskReschedule.try_number == try_number, - ) - .order_by(TaskReschedule.id.asc()) - .with_only_columns(TaskReschedule.start_date) - .limit(1) - ) - - class BaseSensorOperator(BaseOperator, SkipMixin): """ Sensor operators are derived from this class and inherit these attributes. @@ -246,8 +220,12 @@ def execute(self, context: Context) -> Any: ti = context["ti"] max_tries: int = ti.max_tries or 0 retries: int = self.retries or 0 + # If reschedule, use the start date of the first try (first try can be either the very - # first execution of the task, or the first execution after the task was cleared.) + # first execution of the task, or the first execution after the task was cleared). + # If the first try's record was not saved due to the Exception occurred and the following + # transaction rollback, the next available attempt should be taken + # to prevent falling in the endless rescheduling first_try_number = max_tries - retries + 1 with create_session() as session: start_date = session.scalar( @@ -257,7 +235,7 @@ def execute(self, context: Context) -> Any: TaskReschedule.task_id == ti.task_id, TaskReschedule.run_id == ti.run_id, TaskReschedule.map_index == ti.map_index, - TaskReschedule.try_number == first_try_number, + TaskReschedule.try_number >= first_try_number, ) .order_by(TaskReschedule.id.asc()) .with_only_columns(TaskReschedule.start_date) diff --git a/airflow/serialization/schema.json b/airflow/serialization/schema.json index 1cc9c42db0737..4eaae95ba2b01 100644 --- a/airflow/serialization/schema.json +++ b/airflow/serialization/schema.json @@ -310,6 +310,7 @@ "type": "object", "required": [ "_group_id", + "group_display_name", "prefix_group_id", "children", "tooltip", @@ -322,6 +323,7 @@ ], "properties": { "_group_id": {"anyOf": [{"type": "null"}, { "type": "string" }]}, + "group_display_name": {"type": "string" }, "is_mapped": { "type": "boolean" }, "prefix_group_id": { "type": "boolean" }, "children": { "$ref": "#/definitions/dict" }, diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index 1f43f7865d13d..a0e5da74145cf 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -41,7 +41,7 @@ from airflow.exceptions import AirflowException, SerializationError, TaskDeferred from airflow.models.baseoperator import BaseOperator from airflow.models.connection import Connection -from airflow.models.dag import DAG +from airflow.models.dag import DAG, _get_model_data_interval from airflow.models.expandinput import ( EXPAND_INPUT_EMPTY, create_expand_input, @@ -95,13 +95,14 @@ if TYPE_CHECKING: from inspect import Parameter + from airflow.models import DagRun from airflow.models.baseoperatorlink import BaseOperatorLink from airflow.models.expandinput import ExpandInput from airflow.models.operator import Operator from airflow.sdk.definitions.node import DAGNode from airflow.serialization.json_schema import Validator from airflow.ti_deps.deps.base_ti_dep import BaseTIDep - from airflow.timetables.base import DagRunInfo, Timetable + from airflow.timetables.base import DagRunInfo, DataInterval, Timetable HAS_KUBERNETES: bool try: @@ -1786,6 +1787,7 @@ def serialize_task_group(cls, task_group: TaskGroup) -> dict[str, Any] | None: # When calling json.dumps(self.data, sort_keys=True) to generate dag_hash, misjudgment will occur encoded = { "_group_id": task_group._group_id, + "group_display_name": task_group.group_display_name, "prefix_group_id": task_group.prefix_group_id, "tooltip": task_group.tooltip, "ui_color": task_group.ui_color, @@ -1821,7 +1823,7 @@ def deserialize_task_group( group_id = cls.deserialize(encoded_group["_group_id"]) kwargs = { key: cls.deserialize(encoded_group[key]) - for key in ["prefix_group_id", "tooltip", "ui_color", "ui_fgcolor"] + for key in ["prefix_group_id", "tooltip", "ui_color", "ui_fgcolor", "group_display_name"] } if not encoded_group.get("is_mapped"): @@ -1960,6 +1962,19 @@ def get_task_assets( if isinstance(obj, of_type): yield task["task_id"], obj + def get_run_data_interval(self, run: DagRun) -> DataInterval: + """Get the data interval of this run.""" + if run.dag_id is not None and run.dag_id != self.dag_id: + raise ValueError(f"Arguments refer to different DAGs: {self.dag_id} != {run.dag_id}") + + data_interval = _get_model_data_interval(run, "data_interval_start", "data_interval_end") + # the older implementation has call to infer_automated_data_interval if data_interval is None, do we want to keep that or raise + # an exception? + if data_interval is None: + raise ValueError(f"Cannot calculate data interval for run {run}") + + return data_interval + if TYPE_CHECKING: access_control: Mapping[str, Mapping[str, Collection[str]] | Collection[str]] | None = pydantic.Field( init=False, default=None diff --git a/airflow/templates.py b/airflow/templates.py deleted file mode 100644 index 95851253a7d22..0000000000000 --- a/airflow/templates.py +++ /dev/null @@ -1,94 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from typing import TYPE_CHECKING - -import jinja2.nativetypes -import jinja2.sandbox - -if TYPE_CHECKING: - import datetime - - -class _AirflowEnvironmentMixin: - def __init__(self, **kwargs): - super().__init__(**kwargs) - - self.filters.update(FILTERS) - - def is_safe_attribute(self, obj, attr, value): - """ - Allow access to ``_`` prefix vars (but not ``__``). - - Unlike the stock SandboxedEnvironment, we allow access to "private" attributes (ones starting with - ``_``) whilst still blocking internal or truly private attributes (``__`` prefixed ones). - """ - return not jinja2.sandbox.is_internal_attribute(obj, attr) - - -class NativeEnvironment(_AirflowEnvironmentMixin, jinja2.nativetypes.NativeEnvironment): - """NativeEnvironment for Airflow task templates.""" - - -class SandboxedEnvironment(_AirflowEnvironmentMixin, jinja2.sandbox.SandboxedEnvironment): - """SandboxedEnvironment for Airflow task templates.""" - - -def ds_filter(value: datetime.date | datetime.time | None) -> str | None: - """Date filter.""" - if value is None: - return None - return value.strftime("%Y-%m-%d") - - -def ds_nodash_filter(value: datetime.date | datetime.time | None) -> str | None: - """Date filter without dashes.""" - if value is None: - return None - return value.strftime("%Y%m%d") - - -def ts_filter(value: datetime.date | datetime.time | None) -> str | None: - """Timestamp filter.""" - if value is None: - return None - return value.isoformat() - - -def ts_nodash_filter(value: datetime.date | datetime.time | None) -> str | None: - """Timestamp filter without dashes.""" - if value is None: - return None - return value.strftime("%Y%m%dT%H%M%S") - - -def ts_nodash_with_tz_filter(value: datetime.date | datetime.time | None) -> str | None: - """Timestamp filter with timezone.""" - if value is None: - return None - return value.isoformat().replace("-", "").replace(":", "") - - -FILTERS = { - "ds": ds_filter, - "ds_nodash": ds_nodash_filter, - "ts": ts_filter, - "ts_nodash": ts_nodash_filter, - "ts_nodash_with_tz": ts_nodash_with_tz_filter, -} diff --git a/airflow/ui/.prettierrc b/airflow/ui/.prettierrc index 93ba8a38a47fe..45854fe346dcc 100644 --- a/airflow/ui/.prettierrc +++ b/airflow/ui/.prettierrc @@ -5,7 +5,7 @@ "importOrderSeparation": true, "jsxSingleQuote": false, "plugins": ["@trivago/prettier-plugin-sort-imports"], - "printWidth": 80, + "printWidth": 110, "singleQuote": false, "tabWidth": 2, "trailingComma": "all", diff --git a/airflow/ui/dev/index.html b/airflow/ui/dev/index.html index 27842e5bd60aa..a719206af3fe6 100644 --- a/airflow/ui/dev/index.html +++ b/airflow/ui/dev/index.html @@ -3,11 +3,7 @@ - +