test-iasworld-data #36
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: test-iasworld-data | |
# This workflow is not configured to run on PRs, because PRs have their own CI | |
# process defined by the `build-and-test-dbt` workflow. Instead, the Data | |
# Department's Spark data extraction process dispatches it via API once per | |
# day after finishing the daily ingest of iasWorld data. For more detail, see: | |
# | |
# https://github.com/ccao-data/service-spark-iasworld | |
on: | |
workflow_dispatch: | |
inputs: | |
select: | |
description: > | |
Optional space-separated list of tests to run (defaults to all | |
iasWorld data tests) | |
required: false | |
type: string | |
selector: | |
description: > | |
Optional dbt selector representing tests to run (takes precedence | |
over the above list of tests if both are present) | |
required: false | |
type: string | |
upload_test_results: | |
description: Upload test results to S3 | |
required: false | |
default: false | |
type: boolean | |
env: | |
PYTHONUNBUFFERED: "1" | |
UV_SYSTEM_PYTHON: 1 | |
jobs: | |
test-iasworld-data: | |
runs-on: ubuntu-latest | |
# These permissions are needed to interact with GitHub's OIDC Token endpoint | |
# so that we can authenticate with AWS | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Validate and parse input variables | |
id: parse-inputs | |
run: | | |
# Default to no select option, which will fall back to the default | |
# behavior of the underlying Python script | |
SELECT_OPTION="" | |
if [[ -n "$SELECTOR" ]]; then | |
SELECT_OPTION="--selector $SELECTOR" | |
elif [[ -n "$SELECT" ]]; then | |
SELECT_OPTION="--select $SELECT" | |
fi | |
echo "Setting select option to '$SELECT_OPTION'" | |
echo "select-option=$SELECT_OPTION" >> "$GITHUB_OUTPUT" | |
shell: bash | |
env: | |
SELECT: ${{ inputs.select }} | |
SELECTOR: ${{ inputs.selector }} | |
- name: Setup dbt | |
uses: ./.github/actions/setup_dbt | |
with: | |
role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} | |
- name: Restore dbt state cache | |
id: cache | |
uses: ./.github/actions/restore_dbt_cache | |
with: | |
path: ${{ env.PROJECT_DIR }}/${{ env.STATE_DIR }} | |
key: ${{ env.CACHE_KEY }} | |
- name: Install Python dependencies | |
working-directory: ${{ env.PROJECT_DIR }} | |
shell: bash | |
run: uv pip install ".[dbt_tests]" | |
- name: Run tests | |
run: | | |
DEFER_OPTION="" | |
if [[ "$CACHE_HIT" == 'true' ]]; then | |
DEFER_OPTION="--defer --state $STATE_DIR" | |
fi | |
# shellcheck disable=SC2086 | |
python scripts/run_iasworld_data_tests.py \ | |
--target "$TARGET" \ | |
--output-dir ./qc_test_results/ \ | |
$SELECT_OPTION \ | |
$SKIP_ARTIFACTS_OPTION \ | |
$DEFER_OPTION | |
working-directory: ${{ env.PROJECT_DIR }} | |
shell: bash | |
env: | |
USER: ${{ github.triggering_actor }} | |
GIT_SHA: ${{ github.sha }} | |
GIT_REF: ${{ github.ref_name }} | |
GIT_AUTHOR: ${{ github.event.commits[0].author.name }} | |
CACHE_HIT: ${{ steps.cache.outputs.cache-hit }} | |
SELECT_OPTION: ${{ steps.parse-inputs.outputs.select-option }} | |
SKIP_ARTIFACTS_OPTION: ${{ inputs.upload_test_results && '--no-skip-artifacts' || '--skip-artifacts' }} | |
- name: Save test results to S3 | |
if: inputs.upload_test_results | |
run: | | |
s3_prefix="s3://ccao-data-warehouse-us-east-1/qc" | |
local_prefix="qc_test_results/metadata" | |
for dir in "test_run" "test_run_result" "test_run_failing_row"; do | |
dirpath="${local_prefix}/${dir}" | |
if [ -e "$dirpath" ]; then | |
echo "Copying ${dirpath} metadata to S3" | |
aws s3 sync "$dirpath" "${s3_prefix}/${dir}" | |
fi | |
done | |
crawler_name="ccao-data-warehouse-qc-crawler" | |
aws glue start-crawler --name "$crawler_name" | |
echo "Triggered Glue crawler $crawler_name" | |
working-directory: ${{ env.PROJECT_DIR }} | |
shell: bash | |
- name: Get current time | |
if: failure() | |
run: echo "TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S")" >> "$GITHUB_ENV" | |
shell: bash | |
# Only triggered when run on a schedule. Otherwise, whoever dispatched the | |
# workflow is notified via GitHub (instead of SNS) | |
- name: Send failure notification | |
if: github.event_name == 'workflow_dispatch' && github.triggering_actor == 'sqoop-bot[bot]' && failure() | |
uses: ./.github/actions/publish_sns_topic | |
with: | |
sns_topic_arn: ${{ secrets.AWS_SNS_NOTIFICATION_TOPIC_ARN }} | |
subject: "iasWorld tests errored for workflow run: ${{ github.run_id }}" | |
body: | | |
iasWorld tests raised an error for workflow ${{ github.run_id }}, run on ${{ env.TIMESTAMP }} UTC | |
Link to failing workflow: | |
https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} |