Skip to content

chore: Factoring out reusable presets logic - Part 4 (#332) #1

chore: Factoring out reusable presets logic - Part 4 (#332)

chore: Factoring out reusable presets logic - Part 4 (#332) #1

name: Build and Push Preset Models
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
on:
pull_request:
branches:
- main
paths:
- 'presets/inference/**'
- 'presets/models/supported_models.yaml'
push:
branches:
- main
paths:
- 'presets/inference/**'
- 'presets/models/supported_models.yaml'
workflow_dispatch:
inputs:
force-run-all:
type: boolean
default: false
description: "Run all models for build"
env:
GO_VERSION: "1.20"
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
permissions:
id-token: write
contents: read
jobs:
determine-models:
runs-on: ubuntu-latest
environment: preset-env
outputs:
matrix: ${{ steps.affected_models.outputs.matrix }}
is_matrix_empty: ${{ steps.check_matrix_empty.outputs.is_empty }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
fetch-depth: 0
- name: Set FORCE_RUN_ALL Flag
run: echo "FORCE_RUN_ALL=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all == 'true' }}" >> $GITHUB_ENV
# This script should output a JSON array of model names
- name: Determine Affected Models
id: affected_models
run: |
PR_BRANCH=${{ env.BRANCH_NAME }} \
FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \
python3 .github/workflows/kind-cluster/determine_models.py
- name: Print Determined Models
run: |
echo "Output from affected_models: ${{ steps.affected_models.outputs.matrix }}"
- name: Check if Matrix is Empty
id: check_matrix_empty
run: |
if [ "${{ steps.affected_models.outputs.matrix }}" == "[]" ] || [ -z "${{ steps.affected_models.outputs.matrix }}" ]; then
echo "is_empty=true" >> $GITHUB_OUTPUT
else
echo "is_empty=false" >> $GITHUB_OUTPUT
fi
build-models:
needs: determine-models
if: needs.determine-models.outputs.is_matrix_empty == 'false'
runs-on: [self-hosted, 'hostname:model-server']
environment: preset-env
strategy:
fail-fast: false
matrix:
model: ${{fromJson(needs.determine-models.outputs.matrix)}}
max-parallel: 3
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
fetch-depth: 0
- name: Check Available Disk Space
run: df -h
- name: Install Azure CLI latest
run: |
if ! which az > /dev/null; then
echo "Azure CLI not found. Installing..."
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
else
echo "Azure CLI already installed."
fi
- name: 'Az CLI login'
uses: azure/[email protected]
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
allow-no-subscriptions: true
- name: 'Set subscription'
run: az account set --subscription ${{secrets.AZURE_SUBSCRIPTION_ID}}
- name: 'Get ACR Info'
id: acr_info
run: |
ACR_NAME="${{ secrets.ACR_AMRT_USERNAME }}"
ACR_USERNAME=${{ secrets.ACR_AMRT_USERNAME }}
ACR_PASSWORD=${{ secrets.ACR_AMRT_PASSWORD }}
echo "ACR_NAME=$ACR_NAME" >> $GITHUB_OUTPUT
echo "ACR_USERNAME=$ACR_USERNAME" >> $GITHUB_OUTPUT
echo "ACR_PASSWORD=$ACR_PASSWORD" >> $GITHUB_OUTPUT
- name: 'Check if Image exists in Test ACR'
id: check_test_image
run: |
ACR_NAME=${{ steps.acr_info.outputs.ACR_USERNAME }}
IMAGE_NAME=${{ matrix.model.name }}
TAG=${{ matrix.model.tag }}
# Use '|| true' to prevent script from exiting with an error if the repository is not found
TAGS=$(az acr repository show-tags -n $ACR_NAME --repository $IMAGE_NAME --output tsv || true)
if [[ -z "$TAGS" ]]; then
echo "Image $IMAGE_NAME:$TAG or repository not found in $ACR_NAME."
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT
else
if echo "$TAGS" | grep -q "^$TAG$"; then
echo "IMAGE_EXISTS=true" >> $GITHUB_OUTPUT
else
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT
echo "Image $IMAGE_NAME:$TAG not found in $ACR_NAME."
fi
fi
- name: Launch Python Script to Kickoff Build Jobs
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'false'
id: launch_script
run: |
PR_BRANCH=${{ env.BRANCH_NAME }} \
ACR_NAME=${{ steps.acr_info.outputs.ACR_NAME }} \
ACR_USERNAME=${{ steps.acr_info.outputs.ACR_USERNAME }} \
ACR_PASSWORD=${{ steps.acr_info.outputs.ACR_PASSWORD }} \
MODEL_NAME=${{ matrix.model.name }} \
MODEL_TYPE=${{matrix.model.type}} \
MODEL_VERSION=${{ matrix.model.version }} \
MODEL_RUNTIME=${{ matrix.model.runtime }} \
MODEL_TAG=${{ matrix.model.tag }} \
python3 .github/workflows/kind-cluster/main.py
# Check the exit status of the Python script
- name: Check Python Script Status
if: ${{ always() }}
run: |
if [[ "${{ steps.check_test_image.outputs.IMAGE_EXISTS }}" == "true" ]]; then
echo "Image already exists; skipping the status step."
elif [[ "${{ steps.launch_script.outcome }}" != "success" ]]; then
echo "Python script failed to execute successfully."
exit 1 # Fail the job due to script failure
else
echo "Python script executed successfully."
fi
# Cleanup Resources
- name: Cleanup
if: ${{ always() }}
run: |
if [[ "${{ steps.check_test_image.outputs.IMAGE_EXISTS }}" == "false" ]]; then
kubectl get job --no-headers -o custom-columns=":metadata.name" | grep "^docker-build-job-${{ matrix.model.name }}-[0-9]" | xargs -r kubectl delete job
fi