.github/workflows/e2e-preset-test.yml

name: E2E Preset Test

on:
    workflow_run:
        workflows: ["Build and Push Preset Models"]
        types: 
            - completed
    workflow_dispatch:
        inputs:
            force-run-all:
                type: boolean
                default: false
                description: "Test all models for E2E"
            force-run-all-phi-models:
                type: boolean
                default: false
                description: "Test all Phi models for E2E"
            test-on-vllm:
                type: boolean
                default: false
                description: "Test on VLLM runtime"

env:
    GO_VERSION: "1.22"
    BRANCH_NAME: ${{ github.head_ref || github.ref_name}} 
    FORCE_RUN_ALL: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all == 'true' }}
    FORCE_RUN_ALL_PHI:  ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all-phi-models== 'true' }}
    RUNTIME: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.test-on-vllm == 'true') && 'vllm' || 'hf' }}

permissions:
    id-token: write
    contents: read

jobs:
  determine-models:
    if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'
    runs-on: ubuntu-latest
    environment: preset-env
    outputs:
      matrix: ${{ steps.affected_models.outputs.matrix }}
      is_matrix_empty: ${{ steps.check_matrix_empty.outputs.is_empty }}
      full_matrix: ${{ steps.images.outputs.full_matrix }}
    steps:
      - name: Checkout
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          submodules: true
          fetch-depth: 0

      # This script should output a JSON array of model names
      - name: Determine Affected Models
        id: affected_models
        run: |
            PR_BRANCH=${{ env.BRANCH_NAME }} \
            FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \
            FORCE_RUN_ALL_PHI=${{ env.FORCE_RUN_ALL_PHI }} \
            python3 .github/workflows/kind-cluster/determine_models.py

      - name: Print Determined Models
        run: |
            echo "Output from determine_models: ${{ steps.affected_models.outputs.matrix }}"
    
      - name: Check if Matrix is Empty
        id: check_matrix_empty
        run: |
            if [ "${{ steps.affected_models.outputs.matrix }}" == "[]" ] || [ -z "${{ steps.affected_models.outputs.matrix }}" ]; then
                echo "is_empty=true" >> $GITHUB_OUTPUT
            else
                echo "is_empty=false" >> $GITHUB_OUTPUT
            fi
        
      - name: Add Config info for Testing
        if: steps.check_matrix_empty.outputs.is_empty == 'false'
        id: images
        run: |         
            # Read the additional configurations from e2e-preset-configs.json
            CONFIGS=$(cat .github/e2e-preset-configs.json | jq -c '.matrix.image')

            echo "CONFIGS:"
            echo $CONFIGS
            
            # Pseudocode for combining matrices
            # COMBINED_MATRIX = []
            # for model in MATRIX:
            #     for config in CONFIGS:
            #         if config['name'] == model['name']:
            #             combined = {**model, **config}
            #             COMBINED_MATRIX.append(combined)
            #             break

            COMBINED_MATRIX=$(echo '${{ steps.affected_models.outputs.matrix }}' | jq --argjson configs "$CONFIGS" -c '
                map(. as $model | $configs[] | select(.name == $model.name) | $model + .)
            ')

            echo "full_matrix=$COMBINED_MATRIX" >> $GITHUB_OUTPUT
      
      - name: Print Combined Matrix
        if: steps.check_matrix_empty.outputs.is_empty == 'false'
        run: |
            echo "Combined Matrix:"
            echo '${{ steps.images.outputs.full_matrix }}'

  e2e-preset-tests:
    needs: determine-models
    if: needs.determine-models.outputs.is_matrix_empty == 'false' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success')
    runs-on: ubuntu-latest
    environment: preset-env
    strategy:
      fail-fast: false
      matrix:
        # Ex matrix element:
        # {"name":"falcon-40b","type":"text-generation","version":"#",
        # "runtime":"tfs","tag":"0.0.1","node-count":1,
        # "node-vm-size":"Standard_NC96ads_A100_v4", "node-osdisk-size":400}
        model: ${{fromJson(needs.determine-models.outputs.full_matrix)}}
    steps:
      - name: Checkout
        uses: actions/checkout@v4.2.2
        with:
            submodules: true
            fetch-depth: 0

      - name: Set OSS Flag
        run: echo "MODEL_IS_OSS=${{ matrix.model.OSS }}" >> $GITHUB_ENV

      - name: 'Az CLI login'
        uses: azure/login@v2.2.0
        with:
            client-id: ${{ secrets.AZURE_CLIENT_ID }}
            tenant-id: ${{ secrets.AZURE_TENANT_ID }}
            allow-no-subscriptions: true

      - name: 'Set ACR Subscription'
        run: az account set --subscription ${{secrets.AZURE_SUBSCRIPTION_ID}}

      - name: Check if Image exists in ACR
        id: check_image
        run: |
            ACR_NAME=${{ secrets.ACR_AMRT_USERNAME }}
            IMAGE_NAME=${{ matrix.model.name }}
            TAG=${{ matrix.model.tag }}
        
            # Use '|| true' to prevent script from exiting with an error if the repository is not found
            TAGS=$(az acr repository show-tags -n $ACR_NAME --repository $IMAGE_NAME --output tsv || true)

            if [[ -z "$TAGS" ]]; then
                echo "Image $IMAGE_NAME:$TAG or repository not found in $ACR_NAME."
                echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT
            else
                if echo "$TAGS" | grep -q "^$TAG$"; then
                    echo "IMAGE_EXISTS=true" >> $GITHUB_OUTPUT
                else
                    echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT
                    echo "Image $IMAGE_NAME:$TAG not found in $ACR_NAME."
                fi
            fi
    
      - name: Exit if Image for testing does not exist
        if: steps.check_image.outputs.IMAGE_EXISTS == 'false'
        run: |
            echo "Image doesn't exist in ACR, remember to build image for testing first (preset-image-build)"
            exit 1
    
      - name: Set up kubectl context
        run: |
          az aks get-credentials --resource-group llm-test --name GitRunner
    
      - name: Get Nodepool Name
        id: get_nodepool_name
        run: |
            NAME_SUFFIX=${{ matrix.model.name }}
            NAME_SUFFIX_WITHOUT_DASHES=${NAME_SUFFIX//-/}  # Removing all '-' symbols
            NAME_SUFFIX_WITHOUT_DASHES=${NAME_SUFFIX_WITHOUT_DASHES//./} # Removing all '.' symbols

            if [ ${#NAME_SUFFIX_WITHOUT_DASHES} -gt 12 ]; then
                TRUNCATED_NAME_SUFFIX=${NAME_SUFFIX_WITHOUT_DASHES:0:12}
            else
                TRUNCATED_NAME_SUFFIX=$NAME_SUFFIX_WITHOUT_DASHES
            fi
            echo "Nodepool Name: $TRUNCATED_NAME_SUFFIX"
            echo "NODEPOOL_NAME=$TRUNCATED_NAME_SUFFIX" >> $GITHUB_OUTPUT

      - name: Create Nodepool
        run: |
            NODEPOOL_EXIST=$(az aks nodepool show \
                            --name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
                            --cluster-name GitRunner \
                            --resource-group llm-test \
                            --query 'name' -o tsv || echo "")
            echo "NODEPOOL_EXIST: $NODEPOOL_EXIST"
            if [ -z "$NODEPOOL_EXIST" ]; then
                az aks nodepool add \
                    --name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
                    --cluster-name GitRunner \
                    --resource-group llm-test \
                    --node-count ${{ matrix.model.node-count }} \
                    --node-vm-size ${{ matrix.model.node-vm-size }} \
                    --node-osdisk-size ${{ matrix.model.node-osdisk-size }} \
                    --labels pool=${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
                    --node-taints sku=gpu:NoSchedule \
                    --aks-custom-headers UseGPUDedicatedVHD=true
            else
                NODEPOOL_STATE=$(az aks nodepool show \
                                --name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
                                --cluster-name GitRunner \
                                --resource-group llm-test \
                                --query 'provisioningState' -o tsv)
                echo "NODEPOOL_STATE: $NODEPOOL_STATE"          
                if [ "$NODEPOOL_STATE" != "Succeeded" ]; then
                    echo "Nodepool exists but is not in a Succeeded state. Please check manually."
                    exit 1
                else
                    echo "Nodepool already exists and is in a running state."
                fi
            fi

      - name: Get testing workload
        id: workload
        run: |
            WORKLOAD_NAME=${{ matrix.model.workload || matrix.model.name }}
            echo "WORKLOAD_NAME=$WORKLOAD_NAME" >> $GITHUB_OUTPUT
            echo "WORKLOAD_FILE_PREFIX=presets/workspace/test/manifests/$WORKLOAD_NAME/$WORKLOAD_NAME" >> $GITHUB_OUTPUT

      - name: Create Service
        run: |
            kubectl apply -f ${{steps.workload.outputs.WORKLOAD_FILE_PREFIX}}-service.yaml
      
      - name: Retrieve External Service IP
        id: get_ip
        run: |
            SERVICE_IP=$(kubectl get svc ${{steps.workload.outputs.WORKLOAD_NAME}} -o=jsonpath='{.spec.clusterIP}')
            echo "Service IP is $SERVICE_IP"
            echo "SERVICE_IP=$SERVICE_IP" >> $GITHUB_OUTPUT

      - name: Get Resource Type
        id: resource
        run: |
            RESOURCE_TYPE=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "statefulset" || echo "deployment")
            echo "RESOURCE_TYPE=$RESOURCE_TYPE" >> $GITHUB_OUTPUT
      
      - name: Replace IP and Deploy Resource to K8s
        run: |
            POSTFIX=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "" || echo "_${{ env.RUNTIME }}")
            WORKLOAD_FILE=${{steps.workload.outputs.WORKLOAD_FILE_PREFIX}}$POSTFIX.yaml

            sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" $WORKLOAD_FILE
            sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" $WORKLOAD_FILE
            sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" $WORKLOAD_FILE
            kubectl apply -f $WORKLOAD_FILE

      - name: Wait for Resource to be ready
        run: |
            kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} --timeout=1800s
    
      - name: Check Adapter Loading from Logs
        if: matrix.model.loads_adapter == true
        run: |
            POD_NAME=$(kubectl get pods -l app=${{steps.workload.outputs.WORKLOAD_NAME}} -o jsonpath="{.items[0].metadata.name}")
            kubectl logs $POD_NAME | grep "Adapter added:" | grep "${{ matrix.model.expected_adapter }}" || (echo "Adapter not loaded or incorrect adapter loaded" && exit 1)

      - name: Install testing commands
        run: |
            kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- apt-get update
            kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- apt-get install -y curl

      - name: Test healthz endpoint
        run: |
            kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
            curl -s http://localhost:5000/health

      - name: Test inference endpoint
        run: |
            echo "Testing inference for ${{ matrix.model.name }}"
            if [[ "${{ matrix.model.name }}" == *"llama"* && "${{ matrix.model.name }}" == *"-chat"* ]]; then
                kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
                curl -s -X POST \
                -H "Content-Type: application/json" \
                -d '{
                    "input_data": {
                        "input_string": [
                            [
                                {
                                    "role": "system",
                                    "content": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe."
                                },
                                {
                                    "role": "user",
                                    "content": "Write a brief birthday message to John"
                                }
                            ]
                        ]
                    }
                }' \
                http://localhost:5000/chat
            elif [[ "${{ matrix.model.name }}" == *"llama"* ]]; then
                kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
                curl -s -X POST \
                -H "Content-Type: application/json" \
                -d '{
                    "prompts": [
                        "I believe the meaning of life is",
                        "Simply put, the theory of relativity states that ",
                        "A brief message congratulating the team on the launch: Hi everyone, I just ",
                        "Translate English to French: sea otter => loutre de mer, peppermint => menthe poivrée, plush girafe => girafe peluche, cheese =>"
                    ],
                    "parameters": {
                        "max_gen_len": 128
                    }
                }' \
                http://localhost:5000/generate
            elif [[ "${{ env.RUNTIME }}" == *"vllm"* ]]; then
                kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
                curl -s -X POST \
                -H "accept: application/json" \
                -H "Content-Type: application/json" \
                -d '{
                    "model": "test",
                    "messages": [
                    {
                        "role": "system",
                        "content": "You are a helpful assistant."
                    },
                    {
                        "role": "user",
                        "content": "Hello!"
                    }
                    ]
                    }' \
                http://localhost:5000/v1/chat/completions
            else
                kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
                curl -s -X POST \
                -H "accept: application/json" \
                -H "Content-Type: application/json" \
                -d '{
                    "prompt":"Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
                    "return_full_text": false,
                    "clean_up_tokenization_spaces": false, 
                    "prefix": null,
                    "handle_long_generation": null,
                    "generate_kwargs": {
                            "max_length":200,
                            "min_length":0,
                            "do_sample":true,
                            "early_stopping":false,
                            "num_beams":1,
                            "num_beam_groups":1,
                            "diversity_penalty":0.0,
                            "temperature":1.0,
                            "top_k":10,
                            "top_p":1,
                            "typical_p":1,
                            "repetition_penalty":1,
                            "length_penalty":1,
                            "no_repeat_ngram_size":0,
                            "encoder_no_repeat_ngram_size":0,
                            "bad_words_ids":null,
                            "num_return_sequences":1,
                            "output_scores":false,
                            "return_dict_in_generate":false,
                            "forced_bos_token_id":null,
                            "forced_eos_token_id":null,
                            "remove_invalid_values":null
                        }
                    }' \
                http://localhost:5000/chat
            fi
      
      - name: Cleanup
        if: always()
        run: |
            # Only proceed if RESOURCE_TYPE is set (else resource wasn't created)
            if [ -n "${{ steps.resource.outputs.RESOURCE_TYPE }}" ]; then
                # Use RESOURCE_TYPE from the previous step
                RESOURCE_TYPE=${{ steps.resource.outputs.RESOURCE_TYPE }}
                
                # Check and Delete K8s Resource (Deployment or StatefulSet)
                if kubectl get $RESOURCE_TYPE ${{steps.workload.outputs.WORKLOAD_NAME}} > /dev/null 2>&1; then
                    kubectl logs $RESOURCE_TYPE/${{steps.workload.outputs.WORKLOAD_NAME}}
                    kubectl delete $RESOURCE_TYPE ${{steps.workload.outputs.WORKLOAD_NAME}}
                fi
            fi

            # Check and Delete K8s Service if it exists
            if kubectl get svc ${{steps.workload.outputs.WORKLOAD_NAME}} > /dev/null 2>&1; then
                kubectl delete svc ${{steps.workload.outputs.WORKLOAD_NAME}}
            fi
        
            # Check and Delete AKS Nodepool if it exists            
            if [ -n "${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }}" ]; then
                NODEPOOL_EXIST=$(az aks nodepool show \
                                --name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
                                --cluster-name GitRunner \
                                --resource-group llm-test \
                                --query 'name' -o tsv || echo "")

                if [ -n "$NODEPOOL_EXIST" ]; then
                    az aks nodepool delete \
                    --name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
                    --cluster-name GitRunner \
                    --resource-group llm-test
                fi
            fi