E2E Preset Test #5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: E2E Preset Test | |
on: | |
workflow_run: | |
workflows: ["Build and Push Preset Models"] | |
types: | |
- completed | |
workflow_dispatch: | |
inputs: | |
force-run-all: | |
type: boolean | |
default: false | |
description: "Test all models for E2E" | |
force-run-all-phi-models: | |
type: boolean | |
default: false | |
description: "Test all Phi models for E2E" | |
test-on-vllm: | |
type: boolean | |
default: false | |
description: "Test on VLLM runtime" | |
env: | |
GO_VERSION: "1.22" | |
BRANCH_NAME: ${{ github.head_ref || github.ref_name}} | |
FORCE_RUN_ALL: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all == 'true' }} | |
FORCE_RUN_ALL_PHI: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all-phi-models== 'true' }} | |
RUNTIME: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.test-on-vllm == 'true') && 'vllm' || 'hf' }} | |
permissions: | |
id-token: write | |
contents: read | |
jobs: | |
determine-models: | |
if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' | |
runs-on: ubuntu-latest | |
environment: preset-env | |
outputs: | |
matrix: ${{ steps.affected_models.outputs.matrix }} | |
is_matrix_empty: ${{ steps.check_matrix_empty.outputs.is_empty }} | |
full_matrix: ${{ steps.images.outputs.full_matrix }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
submodules: true | |
fetch-depth: 0 | |
# This script should output a JSON array of model names | |
- name: Determine Affected Models | |
id: affected_models | |
run: | | |
PR_BRANCH=${{ env.BRANCH_NAME }} \ | |
FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \ | |
FORCE_RUN_ALL_PHI=${{ env.FORCE_RUN_ALL_PHI }} \ | |
python3 .github/workflows/kind-cluster/determine_models.py | |
- name: Print Determined Models | |
run: | | |
echo "Output from determine_models: ${{ steps.affected_models.outputs.matrix }}" | |
- name: Check if Matrix is Empty | |
id: check_matrix_empty | |
run: | | |
if [ "${{ steps.affected_models.outputs.matrix }}" == "[]" ] || [ -z "${{ steps.affected_models.outputs.matrix }}" ]; then | |
echo "is_empty=true" >> $GITHUB_OUTPUT | |
else | |
echo "is_empty=false" >> $GITHUB_OUTPUT | |
fi | |
- name: Add Config info for Testing | |
if: steps.check_matrix_empty.outputs.is_empty == 'false' | |
id: images | |
run: | | |
# Read the additional configurations from e2e-preset-configs.json | |
CONFIGS=$(cat .github/e2e-preset-configs.json | jq -c '.matrix.image') | |
echo "CONFIGS:" | |
echo $CONFIGS | |
# Pseudocode for combining matrices | |
# COMBINED_MATRIX = [] | |
# for model in MATRIX: | |
# for config in CONFIGS: | |
# if config['name'] == model['name']: | |
# combined = {**model, **config} | |
# COMBINED_MATRIX.append(combined) | |
# break | |
COMBINED_MATRIX=$(echo '${{ steps.affected_models.outputs.matrix }}' | jq --argjson configs "$CONFIGS" -c ' | |
map(. as $model | $configs[] | select(.name == $model.name) | $model + .) | |
') | |
echo "full_matrix=$COMBINED_MATRIX" >> $GITHUB_OUTPUT | |
- name: Print Combined Matrix | |
if: steps.check_matrix_empty.outputs.is_empty == 'false' | |
run: | | |
echo "Combined Matrix:" | |
echo '${{ steps.images.outputs.full_matrix }}' | |
e2e-preset-tests: | |
needs: determine-models | |
if: needs.determine-models.outputs.is_matrix_empty == 'false' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success') | |
runs-on: ubuntu-latest | |
environment: preset-env | |
strategy: | |
fail-fast: false | |
matrix: | |
# Ex matrix element: | |
# {"name":"falcon-40b","type":"text-generation","version":"#", | |
# "runtime":"tfs","tag":"0.0.1","node-count":1, | |
# "node-vm-size":"Standard_NC96ads_A100_v4", "node-osdisk-size":400} | |
model: ${{fromJson(needs.determine-models.outputs.full_matrix)}} | |
steps: | |
- name: Checkout | |
uses: actions/[email protected] | |
with: | |
submodules: true | |
fetch-depth: 0 | |
- name: Set OSS Flag | |
run: echo "MODEL_IS_OSS=${{ matrix.model.OSS }}" >> $GITHUB_ENV | |
- name: 'Az CLI login' | |
uses: azure/[email protected] | |
with: | |
client-id: ${{ secrets.AZURE_CLIENT_ID }} | |
tenant-id: ${{ secrets.AZURE_TENANT_ID }} | |
allow-no-subscriptions: true | |
- name: 'Set ACR Subscription' | |
run: az account set --subscription ${{secrets.AZURE_SUBSCRIPTION_ID}} | |
- name: Check if Image exists in ACR | |
id: check_image | |
run: | | |
ACR_NAME=${{ secrets.ACR_AMRT_USERNAME }} | |
IMAGE_NAME=${{ matrix.model.name }} | |
TAG=${{ matrix.model.tag }} | |
# Use '|| true' to prevent script from exiting with an error if the repository is not found | |
TAGS=$(az acr repository show-tags -n $ACR_NAME --repository $IMAGE_NAME --output tsv || true) | |
if [[ -z "$TAGS" ]]; then | |
echo "Image $IMAGE_NAME:$TAG or repository not found in $ACR_NAME." | |
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT | |
else | |
if echo "$TAGS" | grep -q "^$TAG$"; then | |
echo "IMAGE_EXISTS=true" >> $GITHUB_OUTPUT | |
else | |
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT | |
echo "Image $IMAGE_NAME:$TAG not found in $ACR_NAME." | |
fi | |
fi | |
- name: Exit if Image for testing does not exist | |
if: steps.check_image.outputs.IMAGE_EXISTS == 'false' | |
run: | | |
echo "Image doesn't exist in ACR, remember to build image for testing first (preset-image-build)" | |
exit 1 | |
- name: Set up kubectl context | |
run: | | |
az aks get-credentials --resource-group llm-test --name GitRunner | |
- name: Get Nodepool Name | |
id: get_nodepool_name | |
run: | | |
NAME_SUFFIX=${{ matrix.model.name }} | |
NAME_SUFFIX_WITHOUT_DASHES=${NAME_SUFFIX//-/} # Removing all '-' symbols | |
NAME_SUFFIX_WITHOUT_DASHES=${NAME_SUFFIX_WITHOUT_DASHES//./} # Removing all '.' symbols | |
if [ ${#NAME_SUFFIX_WITHOUT_DASHES} -gt 12 ]; then | |
TRUNCATED_NAME_SUFFIX=${NAME_SUFFIX_WITHOUT_DASHES:0:12} | |
else | |
TRUNCATED_NAME_SUFFIX=$NAME_SUFFIX_WITHOUT_DASHES | |
fi | |
echo "Nodepool Name: $TRUNCATED_NAME_SUFFIX" | |
echo "NODEPOOL_NAME=$TRUNCATED_NAME_SUFFIX" >> $GITHUB_OUTPUT | |
- name: Create Nodepool | |
run: | | |
NODEPOOL_EXIST=$(az aks nodepool show \ | |
--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \ | |
--cluster-name GitRunner \ | |
--resource-group llm-test \ | |
--query 'name' -o tsv || echo "") | |
echo "NODEPOOL_EXIST: $NODEPOOL_EXIST" | |
if [ -z "$NODEPOOL_EXIST" ]; then | |
az aks nodepool add \ | |
--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \ | |
--cluster-name GitRunner \ | |
--resource-group llm-test \ | |
--node-count ${{ matrix.model.node-count }} \ | |
--node-vm-size ${{ matrix.model.node-vm-size }} \ | |
--node-osdisk-size ${{ matrix.model.node-osdisk-size }} \ | |
--labels pool=${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \ | |
--node-taints sku=gpu:NoSchedule \ | |
--aks-custom-headers UseGPUDedicatedVHD=true | |
else | |
NODEPOOL_STATE=$(az aks nodepool show \ | |
--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \ | |
--cluster-name GitRunner \ | |
--resource-group llm-test \ | |
--query 'provisioningState' -o tsv) | |
echo "NODEPOOL_STATE: $NODEPOOL_STATE" | |
if [ "$NODEPOOL_STATE" != "Succeeded" ]; then | |
echo "Nodepool exists but is not in a Succeeded state. Please check manually." | |
exit 1 | |
else | |
echo "Nodepool already exists and is in a running state." | |
fi | |
fi | |
- name: Get testing workload | |
id: workload | |
run: | | |
WORKLOAD_NAME=${{ matrix.model.workload || matrix.model.name }} | |
echo "WORKLOAD_NAME=$WORKLOAD_NAME" >> $GITHUB_OUTPUT | |
echo "WORKLOAD_FILE_PREFIX=presets/workspace/test/manifests/$WORKLOAD_NAME/$WORKLOAD_NAME" >> $GITHUB_OUTPUT | |
- name: Create Service | |
run: | | |
kubectl apply -f ${{steps.workload.outputs.WORKLOAD_FILE_PREFIX}}-service.yaml | |
- name: Retrieve External Service IP | |
id: get_ip | |
run: | | |
SERVICE_IP=$(kubectl get svc ${{steps.workload.outputs.WORKLOAD_NAME}} -o=jsonpath='{.spec.clusterIP}') | |
echo "Service IP is $SERVICE_IP" | |
echo "SERVICE_IP=$SERVICE_IP" >> $GITHUB_OUTPUT | |
- name: Get Resource Type | |
id: resource | |
run: | | |
RESOURCE_TYPE=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "statefulset" || echo "deployment") | |
echo "RESOURCE_TYPE=$RESOURCE_TYPE" >> $GITHUB_OUTPUT | |
- name: Replace IP and Deploy Resource to K8s | |
run: | | |
POSTFIX=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "" || echo "_${{ env.RUNTIME }}") | |
WORKLOAD_FILE=${{steps.workload.outputs.WORKLOAD_FILE_PREFIX}}$POSTFIX.yaml | |
sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" $WORKLOAD_FILE | |
sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" $WORKLOAD_FILE | |
sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" $WORKLOAD_FILE | |
kubectl apply -f $WORKLOAD_FILE | |
- name: Wait for Resource to be ready | |
run: | | |
kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} --timeout=1800s | |
- name: Check Adapter Loading from Logs | |
if: matrix.model.loads_adapter == true | |
run: | | |
POD_NAME=$(kubectl get pods -l app=${{steps.workload.outputs.WORKLOAD_NAME}} -o jsonpath="{.items[0].metadata.name}") | |
kubectl logs $POD_NAME | grep "Adapter added:" | grep "${{ matrix.model.expected_adapter }}" || (echo "Adapter not loaded or incorrect adapter loaded" && exit 1) | |
- name: Install testing commands | |
run: | | |
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- apt-get update | |
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- apt-get install -y curl | |
- name: Test healthz endpoint | |
run: | | |
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \ | |
curl -s http://localhost:5000/health | |
- name: Test inference endpoint | |
run: | | |
echo "Testing inference for ${{ matrix.model.name }}" | |
if [[ "${{ matrix.model.name }}" == *"llama"* && "${{ matrix.model.name }}" == *"-chat"* ]]; then | |
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \ | |
curl -s -X POST \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"input_data": { | |
"input_string": [ | |
[ | |
{ | |
"role": "system", | |
"content": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe." | |
}, | |
{ | |
"role": "user", | |
"content": "Write a brief birthday message to John" | |
} | |
] | |
] | |
} | |
}' \ | |
http://localhost:5000/chat | |
elif [[ "${{ matrix.model.name }}" == *"llama"* ]]; then | |
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \ | |
curl -s -X POST \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"prompts": [ | |
"I believe the meaning of life is", | |
"Simply put, the theory of relativity states that ", | |
"A brief message congratulating the team on the launch: Hi everyone, I just ", | |
"Translate English to French: sea otter => loutre de mer, peppermint => menthe poivrée, plush girafe => girafe peluche, cheese =>" | |
], | |
"parameters": { | |
"max_gen_len": 128 | |
} | |
}' \ | |
http://localhost:5000/generate | |
elif [[ "${{ env.RUNTIME }}" == *"vllm"* ]]; then | |
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \ | |
curl -s -X POST \ | |
-H "accept: application/json" \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"model": "test", | |
"messages": [ | |
{ | |
"role": "system", | |
"content": "You are a helpful assistant." | |
}, | |
{ | |
"role": "user", | |
"content": "Hello!" | |
} | |
] | |
}' \ | |
http://localhost:5000/v1/chat/completions | |
else | |
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \ | |
curl -s -X POST \ | |
-H "accept: application/json" \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"prompt":"Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:", | |
"return_full_text": false, | |
"clean_up_tokenization_spaces": false, | |
"prefix": null, | |
"handle_long_generation": null, | |
"generate_kwargs": { | |
"max_length":200, | |
"min_length":0, | |
"do_sample":true, | |
"early_stopping":false, | |
"num_beams":1, | |
"num_beam_groups":1, | |
"diversity_penalty":0.0, | |
"temperature":1.0, | |
"top_k":10, | |
"top_p":1, | |
"typical_p":1, | |
"repetition_penalty":1, | |
"length_penalty":1, | |
"no_repeat_ngram_size":0, | |
"encoder_no_repeat_ngram_size":0, | |
"bad_words_ids":null, | |
"num_return_sequences":1, | |
"output_scores":false, | |
"return_dict_in_generate":false, | |
"forced_bos_token_id":null, | |
"forced_eos_token_id":null, | |
"remove_invalid_values":null | |
} | |
}' \ | |
http://localhost:5000/chat | |
fi | |
- name: Cleanup | |
if: always() | |
run: | | |
# Only proceed if RESOURCE_TYPE is set (else resource wasn't created) | |
if [ -n "${{ steps.resource.outputs.RESOURCE_TYPE }}" ]; then | |
# Use RESOURCE_TYPE from the previous step | |
RESOURCE_TYPE=${{ steps.resource.outputs.RESOURCE_TYPE }} | |
# Check and Delete K8s Resource (Deployment or StatefulSet) | |
if kubectl get $RESOURCE_TYPE ${{steps.workload.outputs.WORKLOAD_NAME}} > /dev/null 2>&1; then | |
kubectl logs $RESOURCE_TYPE/${{steps.workload.outputs.WORKLOAD_NAME}} | |
kubectl delete $RESOURCE_TYPE ${{steps.workload.outputs.WORKLOAD_NAME}} | |
fi | |
fi | |
# Check and Delete K8s Service if it exists | |
if kubectl get svc ${{steps.workload.outputs.WORKLOAD_NAME}} > /dev/null 2>&1; then | |
kubectl delete svc ${{steps.workload.outputs.WORKLOAD_NAME}} | |
fi | |
# Check and Delete AKS Nodepool if it exists | |
if [ -n "${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }}" ]; then | |
NODEPOOL_EXIST=$(az aks nodepool show \ | |
--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \ | |
--cluster-name GitRunner \ | |
--resource-group llm-test \ | |
--query 'name' -o tsv || echo "") | |
if [ -n "$NODEPOOL_EXIST" ]; then | |
az aks nodepool delete \ | |
--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \ | |
--cluster-name GitRunner \ | |
--resource-group llm-test | |
fi | |
fi |