-
Notifications
You must be signed in to change notification settings - Fork 59
403 lines (362 loc) · 18 KB
/
e2e-preset-test.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
name: E2E Preset Test
on:
workflow_run:
workflows: ["Build and Push Preset Models"]
types:
- completed
workflow_dispatch:
inputs:
force-run-all:
type: boolean
default: false
description: "Test all models for E2E"
force-run-all-phi-models:
type: boolean
default: false
description: "Test all Phi models for E2E"
test-on-vllm:
type: boolean
default: false
description: "Test on VLLM runtime"
env:
GO_VERSION: "1.22"
BRANCH_NAME: ${{ github.head_ref || github.ref_name}}
FORCE_RUN_ALL: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all == 'true' }}
FORCE_RUN_ALL_PHI: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all-phi-models== 'true' }}
RUNTIME: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.test-on-vllm == 'true') && 'vllm' || 'hf' }}
permissions:
id-token: write
contents: read
jobs:
determine-models:
if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'
runs-on: ubuntu-latest
environment: preset-env
outputs:
matrix: ${{ steps.affected_models.outputs.matrix }}
is_matrix_empty: ${{ steps.check_matrix_empty.outputs.is_empty }}
full_matrix: ${{ steps.images.outputs.full_matrix }}
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true
fetch-depth: 0
# This script should output a JSON array of model names
- name: Determine Affected Models
id: affected_models
run: |
PR_BRANCH=${{ env.BRANCH_NAME }} \
FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \
FORCE_RUN_ALL_PHI=${{ env.FORCE_RUN_ALL_PHI }} \
python3 .github/workflows/kind-cluster/determine_models.py
- name: Print Determined Models
run: |
echo "Output from determine_models: ${{ steps.affected_models.outputs.matrix }}"
- name: Check if Matrix is Empty
id: check_matrix_empty
run: |
if [ "${{ steps.affected_models.outputs.matrix }}" == "[]" ] || [ -z "${{ steps.affected_models.outputs.matrix }}" ]; then
echo "is_empty=true" >> $GITHUB_OUTPUT
else
echo "is_empty=false" >> $GITHUB_OUTPUT
fi
- name: Add Config info for Testing
if: steps.check_matrix_empty.outputs.is_empty == 'false'
id: images
run: |
# Read the additional configurations from e2e-preset-configs.json
CONFIGS=$(cat .github/e2e-preset-configs.json | jq -c '.matrix.image')
echo "CONFIGS:"
echo $CONFIGS
# Pseudocode for combining matrices
# COMBINED_MATRIX = []
# for model in MATRIX:
# for config in CONFIGS:
# if config['name'] == model['name']:
# combined = {**model, **config}
# COMBINED_MATRIX.append(combined)
# break
COMBINED_MATRIX=$(echo '${{ steps.affected_models.outputs.matrix }}' | jq --argjson configs "$CONFIGS" -c '
map(. as $model | $configs[] | select(.name == $model.name) | $model + .)
')
echo "full_matrix=$COMBINED_MATRIX" >> $GITHUB_OUTPUT
- name: Print Combined Matrix
if: steps.check_matrix_empty.outputs.is_empty == 'false'
run: |
echo "Combined Matrix:"
echo '${{ steps.images.outputs.full_matrix }}'
e2e-preset-tests:
needs: determine-models
if: needs.determine-models.outputs.is_matrix_empty == 'false' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success')
runs-on: ubuntu-latest
environment: preset-env
strategy:
fail-fast: false
matrix:
# Ex matrix element:
# {"name":"falcon-40b","type":"text-generation","version":"#",
# "runtime":"tfs","tag":"0.0.1","node-count":1,
# "node-vm-size":"Standard_NC96ads_A100_v4", "node-osdisk-size":400}
model: ${{fromJson(needs.determine-models.outputs.full_matrix)}}
steps:
- name: Checkout
uses: actions/[email protected]
with:
submodules: true
fetch-depth: 0
- name: Set OSS Flag
run: echo "MODEL_IS_OSS=${{ matrix.model.OSS }}" >> $GITHUB_ENV
- name: 'Az CLI login'
uses: azure/[email protected]
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
allow-no-subscriptions: true
- name: 'Set ACR Subscription'
run: az account set --subscription ${{secrets.AZURE_SUBSCRIPTION_ID}}
- name: Check if Image exists in ACR
id: check_image
run: |
ACR_NAME=${{ secrets.ACR_AMRT_USERNAME }}
IMAGE_NAME=${{ matrix.model.name }}
TAG=${{ matrix.model.tag }}
# Use '|| true' to prevent script from exiting with an error if the repository is not found
TAGS=$(az acr repository show-tags -n $ACR_NAME --repository $IMAGE_NAME --output tsv || true)
if [[ -z "$TAGS" ]]; then
echo "Image $IMAGE_NAME:$TAG or repository not found in $ACR_NAME."
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT
else
if echo "$TAGS" | grep -q "^$TAG$"; then
echo "IMAGE_EXISTS=true" >> $GITHUB_OUTPUT
else
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT
echo "Image $IMAGE_NAME:$TAG not found in $ACR_NAME."
fi
fi
- name: Exit if Image for testing does not exist
if: steps.check_image.outputs.IMAGE_EXISTS == 'false'
run: |
echo "Image doesn't exist in ACR, remember to build image for testing first (preset-image-build)"
exit 1
- name: Set up kubectl context
run: |
az aks get-credentials --resource-group llm-test --name GitRunner
- name: Get Nodepool Name
id: get_nodepool_name
run: |
NAME_SUFFIX=${{ matrix.model.name }}
NAME_SUFFIX_WITHOUT_DASHES=${NAME_SUFFIX//-/} # Removing all '-' symbols
NAME_SUFFIX_WITHOUT_DASHES=${NAME_SUFFIX_WITHOUT_DASHES//./} # Removing all '.' symbols
if [ ${#NAME_SUFFIX_WITHOUT_DASHES} -gt 12 ]; then
TRUNCATED_NAME_SUFFIX=${NAME_SUFFIX_WITHOUT_DASHES:0:12}
else
TRUNCATED_NAME_SUFFIX=$NAME_SUFFIX_WITHOUT_DASHES
fi
echo "Nodepool Name: $TRUNCATED_NAME_SUFFIX"
echo "NODEPOOL_NAME=$TRUNCATED_NAME_SUFFIX" >> $GITHUB_OUTPUT
- name: Create Nodepool
run: |
NODEPOOL_EXIST=$(az aks nodepool show \
--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
--cluster-name GitRunner \
--resource-group llm-test \
--query 'name' -o tsv || echo "")
echo "NODEPOOL_EXIST: $NODEPOOL_EXIST"
if [ -z "$NODEPOOL_EXIST" ]; then
az aks nodepool add \
--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
--cluster-name GitRunner \
--resource-group llm-test \
--node-count ${{ matrix.model.node-count }} \
--node-vm-size ${{ matrix.model.node-vm-size }} \
--node-osdisk-size ${{ matrix.model.node-osdisk-size }} \
--labels pool=${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
--node-taints sku=gpu:NoSchedule \
--aks-custom-headers UseGPUDedicatedVHD=true
else
NODEPOOL_STATE=$(az aks nodepool show \
--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
--cluster-name GitRunner \
--resource-group llm-test \
--query 'provisioningState' -o tsv)
echo "NODEPOOL_STATE: $NODEPOOL_STATE"
if [ "$NODEPOOL_STATE" != "Succeeded" ]; then
echo "Nodepool exists but is not in a Succeeded state. Please check manually."
exit 1
else
echo "Nodepool already exists and is in a running state."
fi
fi
- name: Get testing workload
id: workload
run: |
WORKLOAD_NAME=${{ matrix.model.workload || matrix.model.name }}
echo "WORKLOAD_NAME=$WORKLOAD_NAME" >> $GITHUB_OUTPUT
echo "WORKLOAD_FILE_PREFIX=presets/workspace/test/manifests/$WORKLOAD_NAME/$WORKLOAD_NAME" >> $GITHUB_OUTPUT
- name: Create Service
run: |
kubectl apply -f ${{steps.workload.outputs.WORKLOAD_FILE_PREFIX}}-service.yaml
- name: Retrieve External Service IP
id: get_ip
run: |
SERVICE_IP=$(kubectl get svc ${{steps.workload.outputs.WORKLOAD_NAME}} -o=jsonpath='{.spec.clusterIP}')
echo "Service IP is $SERVICE_IP"
echo "SERVICE_IP=$SERVICE_IP" >> $GITHUB_OUTPUT
- name: Get Resource Type
id: resource
run: |
RESOURCE_TYPE=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "statefulset" || echo "deployment")
echo "RESOURCE_TYPE=$RESOURCE_TYPE" >> $GITHUB_OUTPUT
- name: Replace IP and Deploy Resource to K8s
run: |
POSTFIX=$(echo "${{ matrix.model.name }}" | grep -q "llama" && echo "" || echo "_${{ env.RUNTIME }}")
WORKLOAD_FILE=${{steps.workload.outputs.WORKLOAD_FILE_PREFIX}}$POSTFIX.yaml
sed -i "s/MASTER_ADDR_HERE/${{ steps.get_ip.outputs.SERVICE_IP }}/g" $WORKLOAD_FILE
sed -i "s/TAG_HERE/${{ matrix.model.tag }}/g" $WORKLOAD_FILE
sed -i "s/REPO_HERE/${{ secrets.ACR_AMRT_USERNAME }}/g" $WORKLOAD_FILE
kubectl apply -f $WORKLOAD_FILE
- name: Wait for Resource to be ready
run: |
kubectl rollout status ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} --timeout=1800s
- name: Check Adapter Loading from Logs
if: matrix.model.loads_adapter == true
run: |
POD_NAME=$(kubectl get pods -l app=${{steps.workload.outputs.WORKLOAD_NAME}} -o jsonpath="{.items[0].metadata.name}")
kubectl logs $POD_NAME | grep "Adapter added:" | grep "${{ matrix.model.expected_adapter }}" || (echo "Adapter not loaded or incorrect adapter loaded" && exit 1)
- name: Install testing commands
run: |
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- apt-get update
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- apt-get install -y curl
- name: Test healthz endpoint
run: |
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
curl -s http://localhost:5000/health
- name: Test inference endpoint
run: |
echo "Testing inference for ${{ matrix.model.name }}"
if [[ "${{ matrix.model.name }}" == *"llama"* && "${{ matrix.model.name }}" == *"-chat"* ]]; then
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
curl -s -X POST \
-H "Content-Type: application/json" \
-d '{
"input_data": {
"input_string": [
[
{
"role": "system",
"content": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe."
},
{
"role": "user",
"content": "Write a brief birthday message to John"
}
]
]
}
}' \
http://localhost:5000/chat
elif [[ "${{ matrix.model.name }}" == *"llama"* ]]; then
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
curl -s -X POST \
-H "Content-Type: application/json" \
-d '{
"prompts": [
"I believe the meaning of life is",
"Simply put, the theory of relativity states that ",
"A brief message congratulating the team on the launch: Hi everyone, I just ",
"Translate English to French: sea otter => loutre de mer, peppermint => menthe poivrée, plush girafe => girafe peluche, cheese =>"
],
"parameters": {
"max_gen_len": 128
}
}' \
http://localhost:5000/generate
elif [[ "${{ env.RUNTIME }}" == *"vllm"* ]]; then
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
curl -s -X POST \
-H "accept: application/json" \
-H "Content-Type: application/json" \
-d '{
"model": "test",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
]
}' \
http://localhost:5000/v1/chat/completions
else
kubectl exec ${{steps.resource.outputs.RESOURCE_TYPE}}/${{steps.workload.outputs.WORKLOAD_NAME}} -- \
curl -s -X POST \
-H "accept: application/json" \
-H "Content-Type: application/json" \
-d '{
"prompt":"Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
"return_full_text": false,
"clean_up_tokenization_spaces": false,
"prefix": null,
"handle_long_generation": null,
"generate_kwargs": {
"max_length":200,
"min_length":0,
"do_sample":true,
"early_stopping":false,
"num_beams":1,
"num_beam_groups":1,
"diversity_penalty":0.0,
"temperature":1.0,
"top_k":10,
"top_p":1,
"typical_p":1,
"repetition_penalty":1,
"length_penalty":1,
"no_repeat_ngram_size":0,
"encoder_no_repeat_ngram_size":0,
"bad_words_ids":null,
"num_return_sequences":1,
"output_scores":false,
"return_dict_in_generate":false,
"forced_bos_token_id":null,
"forced_eos_token_id":null,
"remove_invalid_values":null
}
}' \
http://localhost:5000/chat
fi
- name: Cleanup
if: always()
run: |
# Only proceed if RESOURCE_TYPE is set (else resource wasn't created)
if [ -n "${{ steps.resource.outputs.RESOURCE_TYPE }}" ]; then
# Use RESOURCE_TYPE from the previous step
RESOURCE_TYPE=${{ steps.resource.outputs.RESOURCE_TYPE }}
# Check and Delete K8s Resource (Deployment or StatefulSet)
if kubectl get $RESOURCE_TYPE ${{steps.workload.outputs.WORKLOAD_NAME}} > /dev/null 2>&1; then
kubectl logs $RESOURCE_TYPE/${{steps.workload.outputs.WORKLOAD_NAME}}
kubectl delete $RESOURCE_TYPE ${{steps.workload.outputs.WORKLOAD_NAME}}
fi
fi
# Check and Delete K8s Service if it exists
if kubectl get svc ${{steps.workload.outputs.WORKLOAD_NAME}} > /dev/null 2>&1; then
kubectl delete svc ${{steps.workload.outputs.WORKLOAD_NAME}}
fi
# Check and Delete AKS Nodepool if it exists
if [ -n "${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }}" ]; then
NODEPOOL_EXIST=$(az aks nodepool show \
--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
--cluster-name GitRunner \
--resource-group llm-test \
--query 'name' -o tsv || echo "")
if [ -n "$NODEPOOL_EXIST" ]; then
az aks nodepool delete \
--name ${{ steps.get_nodepool_name.outputs.NODEPOOL_NAME }} \
--cluster-name GitRunner \
--resource-group llm-test
fi
fi