Skip to content

Commit

Permalink
fix: preset tuning test workflow
Browse files Browse the repository at this point in the history
Signed-off-by: jerryzhuang <[email protected]>
  • Loading branch information
zhuangqh committed Nov 28, 2024
1 parent 3dbb660 commit d4b8780
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .github/e2e-preset-configs.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
"loads_adapter": false
},
{
"name": "tuning-example",
"name": "tuning",
"node-count": 1,
"node-vm-size": "Standard_NC6s_v3",
"node-osdisk-size": 100,
Expand Down
25 changes: 13 additions & 12 deletions .github/workflows/e2e-preset-tuning-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ permissions:

jobs:
e2e-preset-tuning-tests:
needs: determine-models
if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'
runs-on: ubuntu-latest
environment: preset-env
Expand Down Expand Up @@ -44,33 +43,35 @@ jobs:
- name: Get test meta
id: get_test_meta
run: |
CONFIG=$(jq '.matrix.image[] | select(.name == "tuning-example")' .github/e2e-preset-configs.json)
CONFIG=$(jq -c '.matrix.image[] | select(.name == "tuning-example")' .github/e2e-preset-configs.json)
echo "TAG=0.0.7" >> $GITHUB_OUTPUT
echo "model=$CONFIG" >> $GITHUB_OUTPUT
for row in $(echo "${CONFIG}" | jq -r 'to_entries|map("\(.key)=\(.value|tostring)")|.[]'); do
echo "${row}" >> $GITHUB_OUTPUT
done
- name: Create Nodepool
run: |
NODEPOOL_EXIST=$(az aks nodepool show \
--name ${{ steps.get_test_meta.outputs.model.name }} \
--name ${{ steps.get_test_meta.outputs.name }} \
--cluster-name GitRunner \
--resource-group llm-test \
--query 'name' -o tsv || echo "")
echo "NODEPOOL_EXIST: $NODEPOOL_EXIST"
if [ -z "$NODEPOOL_EXIST" ]; then
az aks nodepool add \
--name ${{ steps.get_test_meta.outputs.model.name }} \
--name ${{ steps.get_test_meta.outputs.name }} \
--cluster-name GitRunner \
--resource-group llm-test \
--node-count ${{ steps.get_test_meta.outputs.model.node-count }} \
--node-vm-size ${{ steps.get_test_meta.outputs.model.node-vm-size }} \
--node-osdisk-size ${{ steps.get_test_meta.outputs.model.node-osdisk-size }} \
--labels pool=${{ steps.get_test_meta.outputs.model.name }} \
--node-count ${{ steps.get_test_meta.outputs.node-count }} \
--node-vm-size ${{ steps.get_test_meta.outputs.node-vm-size }} \
--node-osdisk-size ${{ steps.get_test_meta.outputs.node-osdisk-size }} \
--labels pool=${{ steps.get_test_meta.outputs.name }} \
--node-taints sku=gpu:NoSchedule \
--aks-custom-headers UseGPUDedicatedVHD=true
else
NODEPOOL_STATE=$(az aks nodepool show \
--name ${{ steps.get_test_meta.outputs.model.name }} \
--name ${{ steps.get_test_meta.outputs.name }} \
--cluster-name GitRunner \
--resource-group llm-test \
--query 'provisioningState' -o tsv)
Expand Down Expand Up @@ -121,15 +122,15 @@ jobs:
# Check and Delete AKS Nodepool if it exists
NODEPOOL_EXIST=$(az aks nodepool show \
--name ${{ steps.get_test_meta.outputs.model.name }} \
--name ${{ steps.get_test_meta.outputs.name }} \
--cluster-name GitRunner \
--resource-group llm-test \
--query 'name' -o tsv || echo "")
if [ -n "$NODEPOOL_EXIST" ]; then
echo "deleting nodepool"
az aks nodepool delete \
--name ${{ steps.get_test_meta.outputs.model.name }} \
--name ${{ steps.get_test_meta.outputs.name }} \
--cluster-name GitRunner \
--resource-group llm-test
fi
2 changes: 1 addition & 1 deletion presets/workspace/test/tuning/tuning-job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ spec:
- emptyDir: {}
name: data-volume
nodeSelector:
pool: tuning-example
pool: tuning
---
apiVersion: v1
kind: ConfigMap
Expand Down

0 comments on commit d4b8780

Please sign in to comment.