Skip to content

Commit

Permalink
RAGEngine e2e tests
Browse files Browse the repository at this point in the history
Signed-off-by: Bangqi Zhu <[email protected]>
  • Loading branch information
Bangqi Zhu committed Jan 11, 2025
1 parent c429d81 commit 6a1cb8f
Show file tree
Hide file tree
Showing 9 changed files with 1,458 additions and 5 deletions.
284 changes: 284 additions & 0 deletions .github/workflows/ragengine-e2e-workflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
name: ragengine-e2e-workflow

on:
workflow_call:
inputs:
git_sha:
type: string
required: true
node_provisioner:
type: string
required: false
default: gpuprovisioner
tag:
type: string
isRelease:
type: boolean
default: false
registry:
type: string
region:
type: string
description: "the azure location to run the e2e test in"
default: "eastus"
k8s_version:
type: string
default: "1.30.0"

jobs:
e2e-tests:
runs-on: [ "self-hosted", "hostname:kaito-e2e-github-runner" ]
name: e2e-tests-${{ inputs.node_provisioner }}
permissions:
contents: read
id-token: write # This is required for requesting the JWT
environment: e2e-test
env:
GO_VERSION: "1.22"
KARPENTER_NAMESPACE: "karpenter"
GPU_PROVISIONER_NAMESPACE: "gpu-provisioner"

steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
with:
egress-policy: audit

- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.git_sha }}

- name: Set e2e Resource and Cluster Name
run: |
rand=$(git rev-parse --short ${{ inputs.git_sha }})
if [ "$rand" = "" ]; then
rand=$RANDOM
fi
echo "VERSION=${rand}" >> $GITHUB_ENV
echo "CLUSTER_NAME=${{ inputs.node_provisioner }}${rand}" >> $GITHUB_ENV
echo "REGISTRY=${{ inputs.node_provisioner }}${rand}.azurecr.io" >> $GITHUB_ENV
echo "RUN_LLAMA_13B=false" >> $GITHUB_ENV
- name: Set Registry
if: ${{ inputs.isRelease }}
run: |
echo "REGISTRY=${{ inputs.registry }}" >> $GITHUB_ENV
echo "VERSION=$(echo ${{ inputs.tag }} | tr -d v)" >> $GITHUB_ENV
- name: Remove existing Go modules directory
run: sudo rm -rf ~/go/pkg/mod

- name: Set up Go ${{ env.GO_VERSION }}
uses: actions/[email protected]
with:
go-version: ${{ env.GO_VERSION }}

- name: Install Azure CLI latest
run: |
if ! which az > /dev/null; then
echo "Azure CLI not found. Installing..."
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
else
echo "Azure CLI already installed."
fi
- name: Azure CLI Login
run: |
az login --identity
- uses: azure/setup-helm@v4
id: install

- name: Create Resource Group
shell: bash
run: |
make create-rg
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}

- name: Create ACR
shell: bash
run: |
make create-acr
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_ACR_NAME: ${{ env.CLUSTER_NAME }}

- name: Create Azure Identity
uses: azure/[email protected]
with:
inlineScript: |
az identity create --name ${{ inputs.node_provisioner }}Identity --resource-group ${{ env.CLUSTER_NAME }}
- name: Generate APIs
run: |
make generate
- name: build KAITO image
if: ${{ !inputs.isRelease }}
shell: bash
run: |
make docker-build-workspace
env:
REGISTRY: ${{ env.REGISTRY }}
VERSION: ${{ env.VERSION }}

- name: build kaito RAG Engine image
if: ${{ !inputs.isRelease }}
shell: bash
run: |
make docker-build-ragengine
env:
REGISTRY: ${{ env.REGISTRY }}
VERSION: ${{ env.VERSION }}



- name: create cluster
shell: bash
run: |
if [ "${{ inputs.node_provisioner }}" == "gpuprovisioner" ]; then
make create-aks-cluster
else
make create-aks-cluster-for-karpenter
fi
env:
AZURE_ACR_NAME: ${{ env.CLUSTER_NAME }}
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
AZURE_LOCATION: ${{ inputs.region }}
AKS_K8S_VERSION: ${{ inputs.k8s_version }}

- name: Create Identities and Permissions for ${{ inputs.node_provisioner }}
shell: bash
run: |
AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \
make generate-identities
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
TEST_SUITE: ${{ inputs.node_provisioner }}

- name: Install gpu-provisioner helm chart
if: ${{ inputs.node_provisioner == 'gpuprovisioner' }}
shell: bash
run: |
AZURE_TENANT_ID=$E2E_TENANT_ID \
AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \
make gpu-provisioner-helm
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}

- name: Install karpenter Azure provider helm chart
if: ${{ inputs.node_provisioner == 'azkarpenter' }}
shell: bash
run: |
AZURE_TENANT_ID=$E2E_TENANT_ID \
AZURE_SUBSCRIPTION_ID=$E2E_SUBSCRIPTION_ID \
make azure-karpenter-helm
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
KARPENTER_VERSION: ${{ vars.KARPENTER_VERSION }}
KARPENTER_NAMESPACE: ${{ env.KARPENTER_NAMESPACE }}

- name: Install KAITO Workspace helm chart
shell: bash
run: |
make az-patch-install-helm
kubectl wait --for=condition=available deploy "kaito-workspace" -n kaito-workspace --timeout=300s
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
REGISTRY: ${{ env.REGISTRY }}
VERSION: ${{ env.VERSION }}
TEST_SUITE: ${{ inputs.node_provisioner }}

- name: Install KAITO RAG Engine helm chart
shell: bash
run: |
make az-patch-install-ragengine-helm
kubectl wait --for=condition=available deploy "kaito-ragengine" -n kaito-ragengine --timeout=300s
env:
AZURE_RESOURCE_GROUP: ${{ env.CLUSTER_NAME }}
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
REGISTRY: ${{ env.REGISTRY }}
VERSION: ${{ env.VERSION }}
TEST_SUITE: ${{ inputs.node_provisioner }}

# Retrieve E2E ACR credentials and create Kubernetes secret
- name: Set up E2E ACR Credentials and Secret
shell: bash
run: |
# Retrieve the ACR username and password
ACR_USERNAME=$(az acr credential show --name ${{ env.CLUSTER_NAME }} --resource-group ${{ env.CLUSTER_NAME }} --query "username" -o tsv)
ACR_PASSWORD=$(az acr credential show --name ${{ env.CLUSTER_NAME }} --resource-group ${{ env.CLUSTER_NAME }} --query "passwords[0].value" -o tsv)
# Ensure credentials were retrieved successfully
if [ -z "$ACR_USERNAME" ] || [ -z "$ACR_PASSWORD" ]; then
echo "Failed to retrieve ACR credentials"
exit 1
fi
# Create the Kubernetes secret with the retrieved credentials
kubectl create secret docker-registry ${{ env.CLUSTER_NAME }}-acr-secret \
--docker-server=${{ env.CLUSTER_NAME }}.azurecr.io \
--docker-username=${ACR_USERNAME} \
--docker-password=${ACR_PASSWORD}
# Add Private-Hosted ACR secret for private models like llama
- name: Add Private-Hosted ACR Secret Credentials
run: |
# Ensure E2E_AMRT_SECRET_NAME is sanitized to remove any accidental quotes
E2E_AMRT_SECRET_NAME=$(echo "$E2E_AMRT_SECRET_NAME" | sed 's/[\"'\'']//g')
if kubectl get secret "$E2E_AMRT_SECRET_NAME" >/dev/null 2>&1; then
echo "Secret $E2E_AMRT_SECRET_NAME already exists. Skipping creation."
else
kubectl create secret docker-registry "$E2E_AMRT_SECRET_NAME" \
--docker-server="$E2E_ACR_AMRT_USERNAME.azurecr.io" \
--docker-username="$E2E_ACR_AMRT_USERNAME" \
--docker-password="$E2E_ACR_AMRT_PASSWORD"
echo "Secret $E2E_AMRT_SECRET_NAME created successfully."
fi
- name: Log ${{ inputs.node_provisioner }}
run: |
if [ "${{ inputs.node_provisioner }}" == "gpuprovisioner" ]; then
kubectl logs -n "${{ env.GPU_PROVISIONER_NAMESPACE }}" -l app.kubernetes.io/name=gpu-provisioner -c controller
else
kubectl logs -n "${{ env.KARPENTER_NAMESPACE }}" -l app.kubernetes.io/name=karpenter -c controller
fi
- name: Log kaito-workspace
run: |
kubectl get pods -n kaito-workspace -o name | grep "^pod/kaito-workspace" | sed 's/^pod\///' | xargs -I {} kubectl logs -n kaito-workspace {}
- name: Log kaito-ragengine
run: |
kubectl get pods -n kaito-ragengine -o name | grep "^pod/kaito-ragengine" | sed 's/^pod\///' | xargs -I {} kubectl logs -n kaito-ragengine {}
- name: Run e2e test
run: |
AI_MODELS_REGISTRY=$E2E_ACR_AMRT_USERNAME.azurecr.io \
AI_MODELS_REGISTRY_SECRET=$E2E_AMRT_SECRET_NAME \
make kaito-ragengine-e2e-test
env:
AZURE_CLUSTER_NAME: ${{ env.CLUSTER_NAME }}
RUN_LLAMA_13B: ${{ env.RUN_LLAMA_13B }}
REGISTRY: ${{ env.REGISTRY }}
TEST_SUITE: ${{ inputs.node_provisioner }}
E2E_ACR_REGISTRY: ${{ env.CLUSTER_NAME }}.azurecr.io
E2E_ACR_REGISTRY_SECRET: ${{ env.CLUSTER_NAME }}-acr-secret

- name: Cleanup e2e resources
if: ${{ always() }}
uses: azure/[email protected]
with:
inlineScript: |
set +e
az group delete --name "${{ env.CLUSTER_NAME }}" --yes --no-wait || true
31 changes: 31 additions & 0 deletions .github/workflows/ragengine-e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: ragengine-e2e-test

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

on:
pull_request:
paths-ignore: ['docs/**', '**.md', '**.mdx', '**.png', '**.jpg']

env:
GO_VERSION: "1.22"

permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout

jobs:
run-e2e:
strategy:
fail-fast: false
matrix:
node-provisioner: [gpuprovisioner] # WIP: azkarpenter]
permissions:
contents: read
id-token: write
statuses: write
uses: ./.github/workflows/ragengine-e2e-workflow.yml
with:
git_sha: ${{ github.event.pull_request.head.sha }}
node_provisioner: ${{ matrix.node-provisioner }}
36 changes: 31 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Image URL to use all building/pushing image targets
REGISTRY ?= YOUR_REGISTRY
IMG_NAME ?= workspace
RAGENGINE_IMG_NAME ?= ragengine
VERSION ?= v0.4.1
GPU_PROVISIONER_VERSION ?= 0.2.1
IMG_TAG ?= $(subst v,,$(VERSION))
Expand Down Expand Up @@ -32,6 +33,7 @@ AZURE_CLUSTER_NAME ?= kaito-demo
AZURE_RESOURCE_GROUP_MC=MC_$(AZURE_RESOURCE_GROUP)_$(AZURE_CLUSTER_NAME)_$(AZURE_LOCATION)
GPU_PROVISIONER_NAMESPACE ?= gpu-provisioner
KAITO_NAMESPACE ?= kaito-workspace
KAITO_RAGENGINE_NAMESPACE ?= kaito-ragengine
GPU_PROVISIONER_MSI_NAME ?= gpuprovisionerIdentity

## Azure Karpenter parameters
Expand Down Expand Up @@ -132,6 +134,9 @@ GINKGO_ARGS ?= -focus="$(GINKGO_FOCUS)" -skip="$(GINKGO_SKIP)" -nodes=$(GINKGO_N
$(E2E_TEST):
(cd test/e2e && go test -c . -o $(E2E_TEST))

$(RAGENGINE_E2E_TEST):
(cd test/rage2e && go test -c . -o $(RAGENGINE_E2E_TEST))

.PHONY: kaito-workspace-e2e-test
kaito-workspace-e2e-test: $(E2E_TEST) $(GINKGO)
AI_MODELS_REGISTRY_SECRET=$(AI_MODELS_REGISTRY_SECRET) RUN_LLAMA_13B=$(RUN_LLAMA_13B) \
Expand All @@ -140,6 +145,14 @@ kaito-workspace-e2e-test: $(E2E_TEST) $(GINKGO)
SUPPORTED_MODELS_YAML_PATH=$(SUPPORTED_MODELS_YAML_PATH) \
$(GINKGO) -v -trace $(GINKGO_ARGS) $(E2E_TEST)

.PHONY: kaito-ragengine-e2e-test
kaito-workspace-e2e-test: $(RAGENGINE_E2E_TEST) $(GINKGO)
AI_MODELS_REGISTRY_SECRET=$(AI_MODELS_REGISTRY_SECRET) RUN_LLAMA_13B=$(RUN_LLAMA_13B) \
AI_MODELS_REGISTRY=$(AI_MODELS_REGISTRY) GPU_PROVISIONER_NAMESPACE=$(GPU_PROVISIONER_NAMESPACE) \
KARPENTER_NAMESPACE=$(KARPENTER_NAMESPACE) KAITO_NAMESPACE=$(KAITO_NAMESPACE) TEST_SUITE=$(TEST_SUITE) \
SUPPORTED_MODELS_YAML_PATH=$(SUPPORTED_MODELS_YAML_PATH) \
$(GINKGO) -v -trace $(GINKGO_ARGS) $(RAGENGINE_E2E_TEST)

## --------------------------------------
## Azure resources
## --------------------------------------
Expand Down Expand Up @@ -234,11 +247,11 @@ docker-build-workspace: docker-buildx
.PHONY: docker-build-ragengine
docker-build-ragengine: docker-buildx
docker buildx build \
--file ./docker/ragengine/Dockerfile \
--output=$(OUTPUT_TYPE) \
--platform="linux/$(ARCH)" \
--pull \
--tag $(REGISTRY)/$(RAGENGINE_IMG_NAME):$(RAGENGINE_IMG_TAG) .
--file ./docker/ragengine/Dockerfile \
--output=$(OUTPUT_TYPE) \
--platform="linux/$(ARCH)" \
--pull \
--tag $(REGISTRY)/$(RAGENGINE_IMAGE_NAME):$(IMG_TAG) .

.PHONY: docker-build-rag-service
docker-build-ragservice: docker buildx
Expand Down Expand Up @@ -318,6 +331,19 @@ az-patch-install-helm: ## Update Azure client env vars and settings in helm valu

helm install kaito-workspace ./charts/kaito/workspace --namespace $(KAITO_NAMESPACE) --create-namespace

.PHONY: az-patch-install-ragengine-helm
az-patch-install-ragengine-helm: ## Update Azure client env vars and settings in helm values.yml
az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP)

yq -i '(.image.repository) = "$(REGISTRY)/ragengine"' ./charts/kaito/workspace/values.yaml
yq -i '(.image.tag) = "$(IMG_TAG)"' ./charts/kaito/workspace/values.yaml
if [ $(TEST_SUITE) = "azkarpenter" ]; then \
yq -i '(.featureGates.Karpenter) = "true"' ./charts/kaito/workspace/values.yaml; \
fi
yq -i '(.clusterName) = "$(AZURE_CLUSTER_NAME)"' ./charts/kaito/workspace/values.yaml

helm install kaito-ragengine ./charts/kaito/ragengine --namespace $(KAITO_RAGENGINE_NAMESPACE) --create-namespace

.PHONY: aws-patch-install-helm ##install kaito on AWS cluster
aws-patch-install-helm:
yq -i '(.image.repository) = "$(REGISTRY)/workspace"' ./charts/kaito/workspace/values.yaml
Expand Down
Loading

0 comments on commit 6a1cb8f

Please sign in to comment.