From 3492355bedbc6abed9b90e9b78b7c482622b4044 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 30 Apr 2024 13:59:04 -0700 Subject: [PATCH 01/16] feat: handling image data destination --- pkg/resources/manifests.go | 72 +++++++++++++++++++++++++++++++++---- pkg/tuning/preset-tuning.go | 4 +-- pkg/utils/common-preset.go | 6 ++-- 3 files changed, 71 insertions(+), 11 deletions(-) diff --git a/pkg/resources/manifests.go b/pkg/resources/manifests.go index 68c59fc25..0982d0761 100644 --- a/pkg/resources/manifests.go +++ b/pkg/resources/manifests.go @@ -5,6 +5,7 @@ package resources import ( "context" "fmt" + batchv1 "k8s.io/api/batch/v1" "k8s.io/utils/pointer" @@ -184,6 +185,58 @@ func GenerateStatefulSetManifest(ctx context.Context, workspaceObj *kaitov1alpha return ss } +func dockerSidecarScriptPushToVolume(arg interface{}) string { + //volume, ok := arg.(*corev1.VolumeSource) + return ` +` +} +func dockerSidecarScriptPushImage(arg interface{}) string { + image, _ := arg.(string) + return fmt.Sprintf(` +# Start the Docker daemon in the background with specific options for DinD +dockerd & +# Wait for the Docker daemon to be ready +while ! docker info > /dev/null 2>&1; do + echo "Waiting for Docker daemon to start..." + sleep 1 +done +echo 'Docker daemon started' +while true; do + FILE_PATH=$(find /workspace/tfs -name 'fine_tuning_completed.txt') + if [ ! -z "$FILE_PATH" ]; then + echo "FOUND TRAINING COMPLETED FILE at $FILE_PATH" + PARENT_DIR=$(dirname "$FILE_PATH") + echo "Parent directory is $PARENT_DIR" + TEMP_CONTEXT=$(mktemp -d) + cp "$PARENT_DIR/adapter_config.json" "$TEMP_CONTEXT/adapter_config.json" + cp -r "$PARENT_DIR/adapter_model.safetensors" "$TEMP_CONTEXT/adapter_model.safetensors" + # Create a minimal Dockerfile + echo 'FROM scratch + ADD adapter_config.json / + ADD adapter_model.safetensors /' > "$TEMP_CONTEXT/Dockerfile" + docker build -t %s "$TEMP_CONTEXT" + docker push %s + # Cleanup: Remove the temporary directory + rm -rf "$TEMP_CONTEXT" + # Remove the file to prevent repeated builds, or handle as needed + # rm "$FILE_PATH" + echo "Upload complete" + exit 0 + fi + sleep 10 # Check every 10 seconds +done`, image, image) +} + +func determinePushMethod(wObj *kaitov1alpha1.Workspace) (func(arg interface{}) string, interface{}) { + if wObj.Tuning.Output.Volume != nil { + return dockerSidecarScriptPushToVolume, wObj.Tuning.Output.Volume + } + if wObj.Tuning.Output.Image != "" { + return dockerSidecarScriptPushImage, wObj.Tuning.Output.Image + } + return func(arg interface{}) string { return "" }, "" +} + func GenerateTuningJobManifest(ctx context.Context, wObj *kaitov1alpha1.Workspace, imageName string, imagePullSecretRefs []corev1.LocalObjectReference, replicas int, commands []string, containerPorts []corev1.ContainerPort, livenessProbe, readinessProbe *corev1.Probe, resourceRequirements corev1.ResourceRequirements, tolerations []corev1.Toleration, @@ -191,11 +244,7 @@ func GenerateTuningJobManifest(ctx context.Context, wObj *kaitov1alpha1.Workspac labels := map[string]string{ kaitov1alpha1.LabelWorkspaceName: wObj.Name, } - //TODO: - // Will be included in future PR, this code includes - // bash script for pushing results based on user - // data destination method - //pushMethod, pushArg := determinePushMethod(wObj) + pushMethod, pushArg := determinePushMethod(wObj) return &batchv1.Job{ TypeMeta: v1.TypeMeta{ APIVersion: "batch/v1", @@ -233,6 +282,17 @@ func GenerateTuningJobManifest(ctx context.Context, wObj *kaitov1alpha1.Workspac Ports: containerPorts, VolumeMounts: volumeMounts, }, + { + Name: wObj.Name + "dup", + Image: imageName, + Command: []string{"/bin/sh", "-c"}, + Args: []string{"sleep infinity"}, + //Resources: resourceRequirements, + //LivenessProbe: livenessProbe, + //ReadinessProbe: readinessProbe, + Ports: containerPorts, + VolumeMounts: volumeMounts, + }, { Name: "docker-sidecar", Image: "docker:dind", @@ -241,7 +301,7 @@ func GenerateTuningJobManifest(ctx context.Context, wObj *kaitov1alpha1.Workspac }, VolumeMounts: volumeMounts, Command: []string{"/bin/sh", "-c"}, - // TODO: Args: []string{pushMethod(pushArg)}, + Args: []string{pushMethod(pushArg)}, }, }, RestartPolicy: corev1.RestartPolicyNever, diff --git a/pkg/tuning/preset-tuning.go b/pkg/tuning/preset-tuning.go index 0f201ea39..a5ebb5964 100644 --- a/pkg/tuning/preset-tuning.go +++ b/pkg/tuning/preset-tuning.go @@ -184,7 +184,7 @@ func handleImageDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Work }, }) - volumes, volumeMounts := utils.ConfigDataVolume("") + volumes, volumeMounts := utils.ConfigDataVolume(nil) return initContainers, volumes, volumeMounts } @@ -216,7 +216,7 @@ func handleURLDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Worksp }, }, }) - volumes, volumeMounts := utils.ConfigDataVolume("") + volumes, volumeMounts := utils.ConfigDataVolume(nil) return initContainers, volumes, volumeMounts } diff --git a/pkg/utils/common-preset.go b/pkg/utils/common-preset.go index 87363821b..de79ff8f2 100644 --- a/pkg/utils/common-preset.go +++ b/pkg/utils/common-preset.go @@ -56,14 +56,14 @@ func ConfigCMVolume(cmName string) (corev1.Volume, corev1.VolumeMount) { return volume, volumeMount } -func ConfigDataVolume(hostPath string) ([]corev1.Volume, []corev1.VolumeMount) { +func ConfigDataVolume(hostPath *string) ([]corev1.Volume, []corev1.VolumeMount) { var volumes []corev1.Volume var volumeMounts []corev1.VolumeMount var volumeSource corev1.VolumeSource - if hostPath != "" { + if hostPath != nil { volumeSource = corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ - Path: hostPath, + Path: *hostPath, }, } } else { From bd60636cb6383b7880d035e6966183a3c087033b Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 30 Apr 2024 17:04:23 -0700 Subject: [PATCH 02/16] feat: ConfigWorkspaceVolume --- .../workspace/templates/clusterrole.yaml | 60 +++++++++---------- pkg/resources/manifests.go | 44 ++++++++------ pkg/tuning/preset-tuning.go | 8 +++ pkg/utils/common-preset.go | 21 ++++++- 4 files changed, 78 insertions(+), 55 deletions(-) diff --git a/charts/kaito/workspace/templates/clusterrole.yaml b/charts/kaito/workspace/templates/clusterrole.yaml index 137144614..3f7dafc5b 100644 --- a/charts/kaito/workspace/templates/clusterrole.yaml +++ b/charts/kaito/workspace/templates/clusterrole.yaml @@ -6,40 +6,34 @@ metadata: labels: {{- include "kaito.labels" . | nindent 4 }} rules: - - apiGroups: ["kaito.sh"] - resources: ["workspaces"] - verbs: ["update", "patch","get","list","watch"] - - apiGroups: ["kaito.sh"] - resources: ["workspaces/status"] - verbs: ["update", "patch","get","list","watch"] - - apiGroups: [""] - resources: ["nodes", "namespaces"] - verbs: ["get","list","watch","update", "patch"] - - apiGroups: [""] - resources: ["services"] - verbs: ["get","list","watch","create", "delete", "update", "patch"] + - apiGroups: [ "kaito.sh" ] + resources: [ "workspaces" ] + verbs: [ "update", "patch", "get", "list", "watch" ] + - apiGroups: [ "kaito.sh" ] + resources: [ "workspaces/status" ] + verbs: [ "update", "patch", "get", "list", "watch" ] - apiGroups: [ "" ] - resources: [ "pods"] - verbs: ["get","list","watch","create", "update", "patch" ] + resources: [ "nodes", "namespaces" ] + verbs: [ "get", "list", "watch", "update", "patch" ] + - apiGroups: [ "" ] + resources: [ "services" ] + verbs: [ "get", "list", "watch", "create", "delete", "update", "patch" ] + - apiGroups: [ "" ] + resources: [ "pods" ] + verbs: [ "get", "list", "watch", "create", "update", "patch" ] - apiGroups: [ "" ] resources: [ "configmaps" ] - verbs: [ "get","list","watch","create", "delete" ] - - apiGroups: ["apps"] - resources: ["daemonsets"] - verbs: ["get","list","watch","update", "patch"] - - apiGroups: [ "apps" ] - resources: ["deployments" ] - verbs: ["get","list","watch","create", "delete","update", "patch"] + verbs: [ "get", "list", "watch", "create", "delete" ] - apiGroups: [ "apps" ] - resources: [ "statefulsets" ] - verbs: [ "get","list","watch","create", "delete","update", "patch" ] - - apiGroups: ["karpenter.sh"] - resources: ["machines", "machines/status", "nodepools", "nodepools/status", "nodeclaims", "nodeclaims/status"] - verbs: ["get","list","watch","create", "delete", "update", "patch"] - - apiGroups: ["admissionregistration.k8s.io"] - resources: ["validatingwebhookconfigurations"] - verbs: ["get","list","watch"] - - apiGroups: ["admissionregistration.k8s.io"] - resources: ["validatingwebhookconfigurations"] - verbs: ["update"] - resourceNames: ["validation.workspace.kaito.sh"] + resources: [ "daemonsets", "deployments", "statefulsets" ] + verbs: [ "get", "list", "watch", "create", "delete", "update", "patch" ] + - apiGroups: [ "karpenter.sh" ] + resources: [ "machines", "machines/status", "nodepools", "nodepools/status", "nodeclaims", "nodeclaims/status" ] + verbs: [ "get", "list", "watch", "create", "delete", "update", "patch" ] + - apiGroups: [ "batch" ] + resources: [ "jobs" ] + verbs: [ "get", "list", "watch", "create", "delete", "update", "patch" ] + - apiGroups: [ "admissionregistration.k8s.io" ] + resources: [ "validatingwebhookconfigurations" ] + verbs: [ "get", "list", "watch", "update" ] + resourceNames: [ "validation.workspace.kaito.sh" ] \ No newline at end of file diff --git a/pkg/resources/manifests.go b/pkg/resources/manifests.go index 0982d0761..d67cab712 100644 --- a/pkg/resources/manifests.go +++ b/pkg/resources/manifests.go @@ -201,27 +201,34 @@ while ! docker info > /dev/null 2>&1; do sleep 1 done echo 'Docker daemon started' + while true; do FILE_PATH=$(find /workspace/tfs -name 'fine_tuning_completed.txt') if [ ! -z "$FILE_PATH" ]; then - echo "FOUND TRAINING COMPLETED FILE at $FILE_PATH" - PARENT_DIR=$(dirname "$FILE_PATH") - echo "Parent directory is $PARENT_DIR" - TEMP_CONTEXT=$(mktemp -d) - cp "$PARENT_DIR/adapter_config.json" "$TEMP_CONTEXT/adapter_config.json" - cp -r "$PARENT_DIR/adapter_model.safetensors" "$TEMP_CONTEXT/adapter_model.safetensors" - # Create a minimal Dockerfile - echo 'FROM scratch - ADD adapter_config.json / - ADD adapter_model.safetensors /' > "$TEMP_CONTEXT/Dockerfile" - docker build -t %s "$TEMP_CONTEXT" - docker push %s - # Cleanup: Remove the temporary directory - rm -rf "$TEMP_CONTEXT" - # Remove the file to prevent repeated builds, or handle as needed - # rm "$FILE_PATH" - echo "Upload complete" - exit 0 + echo "FOUND TRAINING COMPLETED FILE at $FILE_PATH" + + PARENT_DIR=$(dirname "$FILE_PATH") + echo "Parent directory is $PARENT_DIR" + + TEMP_CONTEXT=$(mktemp -d) + cp "$PARENT_DIR/adapter_config.json" "$TEMP_CONTEXT/adapter_config.json" + cp -r "$PARENT_DIR/adapter_model.safetensors" "$TEMP_CONTEXT/adapter_model.safetensors" + + # Create a minimal Dockerfile + echo 'FROM scratch + ADD adapter_config.json / + ADD adapter_model.safetensors /' > "$TEMP_CONTEXT/Dockerfile" + + docker build -t %s "$TEMP_CONTEXT" + docker push %s + + # Cleanup: Remove the temporary directory + rm -rf "$TEMP_CONTEXT" + + # Remove the file to prevent repeated builds, or handle as needed + # rm "$FILE_PATH" + echo "Upload complete" + exit 0 fi sleep 10 # Check every 10 seconds done`, image, image) @@ -450,5 +457,4 @@ func GenerateDeploymentManifestWithPodTemplate(ctx context.Context, workspaceObj Template: *templateCopy, }, } - } diff --git a/pkg/tuning/preset-tuning.go b/pkg/tuning/preset-tuning.go index a5ebb5964..0e0b7ee18 100644 --- a/pkg/tuning/preset-tuning.go +++ b/pkg/tuning/preset-tuning.go @@ -121,6 +121,14 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa return nil, err } + workspaceVolume, workspaceVolumeMount := utils.ConfigWorkspaceVolume() + if workspaceVolume.Name != "" { + volumes = append(volumes, workspaceVolume) + } + if workspaceVolumeMount.Name != "" { + volumeMounts = append(volumeMounts, workspaceVolumeMount) + } + shmVolume, shmVolumeMount := utils.ConfigSHMVolume(*workspaceObj.Resource.Count) if shmVolume.Name != "" { volumes = append(volumes, shmVolume) diff --git a/pkg/utils/common-preset.go b/pkg/utils/common-preset.go index de79ff8f2..d537f9020 100644 --- a/pkg/utils/common-preset.go +++ b/pkg/utils/common-preset.go @@ -7,11 +7,26 @@ import ( ) const ( - DefaultVolumeMountPath = "/dev/shm" - DefaultConfigMapMountPath = "/mnt/config" - DefaultDataVolumePath = "/mnt/data" + DefaultVolumeMountPath = "/dev/shm" + DefaultConfigMapMountPath = "/mnt/config" + DefaultDataVolumePath = "/mnt/data" + DefaultWorkspaceVolumePath = "/workspace" ) +func ConfigWorkspaceVolume() (corev1.Volume, corev1.VolumeMount) { + sharedWorkspaceVolume := corev1.Volume{ + Name: "workspace-volume", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + } + sharedVolumeMount := corev1.VolumeMount{ + Name: "workspace-volume", + MountPath: DefaultWorkspaceVolumePath, + } + return sharedWorkspaceVolume, sharedVolumeMount +} + func ConfigSHMVolume(instanceCount int) (corev1.Volume, corev1.VolumeMount) { volume := corev1.Volume{} volumeMount := corev1.VolumeMount{} From 3b12c96c80894536a01ecd6c5e637b02232d3b4d Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 30 Apr 2024 17:06:47 -0700 Subject: [PATCH 03/16] validation --- charts/kaito/workspace/templates/clusterrole.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/kaito/workspace/templates/clusterrole.yaml b/charts/kaito/workspace/templates/clusterrole.yaml index 3f7dafc5b..ef463800e 100644 --- a/charts/kaito/workspace/templates/clusterrole.yaml +++ b/charts/kaito/workspace/templates/clusterrole.yaml @@ -35,5 +35,5 @@ rules: verbs: [ "get", "list", "watch", "create", "delete", "update", "patch" ] - apiGroups: [ "admissionregistration.k8s.io" ] resources: [ "validatingwebhookconfigurations" ] - verbs: [ "get", "list", "watch", "update" ] + verbs: [ "update" ] resourceNames: [ "validation.workspace.kaito.sh" ] \ No newline at end of file From 26d6c3a7a94a8d30e1dbd5728219cfeb5e72e369 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Tue, 30 Apr 2024 17:19:22 -0700 Subject: [PATCH 04/16] validation --- .../workspace/templates/clusterrole.yaml | 57 +++++++++++-------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/charts/kaito/workspace/templates/clusterrole.yaml b/charts/kaito/workspace/templates/clusterrole.yaml index ef463800e..98d9f2ad0 100644 --- a/charts/kaito/workspace/templates/clusterrole.yaml +++ b/charts/kaito/workspace/templates/clusterrole.yaml @@ -6,34 +6,43 @@ metadata: labels: {{- include "kaito.labels" . | nindent 4 }} rules: - - apiGroups: [ "kaito.sh" ] - resources: [ "workspaces" ] - verbs: [ "update", "patch", "get", "list", "watch" ] - - apiGroups: [ "kaito.sh" ] - resources: [ "workspaces/status" ] - verbs: [ "update", "patch", "get", "list", "watch" ] + - apiGroups: ["kaito.sh"] + resources: ["workspaces"] + verbs: ["update", "patch","get","list","watch"] + - apiGroups: ["kaito.sh"] + resources: ["workspaces/status"] + verbs: ["update", "patch","get","list","watch"] + - apiGroups: [""] + resources: ["nodes", "namespaces"] + verbs: ["get","list","watch","update", "patch"] + - apiGroups: [""] + resources: ["services"] + verbs: ["get","list","watch","create", "delete", "update", "patch"] - apiGroups: [ "" ] - resources: [ "nodes", "namespaces" ] - verbs: [ "get", "list", "watch", "update", "patch" ] - - apiGroups: [ "" ] - resources: [ "services" ] - verbs: [ "get", "list", "watch", "create", "delete", "update", "patch" ] - - apiGroups: [ "" ] - resources: [ "pods" ] - verbs: [ "get", "list", "watch", "create", "update", "patch" ] + resources: [ "pods"] + verbs: ["get","list","watch","create", "update", "patch" ] - apiGroups: [ "" ] resources: [ "configmaps" ] - verbs: [ "get", "list", "watch", "create", "delete" ] + verbs: [ "get","list","watch","create", "delete" ] + - apiGroups: ["apps"] + resources: ["daemonsets"] + verbs: ["get","list","watch","update", "patch"] - apiGroups: [ "apps" ] - resources: [ "daemonsets", "deployments", "statefulsets" ] - verbs: [ "get", "list", "watch", "create", "delete", "update", "patch" ] - - apiGroups: [ "karpenter.sh" ] - resources: [ "machines", "machines/status", "nodepools", "nodepools/status", "nodeclaims", "nodeclaims/status" ] - verbs: [ "get", "list", "watch", "create", "delete", "update", "patch" ] + resources: ["deployments" ] + verbs: ["get","list","watch","create", "delete","update", "patch"] + - apiGroups: [ "apps" ] + resources: [ "statefulsets" ] + verbs: [ "get","list","watch","create", "delete","update", "patch" ] + - apiGroups: ["karpenter.sh"] + resources: ["machines", "machines/status", "nodepools", "nodepools/status", "nodeclaims", "nodeclaims/status"] + verbs: ["get","list","watch","create", "delete", "update", "patch"] + - apiGroups: ["admissionregistration.k8s.io"] + resources: ["validatingwebhookconfigurations"] + verbs: ["get","list","watch"] - apiGroups: [ "batch" ] resources: [ "jobs" ] verbs: [ "get", "list", "watch", "create", "delete", "update", "patch" ] - - apiGroups: [ "admissionregistration.k8s.io" ] - resources: [ "validatingwebhookconfigurations" ] - verbs: [ "update" ] - resourceNames: [ "validation.workspace.kaito.sh" ] \ No newline at end of file + - apiGroups: ["admissionregistration.k8s.io"] + resources: ["validatingwebhookconfigurations"] + verbs: ["update"] + resourceNames: ["validation.workspace.kaito.sh"] \ No newline at end of file From 4d267a775f150bfafcfce9b1f0afc159cfef53d5 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 1 May 2024 16:15:23 -0700 Subject: [PATCH 05/16] feat: image upload code --- pkg/resources/manifests.go | 6 ++--- pkg/tuning/preset-tuning.go | 29 +++++++++++++++-------- pkg/utils/common-preset.go | 42 +++++++++++++++++++++++++++------- presets/models/falcon/model.go | 11 +++++---- 4 files changed, 64 insertions(+), 24 deletions(-) diff --git a/pkg/resources/manifests.go b/pkg/resources/manifests.go index d67cab712..14130ca1c 100644 --- a/pkg/resources/manifests.go +++ b/pkg/resources/manifests.go @@ -203,7 +203,7 @@ done echo 'Docker daemon started' while true; do - FILE_PATH=$(find /workspace/tfs -name 'fine_tuning_completed.txt') + FILE_PATH=$(find /mnt/results -name 'fine_tuning_completed.txt') if [ ! -z "$FILE_PATH" ]; then echo "FOUND TRAINING COMPLETED FILE at $FILE_PATH" @@ -225,8 +225,8 @@ while true; do # Cleanup: Remove the temporary directory rm -rf "$TEMP_CONTEXT" - # Remove the file to prevent repeated builds, or handle as needed - # rm "$FILE_PATH" + # Remove the file to prevent repeated builds + rm "$FILE_PATH" echo "Upload complete" exit 0 fi diff --git a/pkg/tuning/preset-tuning.go b/pkg/tuning/preset-tuning.go index 0e0b7ee18..436654359 100644 --- a/pkg/tuning/preset-tuning.go +++ b/pkg/tuning/preset-tuning.go @@ -115,20 +115,14 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa if err != nil { return nil, err } + _, imagePushSecret := GetDataDestImageInfo(ctx, workspaceObj) + combinedSecretRefs := append(imagePushSecret, imagePullSecrets...) err = EnsureTuningConfigMap(ctx, workspaceObj, tuningObj, kubeClient) if err != nil { return nil, err } - workspaceVolume, workspaceVolumeMount := utils.ConfigWorkspaceVolume() - if workspaceVolume.Name != "" { - volumes = append(volumes, workspaceVolume) - } - if workspaceVolumeMount.Name != "" { - volumeMounts = append(volumeMounts, workspaceVolumeMount) - } - shmVolume, shmVolumeMount := utils.ConfigSHMVolume(*workspaceObj.Resource.Count) if shmVolume.Name != "" { volumes = append(volumes, shmVolume) @@ -141,6 +135,20 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa volumes = append(volumes, cmVolume) volumeMounts = append(volumeMounts, cmVolumeMount) + resultsVolume, resultsVolumeMount := utils.ConfigResultsVolume() + if resultsVolume.Name != "" { + volumes = append(volumes, resultsVolume) + } + if resultsVolumeMount.Name != "" { + volumeMounts = append(volumeMounts, resultsVolumeMount) + } + + if workspaceObj.Tuning.Output.Image != "" { + imagePushSecretVolume, imagePushSecretVolumeMount := utils.ConfigImagePushSecretVolume(workspaceObj.Tuning.Output.ImagePushSecret) + volumes = append(volumes, imagePushSecretVolume) + volumeMounts = append(volumeMounts, imagePushSecretVolumeMount) + } + modelCommand, err := prepareModelRunParameters(ctx, tuningObj) if err != nil { return nil, err @@ -148,7 +156,7 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa commands, resourceReq := prepareTuningParameters(ctx, workspaceObj, modelCommand, tuningObj) tuningImage := GetTuningImageInfo(ctx, workspaceObj, tuningObj) - jobObj := resources.GenerateTuningJobManifest(ctx, workspaceObj, tuningImage, imagePullSecrets, *workspaceObj.Resource.Count, commands, + jobObj := resources.GenerateTuningJobManifest(ctx, workspaceObj, tuningImage, combinedSecretRefs, *workspaceObj.Resource.Count, commands, containerPorts, nil, nil, resourceReq, tolerations, initContainers, volumes, volumeMounts) err = resources.CreateResource(ctx, jobObj, kubeClient) @@ -238,6 +246,9 @@ func prepareModelRunParameters(ctx context.Context, tuningObj *model.PresetParam // and sets the GPU resources required for tuning. // Returns the command and resource configuration. func prepareTuningParameters(ctx context.Context, wObj *kaitov1alpha1.Workspace, modelCommand string, tuningObj *model.PresetParam) ([]string, corev1.ResourceRequirements) { + if tuningObj.TorchRunParams == nil { + tuningObj.TorchRunParams = make(map[string]string) + } // Set # of processes to GPU Count numProcesses := getInstanceGPUCount(wObj.Resource.InstanceType) tuningObj.TorchRunParams["num_processes"] = fmt.Sprintf("%d", numProcesses) diff --git a/pkg/utils/common-preset.go b/pkg/utils/common-preset.go index d537f9020..872143138 100644 --- a/pkg/utils/common-preset.go +++ b/pkg/utils/common-preset.go @@ -7,26 +7,52 @@ import ( ) const ( - DefaultVolumeMountPath = "/dev/shm" - DefaultConfigMapMountPath = "/mnt/config" - DefaultDataVolumePath = "/mnt/data" - DefaultWorkspaceVolumePath = "/workspace" + DefaultVolumeMountPath = "/dev/shm" + DefaultConfigMapMountPath = "/mnt/config" + DefaultDataVolumePath = "/mnt/data" + DefaultResultsVolumePath = "/mnt/results" ) -func ConfigWorkspaceVolume() (corev1.Volume, corev1.VolumeMount) { +func ConfigResultsVolume() (corev1.Volume, corev1.VolumeMount) { sharedWorkspaceVolume := corev1.Volume{ - Name: "workspace-volume", + Name: "results-volume", VolumeSource: corev1.VolumeSource{ EmptyDir: &corev1.EmptyDirVolumeSource{}, }, } sharedVolumeMount := corev1.VolumeMount{ - Name: "workspace-volume", - MountPath: DefaultWorkspaceVolumePath, + Name: "results-volume", + // TODO: Override output path if specified in trainingconfig + MountPath: DefaultResultsVolumePath, } return sharedWorkspaceVolume, sharedVolumeMount } +func ConfigImagePushSecretVolume(imagePushSecret string) (corev1.Volume, corev1.VolumeMount) { + volume := corev1.Volume{ + Name: "docker-config", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: imagePushSecret, + Items: []corev1.KeyToPath{ + { + Key: ".dockerconfigjson", + Path: "config.json", + }, + }, + }, + }, + } + + volumeMount := corev1.VolumeMount{ + Name: "docker-config", + MountPath: "/root/.docker/config.json", + SubPath: "config.json", // Mount only the config.json file + } + + return volume, volumeMount +} + func ConfigSHMVolume(instanceCount int) (corev1.Volume, corev1.VolumeMount) { volume := corev1.Volume{} volumeMount := corev1.VolumeMount{} diff --git a/presets/models/falcon/model.go b/presets/models/falcon/model.go index bc7f882af..4122902db 100644 --- a/presets/models/falcon/model.go +++ b/presets/models/falcon/model.go @@ -3,6 +3,7 @@ package falcon import ( + "github.com/azure/kaito/pkg/tuning" "time" kaitov1alpha1 "github.com/azure/kaito/api/v1alpha1" @@ -39,8 +40,10 @@ var ( PresetFalconTagMap = map[string]string{ "Falcon7B": "0.0.4", "Falcon7BInstruct": "0.0.4", + "Falcon7BTuning": "0.0.2", "Falcon40B": "0.0.5", "Falcon40BInstruct": "0.0.5", + "Falcon40BTuning": "0.0.2", } baseCommandPresetFalcon = "accelerate launch" @@ -77,11 +80,11 @@ func (*falcon7b) GetTuningParameters() *model.PresetParam { GPUCountRequirement: "2", TotalGPUMemoryRequirement: "16Gi", PerGPUMemoryRequirement: "16Gi", - //TorchRunParams: tuning.DefaultAccelerateParams, // TODO + TorchRunParams: tuning.DefaultAccelerateParams, //ModelRunPrams: falconRunTuningParams, // TODO ReadinessTimeout: time.Duration(30) * time.Minute, BaseCommand: baseCommandPresetFalcon, - Tag: PresetFalconTagMap["Falcon7B"], + Tag: PresetFalconTagMap["Falcon7BTuning"], } } @@ -150,11 +153,11 @@ func (*falcon40b) GetTuningParameters() *model.PresetParam { GPUCountRequirement: "2", TotalGPUMemoryRequirement: "90Gi", PerGPUMemoryRequirement: "16Gi", - //TorchRunParams: tuning.DefaultAccelerateParams, // TODO + TorchRunParams: tuning.DefaultAccelerateParams, //ModelRunPrams: falconRunTuningParams, // TODO ReadinessTimeout: time.Duration(30) * time.Minute, BaseCommand: baseCommandPresetFalcon, - Tag: PresetFalconTagMap["Falcon40B"], + Tag: PresetFalconTagMap["Falcon40BTuning"], } } func (*falcon40b) SupportDistributedInference() bool { From d313189efc122e49f4fb04e3d9d353a58c92938e Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 1 May 2024 16:17:43 -0700 Subject: [PATCH 06/16] comment --- pkg/resources/manifests.go | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/pkg/resources/manifests.go b/pkg/resources/manifests.go index 14130ca1c..d78ac26d8 100644 --- a/pkg/resources/manifests.go +++ b/pkg/resources/manifests.go @@ -192,6 +192,7 @@ func dockerSidecarScriptPushToVolume(arg interface{}) string { } func dockerSidecarScriptPushImage(arg interface{}) string { image, _ := arg.(string) + // TODO: Override output path if specified in trainingconfig (instead of /mnt/results) return fmt.Sprintf(` # Start the Docker daemon in the background with specific options for DinD dockerd & @@ -289,17 +290,6 @@ func GenerateTuningJobManifest(ctx context.Context, wObj *kaitov1alpha1.Workspac Ports: containerPorts, VolumeMounts: volumeMounts, }, - { - Name: wObj.Name + "dup", - Image: imageName, - Command: []string{"/bin/sh", "-c"}, - Args: []string{"sleep infinity"}, - //Resources: resourceRequirements, - //LivenessProbe: livenessProbe, - //ReadinessProbe: readinessProbe, - Ports: containerPorts, - VolumeMounts: volumeMounts, - }, { Name: "docker-sidecar", Image: "docker:dind", From f1c1e0987e9123bb8ba02957624c0d440f14fbf4 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 1 May 2024 16:25:46 -0700 Subject: [PATCH 07/16] fix msg --- pkg/tuning/preset-tuning_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/tuning/preset-tuning_test.go b/pkg/tuning/preset-tuning_test.go index cb9fb94e9..e96705626 100644 --- a/pkg/tuning/preset-tuning_test.go +++ b/pkg/tuning/preset-tuning_test.go @@ -188,7 +188,7 @@ func TestEnsureTuningConfigMap(t *testing.T) { ConfigTemplate: "config-template", }, }, - expectedError: "failed to get ConfigMap from template namespace: \"config-template\" not found", + expectedError: "failed to get release namespace: failed to determine release namespace from file /var/run/secrets/kubernetes.io/serviceaccount/namespace and env var RELEASE_NAMESPACE", }, "Config doesn't exist in template namespace": { callMocks: func(c *test.MockClient) { From 15c7fbcc520071288b47560a2443201870f07146 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 1 May 2024 17:49:05 -0700 Subject: [PATCH 08/16] feat: clusterrole update --- charts/kaito/workspace/templates/clusterrole.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/kaito/workspace/templates/clusterrole.yaml b/charts/kaito/workspace/templates/clusterrole.yaml index 98d9f2ad0..ebd2ceba2 100644 --- a/charts/kaito/workspace/templates/clusterrole.yaml +++ b/charts/kaito/workspace/templates/clusterrole.yaml @@ -41,8 +41,8 @@ rules: verbs: ["get","list","watch"] - apiGroups: [ "batch" ] resources: [ "jobs" ] - verbs: [ "get", "list", "watch", "create", "delete", "update", "patch" ] + verbs: [ "get", "list", "watch" ] - apiGroups: ["admissionregistration.k8s.io"] resources: ["validatingwebhookconfigurations"] verbs: ["update"] - resourceNames: ["validation.workspace.kaito.sh"] \ No newline at end of file + resourceNames: ["validation.workspace.kaito.sh"] From 4bf571ef14def1d536faf3d206cc24436f5ca5a8 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 1 May 2024 19:21:59 -0700 Subject: [PATCH 09/16] fix flaky test --- pkg/tuning/preset-tuning_test.go | 33 ++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/pkg/tuning/preset-tuning_test.go b/pkg/tuning/preset-tuning_test.go index 387195c97..4a9f0af0f 100644 --- a/pkg/tuning/preset-tuning_test.go +++ b/pkg/tuning/preset-tuning_test.go @@ -35,6 +35,24 @@ func normalize(s string) string { return strings.Join(strings.Fields(s), " ") } +// Saves state of current env, and returns function to restore to saved state +func saveEnv(key string) func() { + envVal, envExists := os.LookupEnv(key) + return func() { + if envExists { + err := os.Setenv(key, envVal) + if err != nil { + return + } + } else { + err := os.Unsetenv(key) + if err != nil { + return + } + } + } +} + func TestGetInstanceGPUCount(t *testing.T) { kaitov1alpha1.SupportedGPUConfigs = mockSupportedGPUConfigs testcases := map[string]struct { @@ -164,13 +182,16 @@ func TestGetDataSrcImageInfo(t *testing.T) { func TestEnsureTuningConfigMap(t *testing.T) { testcases := map[string]struct { + setupEnv func() callMocks func(c *test.MockClient) workspaceObj *kaitov1alpha1.Workspace expectedError string }{ "Config already exists in workspace namespace": { - callMocks: func(c *test.MockClient) { + setupEnv: func() { os.Setenv(consts.DefaultReleaseNamespaceEnvVar, "release-namespace") + }, + callMocks: func(c *test.MockClient) { c.On("Get", mock.IsType(context.Background()), mock.Anything, mock.IsType(&corev1.ConfigMap{}), mock.Anything).Return(nil) }, workspaceObj: &kaitov1alpha1.Workspace{ @@ -192,8 +213,10 @@ func TestEnsureTuningConfigMap(t *testing.T) { expectedError: "failed to get release namespace: failed to determine release namespace from file /var/run/secrets/kubernetes.io/serviceaccount/namespace and env var RELEASE_NAMESPACE", }, "Config doesn't exist in template namespace": { - callMocks: func(c *test.MockClient) { + setupEnv: func() { os.Setenv(consts.DefaultReleaseNamespaceEnvVar, "release-namespace") + }, + callMocks: func(c *test.MockClient) { c.On("Get", mock.IsType(context.Background()), mock.Anything, mock.IsType(&corev1.ConfigMap{}), mock.Anything).Return(errors.NewNotFound(schema.GroupResource{}, "config-template")) }, workspaceObj: &kaitov1alpha1.Workspace{ @@ -207,6 +230,12 @@ func TestEnsureTuningConfigMap(t *testing.T) { for name, tc := range testcases { t.Run(name, func(t *testing.T) { + cleanupEnv := saveEnv(consts.DefaultReleaseNamespaceEnvVar) + defer cleanupEnv() + + if tc.setupEnv != nil { + tc.setupEnv() + } mockClient := test.NewClient() tc.callMocks(mockClient) tc.workspaceObj.SetNamespace("workspace-namespace") From 8128c7220f7e47e9deb821f1b470a9d7b5011c3a Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 1 May 2024 20:48:42 -0700 Subject: [PATCH 10/16] cleanup code --- pkg/resources/manifests.go | 108 ++++--------------- pkg/tuning/preset-tuning.go | 176 ++++++++++++++++++++++++------- pkg/tuning/preset-tuning_test.go | 63 +++++------ pkg/utils/common-preset.go | 16 +-- 4 files changed, 194 insertions(+), 169 deletions(-) diff --git a/pkg/resources/manifests.go b/pkg/resources/manifests.go index d78ac26d8..48ecb854f 100644 --- a/pkg/resources/manifests.go +++ b/pkg/resources/manifests.go @@ -185,74 +185,33 @@ func GenerateStatefulSetManifest(ctx context.Context, workspaceObj *kaitov1alpha return ss } -func dockerSidecarScriptPushToVolume(arg interface{}) string { - //volume, ok := arg.(*corev1.VolumeSource) - return ` -` -} -func dockerSidecarScriptPushImage(arg interface{}) string { - image, _ := arg.(string) - // TODO: Override output path if specified in trainingconfig (instead of /mnt/results) - return fmt.Sprintf(` -# Start the Docker daemon in the background with specific options for DinD -dockerd & -# Wait for the Docker daemon to be ready -while ! docker info > /dev/null 2>&1; do - echo "Waiting for Docker daemon to start..." - sleep 1 -done -echo 'Docker daemon started' - -while true; do - FILE_PATH=$(find /mnt/results -name 'fine_tuning_completed.txt') - if [ ! -z "$FILE_PATH" ]; then - echo "FOUND TRAINING COMPLETED FILE at $FILE_PATH" - - PARENT_DIR=$(dirname "$FILE_PATH") - echo "Parent directory is $PARENT_DIR" - - TEMP_CONTEXT=$(mktemp -d) - cp "$PARENT_DIR/adapter_config.json" "$TEMP_CONTEXT/adapter_config.json" - cp -r "$PARENT_DIR/adapter_model.safetensors" "$TEMP_CONTEXT/adapter_model.safetensors" - - # Create a minimal Dockerfile - echo 'FROM scratch - ADD adapter_config.json / - ADD adapter_model.safetensors /' > "$TEMP_CONTEXT/Dockerfile" - - docker build -t %s "$TEMP_CONTEXT" - docker push %s - - # Cleanup: Remove the temporary directory - rm -rf "$TEMP_CONTEXT" - - # Remove the file to prevent repeated builds - rm "$FILE_PATH" - echo "Upload complete" - exit 0 - fi - sleep 10 # Check every 10 seconds -done`, image, image) -} - -func determinePushMethod(wObj *kaitov1alpha1.Workspace) (func(arg interface{}) string, interface{}) { - if wObj.Tuning.Output.Volume != nil { - return dockerSidecarScriptPushToVolume, wObj.Tuning.Output.Volume - } - if wObj.Tuning.Output.Image != "" { - return dockerSidecarScriptPushImage, wObj.Tuning.Output.Image - } - return func(arg interface{}) string { return "" }, "" -} - func GenerateTuningJobManifest(ctx context.Context, wObj *kaitov1alpha1.Workspace, imageName string, imagePullSecretRefs []corev1.LocalObjectReference, replicas int, commands []string, containerPorts []corev1.ContainerPort, livenessProbe, readinessProbe *corev1.Probe, resourceRequirements corev1.ResourceRequirements, tolerations []corev1.Toleration, - initContainers []corev1.Container, volumes []corev1.Volume, volumeMounts []corev1.VolumeMount) *batchv1.Job { + initContainers []corev1.Container, sidecarContainers []corev1.Container, volumes []corev1.Volume, volumeMounts []corev1.VolumeMount) *batchv1.Job { labels := map[string]string{ kaitov1alpha1.LabelWorkspaceName: wObj.Name, } - pushMethod, pushArg := determinePushMethod(wObj) + + // Add volume mounts to sidecar containers + for i := range sidecarContainers { + sidecarContainers[i].VolumeMounts = append(sidecarContainers[i].VolumeMounts, volumeMounts...) + } + + // Construct the complete list of containers (main and sidecars) + containers := append([]corev1.Container{ + { + Name: wObj.Name, + Image: imageName, + Command: commands, + Resources: resourceRequirements, + LivenessProbe: livenessProbe, + ReadinessProbe: readinessProbe, + Ports: containerPorts, + VolumeMounts: volumeMounts, + }, + }, sidecarContainers...) + return &batchv1.Job{ TypeMeta: v1.TypeMeta{ APIVersion: "batch/v1", @@ -278,29 +237,8 @@ func GenerateTuningJobManifest(ctx context.Context, wObj *kaitov1alpha1.Workspac Labels: labels, }, Spec: corev1.PodSpec{ - InitContainers: initContainers, - Containers: []corev1.Container{ - { - Name: wObj.Name, - Image: imageName, - Command: commands, - Resources: resourceRequirements, - LivenessProbe: livenessProbe, - ReadinessProbe: readinessProbe, - Ports: containerPorts, - VolumeMounts: volumeMounts, - }, - { - Name: "docker-sidecar", - Image: "docker:dind", - SecurityContext: &corev1.SecurityContext{ - Privileged: pointer.BoolPtr(true), - }, - VolumeMounts: volumeMounts, - Command: []string{"/bin/sh", "-c"}, - Args: []string{pushMethod(pushArg)}, - }, - }, + InitContainers: initContainers, + Containers: containers, RestartPolicy: corev1.RestartPolicyNever, Volumes: volumes, Tolerations: tolerations, diff --git a/pkg/tuning/preset-tuning.go b/pkg/tuning/preset-tuning.go index 436654359..f7488db7a 100644 --- a/pkg/tuning/preset-tuning.go +++ b/pkg/tuning/preset-tuning.go @@ -3,6 +3,7 @@ package tuning import ( "context" "fmt" + "k8s.io/utils/pointer" "os" "strings" @@ -67,9 +68,9 @@ func GetDataSrcImageInfo(ctx context.Context, wObj *kaitov1alpha1.Workspace) (st return wObj.Tuning.Input.Image, imagePullSecretRefs } -func GetDataDestImageInfo(ctx context.Context, wObj *kaitov1alpha1.Workspace) (string, []corev1.LocalObjectReference) { - imagePushSecretRefs := []corev1.LocalObjectReference{{Name: wObj.Tuning.Output.ImagePushSecret}} - return wObj.Tuning.Output.Image, imagePushSecretRefs +func GetDataDestImageInfo(ctx context.Context, wObj *kaitov1alpha1.Workspace) (string, corev1.LocalObjectReference) { + imagePushSecretRef := corev1.LocalObjectReference{Name: wObj.Tuning.Output.ImagePushSecret} + return wObj.Tuning.Output.Image, imagePushSecretRef } func EnsureTuningConfigMap(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace, @@ -109,20 +110,55 @@ func EnsureTuningConfigMap(ctx context.Context, workspaceObj *kaitov1alpha1.Work return nil } -func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace, - tuningObj *model.PresetParam, kubeClient client.Client) (client.Object, error) { - initContainers, imagePullSecrets, volumes, volumeMounts, err := prepareDataSource(ctx, workspaceObj, kubeClient) - if err != nil { - return nil, err - } - _, imagePushSecret := GetDataDestImageInfo(ctx, workspaceObj) - combinedSecretRefs := append(imagePushSecret, imagePullSecrets...) +func dockerSidecarScriptPushImage(image string) string { + // TODO: Override output path if specified in trainingconfig (instead of /mnt/results) + return fmt.Sprintf(` +# Start the Docker daemon in the background with specific options for DinD +dockerd & +# Wait for the Docker daemon to be ready +while ! docker info > /dev/null 2>&1; do + echo "Waiting for Docker daemon to start..." + sleep 1 +done +echo 'Docker daemon started' + +while true; do + FILE_PATH=$(find /mnt/results -name 'fine_tuning_completed.txt') + if [ ! -z "$FILE_PATH" ]; then + echo "FOUND TRAINING COMPLETED FILE at $FILE_PATH" + + PARENT_DIR=$(dirname "$FILE_PATH") + echo "Parent directory is $PARENT_DIR" + + TEMP_CONTEXT=$(mktemp -d) + cp "$PARENT_DIR/adapter_config.json" "$TEMP_CONTEXT/adapter_config.json" + cp -r "$PARENT_DIR/adapter_model.safetensors" "$TEMP_CONTEXT/adapter_model.safetensors" + + # Create a minimal Dockerfile + echo 'FROM scratch + ADD adapter_config.json / + ADD adapter_model.safetensors /' > "$TEMP_CONTEXT/Dockerfile" + + docker build -t %s "$TEMP_CONTEXT" + docker push %s + + # Cleanup: Remove the temporary directory + rm -rf "$TEMP_CONTEXT" + + # Remove the file to prevent repeated builds + rm "$FILE_PATH" + echo "Upload complete" + exit 0 + fi + sleep 10 # Check every 10 seconds +done`, image, image) +} - err = EnsureTuningConfigMap(ctx, workspaceObj, tuningObj, kubeClient) - if err != nil { - return nil, err - } +func setupDefaultSharedVolumes(workspaceObj *kaitov1alpha1.Workspace) ([]corev1.Volume, []corev1.VolumeMount) { + var volumes []corev1.Volume + var volumeMounts []corev1.VolumeMount + // Add shared volume for shared memory (multi-node) shmVolume, shmVolumeMount := utils.ConfigSHMVolume(*workspaceObj.Resource.Count) if shmVolume.Name != "" { volumes = append(volumes, shmVolume) @@ -131,10 +167,12 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa volumeMounts = append(volumeMounts, shmVolumeMount) } + // Add shared volume for tuning parameters cmVolume, cmVolumeMount := utils.ConfigCMVolume(workspaceObj.Tuning.ConfigTemplate) volumes = append(volumes, cmVolume) volumeMounts = append(volumeMounts, cmVolumeMount) + // Add shared volume for results dir resultsVolume, resultsVolumeMount := utils.ConfigResultsVolume() if resultsVolume.Name != "" { volumes = append(volumes, resultsVolume) @@ -142,11 +180,40 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa if resultsVolumeMount.Name != "" { volumeMounts = append(volumeMounts, resultsVolumeMount) } + return volumes, volumeMounts +} + +func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace, + tuningObj *model.PresetParam, kubeClient client.Client) (client.Object, error) { + var initContainers, sidecarContainers []corev1.Container + volumes, volumeMounts := setupDefaultSharedVolumes(workspaceObj) + + initContainer, imagePullSecrets, dataSourceVolume, dataSourceVolumeMount, err := prepareDataSource(ctx, workspaceObj) + if err != nil { + return nil, err + } + volumes = append(volumes, dataSourceVolume) + volumeMounts = append(volumeMounts, dataSourceVolumeMount) + if initContainer.Name != "" { + initContainers = append(initContainers, initContainer) + } + + sidecarContainer, imagePushSecret, dataDestVolume, dataDestVolumeMount, err := prepareDataDestination(ctx, workspaceObj) + if err != nil { + return nil, err + } + volumes = append(volumes, dataDestVolume) + volumeMounts = append(volumeMounts, dataDestVolumeMount) + if sidecarContainer.Name != "" { + sidecarContainers = append(sidecarContainers, sidecarContainer) + } + if imagePushSecret.Name != "" { + imagePullSecrets = append(imagePullSecrets, imagePushSecret) + } - if workspaceObj.Tuning.Output.Image != "" { - imagePushSecretVolume, imagePushSecretVolumeMount := utils.ConfigImagePushSecretVolume(workspaceObj.Tuning.Output.ImagePushSecret) - volumes = append(volumes, imagePushSecretVolume) - volumeMounts = append(volumeMounts, imagePushSecretVolumeMount) + err = EnsureTuningConfigMap(ctx, workspaceObj, tuningObj, kubeClient) + if err != nil { + return nil, err } modelCommand, err := prepareModelRunParameters(ctx, tuningObj) @@ -156,8 +223,8 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa commands, resourceReq := prepareTuningParameters(ctx, workspaceObj, modelCommand, tuningObj) tuningImage := GetTuningImageInfo(ctx, workspaceObj, tuningObj) - jobObj := resources.GenerateTuningJobManifest(ctx, workspaceObj, tuningImage, combinedSecretRefs, *workspaceObj.Resource.Count, commands, - containerPorts, nil, nil, resourceReq, tolerations, initContainers, volumes, volumeMounts) + jobObj := resources.GenerateTuningJobManifest(ctx, workspaceObj, tuningImage, imagePullSecrets, *workspaceObj.Resource.Count, commands, + containerPorts, nil, nil, resourceReq, tolerations, initContainers, sidecarContainers, volumes, volumeMounts) err = resources.CreateResource(ctx, jobObj, kubeClient) if client.IgnoreAlreadyExists(err) != nil { @@ -166,29 +233,59 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa return jobObj, nil } +// Now there are two options for data destination 1. HostPath - 2. Image +func prepareDataDestination(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (corev1.Container, corev1.LocalObjectReference, corev1.Volume, corev1.VolumeMount, error) { + var sidecarContainer corev1.Container + var volume corev1.Volume + var volumeMount corev1.VolumeMount + var imagePushSecret corev1.LocalObjectReference + switch { + case workspaceObj.Tuning.Output.Image != "": + _, imagePushSecret = GetDataDestImageInfo(ctx, workspaceObj) + sidecarContainer, volume, volumeMount = handleImageDataDestination(ctx, workspaceObj) + // TODO: Future PR include + //case workspaceObj.Tuning.Output.Volume != nil: + } + return sidecarContainer, imagePushSecret, volume, volumeMount, nil +} + +func handleImageDataDestination(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (corev1.Container, corev1.Volume, corev1.VolumeMount) { + sidecarContainer := corev1.Container{ + Name: "docker-sidecar", + Image: "docker:dind", + SecurityContext: &corev1.SecurityContext{ + Privileged: pointer.BoolPtr(true), + }, + Command: []string{"/bin/sh", "-c"}, + Args: []string{dockerSidecarScriptPushImage(workspaceObj.Tuning.Output.Image)}, + } + + volume, volumeMount := utils.ConfigImagePushSecretVolume(workspaceObj.Tuning.Output.ImagePushSecret) + return sidecarContainer, volume, volumeMount +} + // Now there are three options for DataSource: 1. URL - 2. HostPath - 3. Image -func prepareDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace, kubeClient client.Client) ([]corev1.Container, []corev1.LocalObjectReference, []corev1.Volume, []corev1.VolumeMount, error) { - var initContainers []corev1.Container - var volumes []corev1.Volume - var volumeMounts []corev1.VolumeMount +func prepareDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (corev1.Container, []corev1.LocalObjectReference, corev1.Volume, corev1.VolumeMount, error) { + var initContainer corev1.Container + var volume corev1.Volume + var volumeMount corev1.VolumeMount var imagePullSecrets []corev1.LocalObjectReference switch { case workspaceObj.Tuning.Input.Image != "": - initContainers, volumes, volumeMounts = handleImageDataSource(ctx, workspaceObj) + initContainer, volume, volumeMount = handleImageDataSource(ctx, workspaceObj) _, imagePullSecrets = GetDataSrcImageInfo(ctx, workspaceObj) case len(workspaceObj.Tuning.Input.URLs) > 0: - initContainers, volumes, volumeMounts = handleURLDataSource(ctx, workspaceObj) + initContainer, volume, volumeMount = handleURLDataSource(ctx, workspaceObj) // TODO: Future PR include // case workspaceObj.Tuning.Input.Volume != nil: } - return initContainers, imagePullSecrets, volumes, volumeMounts, nil + return initContainer, imagePullSecrets, volume, volumeMount, nil } -func handleImageDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) ([]corev1.Container, []corev1.Volume, []corev1.VolumeMount) { - var initContainers []corev1.Container +func handleImageDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (corev1.Container, corev1.Volume, corev1.VolumeMount) { // Constructing a multistep command that lists, copies, and then lists the destination command := "ls -la /data && cp -r /data/* " + utils.DefaultDataVolumePath + " && ls -la " + utils.DefaultDataVolumePath - initContainers = append(initContainers, corev1.Container{ + initContainer := corev1.Container{ Name: "data-extractor", Image: workspaceObj.Tuning.Input.Image, Command: []string{"sh", "-c", command}, @@ -198,15 +295,14 @@ func handleImageDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Work MountPath: utils.DefaultDataVolumePath, }, }, - }) + } - volumes, volumeMounts := utils.ConfigDataVolume(nil) - return initContainers, volumes, volumeMounts + volume, volumeMount := utils.ConfigDataVolume(nil) + return initContainer, volume, volumeMount } -func handleURLDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) ([]corev1.Container, []corev1.Volume, []corev1.VolumeMount) { - var initContainers []corev1.Container - initContainers = append(initContainers, corev1.Container{ +func handleURLDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (corev1.Container, corev1.Volume, corev1.VolumeMount) { + initContainer := corev1.Container{ Name: "data-downloader", Image: "curlimages/curl", Command: []string{"sh", "-c", ` @@ -231,9 +327,9 @@ func handleURLDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Worksp Value: utils.DefaultDataVolumePath, }, }, - }) - volumes, volumeMounts := utils.ConfigDataVolume(nil) - return initContainers, volumes, volumeMounts + } + volume, volumeMount := utils.ConfigDataVolume(nil) + return initContainer, volume, volumeMount } func prepareModelRunParameters(ctx context.Context, tuningObj *model.PresetParam) (string, error) { diff --git a/pkg/tuning/preset-tuning_test.go b/pkg/tuning/preset-tuning_test.go index 4a9f0af0f..9d5606b80 100644 --- a/pkg/tuning/preset-tuning_test.go +++ b/pkg/tuning/preset-tuning_test.go @@ -276,18 +276,15 @@ func TestHandleImageDataSource(t *testing.T) { for name, tc := range testcases { t.Run(name, func(t *testing.T) { - initContainers, volumes, volumeMounts := handleImageDataSource(context.Background(), tc.workspaceObj) + initContainer, volume, volumeMount := handleImageDataSource(context.Background(), tc.workspaceObj) - assert.Len(t, initContainers, 1) - assert.Equal(t, tc.expectedInitContainerName, initContainers[0].Name) - assert.Equal(t, tc.workspaceObj.Tuning.Input.Image, initContainers[0].Image) - assert.Contains(t, initContainers[0].Command[2], "cp -r /data/* /mnt/data") + assert.Equal(t, tc.expectedInitContainerName, initContainer.Name) + assert.Equal(t, tc.workspaceObj.Tuning.Input.Image, initContainer.Image) + assert.Contains(t, initContainer.Command[2], "cp -r /data/* /mnt/data") - assert.Len(t, volumes, 1) - assert.Equal(t, tc.expectedVolumeName, volumes[0].Name) + assert.Equal(t, tc.expectedVolumeName, volume.Name) - assert.Len(t, volumeMounts, 1) - assert.Equal(t, tc.expectedVolumeMountPath, volumeMounts[0].MountPath) + assert.Equal(t, tc.expectedVolumeMountPath, volumeMount.MountPath) }) } } @@ -319,18 +316,15 @@ func TestHandleURLDataSource(t *testing.T) { for name, tc := range testcases { t.Run(name, func(t *testing.T) { - initContainers, volumes, volumeMounts := handleURLDataSource(context.Background(), tc.workspaceObj) + initContainer, volume, volumeMount := handleURLDataSource(context.Background(), tc.workspaceObj) - assert.Len(t, initContainers, 1) - assert.Equal(t, tc.expectedInitContainerName, initContainers[0].Name) - assert.Equal(t, tc.expectedImage, initContainers[0].Image) - assert.Contains(t, normalize(initContainers[0].Command[2]), normalize(tc.expectedCommands)) + assert.Equal(t, tc.expectedInitContainerName, initContainer.Name) + assert.Equal(t, tc.expectedImage, initContainer.Image) + assert.Contains(t, normalize(initContainer.Command[2]), normalize(tc.expectedCommands)) - assert.Len(t, volumes, 1) - assert.Equal(t, tc.expectedVolumeName, volumes[0].Name) + assert.Equal(t, tc.expectedVolumeName, volume.Name) - assert.Len(t, volumeMounts, 1) - assert.Equal(t, tc.expectedVolumeMountPath, volumeMounts[0].MountPath) + assert.Equal(t, tc.expectedVolumeMountPath, volumeMount.MountPath) }) } } @@ -393,31 +387,28 @@ func TestPrepareDataSource_ImageSource(t *testing.T) { } // Expected outputs from mocked functions - expectedVolumes := []corev1.Volume{ - { - Name: "data-volume", - VolumeSource: corev1.VolumeSource{ - EmptyDir: &corev1.EmptyDirVolumeSource{}, // Assume we expect an EmptyDir - }, + expectedVolume := corev1.Volume{ + Name: "data-volume", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, // Assume we expect an EmptyDir }, } - expectedVolumeMounts := []corev1.VolumeMount{{Name: "data-volume", MountPath: "/mnt/data"}} + + expectedVolumeMount := corev1.VolumeMount{Name: "data-volume", MountPath: "/mnt/data"} expectedImagePullSecrets := []corev1.LocalObjectReference{} - expectedInitContainers := []corev1.Container{ - { - Name: "data-extractor", - Image: "custom/data-loader-image", - Command: []string{"sh", "-c", "ls -la /data && cp -r /data/* /mnt/data && ls -la /mnt/data"}, - VolumeMounts: expectedVolumeMounts, - }, + expectedInitContainer := corev1.Container{ + Name: "data-extractor", + Image: "custom/data-loader-image", + Command: []string{"sh", "-c", "ls -la /data && cp -r /data/* /mnt/data && ls -la /mnt/data"}, + VolumeMounts: []corev1.VolumeMount{expectedVolumeMount}, } - initContainers, imagePullSecrets, volumes, volumeMounts, err := prepareDataSource(ctx, workspaceObj, nil) + initContainer, imagePullSecrets, volume, volumeMount, err := prepareDataSource(ctx, workspaceObj) // Assertions assert.NoError(t, err) - assert.Equal(t, expectedInitContainers, initContainers) - assert.Equal(t, expectedVolumes, volumes) - assert.Equal(t, expectedVolumeMounts, volumeMounts) + assert.Equal(t, expectedInitContainer, initContainer) + assert.Equal(t, expectedVolume, volume) + assert.Equal(t, expectedVolumeMount, volumeMount) assert.Equal(t, expectedImagePullSecrets, imagePullSecrets) } diff --git a/pkg/utils/common-preset.go b/pkg/utils/common-preset.go index 872143138..61295dcde 100644 --- a/pkg/utils/common-preset.go +++ b/pkg/utils/common-preset.go @@ -97,9 +97,9 @@ func ConfigCMVolume(cmName string) (corev1.Volume, corev1.VolumeMount) { return volume, volumeMount } -func ConfigDataVolume(hostPath *string) ([]corev1.Volume, []corev1.VolumeMount) { - var volumes []corev1.Volume - var volumeMounts []corev1.VolumeMount +func ConfigDataVolume(hostPath *string) (corev1.Volume, corev1.VolumeMount) { + var volume corev1.Volume + var volumeMount corev1.VolumeMount var volumeSource corev1.VolumeSource if hostPath != nil { volumeSource = corev1.VolumeSource{ @@ -112,14 +112,14 @@ func ConfigDataVolume(hostPath *string) ([]corev1.Volume, []corev1.VolumeMount) EmptyDir: &corev1.EmptyDirVolumeSource{}, } } - volumes = append(volumes, corev1.Volume{ + volume = corev1.Volume{ Name: "data-volume", VolumeSource: volumeSource, - }) + } - volumeMounts = append(volumeMounts, corev1.VolumeMount{ + volumeMount = corev1.VolumeMount{ Name: "data-volume", MountPath: DefaultDataVolumePath, - }) - return volumes, volumeMounts + } + return volume, volumeMount } From 5ae7fd386806182123fc6e14b4f6871d87e8abdf Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 1 May 2024 21:00:28 -0700 Subject: [PATCH 11/16] need more permissions --- charts/kaito/workspace/templates/clusterrole.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/kaito/workspace/templates/clusterrole.yaml b/charts/kaito/workspace/templates/clusterrole.yaml index ebd2ceba2..f8d6a62ee 100644 --- a/charts/kaito/workspace/templates/clusterrole.yaml +++ b/charts/kaito/workspace/templates/clusterrole.yaml @@ -41,7 +41,7 @@ rules: verbs: ["get","list","watch"] - apiGroups: [ "batch" ] resources: [ "jobs" ] - verbs: [ "get", "list", "watch" ] + verbs: [ "get", "list", "watch", "create", "delete","update", "patch" ] - apiGroups: ["admissionregistration.k8s.io"] resources: ["validatingwebhookconfigurations"] verbs: ["update"] From c44955acd331d734e0f706fcb69df91f8ba37ec9 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 1 May 2024 21:17:11 -0700 Subject: [PATCH 12/16] minor cleanup --- pkg/tuning/preset-tuning.go | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/pkg/tuning/preset-tuning.go b/pkg/tuning/preset-tuning.go index f7488db7a..1968933fe 100644 --- a/pkg/tuning/preset-tuning.go +++ b/pkg/tuning/preset-tuning.go @@ -68,11 +68,6 @@ func GetDataSrcImageInfo(ctx context.Context, wObj *kaitov1alpha1.Workspace) (st return wObj.Tuning.Input.Image, imagePullSecretRefs } -func GetDataDestImageInfo(ctx context.Context, wObj *kaitov1alpha1.Workspace) (string, corev1.LocalObjectReference) { - imagePushSecretRef := corev1.LocalObjectReference{Name: wObj.Tuning.Output.ImagePushSecret} - return wObj.Tuning.Output.Image, imagePushSecretRef -} - func EnsureTuningConfigMap(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace, tuningObj *model.PresetParam, kubeClient client.Client) error { // Copy Configmap from helm chart configmap into workspace @@ -241,15 +236,16 @@ func prepareDataDestination(ctx context.Context, workspaceObj *kaitov1alpha1.Wor var imagePushSecret corev1.LocalObjectReference switch { case workspaceObj.Tuning.Output.Image != "": - _, imagePushSecret = GetDataDestImageInfo(ctx, workspaceObj) - sidecarContainer, volume, volumeMount = handleImageDataDestination(ctx, workspaceObj) + image, secret := workspaceObj.Tuning.Output.Image, workspaceObj.Tuning.Output.ImagePushSecret + imagePushSecret = corev1.LocalObjectReference{Name: secret} + sidecarContainer, volume, volumeMount = handleImageDataDestination(ctx, image, secret) // TODO: Future PR include //case workspaceObj.Tuning.Output.Volume != nil: } return sidecarContainer, imagePushSecret, volume, volumeMount, nil } -func handleImageDataDestination(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (corev1.Container, corev1.Volume, corev1.VolumeMount) { +func handleImageDataDestination(ctx context.Context, image, imagePushSecret string) (corev1.Container, corev1.Volume, corev1.VolumeMount) { sidecarContainer := corev1.Container{ Name: "docker-sidecar", Image: "docker:dind", @@ -257,10 +253,10 @@ func handleImageDataDestination(ctx context.Context, workspaceObj *kaitov1alpha1 Privileged: pointer.BoolPtr(true), }, Command: []string{"/bin/sh", "-c"}, - Args: []string{dockerSidecarScriptPushImage(workspaceObj.Tuning.Output.Image)}, + Args: []string{dockerSidecarScriptPushImage(image)}, } - volume, volumeMount := utils.ConfigImagePushSecretVolume(workspaceObj.Tuning.Output.ImagePushSecret) + volume, volumeMount := utils.ConfigImagePushSecretVolume(imagePushSecret) return sidecarContainer, volume, volumeMount } @@ -272,8 +268,9 @@ func prepareDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspac var imagePullSecrets []corev1.LocalObjectReference switch { case workspaceObj.Tuning.Input.Image != "": - initContainer, volume, volumeMount = handleImageDataSource(ctx, workspaceObj) - _, imagePullSecrets = GetDataSrcImageInfo(ctx, workspaceObj) + var image string + image, imagePullSecrets = GetDataSrcImageInfo(ctx, workspaceObj) + initContainer, volume, volumeMount = handleImageDataSource(ctx, image) case len(workspaceObj.Tuning.Input.URLs) > 0: initContainer, volume, volumeMount = handleURLDataSource(ctx, workspaceObj) // TODO: Future PR include @@ -282,12 +279,12 @@ func prepareDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspac return initContainer, imagePullSecrets, volume, volumeMount, nil } -func handleImageDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (corev1.Container, corev1.Volume, corev1.VolumeMount) { +func handleImageDataSource(ctx context.Context, image string) (corev1.Container, corev1.Volume, corev1.VolumeMount) { // Constructing a multistep command that lists, copies, and then lists the destination command := "ls -la /data && cp -r /data/* " + utils.DefaultDataVolumePath + " && ls -la " + utils.DefaultDataVolumePath initContainer := corev1.Container{ Name: "data-extractor", - Image: workspaceObj.Tuning.Input.Image, + Image: image, Command: []string{"sh", "-c", command}, VolumeMounts: []corev1.VolumeMount{ { From 55ea50101a41f751322c95b1198f03df6e4b4162 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 1 May 2024 21:18:24 -0700 Subject: [PATCH 13/16] nit --- pkg/tuning/preset-tuning_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/tuning/preset-tuning_test.go b/pkg/tuning/preset-tuning_test.go index 9d5606b80..b2d7d4143 100644 --- a/pkg/tuning/preset-tuning_test.go +++ b/pkg/tuning/preset-tuning_test.go @@ -276,7 +276,7 @@ func TestHandleImageDataSource(t *testing.T) { for name, tc := range testcases { t.Run(name, func(t *testing.T) { - initContainer, volume, volumeMount := handleImageDataSource(context.Background(), tc.workspaceObj) + initContainer, volume, volumeMount := handleImageDataSource(context.Background(), tc.workspaceObj.Tuning.Input.Image) assert.Equal(t, tc.expectedInitContainerName, initContainer.Name) assert.Equal(t, tc.workspaceObj.Tuning.Input.Image, initContainer.Image) From ff3c2e10bb8238470ba12b7743b64d6ba082f5ba Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 1 May 2024 22:08:47 -0700 Subject: [PATCH 14/16] use pointers --- pkg/tuning/preset-tuning.go | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/pkg/tuning/preset-tuning.go b/pkg/tuning/preset-tuning.go index 1968933fe..7b6744961 100644 --- a/pkg/tuning/preset-tuning.go +++ b/pkg/tuning/preset-tuning.go @@ -190,7 +190,7 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa volumes = append(volumes, dataSourceVolume) volumeMounts = append(volumeMounts, dataSourceVolumeMount) if initContainer.Name != "" { - initContainers = append(initContainers, initContainer) + initContainers = append(initContainers, *initContainer) } sidecarContainer, imagePushSecret, dataDestVolume, dataDestVolumeMount, err := prepareDataDestination(ctx, workspaceObj) @@ -199,11 +199,11 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa } volumes = append(volumes, dataDestVolume) volumeMounts = append(volumeMounts, dataDestVolumeMount) - if sidecarContainer.Name != "" { - sidecarContainers = append(sidecarContainers, sidecarContainer) + if sidecarContainer != nil { + sidecarContainers = append(sidecarContainers, *sidecarContainer) } - if imagePushSecret.Name != "" { - imagePullSecrets = append(imagePullSecrets, imagePushSecret) + if imagePushSecret != nil { + imagePullSecrets = append(imagePullSecrets, *imagePushSecret) } err = EnsureTuningConfigMap(ctx, workspaceObj, tuningObj, kubeClient) @@ -229,15 +229,15 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa } // Now there are two options for data destination 1. HostPath - 2. Image -func prepareDataDestination(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (corev1.Container, corev1.LocalObjectReference, corev1.Volume, corev1.VolumeMount, error) { - var sidecarContainer corev1.Container +func prepareDataDestination(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (*corev1.Container, *corev1.LocalObjectReference, corev1.Volume, corev1.VolumeMount, error) { + var sidecarContainer *corev1.Container var volume corev1.Volume var volumeMount corev1.VolumeMount - var imagePushSecret corev1.LocalObjectReference + var imagePushSecret *corev1.LocalObjectReference switch { case workspaceObj.Tuning.Output.Image != "": image, secret := workspaceObj.Tuning.Output.Image, workspaceObj.Tuning.Output.ImagePushSecret - imagePushSecret = corev1.LocalObjectReference{Name: secret} + imagePushSecret = &corev1.LocalObjectReference{Name: secret} sidecarContainer, volume, volumeMount = handleImageDataDestination(ctx, image, secret) // TODO: Future PR include //case workspaceObj.Tuning.Output.Volume != nil: @@ -245,8 +245,8 @@ func prepareDataDestination(ctx context.Context, workspaceObj *kaitov1alpha1.Wor return sidecarContainer, imagePushSecret, volume, volumeMount, nil } -func handleImageDataDestination(ctx context.Context, image, imagePushSecret string) (corev1.Container, corev1.Volume, corev1.VolumeMount) { - sidecarContainer := corev1.Container{ +func handleImageDataDestination(ctx context.Context, image, imagePushSecret string) (*corev1.Container, corev1.Volume, corev1.VolumeMount) { + sidecarContainer := &corev1.Container{ Name: "docker-sidecar", Image: "docker:dind", SecurityContext: &corev1.SecurityContext{ @@ -261,8 +261,8 @@ func handleImageDataDestination(ctx context.Context, image, imagePushSecret stri } // Now there are three options for DataSource: 1. URL - 2. HostPath - 3. Image -func prepareDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (corev1.Container, []corev1.LocalObjectReference, corev1.Volume, corev1.VolumeMount, error) { - var initContainer corev1.Container +func prepareDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (*corev1.Container, []corev1.LocalObjectReference, corev1.Volume, corev1.VolumeMount, error) { + var initContainer *corev1.Container var volume corev1.Volume var volumeMount corev1.VolumeMount var imagePullSecrets []corev1.LocalObjectReference @@ -279,10 +279,10 @@ func prepareDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspac return initContainer, imagePullSecrets, volume, volumeMount, nil } -func handleImageDataSource(ctx context.Context, image string) (corev1.Container, corev1.Volume, corev1.VolumeMount) { +func handleImageDataSource(ctx context.Context, image string) (*corev1.Container, corev1.Volume, corev1.VolumeMount) { // Constructing a multistep command that lists, copies, and then lists the destination command := "ls -la /data && cp -r /data/* " + utils.DefaultDataVolumePath + " && ls -la " + utils.DefaultDataVolumePath - initContainer := corev1.Container{ + initContainer := &corev1.Container{ Name: "data-extractor", Image: image, Command: []string{"sh", "-c", command}, @@ -298,8 +298,8 @@ func handleImageDataSource(ctx context.Context, image string) (corev1.Container, return initContainer, volume, volumeMount } -func handleURLDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (corev1.Container, corev1.Volume, corev1.VolumeMount) { - initContainer := corev1.Container{ +func handleURLDataSource(ctx context.Context, workspaceObj *kaitov1alpha1.Workspace) (*corev1.Container, corev1.Volume, corev1.VolumeMount) { + initContainer := &corev1.Container{ Name: "data-downloader", Image: "curlimages/curl", Command: []string{"sh", "-c", ` From ef039b21daaa7339688217edbe3114779551a588 Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Wed, 1 May 2024 22:09:39 -0700 Subject: [PATCH 15/16] use pointers --- pkg/tuning/preset-tuning_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/tuning/preset-tuning_test.go b/pkg/tuning/preset-tuning_test.go index b2d7d4143..d4e410a42 100644 --- a/pkg/tuning/preset-tuning_test.go +++ b/pkg/tuning/preset-tuning_test.go @@ -396,7 +396,7 @@ func TestPrepareDataSource_ImageSource(t *testing.T) { expectedVolumeMount := corev1.VolumeMount{Name: "data-volume", MountPath: "/mnt/data"} expectedImagePullSecrets := []corev1.LocalObjectReference{} - expectedInitContainer := corev1.Container{ + expectedInitContainer := &corev1.Container{ Name: "data-extractor", Image: "custom/data-loader-image", Command: []string{"sh", "-c", "ls -la /data && cp -r /data/* /mnt/data && ls -la /mnt/data"}, From 52c5a76fe417c6755faad47d47962d2ad4cd483a Mon Sep 17 00:00:00 2001 From: ishaansehgal99 Date: Thu, 2 May 2024 17:31:31 -0700 Subject: [PATCH 16/16] removed --- presets/models/falcon/model.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/presets/models/falcon/model.go b/presets/models/falcon/model.go index 4122902db..018dd5265 100644 --- a/presets/models/falcon/model.go +++ b/presets/models/falcon/model.go @@ -40,10 +40,8 @@ var ( PresetFalconTagMap = map[string]string{ "Falcon7B": "0.0.4", "Falcon7BInstruct": "0.0.4", - "Falcon7BTuning": "0.0.2", "Falcon40B": "0.0.5", "Falcon40BInstruct": "0.0.5", - "Falcon40BTuning": "0.0.2", } baseCommandPresetFalcon = "accelerate launch" @@ -84,7 +82,7 @@ func (*falcon7b) GetTuningParameters() *model.PresetParam { //ModelRunPrams: falconRunTuningParams, // TODO ReadinessTimeout: time.Duration(30) * time.Minute, BaseCommand: baseCommandPresetFalcon, - Tag: PresetFalconTagMap["Falcon7BTuning"], + Tag: PresetFalconTagMap["Falcon7B"], } } @@ -157,7 +155,7 @@ func (*falcon40b) GetTuningParameters() *model.PresetParam { //ModelRunPrams: falconRunTuningParams, // TODO ReadinessTimeout: time.Duration(30) * time.Minute, BaseCommand: baseCommandPresetFalcon, - Tag: PresetFalconTagMap["Falcon40BTuning"], + Tag: PresetFalconTagMap["Falcon40B"], } } func (*falcon40b) SupportDistributedInference() bool {