From 7abffc3c3793a74e5d2d43712a4103b7c5a553da Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Tue, 26 Nov 2024 10:31:19 +0000 Subject: [PATCH 1/8] feat(lmeval): Add offline mode as default (#370) Refer to [RHOAIENG-16200](https://issues.redhat.com/browse/RHOAIENG-16200) --- controllers/lmes/lmevaljob_controller.go | 30 ++--- controllers/lmes/lmevaljob_controller_test.go | 106 ++++++++++++++++++ 2 files changed, 123 insertions(+), 13 deletions(-) diff --git a/controllers/lmes/lmevaljob_controller.go b/controllers/lmes/lmevaljob_controller.go index 1db00a85..733e91bb 100644 --- a/controllers/lmes/lmevaljob_controller.go +++ b/controllers/lmes/lmevaljob_controller.go @@ -713,20 +713,24 @@ func CreatePod(svcOpts *serviceOptions, job *lmesv1alpha1.LMEvalJob, log logr.Lo volumes = append(volumes, outputPVC) } - // If the job is supposed to run offline, set the appropriate HuggingFace offline flags - if job.Spec.IsOffline() { + // Enforce offline mode by default + offlineHuggingFaceEnvVars := []corev1.EnvVar{ + { + Name: "HF_DATASETS_OFFLINE", + Value: "1", + }, + { + Name: "HF_HUB_OFFLINE", + Value: "1", + }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, + } + envVars = append(envVars, offlineHuggingFaceEnvVars...) - offlineHuggingFaceEnvVars := []corev1.EnvVar{ - { - Name: "HF_DATASETS_OFFLINE", - Value: "1", - }, - { - Name: "HF_HUB_OFFLINE", - Value: "1", - }, - } - envVars = append(envVars, offlineHuggingFaceEnvVars...) + if job.Spec.IsOffline() { // If the job is offline, a storage must be set. PVC is the only supported storage backend at the moment. offlinePVCMount := corev1.VolumeMount{ diff --git a/controllers/lmes/lmevaljob_controller_test.go b/controllers/lmes/lmevaljob_controller_test.go index 9a4faf58..e6d797a0 100644 --- a/controllers/lmes/lmevaljob_controller_test.go +++ b/controllers/lmes/lmevaljob_controller_test.go @@ -116,6 +116,20 @@ func Test_SimplePod(t *testing.T) { MountPath: "/opt/app-root/src/bin", }, }, + Env: []corev1.EnvVar{ + { + Name: "HF_DATASETS_OFFLINE", + Value: "1", + }, + { + Name: "HF_HUB_OFFLINE", + Value: "1", + }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, + }, }, }, SecurityContext: defaultPodSecurityContext, @@ -282,6 +296,7 @@ func Test_WithCustomPod(t *testing.T) { RunAsUser: &runAsUser, RunAsGroup: &runAsGroup, }, + VolumeMounts: []corev1.VolumeMount{ { Name: "shared", @@ -297,6 +312,20 @@ func Test_WithCustomPod(t *testing.T) { corev1.ResourceCPU: resource.MustParse("1"), }, }, + Env: []corev1.EnvVar{ + { + Name: "HF_DATASETS_OFFLINE", + Value: "1", + }, + { + Name: "HF_HUB_OFFLINE", + Value: "1", + }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, + }, }, { Name: "sidecar1", @@ -462,6 +491,18 @@ func Test_EnvSecretsPod(t *testing.T) { }, }, }, + { + Name: "HF_DATASETS_OFFLINE", + Value: "1", + }, + { + Name: "HF_HUB_OFFLINE", + Value: "1", + }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, }, Command: generateCmd(svcOpts, job), Args: generateArgs(svcOpts, job, log), @@ -591,6 +632,20 @@ func Test_FileSecretsPod(t *testing.T) { Command: generateCmd(svcOpts, job), Args: generateArgs(svcOpts, job, log), SecurityContext: defaultSecurityContext, + Env: []corev1.EnvVar{ + { + Name: "HF_DATASETS_OFFLINE", + Value: "1", + }, + { + Name: "HF_HUB_OFFLINE", + Value: "1", + }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, + }, VolumeMounts: []corev1.VolumeMount{ { Name: "shared", @@ -1018,6 +1073,21 @@ func Test_ManagedPVC(t *testing.T) { Command: generateCmd(svcOpts, job), Args: generateArgs(svcOpts, job, log), SecurityContext: defaultSecurityContext, + Env: []corev1.EnvVar{ + { + Name: "HF_DATASETS_OFFLINE", + Value: "1", + }, + { + Name: "HF_HUB_OFFLINE", + Value: "1", + }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, + }, + VolumeMounts: []corev1.VolumeMount{ { Name: "shared", @@ -1134,6 +1204,20 @@ func Test_ExistingPVC(t *testing.T) { Command: generateCmd(svcOpts, job), Args: generateArgs(svcOpts, job, log), SecurityContext: defaultSecurityContext, + Env: []corev1.EnvVar{ + { + Name: "HF_DATASETS_OFFLINE", + Value: "1", + }, + { + Name: "HF_HUB_OFFLINE", + Value: "1", + }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, + }, VolumeMounts: []corev1.VolumeMount{ { Name: "shared", @@ -1268,6 +1352,20 @@ func Test_PVCPreference(t *testing.T) { }, }, }, + Env: []corev1.EnvVar{ + { + Name: "HF_DATASETS_OFFLINE", + Value: "1", + }, + { + Name: "HF_HUB_OFFLINE", + Value: "1", + }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, + }, VolumeMounts: []corev1.VolumeMount{ { Name: "shared", @@ -1442,6 +1540,10 @@ func Test_OfflineMode(t *testing.T) { Name: "HF_HUB_OFFLINE", Value: "1", }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, }, VolumeMounts: []corev1.VolumeMount{ { @@ -1594,6 +1696,10 @@ func Test_OfflineModeWithOutput(t *testing.T) { Name: "HF_HUB_OFFLINE", Value: "1", }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, }, VolumeMounts: []corev1.VolumeMount{ { From ea3b07cc1e214634a6b92fd3c4aa3aa7cd252fdf Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Tue, 26 Nov 2024 11:00:19 +0000 Subject: [PATCH 2/8] feat(lmeval): Disable remote code execution (#371) Refer to [RHOAIENG-16203](https://issues.redhat.com/browse/RHOAIENG-16203) --- controllers/lmes/lmevaljob_controller.go | 13 ++++ controllers/lmes/lmevaljob_controller_test.go | 72 +++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/controllers/lmes/lmevaljob_controller.go b/controllers/lmes/lmevaljob_controller.go index 733e91bb..ce48ddc9 100644 --- a/controllers/lmes/lmevaljob_controller.go +++ b/controllers/lmes/lmevaljob_controller.go @@ -713,6 +713,19 @@ func CreatePod(svcOpts *serviceOptions, job *lmesv1alpha1.LMEvalJob, log logr.Lo volumes = append(volumes, outputPVC) } + // Disable remote code execution by default + remoteCodeEnvVars := []corev1.EnvVar{ + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, + } + envVars = append(envVars, remoteCodeEnvVars...) + // Enforce offline mode by default offlineHuggingFaceEnvVars := []corev1.EnvVar{ { diff --git a/controllers/lmes/lmevaljob_controller_test.go b/controllers/lmes/lmevaljob_controller_test.go index e6d797a0..3620471a 100644 --- a/controllers/lmes/lmevaljob_controller_test.go +++ b/controllers/lmes/lmevaljob_controller_test.go @@ -117,6 +117,14 @@ func Test_SimplePod(t *testing.T) { }, }, Env: []corev1.EnvVar{ + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, { Name: "HF_DATASETS_OFFLINE", Value: "1", @@ -313,6 +321,14 @@ func Test_WithCustomPod(t *testing.T) { }, }, Env: []corev1.EnvVar{ + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, { Name: "HF_DATASETS_OFFLINE", Value: "1", @@ -491,6 +507,14 @@ func Test_EnvSecretsPod(t *testing.T) { }, }, }, + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, { Name: "HF_DATASETS_OFFLINE", Value: "1", @@ -633,6 +657,14 @@ func Test_FileSecretsPod(t *testing.T) { Args: generateArgs(svcOpts, job, log), SecurityContext: defaultSecurityContext, Env: []corev1.EnvVar{ + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, { Name: "HF_DATASETS_OFFLINE", Value: "1", @@ -1074,6 +1106,14 @@ func Test_ManagedPVC(t *testing.T) { Args: generateArgs(svcOpts, job, log), SecurityContext: defaultSecurityContext, Env: []corev1.EnvVar{ + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, { Name: "HF_DATASETS_OFFLINE", Value: "1", @@ -1205,6 +1245,14 @@ func Test_ExistingPVC(t *testing.T) { Args: generateArgs(svcOpts, job, log), SecurityContext: defaultSecurityContext, Env: []corev1.EnvVar{ + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, { Name: "HF_DATASETS_OFFLINE", Value: "1", @@ -1353,6 +1401,14 @@ func Test_PVCPreference(t *testing.T) { }, }, Env: []corev1.EnvVar{ + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, { Name: "HF_DATASETS_OFFLINE", Value: "1", @@ -1532,6 +1588,14 @@ func Test_OfflineMode(t *testing.T) { }, }, Env: []corev1.EnvVar{ + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, { Name: "HF_DATASETS_OFFLINE", Value: "1", @@ -1688,6 +1752,14 @@ func Test_OfflineModeWithOutput(t *testing.T) { }, }, Env: []corev1.EnvVar{ + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, { Name: "HF_DATASETS_OFFLINE", Value: "1", From 4aa3b33a0d24eba3063a0d3877f5c9b9065a5d81 Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Thu, 28 Nov 2024 14:27:01 +0000 Subject: [PATCH 3/8] feat(lmeval): Disable online evaluation (#373) --- controllers/lmes/lmevaljob_controller.go | 4 +++ controllers/lmes/lmevaljob_controller_test.go | 36 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/controllers/lmes/lmevaljob_controller.go b/controllers/lmes/lmevaljob_controller.go index ce48ddc9..88837454 100644 --- a/controllers/lmes/lmevaljob_controller.go +++ b/controllers/lmes/lmevaljob_controller.go @@ -740,6 +740,10 @@ func CreatePod(svcOpts *serviceOptions, job *lmesv1alpha1.LMEvalJob, log logr.Lo Name: "TRANSFORMERS_OFFLINE", Value: "1", }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, } envVars = append(envVars, offlineHuggingFaceEnvVars...) diff --git a/controllers/lmes/lmevaljob_controller_test.go b/controllers/lmes/lmevaljob_controller_test.go index 3620471a..908231ea 100644 --- a/controllers/lmes/lmevaljob_controller_test.go +++ b/controllers/lmes/lmevaljob_controller_test.go @@ -137,6 +137,10 @@ func Test_SimplePod(t *testing.T) { Name: "TRANSFORMERS_OFFLINE", Value: "1", }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, }, }, }, @@ -341,6 +345,10 @@ func Test_WithCustomPod(t *testing.T) { Name: "TRANSFORMERS_OFFLINE", Value: "1", }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, }, }, { @@ -527,6 +535,10 @@ func Test_EnvSecretsPod(t *testing.T) { Name: "TRANSFORMERS_OFFLINE", Value: "1", }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, }, Command: generateCmd(svcOpts, job), Args: generateArgs(svcOpts, job, log), @@ -677,6 +689,10 @@ func Test_FileSecretsPod(t *testing.T) { Name: "TRANSFORMERS_OFFLINE", Value: "1", }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, }, VolumeMounts: []corev1.VolumeMount{ { @@ -1126,6 +1142,10 @@ func Test_ManagedPVC(t *testing.T) { Name: "TRANSFORMERS_OFFLINE", Value: "1", }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, }, VolumeMounts: []corev1.VolumeMount{ @@ -1265,6 +1285,10 @@ func Test_ExistingPVC(t *testing.T) { Name: "TRANSFORMERS_OFFLINE", Value: "1", }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, }, VolumeMounts: []corev1.VolumeMount{ { @@ -1421,6 +1445,10 @@ func Test_PVCPreference(t *testing.T) { Name: "TRANSFORMERS_OFFLINE", Value: "1", }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, }, VolumeMounts: []corev1.VolumeMount{ { @@ -1608,6 +1636,10 @@ func Test_OfflineMode(t *testing.T) { Name: "TRANSFORMERS_OFFLINE", Value: "1", }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, }, VolumeMounts: []corev1.VolumeMount{ { @@ -1772,6 +1804,10 @@ func Test_OfflineModeWithOutput(t *testing.T) { Name: "TRANSFORMERS_OFFLINE", Value: "1", }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, }, VolumeMounts: []corev1.VolumeMount{ { From 53783728668c5832de83481cc70ed21c9f7e04ab Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Mon, 2 Dec 2024 15:57:30 +0000 Subject: [PATCH 4/8] feat(lmeval): Add offline mode as mandatory --- api/lmes/v1alpha1/lmevaljob_types.go | 2 +- config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/api/lmes/v1alpha1/lmevaljob_types.go b/api/lmes/v1alpha1/lmevaljob_types.go index 8ee76807..b72358f2 100644 --- a/api/lmes/v1alpha1/lmevaljob_types.go +++ b/api/lmes/v1alpha1/lmevaljob_types.go @@ -301,7 +301,7 @@ type LMEvalJobSpec struct { // +optional Outputs *Outputs `json:"outputs,omitempty"` // Offline specifies settings for running LMEvalJobs in a offline mode - Offline *OfflineSpec `json:"offline,omitempty"` + Offline *OfflineSpec `json:"offline"` } // IsOffline returns whether this LMEvalJob is configured to run offline diff --git a/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml b/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml index 60d37804..3435b339 100644 --- a/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml +++ b/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml @@ -4636,6 +4636,7 @@ spec: type: object required: - model + - offline - taskList type: object status: From 7dc58f56c6bcd925ddcda23b9dde83b7a9889a75 Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Tue, 3 Dec 2024 09:11:47 +0000 Subject: [PATCH 5/8] feat(lmeval): Add online and code execution flags --- api/lmes/v1alpha1/lmevaljob_types.go | 11 +- api/lmes/v1alpha1/zz_generated.deepcopy.go | 10 + .../trustyai.opendatahub.io_lmevaljobs.yaml | 11 +- controllers/lmes/lmevaljob_controller.go | 60 +-- controllers/lmes/lmevaljob_controller_test.go | 450 ++++++++++++++++++ 5 files changed, 510 insertions(+), 32 deletions(-) diff --git a/api/lmes/v1alpha1/lmevaljob_types.go b/api/lmes/v1alpha1/lmevaljob_types.go index b72358f2..f8c7c9b9 100644 --- a/api/lmes/v1alpha1/lmevaljob_types.go +++ b/api/lmes/v1alpha1/lmevaljob_types.go @@ -300,8 +300,15 @@ type LMEvalJobSpec struct { // Outputs specifies storage for evaluation results // +optional Outputs *Outputs `json:"outputs,omitempty"` - // Offline specifies settings for running LMEvalJobs in a offline mode - Offline *OfflineSpec `json:"offline"` + // Offline specifies settings for running LMEvalJobs in an offline mode + // +optional + Offline *OfflineSpec `json:"offline,omitempty"` + // AllowOnly specifies whether the LMEvalJob can directly download remote code, datasets and metrics. Default is false. + // +optional + AllowOnline *bool `json:"allowOnline,omitempty"` + // AllowCodeExecution specifies whether the LMEvalJob can execute remote code. Default is false. + // +optional + AllowCodeExecution *bool `json:"allowCodeExecution,omitempty"` } // IsOffline returns whether this LMEvalJob is configured to run offline diff --git a/api/lmes/v1alpha1/zz_generated.deepcopy.go b/api/lmes/v1alpha1/zz_generated.deepcopy.go index 994ed95e..ea3c9334 100644 --- a/api/lmes/v1alpha1/zz_generated.deepcopy.go +++ b/api/lmes/v1alpha1/zz_generated.deepcopy.go @@ -197,6 +197,16 @@ func (in *LMEvalJobSpec) DeepCopyInto(out *LMEvalJobSpec) { *out = new(OfflineSpec) **out = **in } + if in.AllowOnline != nil { + in, out := &in.AllowOnline, &out.AllowOnline + *out = new(bool) + **out = **in + } + if in.AllowCodeExecution != nil { + in, out := &in.AllowCodeExecution, &out.AllowCodeExecution + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LMEvalJobSpec. diff --git a/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml b/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml index 3435b339..10e1f232 100644 --- a/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml +++ b/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml @@ -43,6 +43,14 @@ spec: spec: description: LMEvalJobSpec defines the desired state of LMEvalJob properties: + allowCodeExecution: + description: AllowCodeExecution specifies whether the LMEvalJob can + execute remote code. Default is false. + type: boolean + allowOnline: + description: AllowOnly specifies whether the LMEvalJob can directly + download remote code, datasets and metrics. Default is false. + type: boolean batchSize: description: |- Batch size for the evaluation. This is used by the models that run and are loaded @@ -91,7 +99,7 @@ spec: type: integer offline: description: Offline specifies settings for running LMEvalJobs in - a offline mode + an offline mode properties: storage: description: OfflineStorageSpec defines the storage configuration @@ -4636,7 +4644,6 @@ spec: type: object required: - model - - offline - taskList type: object status: diff --git a/controllers/lmes/lmevaljob_controller.go b/controllers/lmes/lmevaljob_controller.go index 88837454..b87be6c7 100644 --- a/controllers/lmes/lmevaljob_controller.go +++ b/controllers/lmes/lmevaljob_controller.go @@ -714,38 +714,42 @@ func CreatePod(svcOpts *serviceOptions, job *lmesv1alpha1.LMEvalJob, log logr.Lo } // Disable remote code execution by default - remoteCodeEnvVars := []corev1.EnvVar{ - { - Name: "TRUST_REMOTE_CODE", - Value: "0", - }, - { - Name: "HF_DATASETS_TRUST_REMOTE_CODE", - Value: "0", - }, + if job.Spec.AllowCodeExecution == nil || *job.Spec.AllowCodeExecution == false { + remoteCodeEnvVars := []corev1.EnvVar{ + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, + } + envVars = append(envVars, remoteCodeEnvVars...) } - envVars = append(envVars, remoteCodeEnvVars...) // Enforce offline mode by default - offlineHuggingFaceEnvVars := []corev1.EnvVar{ - { - Name: "HF_DATASETS_OFFLINE", - Value: "1", - }, - { - Name: "HF_HUB_OFFLINE", - Value: "1", - }, - { - Name: "TRANSFORMERS_OFFLINE", - Value: "1", - }, - { - Name: "HF_EVALUATE_OFFLINE", - Value: "1", - }, + if job.Spec.AllowOnline == nil || *job.Spec.AllowOnline == false { + offlineHuggingFaceEnvVars := []corev1.EnvVar{ + { + Name: "HF_DATASETS_OFFLINE", + Value: "1", + }, + { + Name: "HF_HUB_OFFLINE", + Value: "1", + }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, + } + envVars = append(envVars, offlineHuggingFaceEnvVars...) } - envVars = append(envVars, offlineHuggingFaceEnvVars...) if job.Spec.IsOffline() { diff --git a/controllers/lmes/lmevaljob_controller_test.go b/controllers/lmes/lmevaljob_controller_test.go index 908231ea..2519e135 100644 --- a/controllers/lmes/lmevaljob_controller_test.go +++ b/controllers/lmes/lmevaljob_controller_test.go @@ -1683,6 +1683,456 @@ func Test_OfflineMode(t *testing.T) { assert.Equal(t, expect, newPod) } +// Test_OnlineMode tests that if the online mode is set the configuration is correct +func Test_OnlineMode(t *testing.T) { + log := log.FromContext(context.Background()) + svcOpts := &serviceOptions{ + PodImage: "podimage:latest", + DriverImage: "driver:latest", + ImagePullPolicy: corev1.PullAlways, + } + + allowOnline := true + jobName := "test" + pvcName := "my-pvc" + var job = &lmesv1alpha1.LMEvalJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: jobName, + Namespace: "default", + UID: "for-testing", + }, + TypeMeta: metav1.TypeMeta{ + Kind: lmesv1alpha1.KindName, + APIVersion: lmesv1alpha1.Version, + }, + Spec: lmesv1alpha1.LMEvalJobSpec{ + Model: "test", + ModelArgs: []lmesv1alpha1.Arg{ + {Name: "arg1", Value: "value1"}, + }, + TaskList: lmesv1alpha1.TaskList{ + TaskNames: []string{"task1", "task2"}, + }, + Offline: &lmesv1alpha1.OfflineSpec{ + StorageSpec: lmesv1alpha1.OfflineStorageSpec{ + PersistentVolumeClaimName: pvcName, + }, + }, + AllowOnline: &allowOnline, + }, + } + + expect := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + Labels: map[string]string{ + "app.kubernetes.io/name": "ta-lmes", + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: lmesv1alpha1.Version, + Kind: lmesv1alpha1.KindName, + Name: "test", + Controller: &isController, + UID: "for-testing", + }, + }, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "Pod", + APIVersion: "v1", + }, + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + { + Name: "driver", + Image: svcOpts.DriverImage, + ImagePullPolicy: svcOpts.ImagePullPolicy, + Command: []string{DriverPath, "--copy", DestDriverPath}, + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: &allowPrivilegeEscalation, + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{ + "ALL", + }, + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared", + MountPath: "/opt/app-root/src/bin", + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "main", + Image: svcOpts.PodImage, + ImagePullPolicy: svcOpts.ImagePullPolicy, + Command: generateCmd(svcOpts, job), + Args: generateArgs(svcOpts, job, log), + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: &allowPrivilegeEscalation, + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{ + "ALL", + }, + }, + }, + Env: []corev1.EnvVar{ + { + Name: "TRUST_REMOTE_CODE", + Value: "0", + }, + { + Name: "HF_DATASETS_TRUST_REMOTE_CODE", + Value: "0", + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared", + MountPath: "/opt/app-root/src/bin", + }, + { + Name: "offline", + MountPath: "/opt/app-root/src/hf_home", + }, + }, + }, + }, + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &runAsNonRootUser, + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "shared", VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, + { + Name: "offline", VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + ReadOnly: false, + }, + }, + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + } + + newPod := CreatePod(svcOpts, job, log) + + assert.Equal(t, expect, newPod) +} + +// Test_AllowCodeOnlineMode tests that if the online mode and allow code is set the configuration is correct +func Test_AllowCodeOnlineMode(t *testing.T) { + log := log.FromContext(context.Background()) + svcOpts := &serviceOptions{ + PodImage: "podimage:latest", + DriverImage: "driver:latest", + ImagePullPolicy: corev1.PullAlways, + } + + jobName := "test" + pvcName := "my-pvc" + allowOnline := true + allowCode := true + var job = &lmesv1alpha1.LMEvalJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: jobName, + Namespace: "default", + UID: "for-testing", + }, + TypeMeta: metav1.TypeMeta{ + Kind: lmesv1alpha1.KindName, + APIVersion: lmesv1alpha1.Version, + }, + Spec: lmesv1alpha1.LMEvalJobSpec{ + Model: "test", + ModelArgs: []lmesv1alpha1.Arg{ + {Name: "arg1", Value: "value1"}, + }, + TaskList: lmesv1alpha1.TaskList{ + TaskNames: []string{"task1", "task2"}, + }, + Offline: &lmesv1alpha1.OfflineSpec{ + StorageSpec: lmesv1alpha1.OfflineStorageSpec{ + PersistentVolumeClaimName: pvcName, + }, + }, + AllowOnline: &allowOnline, + AllowCodeExecution: &allowCode, + }, + } + + expect := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + Labels: map[string]string{ + "app.kubernetes.io/name": "ta-lmes", + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: lmesv1alpha1.Version, + Kind: lmesv1alpha1.KindName, + Name: "test", + Controller: &isController, + UID: "for-testing", + }, + }, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "Pod", + APIVersion: "v1", + }, + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + { + Name: "driver", + Image: svcOpts.DriverImage, + ImagePullPolicy: svcOpts.ImagePullPolicy, + Command: []string{DriverPath, "--copy", DestDriverPath}, + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: &allowPrivilegeEscalation, + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{ + "ALL", + }, + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared", + MountPath: "/opt/app-root/src/bin", + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "main", + Image: svcOpts.PodImage, + ImagePullPolicy: svcOpts.ImagePullPolicy, + Command: generateCmd(svcOpts, job), + Args: generateArgs(svcOpts, job, log), + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: &allowPrivilegeEscalation, + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{ + "ALL", + }, + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared", + MountPath: "/opt/app-root/src/bin", + }, + { + Name: "offline", + MountPath: "/opt/app-root/src/hf_home", + }, + }, + }, + }, + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &runAsNonRootUser, + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "shared", VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, + { + Name: "offline", VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + ReadOnly: false, + }, + }, + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + } + + newPod := CreatePod(svcOpts, job, log) + + assert.Equal(t, expect, newPod) +} + +// Test_AllowCodeOfflineMode tests that if the online mode is set the configuration is correct +func Test_AllowCodeOfflineMode(t *testing.T) { + log := log.FromContext(context.Background()) + svcOpts := &serviceOptions{ + PodImage: "podimage:latest", + DriverImage: "driver:latest", + ImagePullPolicy: corev1.PullAlways, + } + + jobName := "test" + pvcName := "my-pvc" + allowCode := true + var job = &lmesv1alpha1.LMEvalJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: jobName, + Namespace: "default", + UID: "for-testing", + }, + TypeMeta: metav1.TypeMeta{ + Kind: lmesv1alpha1.KindName, + APIVersion: lmesv1alpha1.Version, + }, + Spec: lmesv1alpha1.LMEvalJobSpec{ + Model: "test", + ModelArgs: []lmesv1alpha1.Arg{ + {Name: "arg1", Value: "value1"}, + }, + TaskList: lmesv1alpha1.TaskList{ + TaskNames: []string{"task1", "task2"}, + }, + Offline: &lmesv1alpha1.OfflineSpec{ + StorageSpec: lmesv1alpha1.OfflineStorageSpec{ + PersistentVolumeClaimName: pvcName, + }, + }, + AllowCodeExecution: &allowCode, + }, + } + + expect := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + Labels: map[string]string{ + "app.kubernetes.io/name": "ta-lmes", + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: lmesv1alpha1.Version, + Kind: lmesv1alpha1.KindName, + Name: "test", + Controller: &isController, + UID: "for-testing", + }, + }, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "Pod", + APIVersion: "v1", + }, + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + { + Name: "driver", + Image: svcOpts.DriverImage, + ImagePullPolicy: svcOpts.ImagePullPolicy, + Command: []string{DriverPath, "--copy", DestDriverPath}, + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: &allowPrivilegeEscalation, + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{ + "ALL", + }, + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared", + MountPath: "/opt/app-root/src/bin", + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "main", + Image: svcOpts.PodImage, + ImagePullPolicy: svcOpts.ImagePullPolicy, + Command: generateCmd(svcOpts, job), + Args: generateArgs(svcOpts, job, log), + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: &allowPrivilegeEscalation, + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{ + "ALL", + }, + }, + }, + Env: []corev1.EnvVar{ + { + Name: "HF_DATASETS_OFFLINE", + Value: "1", + }, + { + Name: "HF_HUB_OFFLINE", + Value: "1", + }, + { + Name: "TRANSFORMERS_OFFLINE", + Value: "1", + }, + { + Name: "HF_EVALUATE_OFFLINE", + Value: "1", + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared", + MountPath: "/opt/app-root/src/bin", + }, + { + Name: "offline", + MountPath: "/opt/app-root/src/hf_home", + }, + }, + }, + }, + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &runAsNonRootUser, + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "shared", VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, + { + Name: "offline", VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + ReadOnly: false, + }, + }, + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + } + + newPod := CreatePod(svcOpts, job, log) + + assert.Equal(t, expect, newPod) +} + // Test_OfflineModeWithOutput tests that if the offline mode is set the configuration is correct, even when custom output is set func Test_OfflineModeWithOutput(t *testing.T) { log := log.FromContext(context.Background()) From 3ece7985b3521da44ebc161c9a2789b0abab0e7f Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Tue, 3 Dec 2024 11:14:47 +0000 Subject: [PATCH 6/8] Update images --- config/base/params.env | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/base/params.env b/config/base/params.env index e0de8034..d9cff1e6 100644 --- a/config/base/params.env +++ b/config/base/params.env @@ -1,9 +1,9 @@ trustyaiServiceImage=quay.io/trustyai/trustyai-service:latest -trustyaiOperatorImage=quay.io/trustyai/trustyai-service-operator:latest +trustyaiOperatorImage=quay.io/ruimvieira/trustyai-service-operator-lmeval:latest oauthProxyImage=quay.io/openshift/origin-oauth-proxy:4.14.0 kServeServerless=enabled -lmes-driver-image=quay.io/trustyai/ta-lmes-driver:latest -lmes-pod-image=quay.io/trustyai/ta-lmes-job:latest +lmes-driver-image=quay.io/ruimvieira/ta-lmes-driver-v2:latest +lmes-pod-image=quay.io/ruimvieira/ta-lmes-job-v2:latest lmes-pod-checking-interval=10s lmes-image-pull-policy=Always lmes-max-batch-size=24 From 48340f722deb7bc5333eb33d63cf741df5d43981 Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Tue, 3 Dec 2024 12:19:19 +0000 Subject: [PATCH 7/8] Add default to flags --- api/lmes/v1alpha1/lmevaljob_types.go | 2 ++ config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/api/lmes/v1alpha1/lmevaljob_types.go b/api/lmes/v1alpha1/lmevaljob_types.go index f8c7c9b9..b00cd52e 100644 --- a/api/lmes/v1alpha1/lmevaljob_types.go +++ b/api/lmes/v1alpha1/lmevaljob_types.go @@ -305,9 +305,11 @@ type LMEvalJobSpec struct { Offline *OfflineSpec `json:"offline,omitempty"` // AllowOnly specifies whether the LMEvalJob can directly download remote code, datasets and metrics. Default is false. // +optional + // +kubebuilder:default:=false AllowOnline *bool `json:"allowOnline,omitempty"` // AllowCodeExecution specifies whether the LMEvalJob can execute remote code. Default is false. // +optional + // +kubebuilder:default:=false AllowCodeExecution *bool `json:"allowCodeExecution,omitempty"` } diff --git a/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml b/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml index 10e1f232..1a48f951 100644 --- a/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml +++ b/config/crd/bases/trustyai.opendatahub.io_lmevaljobs.yaml @@ -44,10 +44,12 @@ spec: description: LMEvalJobSpec defines the desired state of LMEvalJob properties: allowCodeExecution: + default: false description: AllowCodeExecution specifies whether the LMEvalJob can execute remote code. Default is false. type: boolean allowOnline: + default: false description: AllowOnly specifies whether the LMEvalJob can directly download remote code, datasets and metrics. Default is false. type: boolean From a0e2fd984ae5788caafdaaa1afe14d6c11f99945 Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Wed, 4 Dec 2024 09:42:05 +0000 Subject: [PATCH 8/8] Restore images --- config/base/params.env | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/base/params.env b/config/base/params.env index d9cff1e6..e0de8034 100644 --- a/config/base/params.env +++ b/config/base/params.env @@ -1,9 +1,9 @@ trustyaiServiceImage=quay.io/trustyai/trustyai-service:latest -trustyaiOperatorImage=quay.io/ruimvieira/trustyai-service-operator-lmeval:latest +trustyaiOperatorImage=quay.io/trustyai/trustyai-service-operator:latest oauthProxyImage=quay.io/openshift/origin-oauth-proxy:4.14.0 kServeServerless=enabled -lmes-driver-image=quay.io/ruimvieira/ta-lmes-driver-v2:latest -lmes-pod-image=quay.io/ruimvieira/ta-lmes-job-v2:latest +lmes-driver-image=quay.io/trustyai/ta-lmes-driver:latest +lmes-pod-image=quay.io/trustyai/ta-lmes-job:latest lmes-pod-checking-interval=10s lmes-image-pull-policy=Always lmes-max-batch-size=24