-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathworker-pod.yaml
41 lines (41 loc) · 1.04 KB
/
worker-pod.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
apiVersion: v1
kind: Pod
metadata:
name: work-pod-1
namespace: sfr-ns-yihaofeng
spec:
restartPolicy: OnFailure
volumes:
- name: sfr-home-pv-yihaofeng
persistentVolumeClaim:
claimName: sfr-home-pvc-yihaofeng
- name: sfr-share-pv-yihaofeng
persistentVolumeClaim:
claimName: sfr-share-pvc-yihaofeng
- name: dshm
emptyDir:
medium: Memory
containers:
- name: research-container
image: "gcr.io/deeplearning-platform-release/pytorch-gpu:latest"
command: ["/bin/sh", "-c"]
args: ["sleep 40d"]
resources:
limits:
nvidia.com/gpu: 8
cpu: "63"
memory: 400G
volumeMounts:
- name: sfr-home-pv-yihaofeng
mountPath: "/export/home"
- name: sfr-share-pv-yihaofeng
mountPath: "/export/share"
- mountPath: /dev/shm
name: dshm
nodeSelector:
cloud.google.com/gke-accelerator: nvidia-tesla-v100
tolerations:
- key: "gpu_num"
operator: "Equal"
value: "gvnic-8"
effect: "NoSchedule"