-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy pathcreate_nvidia_runtime_sysext.sh
executable file
·184 lines (152 loc) · 5.93 KB
/
create_nvidia_runtime_sysext.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env bash
set -euo pipefail
export ARCH="${ARCH-x86-64}"
SCRIPTFOLDER="$(dirname "$(readlink -f "$0")")"
if [ $# -lt 2 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
echo "Usage: $0 VERSION SYSEXTNAME"
echo "The script will build nvidia-container-toolkit on ubuntu 20 and package it into a sysext."
echo "A temporary directory named SYSEXTNAME in the current folder will be created and deleted again."
echo "All files in the sysext image will be owned by root."
echo "To use arm64 pass 'ARCH=arm64' as environment variable (current value is '${ARCH}')."
"${SCRIPTFOLDER}"/bake.sh --help
exit 1
fi
# Default should be: v1.14.3
VERSION="$1"
SYSEXTNAME="$2"
# The github release uses different arch identifiers, we map them here
# and rely on bake.sh to map them back to what systemd expects
if [ "${ARCH}" = "amd64" ] || [ "${ARCH}" = "x86-64" ]; then
ARCH="amd64"
elif [ "${ARCH}" = "arm64" ]; then
ARCH="arm64"
fi
git clone -b ${VERSION} --depth 1 https://github.com/NVIDIA/libnvidia-container || true
git clone -b ${VERSION} --depth 1 https://github.com/NVIDIA/nvidia-container-toolkit || true
make -C libnvidia-container ubuntu18.04-${ARCH}
make -C nvidia-container-toolkit ubuntu18.04-${ARCH}
rm -rf "${SYSEXTNAME}"
mkdir -p "${SYSEXTNAME}"
for deb in libnvidia-container/dist/ubuntu18.04/${ARCH}/libnvidia-container{1_*,-tools_}*.deb; do
dpkg-deb -x $deb "${SYSEXTNAME}"/
done
for deb in nvidia-container-toolkit/dist/ubuntu18.04/${ARCH}/nvidia-container-toolkit*.deb; do
dpkg-deb -x $deb "${SYSEXTNAME}"/
done
rm -rf "${SYSEXTNAME}"/usr/share
mv "${SYSEXTNAME}"/usr/lib/*-linux-gnu "${SYSEXTNAME}"/usr/lib64
mkdir -p "${SYSEXTNAME}"/usr/local
ln -s /opt/nvidia "${SYSEXTNAME}"/usr/local/nvidia
ln -s /opt/bin/nvidia-smi "${SYSEXTNAME}"/usr/bin/nvidia-smi
mkdir -p "${SYSEXTNAME}"/usr/lib/systemd/system/docker.service.d
cat <<EOF >"${SYSEXTNAME}"/usr/lib/systemd/system/docker.service.d/10-nvidia.conf
[Unit]
After=nvidia.service
[Service]
Environment=DOCKER_OPTS=--add-runtime=nvidia=nvidia-container-runtime
EOF
mkdir -p "${SYSEXTNAME}"/usr/lib/systemd/system/containerd.service.d
cat <<EOF >"${SYSEXTNAME}"/usr/lib/systemd/system/containerd.service.d/10-nvidia.conf
[Unit]
After=nvidia.service
[Service]
ExecStart=
ExecStart=/usr/bin/containerd --config /etc/containerd/config.toml
EOF
mkdir -p "${SYSEXTNAME}"/usr/lib/systemd/system/nvidia.service.d
cat <<EOF >"${SYSEXTNAME}"/usr/lib/systemd/system/nvidia.service.d/10-persistenced.conf
[Service]
ExecStartPre=-/bin/sh -c "rm /run/extensions/nvidia-driver && systemctl restart systemd-sysext"
ExecStartPost=-/opt/bin/nvidia-persistenced
ExecStartPost=-/bin/sh -c "chcon -R -t container_file_t /dev/nvidia*"
ExecStartPost=mkdir -p /run/extensions
ExecStartPost=ln -sf /opt/nvidia/current /run/extensions/nvidia-driver
ExecStartPost=systemctl restart systemd-sysext
ExecStopPost=rmmod nvidia_uvm nvidia_modeset nvidia
EOF
mkdir -p "${SYSEXTNAME}"/usr/lib/tmpfiles.d/
cat <<EOF >"${SYSEXTNAME}"/usr/lib/tmpfiles.d/10-nvidia.conf
C /etc/containerd/config.toml - - - - /usr/share/flatcar/etc/containerd/config.toml
C /etc/nvidia-container-runtime/config.toml - - - - /usr/share/flatcar/etc/nvidia-container-runtime/config.toml
EOF
mkdir -p "${SYSEXTNAME}"/usr/share/flatcar/etc/nvidia-container-runtime/
cat <<EOF >"${SYSEXTNAME}"/usr/share/flatcar/etc/nvidia-container-runtime/config.toml
#accept-nvidia-visible-devices-as-volume-mounts = false
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
disable-require = false
supported-driver-capabilities = "compat32,compute,display,graphics,ngx,utility,video"
#swarm-resource = "DOCKER_RESOURCE_GPU"
[nvidia-container-cli]
#debug = "/var/log/nvidia-container-toolkit.log"
environment = []
#ldcache = "/etc/ld.so.cache"
ldconfig = "@/sbin/ldconfig"
load-kmods = true
#no-cgroups = false
#path = "/usr/bin/nvidia-container-cli"
#root = "/run/nvidia/driver"
#user = "root:video"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
mode = "auto"
runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes]
[nvidia-container-runtime.modes.cdi]
annotation-prefixes = ["cdi.k8s.io/"]
default-kind = "nvidia.com/gpu"
spec-dirs = ["/etc/cdi", "/var/run/cdi"]
[nvidia-container-runtime.modes.csv]
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
[nvidia-container-runtime-hook]
path = "nvidia-container-runtime-hook"
skip-mode-detection = false
[nvidia-ctk]
path = "nvidia-ctk"
EOF
mkdir -p "${SYSEXTNAME}"/usr/share/flatcar/etc/containerd/
cat <<EOF >"${SYSEXTNAME}"/usr/share/flatcar/etc/containerd/config.toml
version = 2
# persistent data location
root = "/var/lib/containerd"
# runtime state information
state = "/run/containerd"
# set containerd as a subreaper on linux when it is not running as PID 1
subreaper = true
# set containerd's OOM score
oom_score = -999
disabled_plugins = []
# grpc configuration
[grpc]
address = "/run/containerd/containerd.sock"
# socket uid
uid = 0
# socket gid
gid = 0
[plugins."io.containerd.runtime.v1.linux"]
# shim binary name/path
shim = "containerd-shim"
# runtime binary name/path
runtime = "runc"
# do not use a shim when starting containers, saves on memory but
# live restore is not supported
no_shim = false
[plugins."io.containerd.grpc.v1.cri"]
# enable SELinux labeling
enable_selinux = true
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
# setting runc.options unsets parent settings
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
SystemdCgroup = true
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "/usr/bin/nvidia-container-runtime"
SystemdCgroup = true
EOF
mkdir -p "${SYSEXTNAME}"/usr/bin
"${SCRIPTFOLDER}"/bake.sh "${SYSEXTNAME}"
rm -rf "${SYSEXTNAME}"