-
Notifications
You must be signed in to change notification settings - Fork 178
/
Copy pathmatrix.yaml
311 lines (291 loc) · 15 KB
/
matrix.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
workflows:
# If any jobs appear here, they will be executed instead of `pull_request' for PRs.
# This is useful for limiting resource usage when a full matrix is not needed.
# The branch protection checks will fail when using this override workflow.
#
# Example:
# override:
# - {jobs: ['test'], project: 'thrust', std: 17, ctk: 'curr', cxx: ['gcc12', 'clang16']}
#
override:
pull_request:
# Old CTK/compiler
- {jobs: ['build'], std: 'minmax', ctk: '12.0', cxx: ['gcc7', 'gcc9', 'clang14', 'msvc2019']}
# Current CTK build-only
- {jobs: ['build'], std: [11, 14], cxx: ['gcc7', 'clang14'], project: 'libcudacxx'}
- {jobs: ['build'], std: [17], cxx: ['gcc7', 'clang14']}
- {jobs: ['build'], std: 'max', cxx: ['gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']}
- {jobs: ['build'], std: 'max', cxx: ['clang14', 'clang15', 'clang16', 'clang17']}
- {jobs: ['build'], std: 'max', cxx: ['msvc2019']}
- {jobs: ['build'], std: [17, 20], cxx: ['gcc', 'clang', 'msvc']}
# Current CTK testing:
- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['gcc']}
- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['clang', 'msvc']}
# Split up cub tests:
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc']}
- {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc']}
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc']}
- {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' }
# Modded builds:
- {jobs: ['build'], std: [17, 20], ctk: '12.5', cxx: 'nvhpc'}
- {jobs: ['build'], std: 'max', cxx: ['gcc', 'clang'], cpu: 'arm64'}
- {jobs: ['build'], std: 'max', cxx: ['gcc'], sm: '90a'}
# Test Thrust 32-bit-only dispatch here, since it's most likely to break. 64-bit-only is tested in nightly.
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
# default_projects: clang-cuda
- {jobs: ['build'], std: [17, 20], cudacxx: 'clang', cxx: 'clang'}
- {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90'}
- {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90a'}
# nvrtc:
- {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all'}
# verify-codegen:
- {jobs: ['verify_codegen'], project: 'libcudacxx'}
# cudax has different CTK reqs:
- {jobs: ['build'], project: 'cudax', ctk: ['12.0'], std: 20, cxx: ['msvc14.36']}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc10', 'gcc11', 'gcc12']}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['clang14', 'clang15', 'clang16', 'clang17']}
- {jobs: ['build'], project: 'cudax', ctk: ['12.5'], std: [17, 20], cxx: ['nvhpc']}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['msvc2022']}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 17, cxx: ['gcc'], sm: "90"}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc'], sm: "90a"}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: [17, 20], cxx: ['gcc', 'clang'], cpu: 'arm64'}
- {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc']}
# Python and c/parallel jobs:
- {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6'}
# cccl-infra:
- {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14']}
- {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang']}
nightly:
# Edge-case jobs
- {jobs: ['limited'], project: 'cub', std: 17}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit'}
# Old CTK/compiler
- {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'clang14', 'msvc2019']}
- {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc11'], sm: '60;70;80;90'}
# Current CTK build-only
- {jobs: ['build'], std: 'all', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']}
- {jobs: ['build'], std: 'all', cxx: ['clang14', 'clang15', 'clang16', 'clang17']}
- {jobs: ['build'], std: 'all', cxx: ['msvc2019']}
# Test current CTK
- {jobs: ['test'], std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022']}
# Modded builds:
- {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'}
- {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'}
- {jobs: ['build'], std: 'all', cxx: ['gcc'], sm: '90a'}
# default_projects: clang-cuda
- {jobs: ['build'], std: 'all', cudacxx: 'clang', cxx: 'clang'}
- {jobs: ['build'], project: 'libcudacxx', std: 'all', cudacxx: 'clang', cxx: 'clang', sm: '90'}
- {jobs: ['build'], project: 'libcudacxx', std: 'all', cudacxx: 'clang', cxx: 'clang', sm: '90a'}
# cudax
- {jobs: ['build'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc9', 'gcc10', 'gcc11']}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang14', 'clang15', 'clang16', 'clang17']}
- {jobs: ['build'], project: 'cudax', ctk: [ '12.5'], std: 'all', cxx: ['nvhpc']}
- {jobs: ['build'], project: 'cudax', ctk: ['12.0', ], std: 'all', cxx: ['msvc14.36']}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['msvc2022']}
- {jobs: ['build'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['gcc12'], sm: "90"}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13'], sm: "90a"}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13', 'clang16'], cpu: 'arm64'}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14']}
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18']}
# # These are waiting on the NVKS nodes:
# - {jobs: ['test'], ctk: '11.1', gpu: 'v100', sm: 'gpu', cxx: 'gcc7', std: [11]}
# - {jobs: ['test'], ctk: '11.1', gpu: 't4', sm: 'gpu', cxx: 'clang14', std: [17]}
# - {jobs: ['test'], ctk: '11.8', gpu: 'rtx2080', sm: 'gpu', cxx: 'gcc11', std: [17]}
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7', std: [14]}
# - {jobs: ['test'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all'}
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtx4090', sm: 'gpu', cxx: 'clang14', std: [11]}
# # H100 runners are currently flakey, only build since those use CPU-only runners:
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc12', std: [11, 20]}
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'clang18', std: [17]}
#
# # nvrtc:
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 't4', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all', project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc13', std: [11, 20], project: ['libcudacxx']}
# Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows.
exclude:
# GPU runners are not available on Windows.
- {jobs: ['test', 'test_gpu', 'test_nolid', 'test_lid0', 'test_lid1', 'test_lid2'], cxx: ['msvc2019', 'msvc14.36', 'msvc2022']}
#############################################################################################
# The version of the devcontainer images to use from https://hub.docker.com/r/rapidsai/devcontainers
devcontainer_version: '25.02'
# All supported C++ standards:
all_stds: [11, 14, 17, 20]
ctk_versions:
12.0: { stds: [11, 14, 17, 20] }
12.5: { stds: [11, 14, 17, 20] }
12.6: { stds: [11, 14, 17, 20], aka: 'curr' }
device_compilers:
nvcc: # Version / stds are taken from CTK
name: 'nvcc'
exe: 'nvcc'
clang: # Requires cxx=clang. Version / stds are taken from cxx compiler.
name: "ClangCUDA"
exe: 'clang++'
host_compilers:
gcc:
name: 'GCC'
container_tag: 'gcc'
exe: 'g++'
versions:
7: { stds: [11, 14, 17, ] }
8: { stds: [11, 14, 17, ] }
9: { stds: [11, 14, 17, ] }
10: { stds: [11, 14, 17, 20] }
11: { stds: [11, 14, 17, 20] }
12: { stds: [11, 14, 17, 20] }
13: { stds: [11, 14, 17, 20] }
clang:
name: 'Clang'
container_tag: 'llvm'
exe: 'clang++'
versions:
14: { stds: [11, 14, 17, 20] }
15: { stds: [11, 14, 17, 20] }
16: { stds: [11, 14, 17, 20] }
17: { stds: [11, 14, 17, 20] }
18: { stds: [11, 14, 17, 20] }
msvc:
name: 'MSVC'
container_tag: 'cl'
exe: cl
versions:
14.29: { stds: [ 14, 17, ], aka: '2019' }
14.36: { stds: [ 14, 17, 20] }
14.39: { stds: [ 14, 17, 20], aka: '2022' }
nvhpc:
name: 'NVHPC'
container_tag: 'nvhpc'
exe: nvc++
versions:
24.7: { stds: [11, 14, 17, 20 ] }
# Jobs support the following properties:
#
# - gpu: Whether the job requires a GPU runner. Default is false.
# - name: The human-readable name of the job. Default is the capitalized job key.
# - needs:
# - A list of jobs that must be completed before this job can run. Default is an empty list.
# - These are automatically added if needed:
# - Eg. "jobs: ['test']" in the workflow def will also create the required 'build' jobs.
# - invoke:
# - Map the job type to the script invocation spec:
# - prefix: The script invocation prefix. Default is the job name.
# - args: Additional arguments to pass to the script. Default is no args.
# - The script is invoked either:
# linux: `ci/windows/<spec[prefix]>_<project>.ps1 <spec[args]>`
# windows: `ci/<spec[prefix]>_<project>.sh <spec[args]>`
jobs:
# General:
build: { gpu: false }
test: { gpu: true, needs: 'build' }
test_nobuild: { gpu: true, name: 'Test', invoke: { prefix: 'test' } }
# CCCL:
infra: { gpu: true } # example project launches a kernel
# libcudacxx:
nvrtc: { gpu: true, name: 'NVRTC' }
verify_codegen: { gpu: false, name: 'VerifyCodegen' }
# CUB:
# NoLid -> The string `lid_X` doesn't appear in the test name. Mostly warp/block tests, old device tests, and examples.
test_nolid: { name: 'TestGPU', gpu: true, needs: 'build', invoke: { prefix: 'test', args: '-no-lid'} }
# CUB uses `lid` to indicate launch strategies: whether CUB algorithms are:
# - launched from the host (lid0):
test_lid0: { name: 'HostLaunch', gpu: true, needs: 'build', invoke: { prefix: 'test', args: '-lid0'} }
# - launched from the device (lid1):
test_lid1: { name: 'DeviceLaunch', gpu: true, needs: 'build', invoke: { prefix: 'test', args: '-lid1'} }
# - captured in a CUDA graph for deferred launch (lid2):
test_lid2: { name: 'GraphCapture', gpu: true, needs: 'build', invoke: { prefix: 'test', args: '-lid2'} }
# Limited build reduces the number of runtime test cases, available device memory, etc, and may be used
# to reduce test runtime in limited environments.
limited: { name: "SmallGMem", gpu: true, needs: 'build', invoke: { prefix: 'test', args: '-limited'} }
# Thrust:
test_cpu: { name: 'TestCPU', gpu: false, needs: 'build', invoke: { prefix: 'test', args: '-cpu-only'} }
test_gpu: { name: 'TestGPU', gpu: true, needs: 'build', invoke: { prefix: 'test', args: '-gpu-only'} }
# Project have the following properties:
#
# Keys are project subdirectories names. These will also be used in script names.
#
# - stds: A list of C++ standards to test. Required.
# - name: The human-readable name of the project. Default is the project key.
# - job_map: Map general jobs to arrays of project-specific jobs.
# Useful for things like splitting cpu/gpu testing for a project.
# E.g. "job_map: { test: ['test_cpu', 'test_gpu'] }" replaces
# the "test" job with distinct "test_cpu" and "test_gpu" jobs.
projects:
cccl:
name: 'CCCL'
stds: [11, 14, 17, 20]
libcudacxx:
name: 'libcu++'
stds: [11, 14, 17, 20]
cub:
name: 'CUB'
stds: [17, 20]
job_map: { test: ['test_nolid', 'test_lid0', 'test_lid1', 'test_lid2'] }
thrust:
name: 'Thrust'
stds: [17, 20]
job_map: { test: ['test_cpu', 'test_gpu'] }
cudax:
stds: [17, 20]
python:
name: "cuda (python)"
job_map: { build: [], test: ['test_nobuild'] }
cccl_c_parallel:
name: 'CCCL C Parallel'
stds: [20]
# testing -> Runner with GPU is in a nv-gh-runners testing pool
gpus:
v100: { sm: 70 } # 32 GB, 40 runners
t4: { sm: 75, testing: true } # 16 GB, 8 runners
rtx2080: { sm: 75, testing: true } # 8 GB, 8 runners
rtxa6000: { sm: 86, testing: true } # 48 GB, 12 runners
l4: { sm: 89, testing: true } # 24 GB, 48 runners
rtx4090: { sm: 89, testing: true } # 24 GB, 10 runners
h100: { sm: 90, testing: true } # 80 GB, 16 runners
# Tags are used to define a `matrix job` in the workflow section.
#
# Tags have the following options:
# - required: Whether the tag is required. Default is false.
# - default: The default value for the tag. Default is null.
tags:
# An array of jobs (e.g. 'build', 'test', 'nvrtc', 'infra', 'verify_codegen', ...)
# See the `jobs` map.
jobs: { required: true }
# CUDA ToolKit version
# See the `ctks` map.
ctk: { default: 'curr' }
# CPU architecture
cpu: { default: 'amd64' }
# GPU model
gpu: { default: 'v100' }
# Host compiler {name, version, exe}
# See the `host_compilers` map.
cxx: { default: 'gcc' }
# Device compiler.
# See the `device_compilers` map.
cudacxx: { default: 'nvcc' }
# Project name (e.g. libcudacxx, cub, thrust, cccl)
# See the `projects` map.
project: { default: ['libcudacxx', 'cub', 'thrust'] }
# C++ standard
# If set to 'all', all stds supported by the ctk/compilers/project are used.
# If set to 'min', 'max', or 'minmax', the minimum, maximum, or both stds are used.
# If set, will be passed to script with `-std <std>`.
std: { required: false }
# GPU architecture
# - If set, passed to script with `-arch <sm>`.
# - Format is the same as `CMAKE_CUDA_ARCHITECTURES`:
# - PTX only: 70-virtual
# - SASS only: 70-real
# - Both: 70
# - Can pass multiple architectures via "60;70-real;80-virtual"
# - Defaults to use the settings in the CMakePresets.json file.
# - Will be exploded if an array, e.g. `sm: ['60;70;80;90', '90a']` creates two jobs.
# - Set to 'gpu' to only target the GPU in the `gpu` tag.
sm: { required: false }
# Additional CMake options to pass to the build.
# If set, passed to script with `-cmake_options "<cmake_options>"`.
cmake_options: { required: false }