refactor cluster options for GPU resource allocation and update docum…

…entation for Nextflow version requirements
nf-core · Nov 25, 2024 · 6974904 · 6974904
1 parent e3e948e
commit 6974904
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 16 deletions.
diff --git a/conf/vsc_kul_uhasselt.config b/conf/vsc_kul_uhasselt.config
@@ -63,11 +63,14 @@ profiles {
             }
 
             withLabel: '.*gpu.*'{
-                resourceLimits          = [ memory: 703.GB, cpus: 36 , time: 168.h ]
-                // suggested to request 9 cpus per gpu
-                clusterOptions          = { "--gpus-per-node=${Math.max(1,Math.floor(task.cpus/9)) as int} --clusters=genius --account=$tier1_project" }
-                apptainer.runOptions    = '--containall --cleanenv --nv'
-                singularity.runOptions  = '--containall --cleanenv --nv'
+                resourceLimits         = [ memory: 703.GB, cpus: 36 , time: 168.h ]
+                apptainer.runOptions   = '--containall --cleanenv --nv'
+                singularity.runOptions = '--containall --cleanenv --nv'
+                clusterOptions         = {
+                    // suggested to use 9 cpus per gpu
+                    def gpus = task.accelerator?.request ?: Math.max(1, Math.floor(task.cpus/9) as int)
+                    "--gres=gpu:${gpus} --clusters=genius --account=$tier1_project"
+                }
 
                 queue = {
                     task.memory >= 175.GB ?
@@ -87,7 +90,10 @@ profiles {
         process {
             // 768 - 65 so 65GB for overhead, max is 720000MB
             resourceLimits = [ memory: 703.GB, cpus: 36, time: 168.h]
-            clusterOptions = { "--gpus-per-node=${Math.max(1,Math.floor(task.cpus/9)) as int} --clusters=genius --account=$tier1_project" }
+            clusterOptions = {
+                def gpus = task.accelerator?.request ?: Math.max(1, Math.floor(task.cpus/9) as int)
+                "--gres=gpu:${gpus} --clusters=genius --account=$tier1_project"
+            }
 
             queue = {
                     task.memory >= 175.GB ?
@@ -112,10 +118,14 @@ profiles {
             }
 
             withLabel: '.*gpu.*'{
-                resourceLimits          = [ memory: 703.GB, cpus: 64, time: 168.h ]
-                clusterOptions          = { "--gpus-per-node=${Math.max(1,Math.floor(task.cpus/16)) as int} --clusters=wice --account=$tier1_project" }
-                apptainer.runOptions    = '--containall --cleanenv --nv'
-                singularity.runOptions  = '--containall --cleanenv --nv'
+                resourceLimits         = [ memory: 703.GB, cpus: 64, time: 168.h ]
+                apptainer.runOptions   = '--containall --cleanenv --nv'
+                singularity.runOptions = '--containall --cleanenv --nv'
+                clusterOptions         = {
+                    // suggested to use 16 cpus per gpu
+                    def gpus = task.accelerator?.request ?: Math.max(1, Math.floor(task.cpus/16) as int)
+                    "--gres=gpu:${gpus} --clusters=wice --account=$tier1_project"
+                }
 
                 queue = {
                     task.memory >= 239.GB ?
@@ -135,8 +145,10 @@ profiles {
         process {
             // 768 - 65 so 65GB for overhead, max is 720000MB
             resourceLimits = [ memory: 703.GB, cpus: 64, time: 168.h ]
-            // suggested to request 16-18 cpus per gpu
-            clusterOptions = { "--gpus-per-node=${Math.max(1,Math.floor(task.cpus/16)) as int} --clusters=wice --account=$tier1_project" }
+            clusterOptions = {
+                def gpus = task.accelerator?.request ?: Math.max(1, Math.floor(task.cpus/16) as int)
+                "--gres=gpu:${gpus} --clusters=wice --account=$tier1_project"
+            }
 
             queue = {
                 task.memory >= 239.GB ?

diff --git a/docs/vsc_kul_uhasselt.md b/docs/vsc_kul_uhasselt.md
@@ -28,14 +28,14 @@ export NXF_CONDA_CACHEDIR="$VSC_SCRATCH/miniconda3/envs"
 
 # Optional tower key
 # export TOWER_ACCESS_TOKEN="<your_tower_access_token>"
-# export NXF_VER="<version>"      # make sure it's larger then 24.04.0
+# export NXF_VER="<version>"      # make sure it's larger then 24.10.1
 ```
 
 :::warning
-The current config is setup with array jobs. Make sure nextflow version >= 24.04.0, read [array jobs in nextflow](https://www.nextflow.io/docs/latest/process.html#array) you can do this in
+The current config is setup with array jobs. Make sure nextflow version >= 24.10.1, read [array jobs in nextflow](https://www.nextflow.io/docs/latest/process.html#array) you can do this in
 
 ```bash
-export NXF_VER=24.04.0
+export NXF_VER=24.10.1
 ```
 
 :::
@@ -64,10 +64,13 @@ nextflow run <pipeline> -profile vsc_kul_uhasselt,<CLUSTER> <Add your other para
 Here the cluster options are:
 
 - genius
+- genius_gpu
 - wice
+- wice_gpu
 - superdome
 
-> **NB:** The vsc_kul_uhasselt profile is based on a selected amount of SLURM partitions. Should you require resources outside of these limits (e.g.gpus) you will need to provide a custom config specifying an appropriate SLURM partition (e.g. 'gpu\*').
+> **NB:** The vsc_kul_uhasselt profile is based on a selected amount of SLURM partitions. The profile will select to its best ability the most appropriate partition for the job. Including modules with a label containing `gpu`will be allocated to a gpu partition when the 'normal' `genius` profile is selected. Select the `genius_gpu` or `wice_gpu` profile to force the job to be allocated to a gpu partition.
+> **NB:** If the module does not have `accelerator` set, it will determine the number of GPUs based on the requested resources.
 
 Use the `--cluster` option to specify the cluster you intend to use when submitting the job: