diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml index bf32a3d2b..5e10307e8 100644 --- a/.github/workflows/run.yml +++ b/.github/workflows/run.yml @@ -348,7 +348,7 @@ jobs: - name: Dry run dynamic quicksilver-openmp on nosite-x86_64 with allocation modifier run: | - ./bin/benchpark experiment init --dest=quicksilver-openmp quicksilver+openmp +weak~single_node + ./bin/benchpark experiment init --dest=quicksilver-openmp quicksilver+openmp +weak ./bin/benchpark setup ./quicksilver-openmp nosite-x86_64 workspace/ . workspace/setup.sh ramble \ @@ -419,7 +419,7 @@ jobs: - name: Dry run dynamic saxpy/openmp with dynamic llnl-cluster ruby run: | - ./bin/benchpark system init --dest=ruby-system llnl-cluster cluster=ruby + ./bin/benchpark system init --dest=ruby-system llnl-cluster cluster=ruby max_node_limit=0 system_id=$(./bin/benchpark system id ./ruby-system) ./bin/benchpark experiment init --dest=saxpy-openmp saxpy+openmp ./bin/benchpark setup ./saxpy-openmp ./ruby-system workspace/ @@ -432,7 +432,7 @@ jobs: - name: Dry run dynamic saxpy/openmp with dynamic llnl-cluster dane run: | - ./bin/benchpark system init --dest=dane-system llnl-cluster cluster=dane + ./bin/benchpark system init --dest=dane-system llnl-cluster cluster=dane max_node_limit=0 system_id=$(./bin/benchpark system id ./dane-system) ./bin/benchpark experiment init --dest=saxpy-openmp2 saxpy+openmp ./bin/benchpark setup ./saxpy-openmp2 ./dane-system workspace/ @@ -445,7 +445,7 @@ jobs: - name: Dry run dynamic saxpy/openmp with dynamic llnl-cluster magma run: | - ./bin/benchpark system init --dest=magma-system llnl-cluster cluster=magma + ./bin/benchpark system init --dest=magma-system llnl-cluster cluster=magma max_node_limit=0 ./bin/benchpark experiment init --dest=saxpy-openmp3 saxpy+openmp ./bin/benchpark setup ./saxpy-openmp3 ./magma-system workspace/ . workspace/setup.sh @@ -457,7 +457,7 @@ jobs: - name: Dry run dynamic saxpy/openmp with dynamic generic x86 run: | - ./bin/benchpark system init --dest=x86-system genericx86 + ./bin/benchpark system init --dest=x86-system genericx86 max_node_limit=0 ./bin/benchpark experiment init --dest=saxpy-omp-generic saxpy+openmp ./bin/benchpark setup ./saxpy-omp-generic ./x86-system workspace/ . workspace/setup.sh @@ -529,12 +529,13 @@ jobs: - name: Dry run dynamic saxpy/openmp with dynamic fugaku run: | - ./bin/benchpark system init --dest=fugaku-system fugaku + ./bin/benchpark system init --dest=fugaku-system fugaku max_node_limit=0 + system_id=$(./bin/benchpark system id ./fugaku-system) ./bin/benchpark experiment init --dest=saxpy-omp-fugaku saxpy+openmp ./bin/benchpark setup ./saxpy-omp-fugaku ./fugaku-system workspace/ . workspace/setup.sh ramble \ - --workspace-dir workspace/saxpy-omp-fugaku/Fugaku-cf3cb1d/workspace \ + --workspace-dir workspace/saxpy-omp-fugaku/$system_id/workspace \ --disable-progress-bar \ --disable-logger \ workspace setup --dry-run diff --git a/experiments/amg2023/experiment.py b/experiments/amg2023/experiment.py index d4ef26666..64d5b0254 100644 --- a/experiments/amg2023/experiment.py +++ b/experiments/amg2023/experiment.py @@ -3,7 +3,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from benchpark.error import BenchparkError from benchpark.directives import variant from benchpark.experiment import Experiment from benchpark.openmp import OpenMPExperiment @@ -53,35 +52,13 @@ class Amg2023( # ) def compute_applications_section(self): - # TODO: Replace with conflicts clause - scaling_modes = { - "strong": self.spec.satisfies("+strong"), - "weak": self.spec.satisfies("+weak"), - "throughput": self.spec.satisfies("+throughput"), - "single_node": self.spec.satisfies("+single_node"), - } - - scaling_mode_enabled = [key for key, value in scaling_modes.items() if value] - if len(scaling_mode_enabled) != 1: - raise BenchparkError( - f"Only one type of scaling per experiment is allowed for application package {self.name}" - ) - # Number of processes in each dimension num_procs = {"px": 2, "py": 2, "pz": 2} # Per-process size (in zones) in each dimension problem_sizes = {"nx": 80, "ny": 80, "nz": 80} - if self.spec.satisfies("+single_node"): - n_resources = 1 - # TODO: Check if n_ranks / n_resources_per_node <= 1 - for pk, pv in num_procs.items(): - self.add_experiment_variable(pk, pv, True) - n_resources *= pv - for nk, nv in problem_sizes.items(): - self.add_experiment_variable(nk, nv, True) - elif self.spec.satisfies("+throughput"): + if self.spec.satisfies("+throughput"): n_resources = 1 for pk, pv in num_procs.items(): self.add_experiment_variable(pk, pv, True) @@ -124,6 +101,14 @@ def compute_applications_section(self): ] for k, v in scaled_variables.items(): self.add_experiment_variable(k, v, True) + else: + n_resources = 1 + # TODO: Check if n_ranks / n_resources_per_node <= 1 + for pk, pv in num_procs.items(): + self.add_experiment_variable(pk, pv, True) + n_resources *= pv + for nk, nv in problem_sizes.items(): + self.add_experiment_variable(nk, nv, True) if self.spec.satisfies("+openmp"): self.add_experiment_variable("n_ranks", n_resources, True) diff --git a/experiments/ior/experiment.py b/experiments/ior/experiment.py index 871e16a4f..ab095f162 100644 --- a/experiments/ior/experiment.py +++ b/experiments/ior/experiment.py @@ -3,7 +3,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from benchpark.error import BenchparkError from benchpark.directives import variant from benchpark.experiment import Experiment from benchpark.scaling import StrongScaling @@ -28,28 +27,11 @@ class Ior( ) def compute_applications_section(self): - # TODO: Replace with conflicts clause - scaling_modes = { - "strong": self.spec.satisfies("+strong"), - "weak": self.spec.satisfies("+weak"), - "single_node": self.spec.satisfies("+single_node"), - } - - scaling_mode_enabled = [key for key, value in scaling_modes.items() if value] - if len(scaling_mode_enabled) != 1: - raise BenchparkError( - f"Only one type of scaling per experiment is allowed for application package {self.name}" - ) - num_nodes = {"n_nodes": 1} t = "{b}/256" self.add_experiment_variable("t", t, True) - if self.spec.satisfies("+single_node"): - for pk, pv in num_nodes.items(): - self.add_experiment_variable(pk, pv, True) - self.add_experiment_variable("b", "268435456", True) - elif self.spec.satisfies("+strong"): + if self.spec.satisfies("+strong"): scaled_variables = self.generate_strong_scaling_params( {tuple(num_nodes.keys()): list(num_nodes.values())}, int(self.spec.variants["scaling-factor"][0]), @@ -70,6 +52,10 @@ def compute_applications_section(self): self.add_experiment_variable(k, v, True) self.add_experiment_variable("b", "268435456", True) + else: + for pk, pv in num_nodes.items(): + self.add_experiment_variable(pk, pv, True) + self.add_experiment_variable("b", "268435456", True) self.add_experiment_variable("t", t, True) self.add_experiment_variable( diff --git a/experiments/kripke/experiment.py b/experiments/kripke/experiment.py index 94d59efa2..0a5b1de1a 100644 --- a/experiments/kripke/experiment.py +++ b/experiments/kripke/experiment.py @@ -3,7 +3,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from benchpark.error import BenchparkError from benchpark.directives import variant from benchpark.experiment import Experiment from benchpark.openmp import OpenMPExperiment @@ -36,20 +35,6 @@ class Kripke( ) def compute_applications_section(self): - # TODO: Replace with conflicts clause - scaling_modes = { - "strong": self.spec.satisfies("+strong"), - "weak": self.spec.satisfies("+weak"), - "throughput": self.spec.satisfies("+throughput"), - "single_node": self.spec.satisfies("+single_node"), - } - - scaling_mode_enabled = [key for key, value in scaling_modes.items() if value] - if len(scaling_mode_enabled) != 1: - raise BenchparkError( - f"Only one type of scaling per experiment is allowed for application package {self.name}" - ) - input_variables = { "ngroups": 64, "gs": 1, @@ -67,15 +52,7 @@ def compute_applications_section(self): for k, v in input_variables.items(): self.add_experiment_variable(k, v, True) - if self.spec.satisfies("+single_node"): - n_resources = 1 - # TODO: Check if n_ranks / n_resources_per_node <= 1 - for pk, pv in num_procs.items(): - self.add_experiment_variable(pk, pv, True) - n_resources *= pv - for nk, nv in problem_sizes.items(): - self.add_experiment_variable(nk, nv, True) - elif self.spec.satisfies("+throughput"): + if self.spec.satisfies("+throughput"): n_resources = 1 for pk, pv in num_procs.items(): self.add_experiment_variable(pk, pv, True) @@ -118,6 +95,14 @@ def compute_applications_section(self): ] for k, v in scaled_variables.items(): self.add_experiment_variable(k, v, True) + else: + n_resources = 1 + # TODO: Check if n_ranks / n_resources_per_node <= 1 + for pk, pv in num_procs.items(): + self.add_experiment_variable(pk, pv, True) + n_resources *= pv + for nk, nv in problem_sizes.items(): + self.add_experiment_variable(nk, nv, True) if self.spec.satisfies("+openmp"): self.add_experiment_variable("n_ranks", n_resources, True) diff --git a/experiments/laghos/experiment.py b/experiments/laghos/experiment.py index 68f390793..a558ccac9 100644 --- a/experiments/laghos/experiment.py +++ b/experiments/laghos/experiment.py @@ -3,7 +3,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from benchpark.error import BenchparkError from benchpark.directives import variant from benchpark.experiment import Experiment from benchpark.scaling import StrongScaling @@ -29,25 +28,10 @@ class Laghos( ) def compute_applications_section(self): - # TODO: Replace with conflicts clause - scaling_modes = { - "strong": self.spec.satisfies("+strong"), - "single_node": self.spec.satisfies("+single_node"), - } - - scaling_mode_enabled = [key for key, value in scaling_modes.items() if value] - if len(scaling_mode_enabled) != 1: - raise BenchparkError( - f"Only one type of scaling per experiment is allowed for application package {self.name}" - ) - # Number of initial nodes num_nodes = {"n_nodes": 1} - if self.spec.satisfies("+single_node"): - for pk, pv in num_nodes.items(): - self.add_experiment_variable(pk, pv, True) - elif self.spec.satisfies("+strong"): + if self.spec.satisfies("+strong"): scaled_variables = self.generate_strong_scaling_params( {tuple(num_nodes.keys()): list(num_nodes.values())}, int(self.spec.variants["scaling-factor"][0]), @@ -55,6 +39,9 @@ def compute_applications_section(self): ) for pk, pv in scaled_variables.items(): self.add_experiment_variable(pk, pv, True) + else: + for pk, pv in num_nodes.items(): + self.add_experiment_variable(pk, pv, True) self.add_experiment_variable( "n_ranks", "{sys_cores_per_node} * {n_nodes}", True diff --git a/experiments/osu-micro-benchmarks/experiment.py b/experiments/osu-micro-benchmarks/experiment.py index 0ef52f33f..db13dcdf8 100644 --- a/experiments/osu-micro-benchmarks/experiment.py +++ b/experiments/osu-micro-benchmarks/experiment.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: Apache-2.0 from benchpark.directives import variant -from benchpark.error import BenchparkError from benchpark.experiment import Experiment @@ -90,21 +89,10 @@ class OsuMicroBenchmarks(Experiment): ) def compute_applications_section(self): - scaling_modes = { - "single_node": self.spec.satisfies("+single_node"), - } - - scaling_mode_enabled = [key for key, value in scaling_modes.items() if value] - if len(scaling_mode_enabled) != 1: - raise BenchparkError( - f"Only one type of scaling per experiment is allowed for application package {self.name}" - ) - num_nodes = {"n_nodes": 2} - if self.spec.satisfies("+single_node"): - for pk, pv in num_nodes.items(): - self.add_experiment_variable(pk, pv, True) + for pk, pv in num_nodes.items(): + self.add_experiment_variable(pk, pv, True) def compute_spack_section(self): system_specs = {} diff --git a/experiments/remhos/experiment.py b/experiments/remhos/experiment.py index 079ecac56..0a71fbecf 100644 --- a/experiments/remhos/experiment.py +++ b/experiments/remhos/experiment.py @@ -3,7 +3,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from benchpark.error import BenchparkError from benchpark.directives import variant from benchpark.experiment import Experiment from benchpark.scaling import StrongScaling @@ -31,26 +30,10 @@ class Remhos( ) def compute_applications_section(self): - # TODO: Replace with conflicts clause - scaling_modes = { - "strong": self.spec.satisfies("+strong"), - "single_node": self.spec.satisfies("+single_node"), - } - - scaling_mode_enabled = [key for key, value in scaling_modes.items() if value] - if len(scaling_mode_enabled) != 1: - print(scaling_mode_enabled) - raise BenchparkError( - f"Only one type of scaling per experiment is allowed for application package {self.name}" - ) - # Number of initial nodes num_nodes = {"n_nodes": 1} - if self.spec.satisfies("+single_node"): - for pk, pv in num_nodes.items(): - self.add_experiment_variable(pk, pv, True) - elif self.spec.satisfies("+strong"): + if self.spec.satisfies("+strong"): scaled_variables = self.generate_strong_scaling_params( {tuple(num_nodes.keys()): list(num_nodes.values())}, int(self.spec.variants["scaling-factor"][0]), @@ -58,6 +41,9 @@ def compute_applications_section(self): ) for pk, pv in scaled_variables.items(): self.add_experiment_variable(pk, pv, True) + else: + for pk, pv in num_nodes.items(): + self.add_experiment_variable(pk, pv, True) self.add_experiment_variable( "n_ranks", "{sys_cores_per_node} * {n_nodes}", True diff --git a/legacy/systems/CSC-LUMI-HPECray-zen3-MI250X-Slingshot/variables.yaml b/legacy/systems/CSC-LUMI-HPECray-zen3-MI250X-Slingshot/variables.yaml index da7333c04..166463ede 100644 --- a/legacy/systems/CSC-LUMI-HPECray-zen3-MI250X-Slingshot/variables.yaml +++ b/legacy/systems/CSC-LUMI-HPECray-zen3-MI250X-Slingshot/variables.yaml @@ -13,6 +13,7 @@ variables: sys_gpus_per_node: "8" sys_mem_per_node: "512" max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/CSCS-Daint-HPECray-haswell-P100-Infiniband/variables.yaml b/legacy/systems/CSCS-Daint-HPECray-haswell-P100-Infiniband/variables.yaml index 5ce00dcbe..4d0d5e03f 100644 --- a/legacy/systems/CSCS-Daint-HPECray-haswell-P100-Infiniband/variables.yaml +++ b/legacy/systems/CSCS-Daint-HPECray-haswell-P100-Infiniband/variables.yaml @@ -14,6 +14,7 @@ variables: sys_gpus_per_node: "1" sys_mem_per_node: "64" max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/CSCS-Eiger-HPECray-zen2-Slingshot/variables.yaml b/legacy/systems/CSCS-Eiger-HPECray-zen2-Slingshot/variables.yaml index acee05641..e6bde18a4 100644 --- a/legacy/systems/CSCS-Eiger-HPECray-zen2-Slingshot/variables.yaml +++ b/legacy/systems/CSCS-Eiger-HPECray-zen2-Slingshot/variables.yaml @@ -10,6 +10,7 @@ variables: # sys_gpus_per_node unset # sys_mem_per_node unset max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/JSC-JUWELS-Booster-rome-A100-Infiniband/variables.yaml b/legacy/systems/JSC-JUWELS-Booster-rome-A100-Infiniband/variables.yaml index a07feeba3..b8a68c84f 100644 --- a/legacy/systems/JSC-JUWELS-Booster-rome-A100-Infiniband/variables.yaml +++ b/legacy/systems/JSC-JUWELS-Booster-rome-A100-Infiniband/variables.yaml @@ -12,6 +12,7 @@ variables: sys_cores_per_node: "48" sys_gpus_per_node: "4" max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/LLNL-Dane-DELL-sapphirerapids-OmniPath/variables.yaml b/legacy/systems/LLNL-Dane-DELL-sapphirerapids-OmniPath/variables.yaml index a106ac8da..061d59492 100644 --- a/legacy/systems/LLNL-Dane-DELL-sapphirerapids-OmniPath/variables.yaml +++ b/legacy/systems/LLNL-Dane-DELL-sapphirerapids-OmniPath/variables.yaml @@ -8,6 +8,7 @@ variables: scheduler: "slurm" sys_cores_per_node: "112" max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/LLNL-Magma-Penguin-icelake-OmniPath/variables.yaml b/legacy/systems/LLNL-Magma-Penguin-icelake-OmniPath/variables.yaml index 46ca2504b..7cce678c3 100644 --- a/legacy/systems/LLNL-Magma-Penguin-icelake-OmniPath/variables.yaml +++ b/legacy/systems/LLNL-Magma-Penguin-icelake-OmniPath/variables.yaml @@ -8,6 +8,7 @@ variables: scheduler: "slurm" sys_cores_per_node: "96" max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/LLNL-Pascal-Penguin-broadwell-P100-OmniPath/variables.yaml b/legacy/systems/LLNL-Pascal-Penguin-broadwell-P100-OmniPath/variables.yaml index fa6dccf02..e62264f52 100644 --- a/legacy/systems/LLNL-Pascal-Penguin-broadwell-P100-OmniPath/variables.yaml +++ b/legacy/systems/LLNL-Pascal-Penguin-broadwell-P100-OmniPath/variables.yaml @@ -12,6 +12,7 @@ variables: sys_cores_per_node: "36" sys_gpus_per_node: "2" max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/LLNL-Ruby-icelake-OmniPath/variables.yaml b/legacy/systems/LLNL-Ruby-icelake-OmniPath/variables.yaml index 5c6d5ed68..ac6e28778 100644 --- a/legacy/systems/LLNL-Ruby-icelake-OmniPath/variables.yaml +++ b/legacy/systems/LLNL-Ruby-icelake-OmniPath/variables.yaml @@ -9,6 +9,7 @@ variables: sys_cores_per_node: "56" sys_gpus_per_node: 0 max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/LLNL-Sierra-IBM-power9-V100-Infiniband/variables.yaml b/legacy/systems/LLNL-Sierra-IBM-power9-V100-Infiniband/variables.yaml index c4c802503..fe6a5a345 100644 --- a/legacy/systems/LLNL-Sierra-IBM-power9-V100-Infiniband/variables.yaml +++ b/legacy/systems/LLNL-Sierra-IBM-power9-V100-Infiniband/variables.yaml @@ -13,6 +13,7 @@ variables: sys_cores_per_node: "44" sys_gpus_per_node: "4" max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/variables.yaml b/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/variables.yaml index 508744c83..594e437ae 100644 --- a/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/variables.yaml +++ b/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/variables.yaml @@ -11,6 +11,7 @@ variables: sys_cores_per_node: "64" sys_gpus_per_node: "8" max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/RCCS-Fugaku-Fujitsu-A64FX-TofuD/variables.yaml b/legacy/systems/RCCS-Fugaku-Fujitsu-A64FX-TofuD/variables.yaml index 5e29684d6..7db9b954c 100644 --- a/legacy/systems/RCCS-Fugaku-Fujitsu-A64FX-TofuD/variables.yaml +++ b/legacy/systems/RCCS-Fugaku-Fujitsu-A64FX-TofuD/variables.yaml @@ -18,6 +18,7 @@ variables: post_exec_cmds: | for F in $(ls -1v fjmpioutdir/bmexe.*); do cat $F >> {log_file}; done max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/TAMU-Grace-Dell-cascadelake-Infiniband/variables.yaml b/legacy/systems/TAMU-Grace-Dell-cascadelake-Infiniband/variables.yaml index 1d8d9d518..925e437e3 100644 --- a/legacy/systems/TAMU-Grace-Dell-cascadelake-Infiniband/variables.yaml +++ b/legacy/systems/TAMU-Grace-Dell-cascadelake-Infiniband/variables.yaml @@ -8,6 +8,7 @@ variables: scheduler: "slurm" sys_cores_per_node: "24" max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/nosite-AWS_PCluster_Hpc6a-zen3-EFA/variables.yaml b/legacy/systems/nosite-AWS_PCluster_Hpc6a-zen3-EFA/variables.yaml index f5db177a1..9b02fcdd5 100644 --- a/legacy/systems/nosite-AWS_PCluster_Hpc6a-zen3-EFA/variables.yaml +++ b/legacy/systems/nosite-AWS_PCluster_Hpc6a-zen3-EFA/variables.yaml @@ -10,5 +10,6 @@ variables: batch_nodes: '#SBATCH -N {n_nodes}' batch_ranks: '#SBATCH -n {n_ranks}' batch_timeout: '#SBATCH -t {batch_time}:00' + max_node_limit: "1" sys_cpus_per_node: 96 sys_gpus_per_node: 0 diff --git a/legacy/systems/nosite-AWS_PCluster_Hpc7a-zen4-EFA/variables.yaml b/legacy/systems/nosite-AWS_PCluster_Hpc7a-zen4-EFA/variables.yaml index d92d39c6e..cdb361081 100644 --- a/legacy/systems/nosite-AWS_PCluster_Hpc7a-zen4-EFA/variables.yaml +++ b/legacy/systems/nosite-AWS_PCluster_Hpc7a-zen4-EFA/variables.yaml @@ -9,6 +9,7 @@ variables: sys_cores_per_node: "1" # sys_gpus_per_node unset max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "1" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/nosite-HPECray-zen3-MI250X-Slingshot/variables.yaml b/legacy/systems/nosite-HPECray-zen3-MI250X-Slingshot/variables.yaml index 59954bdbd..58c0af995 100644 --- a/legacy/systems/nosite-HPECray-zen3-MI250X-Slingshot/variables.yaml +++ b/legacy/systems/nosite-HPECray-zen3-MI250X-Slingshot/variables.yaml @@ -11,6 +11,7 @@ variables: sys_cores_per_node: "1" # sys_gpus_per_node unset max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "1" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/nosite-x86_64/variables.yaml b/legacy/systems/nosite-x86_64/variables.yaml index 2c5b01c5f..ba8c5466d 100644 --- a/legacy/systems/nosite-x86_64/variables.yaml +++ b/legacy/systems/nosite-x86_64/variables.yaml @@ -10,6 +10,7 @@ variables: extra_cmd_opts: | --oversubscribe max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/legacy/systems/test-extra-batch-opts/variables.yaml b/legacy/systems/test-extra-batch-opts/variables.yaml index dee749bd6..3bbbfaab3 100644 --- a/legacy/systems/test-extra-batch-opts/variables.yaml +++ b/legacy/systems/test-extra-batch-opts/variables.yaml @@ -16,6 +16,7 @@ variables: for F in $(ls -1v fjmpioutdir/bmexe.*); do cat $F >> {log_file}; done echo "done" max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "0" n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder" diff --git a/lib/benchpark/experiment.py b/lib/benchpark/experiment.py index 4143e1e11..772799915 100644 --- a/lib/benchpark/experiment.py +++ b/lib/benchpark/experiment.py @@ -48,19 +48,7 @@ def get_spack_variants(self): return None -class SingleNode: - variant( - "single_node", - default=True, - description="Single node execution mode", - ) - - class Helper(ExperimentHelper): - def get_helper_name_prefix(self): - return "single_node" if self.spec.satisfies("+single_node") else "" - - -class Experiment(ExperimentSystemBase, SingleNode): +class Experiment(ExperimentSystemBase): """This is the superclass for all benchpark experiments. ***The Experiment class*** @@ -176,6 +164,8 @@ def compute_applications_section_wrapper(self): self.compute_applications_section() + self.add_experiment_exclude("{n_nodes} > 0 and {n_nodes} <= {max_node_limit}") + expr_helper_list = [] for cls in self.helpers: helper_prefix = cls.get_helper_name_prefix() diff --git a/lib/benchpark/system.py b/lib/benchpark/system.py index 550790727..f874aa251 100644 --- a/lib/benchpark/system.py +++ b/lib/benchpark/system.py @@ -13,6 +13,7 @@ import benchpark.paths from benchpark.directives import ExperimentSystemBase +from benchpark.directives import variant import benchpark.repo from benchpark.runtime import RuntimeResources @@ -74,6 +75,13 @@ class System(ExperimentSystemBase): Tuple["benchpark.variant.Variant", "benchpark.spec.ConcreteSystemSpec"], ] + variant( + "max_node_limit", + default="1", + values=int, + description="Max number of allocatable nodes for experiments, 0 (no limits), default 1", + ) + def __init__(self, spec): self.spec: "benchpark.spec.ConcreteSystemSpec" = spec super().__init__() @@ -87,6 +95,7 @@ def initialize(self): self.scheduler = None self.timeout = "120" self.queue = None + self.max_node_limit = self.spec.variants["max_node_limit"][0] self.required = ["sys_cores_per_node", "scheduler", "timeout"] @@ -185,6 +194,7 @@ def variables_yaml(self): sys_cores_per_node: "{self.sys_cores_per_node}" {extras_as_cfg} max_request: "1000" # n_ranks/n_nodes cannot exceed this + max_node_limit: "{self.max_node_limit}" # 0: no limits, default: 1 n_ranks: '1000001' # placeholder value n_nodes: '1000001' # placeholder value batch_submit: "placeholder"