From 63e698049762ecd7676f858c50ae1461f3aeb294 Mon Sep 17 00:00:00 2001 From: Francis Vitt Date: Tue, 21 Nov 2023 16:21:01 -0700 Subject: [PATCH 1/4] reasonable PE layouts for WACCM(x) and CAMChem --- cime_config/config_pes.xml | 1421 +++++++++++++++++++----------------- 1 file changed, 762 insertions(+), 659 deletions(-) diff --git a/cime_config/config_pes.xml b/cime_config/config_pes.xml index 4f6ac6abbc..912836a1ed 100644 --- a/cime_config/config_pes.xml +++ b/cime_config/config_pes.xml @@ -7,67 +7,67 @@ none - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 none - -1 - -1 - -1 - -1 - -1 - -1 - -1 - -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -77,34 +77,34 @@ none - -4 - -4 - -4 - -4 - -4 - -4 - -4 - -4 + -4 + -4 + -4 + -4 + -4 + -4 + -4 + -4 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -114,34 +114,34 @@ none - -4 - -4 - -4 - -4 - -4 - -4 - -4 - -4 + -4 + -4 + -4 + -4 + -4 + -4 + -4 + -4 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -151,34 +151,34 @@ none - -40 - -40 - -40 - -40 - -40 - -40 - -40 - -40 + -40 + -40 + -40 + -40 + -40 + -40 + -40 + -40 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -188,34 +188,34 @@ none - 2048 - 2048 - 2048 - 2048 - 2048 - 2048 - 2048 - 2048 + 2048 + 2048 + 2048 + 2048 + 2048 + 2048 + 2048 + 2048 - 8 - 8 - 8 - 8 - 8 - 8 - 8 - 8 + 8 + 8 + 8 + 8 + 8 + 8 + 8 + 8 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -225,34 +225,34 @@ none - 1200 - 1200 - 1200 - 1200 - 1200 - 1200 - 1200 - 1200 + 1200 + 1200 + 1200 + 1200 + 1200 + 1200 + 1200 + 1200 - 3 - 3 - 3 - 3 - 3 - 3 - 3 - 3 + 3 + 3 + 3 + 3 + 3 + 3 + 3 + 3 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -260,36 +260,36 @@ - none + none - -16 - -16 - -16 - -16 - -16 - -16 - -16 - -16 + -16 + -16 + -16 + -16 + -16 + -16 + -16 + -16 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -299,34 +299,34 @@ none - 4800 - 4800 - 4800 - 4800 - 4800 - 4800 - 4800 - 4800 + 4800 + 4800 + 4800 + 4800 + 4800 + 4800 + 4800 + 4800 - 4 - 4 - 4 - 4 - 4 - 4 - 4 - 4 + 4 + 4 + 4 + 4 + 4 + 4 + 4 + 4 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -336,34 +336,34 @@ none - 16384 - 16384 - 16384 - 16384 - 16384 - 16384 - 16384 - 16384 + 16384 + 16384 + 16384 + 16384 + 16384 + 16384 + 16384 + 16384 - 8 - 8 - 8 - 8 - 8 - 8 - 8 - 8 + 8 + 8 + 8 + 8 + 8 + 8 + 8 + 8 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -373,34 +373,34 @@ none - 8192 - 8192 - 8192 - 8192 - 8192 - 8192 - 8192 - 8192 + 8192 + 8192 + 8192 + 8192 + 8192 + 8192 + 8192 + 8192 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 + 2 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -410,34 +410,34 @@ none - -32 - -32 - -32 - -32 - -32 - -32 - -32 - -32 + -32 + -32 + -32 + -32 + -32 + -32 + -32 + -32 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -447,34 +447,34 @@ none - 64 - 64 - 64 - 64 - 64 - 64 - 64 - 64 + 64 + 64 + 64 + 64 + 64 + 64 + 64 + 64 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -484,34 +484,34 @@ none - -2 - -2 - -2 - -2 - -2 - -2 - -2 - -2 + -2 + -2 + -2 + -2 + -2 + -2 + -2 + -2 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -521,34 +521,34 @@ none - 224 - 224 - 224 - 224 - 224 - 224 - 224 - 224 + 224 + 224 + 224 + 224 + 224 + 224 + 224 + 224 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -558,34 +558,34 @@ none - 192 - 192 - 192 - 192 - 192 - 192 - 192 - 192 + 192 + 192 + 192 + 192 + 192 + 192 + 192 + 192 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -595,34 +595,34 @@ none - 192 - 192 - 192 - 192 - 192 - 192 - 192 - 192 + 192 + 192 + 192 + 192 + 192 + 192 + 192 + 192 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -632,34 +632,34 @@ none - 240 - 240 - 240 - 240 - 240 - 240 - 240 - 240 + 240 + 240 + 240 + 240 + 240 + 240 + 240 + 240 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -674,10 +674,10 @@ 360 360 360 - 360 - 360 - 360 - 360 + 360 + 360 + 360 + 360 3 @@ -771,40 +771,75 @@ + + + none + + -4 + -4 + -4 + -4 + -4 + -4 + -4 + -4 + + + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + + + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + none - 192 - 192 - 192 - 192 - 192 - 192 - 192 - 192 + 192 + 192 + 192 + 192 + 192 + 192 + 192 + 192 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 - + @@ -843,6 +878,74 @@ + + + none + + -8 + -8 + -8 + -8 + -8 + -8 + -8 + -8 + + + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + + + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + + none + + -4 + -4 + -4 + -4 + -4 + -4 + -4 + -4 + + + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + + + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + @@ -850,34 +953,34 @@ none - -4 - -4 - -4 - -4 - -4 - -4 - -4 - -4 + -4 + -4 + -4 + -4 + -4 + -4 + -4 + -4 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -887,34 +990,34 @@ none - -208 - -208 - -208 - -208 - -208 - -208 - -208 - -208 + -208 + -208 + -208 + -208 + -208 + -208 + -208 + -208 - 8 - 8 - 8 - 8 - 8 - 8 - 8 - 8 + 8 + 8 + 8 + 8 + 8 + 8 + 8 + 8 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -924,34 +1027,34 @@ none - 384 - 384 - 384 - 384 - 384 - 384 - 384 - 384 + 384 + 384 + 384 + 384 + 384 + 384 + 384 + 384 - 3 - 3 - 3 - 3 - 3 - 3 - 3 - 3 + 3 + 3 + 3 + 3 + 3 + 3 + 3 + 3 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -961,34 +1064,34 @@ none - -8 - -8 - -8 - -8 - -8 - -8 - -8 - -8 + -8 + -8 + -8 + -8 + -8 + -8 + -8 + -8 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -996,34 +1099,34 @@ none - 768 - 768 - 768 - 768 - 768 - 768 - 768 - 768 + 768 + 768 + 768 + 768 + 768 + 768 + 768 + 768 - 3 - 3 - 3 - 3 - 3 - 3 - 3 - 3 + 3 + 3 + 3 + 3 + 3 + 3 + 3 + 3 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -1033,34 +1136,34 @@ none - 480 - 480 - 480 - 480 - 480 - 480 - 480 - 480 + 480 + 480 + 480 + 480 + 480 + 480 + 480 + 480 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -1070,34 +1173,34 @@ none - -16 - -16 - -16 - -16 - -16 - -16 - -16 - -16 + -16 + -16 + -16 + -16 + -16 + -16 + -16 + -16 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -1107,34 +1210,34 @@ none - 960 - 960 - 960 - 960 - 960 - 960 - 960 - 960 + 960 + 960 + 960 + 960 + 960 + 960 + 960 + 960 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 @@ -1144,14 +1247,14 @@ - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 From 9b1ff37330df5bdd568ddea42aa8bcfe9397695b Mon Sep 17 00:00:00 2001 From: Cheryl Craig Date: Thu, 30 Nov 2023 14:18:12 -0700 Subject: [PATCH 2/4] mods to get testing to work on derecho --- Externals.cfg | 6 +- bld/Makefile.in | 2 +- test/system/CAM_runcmnd.sh | 8 ++ test/system/test_driver.sh | 185 ++++++++++++++++++++++++++++++++++++- 4 files changed, 194 insertions(+), 7 deletions(-) diff --git a/Externals.cfg b/Externals.cfg index c7d4c271db..e905cb7658 100644 --- a/Externals.cfg +++ b/Externals.cfg @@ -9,14 +9,14 @@ required = True local_path = components/cice [cime] -tag = cime5.6.33 +tag = cime5.6.45 protocol = git repo_url = https://github.com/ESMCI/cime required = True local_path = cime [cism] -tag = release-cesm2.0.04 +tag = cism-release-cesm2.1.2_03 protocol = git repo_url = https://github.com/ESCOMP/cism-wrapper externals = Externals_CISM.cfg @@ -24,7 +24,7 @@ required = True local_path = components/cism [clm] -tag = release-clm5.0.27 +tag = release-clm5.0.35 protocol = git repo_url = https://github.com/ESCOMP/ctsm externals = Externals_CLM.cfg diff --git a/bld/Makefile.in b/bld/Makefile.in index 9f16f305b8..b74c81efd2 100644 --- a/bld/Makefile.in +++ b/bld/Makefile.in @@ -408,7 +408,7 @@ ifeq ($(FC_TYPE),intel) CPPDEF += -DCPRINTEL $(NO_CONTIGUOUS_FLAG) CFLAGS += -std=gnu99 FC_FLAGS := -ftz -convert big_endian -fp-model source -no-fma \ - -qno-opt-dynamic-align -assume realloc_lhs -xHost + -qno-opt-dynamic-align -assume realloc_lhs FC_FLAGS_O3OPT:= $(FC_FLAGS) FC_FLAGS_NOOPT:= -ftz -convert big_endian -O0 FREEFLAGS := diff --git a/test/system/CAM_runcmnd.sh b/test/system/CAM_runcmnd.sh index 5065c6c064..4f874564bb 100755 --- a/test/system/CAM_runcmnd.sh +++ b/test/system/CAM_runcmnd.sh @@ -41,6 +41,14 @@ if [ $run_mode = mpi ] || [ $run_mode = hybrid ]; then # cmnd="${cmnd} ddt --connect mpiexec_mpt -np $ntasks omplace -vv " ;; + # Derecho + de* ) + + cmnd="${cmnd} mpiexec -n $2 " + # cmnd="${cmnd} ddt --connect mpiexec_mpt -np $ntasks omplace -vv " + ;; + + # hobart and leehill hob* | h[[:digit:]]* | le* | izu* | i[[:digit:]]* ) diff --git a/test/system/test_driver.sh b/test/system/test_driver.sh index e309e3cbef..77d6f40c6d 100755 --- a/test/system/test_driver.sh +++ b/test/system/test_driver.sh @@ -2,7 +2,7 @@ # # test_driver.sh: driver for the testing of CAM with standalone scripts # -# usage on hobart, izumi, leehill, cheyenne +# usage on hobart, izumi, leehill, cheyenne, derecho # ./test_driver.sh # # **more details in the CAM testing user's guide, accessible @@ -323,6 +323,163 @@ cat > ${submit_script_cime} << EOF #PBS -j oe #PBS -l inception=login +EOF + +##^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ writing to batch script ^^^^^^^^^^^^^^^^^^^ + ;; + + ##derecho + derecho* | dec* ) + submit_script="`pwd -P`/test_driver_derecho_${cur_time}.sh" + submit_script_cb="`pwd -P`/test_driver_derecho_cb_${cur_time}.sh" + submit_script_cime="`pwd -P`/test_driver_derecho_cime_${cur_time}.sh" + + if [ -z "$CAM_ACCOUNT" ]; then + echo "ERROR: Must set the environment variable CAM_ACCOUNT" + exit 2 + fi + + if [ -z "$CAM_BATCHQ" ]; then + export CAM_BATCHQ="main" + fi + + # wallclock for run job + wallclock_limit="5:00:00" + + if [ $gmake_j = 0 ]; then + gmake_j=128 + fi + + # run tests on 1 nodes using 128 tasks/node, 2 threads/task + CAM_TASKS=32 + CAM_THREADS=2 + + # change parallel configuration on 2 nodes using 32 tasks, 1 threads/task + CAM_RESTART_TASKS=32 + CAM_RESTART_THREADS=1 + + mach_workspace="/glade/derecho/scratch" + +##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv writing to batch script vvvvvvvvvvvvvvvvvvv + +cat > ${submit_script_cb} << EOF +#!/bin/sh +# +#PBS -N test_dr +#PBS -q $CAM_BATCHQ +#PBS -A $CAM_ACCOUNT +#PBS -l walltime=2:00:00 +#PBS -l select=1:ncpus=128:mpiprocs=128 +#PBS -j oe + +export TMPDIR=/glade/derecho/scratch/$USER + +if [ -n "\$PBS_JOBID" ]; then #batch job + export JOBID=\`echo \${PBS_JOBID} | cut -f1 -d'.'\` + initdir=`pwd -P` + interactive=false +else + interactive=true +fi + +export CAM_RBOPTIONS="build_only" + +## create_newcase looks for account number in ACCOUNT environment variable +export ACCOUNT=$CAM_ACCOUNT + +# tasks and threads need to be set in the cb script because TCB_ccsm.sh uses +# them to set the pe_layout +export CAM_THREADS=$CAM_THREADS +export CAM_TASKS=$CAM_TASKS + +source $LMOD_ROOT/lmod/init/sh + +##module load intel/19.0.5 +module load mkl +##module list + +export INC_NETCDF=\${NCAR_ROOT_NETCDF}/include +export LIB_NETCDF=\${NCAR_ROOT_NETCDF}/lib + +export CFG_STRING="-cc mpicc -fc mpif90 -fc_type intel -ldflags -mkl=cluster" +export MAKE_CMD="gmake -j $gmake_j" +export CCSM_MACH="derecho" +export MACH_WORKSPACE="$mach_workspace" +dataroot=${CESMDATAROOT} +echo_arg="-e" +input_file="tests_pretag_cheyenne" + +EOF + +#------------------------------------------- + +cat > ${submit_script} << EOF +#!/bin/sh +# +#PBS -N test_dr +#PBS -q $CAM_BATCHQ +#PBS -A $CAM_ACCOUNT +#PBS -l walltime=$wallclock_limit +#PBS -l select=2:ncpus=128:mpiprocs=128:ompthreads=2 +#PBS -j oe + +export TMPDIR=/glade/derecho/scratch/$USER + +if [ -n "\$PBS_JOBID" ]; then #batch job + export JOBID=\`echo \${PBS_JOBID} | cut -f1 -d'.'\` + initdir=`pwd -P` + interactive=false +else + interactive=true +fi + +export CAM_RBOPTIONS="run_only" +ulimit -c unlimited + +##omp threads +export OMP_STACKSIZE=256M +export CAM_THREADS=$CAM_THREADS +export CAM_RESTART_THREADS=$CAM_RESTART_THREADS + +##mpi tasks +export CAM_TASKS=$CAM_TASKS +export CAM_RESTART_TASKS=$CAM_RESTART_TASKS + +##derecho hacks to avoid MPI_LAUNCH_TIMEOUT +MPI_IB_CONGESTED=1 +MPI_LAUNCH_TIMEOUT=40 + +source $LMOD_ROOT/lmod/init/sh + +##module load intel/19.0.5 +##module load mkl +##module list + +export CCSM_MACH="derecho" +export MACH_WORKSPACE="$mach_workspace" +export CPRNC_EXE=${CESMDATAROOT}/tools/cime/tools/cprnc/cprnc + +dataroot=${CESMDATAROOT} + +echo_arg="-e" + +input_file="tests_pretag_cheyenne" + +EOF + +#------------------------------------------- + +cat > ${submit_script_cime} << EOF +#!/bin/bash +# +#PBS -N cime-tests +#PBS -q $CAM_BATCHQ +#PBS -A $CAM_ACCOUNT +#PBS -l walltime=$wallclock_limit +#PBS -l select=2:ncpus=128:mpiprocs=128:ompthreads=2 +#PBS -j oe + + EOF ##^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ writing to batch script ^^^^^^^^^^^^^^^^^^^ @@ -936,7 +1093,7 @@ if [ -n "${submit_script_cb}" ]; then case $hostname in # cheyenne - chey* | r* ) + chey* | r* | derecho* | dec* ) batch_queue_submit='qsub -V' ;; *) @@ -990,6 +1147,9 @@ comp="" if [ "${hostname:0:4}" == "chey" ]; then cesm_test_mach="cheyenne" fi +if [ "${hostname:0:4}" == "dere" ]; then + cesm_test_mach="derecho" +fi if [ "${hostname:0:6}" == "hobart" ]; then cesm_test_mach="hobart" fi @@ -1009,7 +1169,14 @@ if [ "${cesm_test_suite}" != "none" -a -n "${cesm_test_mach}" ]; then fi for cesm_test in ${cesm_test_suite}; do - testargs="--xml-category ${cesm_test} --xml-machine ${cesm_test_mach}" + # Force derecho to run the cheyenne testlist. + # After the transition to derecho is completed, this if statement can be removed and + # just the else needs to remain. + if [ "${cesm_test_mach}" == "derecho" ]; then + testargs="--xml-category ${cesm_test} --xml-machine cheyenne --mach ${cesm_test_mach} --retry 2" + else + testargs="--xml-category ${cesm_test} --xml-machine ${cesm_test_mach} --retry 2" + fi if [ -n "${use_existing}" ]; then test_id="${use_existing}" @@ -1085,6 +1252,13 @@ if [ "${cesm_test_suite}" != "none" -a -n "${cesm_test_mach}" ]; then qsub -V ${submit_script_cime} fi + if [ "${hostname:0:2}" == "de" ]; then + echo "cd ${script_dir}" >> ${submit_script_cime} + echo './create_test' ${testargs} >> ${submit_script_cime} + chmod u+x ${submit_script_cime} + qsub -V ${submit_script_cime} + fi + if [ "${hostname:0:6}" == "hobart" ]; then echo "cd ${script_dir}" >> ${submit_script_cime} echo './create_test' ${testargs} >> ${submit_script_cime} @@ -1119,6 +1293,11 @@ if $run_cam_regression; then qsub -V ${submit_script_cb} ;; + ##derecho + derecho* | dec* ) + qsub -V ${submit_script_cb} + ;; + ##hobart hob* | h[[:digit:]]* ) # qsub ${submit_script} From bf8510a0868341d16313adba5abbf0829620deab Mon Sep 17 00:00:00 2001 From: Cheryl Craig Date: Fri, 8 Dec 2023 10:01:11 -0700 Subject: [PATCH 3/4] Updates for derecho --- Externals.cfg | 2 +- cime_config/config_pes.xml | 76 +++++++++++++++++++++++++++++++++ test/system/archive_baseline.sh | 10 +++++ 3 files changed, 87 insertions(+), 1 deletion(-) diff --git a/Externals.cfg b/Externals.cfg index e905cb7658..7f21dd6348 100644 --- a/Externals.cfg +++ b/Externals.cfg @@ -9,7 +9,7 @@ required = True local_path = components/cice [cime] -tag = cime5.6.45 +tag = cime5.6.47 protocol = git repo_url = https://github.com/ESMCI/cime required = True diff --git a/cime_config/config_pes.xml b/cime_config/config_pes.xml index 912836a1ed..7ce147e2d7 100644 --- a/cime_config/config_pes.xml +++ b/cime_config/config_pes.xml @@ -1243,6 +1243,44 @@ + + + + none + + 36 + 36 + 36 + 36 + 36 + 36 + 36 + 36 + + + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + + + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + + + @@ -1260,6 +1298,44 @@ + + + + none + + -35 + -35 + -35 + -35 + -35 + -35 + -35 + -35 + + + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + + + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + + + diff --git a/test/system/archive_baseline.sh b/test/system/archive_baseline.sh index 49ed028407..382f80154a 100755 --- a/test/system/archive_baseline.sh +++ b/test/system/archive_baseline.sh @@ -111,6 +111,16 @@ case $hostname in baselinedir="/glade/p/cesm/amwg/cesm_baselines/$cam_tag" ;; + de*) + echo "server: derecho" + if [ -z "$CAM_FC" ]; then + CAM_FC="INTEL" + fi + test_file_list="tests_pretag_derecho" + cam_tag=$1 + baselinedir="/glade/campaign/cesm/community/amwg/cam_baselines/$cam_tag" + ;; + * ) echo "ERROR: machine $hostname not currently supported"; exit 1 ;; esac From 18ca8a4e86056b6c560d920384ee684e590995f9 Mon Sep 17 00:00:00 2001 From: Cheryl Craig Date: Tue, 12 Dec 2023 11:39:14 -0700 Subject: [PATCH 4/4] Update ChangeLog --- doc/ChangeLog | 63 ++++++++++++++++++++++++++++++++++++++++++ doc/ChangeLog_template | 4 +-- 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/doc/ChangeLog b/doc/ChangeLog index 982433ed70..2e7bab5466 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,66 @@ + +=============================================================== + +Tag name: cam_cesm2_1_rel_60 +Originator(s): cacraig, fvitt +Date: Dec 12, 2023 +One-line Summary: Fix derecho tests on cesm2_1 branch +Github PR URL: https://github.com/ESCOMP/CAM/pull/934 + +Purpose of changes (include the issue number and title text for each relevant GitHub issue): + - derecho PE layouts for WACCM(x) and CAMChem in cesm2_1 branch: https://github.com/ESCOMP/CAM/pull/923 + - Get CAM CESM2.1 branch working on derecho: https://github.com/ESCOMP/CAM/issues/921 + +Describe any changes made to build system: N/A + +Describe any changes made to the namelist: N/A + +List any changes to the defaults for the boundary datasets: N/A + +Describe any substantial timing or memory changes: N/A + +Code reviewed by: nusbaume + +List all files eliminated: N/A + +List all files added and what they do: N/A + +List all existing files that have been modified, and describe the changes: +M Externals.cfg + - Update externals + +M bld/Makefile.in + - Remove -xHost from load line to allow it to work on derecho + +M cime_config/config_pes.xml + - Update PE layouts for derecho + +M doc/ChangeLog_template +M test/system/CAM_runcmnd.sh +M test/system/archive_baseline.sh +M test/system/test_driver.sh + - Add sections for running on derecho + +If there were any failures reported from running test_driver.sh on any test +platform, and checkin with these failures has been OK'd by the gatekeeper, +then copy the lines from the td.*.status files for the failed tests to the +appropriate machine below. All failed tests must be justified. + +cheyenne/intel: all BFB + +cheyenne/intel/aux_cam: all BFB + +derecho/intel: all tests run - no baselines for comparison since initial run on this machine + +derecho/intel/aux_cam: all tests run - no baselines for comparison since initial run on this machine + +izumi/nag: all BFB, except that random TER tests would fail after the job completed running, but before the test was complete. + Jesse and Cheryl ran numerous times and were able to get different tests to fail. Since this is an older branch on + a machine which may be nearing its end-of-life, we agreed to tag this without further investigation + +izumi/pgi: No longer supported - none run + +=============================================================== =============================================================== Tag name: cam_cesm2_1_rel_59 diff --git a/doc/ChangeLog_template b/doc/ChangeLog_template index 9919f58835..a8d5d7af7f 100644 --- a/doc/ChangeLog_template +++ b/doc/ChangeLog_template @@ -29,9 +29,9 @@ platform, and checkin with these failures has been OK'd by the gatekeeper, then copy the lines from the td.*.status files for the failed tests to the appropriate machine below. All failed tests must be justified. -cheyenne/intel: +derecho/intel: -cheyenne/intel/aux_cam: +derecho/intel/aux_cam: izumi/nag: