From 4e7c0fcdfdd37aa63ebf5fd9828dea0508c4fe6a Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 1 Mar 2024 13:52:13 -0800 Subject: [PATCH 01/23] Add github tests --- .github/workflows/main.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a230a3738..ee3ad5b00 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -62,7 +62,15 @@ jobs: shell: bash -l {0} run: | conda activate gacode - echo "TODO" + export GACODE_PLATFORM=CONDA_CPU + export GACODE_ROOT=$PWD + . $GACODE_ROOT/shared/bin/gacode_setup + source $GACODE_ROOT/platform/env/env.${GACODE_PLATFORM} + echo "======= begin env =====" + env + echo "======= end env =====" + # test the code + (cd cgyro/bin && cgyro -r -n 4 -nomp 2) - name: Sanity checks shell: bash -l {0} From 7543586ebb4b28b0f98eefba462ec3d12d318bf4 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 1 Mar 2024 14:11:51 -0800 Subject: [PATCH 02/23] Explicitly add python --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ee3ad5b00..3c4934b57 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -25,7 +25,7 @@ jobs: shell: bash -l {0} run: | df -h . - conda create -q --yes --strict-channel-priority -n gacode -c conda-forge gxx_linux-64 gfortran_linux-64 make curl + conda create -q --yes --strict-channel-priority -n gacode -c conda-forge gxx_linux-64 gfortran_linux-64 make curl python conda clean --yes -t df -h . conda activate gacode From 2748771fbd1f55aad1fb84b50fe50267e4a155bd Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 1 Mar 2024 14:28:32 -0800 Subject: [PATCH 03/23] Add error checking --- .github/workflows/main.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3c4934b57..2e3a10dff 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -70,7 +70,12 @@ jobs: env echo "======= end env =====" # test the code - (cd cgyro/bin && cgyro -r -n 4 -nomp 2) + (cd cgyro/bin && cgyro -r -n 4 -nomp 2) | tee cgyro_reg.log + echo "======= results =====" + grep reg cgyro_reg.log |tee cgyro_reg.tests.log + grep PASS cgyro_reg.tests.log > cgyro_reg.tests.pass.log + cnt=`wc -l cgyro_reg.tests.pass.log |awk '{print $1}'` + if [ "$cnt" -eq 20 ]; then echo "== ALL passed =="; else echo "Some tests FAILED"; test -z "error"; fi - name: Sanity checks shell: bash -l {0} From 7b3b4993b0beb5ef6de5ef29058a12b6152227ab Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 1 Mar 2024 14:34:15 -0800 Subject: [PATCH 04/23] Fix tab vs spaces --- .github/workflows/main.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2e3a10dff..5311c6673 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -71,11 +71,11 @@ jobs: echo "======= end env =====" # test the code (cd cgyro/bin && cgyro -r -n 4 -nomp 2) | tee cgyro_reg.log - echo "======= results =====" - grep reg cgyro_reg.log |tee cgyro_reg.tests.log - grep PASS cgyro_reg.tests.log > cgyro_reg.tests.pass.log - cnt=`wc -l cgyro_reg.tests.pass.log |awk '{print $1}'` - if [ "$cnt" -eq 20 ]; then echo "== ALL passed =="; else echo "Some tests FAILED"; test -z "error"; fi + echo "======= results =====" + grep reg cgyro_reg.log |tee cgyro_reg.tests.log + grep PASS cgyro_reg.tests.log > cgyro_reg.tests.pass.log + cnt=`wc -l cgyro_reg.tests.pass.log |awk '{print $1}'` + if [ "$cnt" -eq 20 ]; then echo "== ALL passed =="; else echo "Some tests FAILED"; test -z "error"; fi - name: Sanity checks shell: bash -l {0} From 7e03b13616486986ca13baaa786e7d22b936b39c Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 1 Mar 2024 14:46:53 -0800 Subject: [PATCH 05/23] Add exec.CONDA_CPU --- platform/exec/exec.CONDA_CPU | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100755 platform/exec/exec.CONDA_CPU diff --git a/platform/exec/exec.CONDA_CPU b/platform/exec/exec.CONDA_CPU new file mode 100755 index 000000000..9132a6b7f --- /dev/null +++ b/platform/exec/exec.CONDA_CPU @@ -0,0 +1,13 @@ +#!/bin/sh +# GACODE Parallel execution script + +simdir=${1} +nmpi=${2} +exec=${3} +nomp=${4} +numa=${5} +mpinuma=${6} + +cd $simdir +mpiexec -env OMP_NUM_THREADS $nomp -n $nmpi $exec + From b2276477093935308718686095b82127af143ca1 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 1 Mar 2024 15:01:07 -0800 Subject: [PATCH 06/23] Use openmp version of openblas --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5311c6673..81ea0b470 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -39,7 +39,7 @@ jobs: which clang clang -v fi - conda install --strict-channel-priority -c conda-forge fftw openblas + conda install --strict-channel-priority -c conda-forge fftw 'libopenblas=*=*openmp*' if [[ "$(uname -s)" == "Linux" ]]; then conda install --strict-channel-priority -c conda-forge mpich From 9c0ff22ce9e22cf3d65d6559b106abc74fd4b103 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 1 Mar 2024 16:10:36 -0800 Subject: [PATCH 07/23] Use openmp version of openblas --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 81ea0b470..1ec294ee6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -39,10 +39,10 @@ jobs: which clang clang -v fi - conda install --strict-channel-priority -c conda-forge fftw 'libopenblas=*=*openmp*' + conda install --yes --strict-channel-priority -c conda-forge fftw 'openblas=*=*openmp*' if [[ "$(uname -s)" == "Linux" ]]; then - conda install --strict-channel-priority -c conda-forge mpich + conda install --yes --strict-channel-priority -c conda-forge mpich # TODO: install PGI but do not source it # the makefile will do it automatically #./scripts/install_hpc_sdk.sh Date: Fri, 1 Mar 2024 16:19:58 -0800 Subject: [PATCH 08/23] Since this is CPU-only, use ubuntu-latest runner --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1ec294ee6..9623ee874 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: build-and-test: strategy: matrix: - os: [linux-gpu-cuda] + os: [ubuntu-latest] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 From 8dd9311e67abe2c02b6a1776015db8748dce382e Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 1 Mar 2024 17:02:28 -0800 Subject: [PATCH 09/23] Revert "Since this is CPU-only, use ubuntu-latest runner" - Tests take close to an hour there. This reverts commit fadf561a9450494a280c9ce8ae7c337962ebd786. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9623ee874..1ec294ee6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: build-and-test: strategy: matrix: - os: [ubuntu-latest] + os: [linux-gpu-cuda] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 From 4728ddf225934c2c0c45f6f25c08fb3fd693cff0 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 4 Mar 2024 08:10:13 -0800 Subject: [PATCH 10/23] Add hwflavor matrix --- .github/workflows/main.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1ec294ee6..08734727b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -14,6 +14,7 @@ jobs: strategy: matrix: os: [linux-gpu-cuda] + hwflavor: [cpu, cuda] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 @@ -39,16 +40,20 @@ jobs: which clang clang -v fi - conda install --yes --strict-channel-priority -c conda-forge fftw 'openblas=*=*openmp*' - if [[ "$(uname -s)" == "Linux" ]]; - then + if [[ "$${{ matrix.hwflavor }}" == "cpu" ]]; then + conda install --yes --strict-channel-priority -c conda-forge fftw 'openblas=*=*openmp*' conda install --yes --strict-channel-priority -c conda-forge mpich - # TODO: install PGI but do not source it - # the makefile will do it automatically - #./scripts/install_hpc_sdk.sh Date: Mon, 4 Mar 2024 08:28:06 -0800 Subject: [PATCH 11/23] Add CONDA_NVHPC_GPU platform --- .github/workflows/main.yml | 4 ++-- cgyro/install/make.ext.CONDA_NVHPC_GPU | 2 ++ platform/env/env.CONDA_CPU | 4 ++-- platform/env/env.CONDA_NVHPC_GPU | 11 +++++++++++ 4 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 cgyro/install/make.ext.CONDA_NVHPC_GPU create mode 100644 platform/env/env.CONDA_NVHPC_GPU diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 08734727b..7a58c6bae 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -45,8 +45,8 @@ jobs: conda install --yes --strict-channel-priority -c conda-forge mpich export GACODE_PLATFORM=CONDA_CPU else - # TODO: install PGI - export GACODE_PLATFORM=CONDA_GPU + # TODO: install NVIDIA HPC SDK + export GACODE_PLATFORM=CONDA_NVHPC_GPU if [[ "$${{ matrix.hwflavor }}" == "ompgpu" ]]; then export GACODE_OMPGPU=1 else diff --git a/cgyro/install/make.ext.CONDA_NVHPC_GPU b/cgyro/install/make.ext.CONDA_NVHPC_GPU new file mode 100644 index 000000000..f0747935a --- /dev/null +++ b/cgyro/install/make.ext.CONDA_NVHPC_GPU @@ -0,0 +1,2 @@ +cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90 + $(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90 diff --git a/platform/env/env.CONDA_CPU b/platform/env/env.CONDA_CPU index 8459a7dc9..04da4778d 100644 --- a/platform/env/env.CONDA_CPU +++ b/platform/env/env.CONDA_CPU @@ -5,8 +5,8 @@ if [ -n "$SSH_TTY" ] ; then fi # Recommended installation options -# conda create -q --yes --strict-channel-priority -n gacode -c conda-forge gxx_linux-64 gfortran_linux-64 make curl +# conda create -q --yes --strict-channel-priority -n gacode -c conda-forge gxx_linux-64 gfortran_linux-64 make curl python # conda activate gacode -# conda install --strict-channel-priority -c conda-forge fftw openblas +# conda install --strict-channel-priority -c conda-forge fftw 'openblas=*=*openmp*' # conda install --strict-channel-priority -c conda-forge mpich diff --git a/platform/env/env.CONDA_NVHPC_GPU b/platform/env/env.CONDA_NVHPC_GPU new file mode 100644 index 000000000..db411ad1b --- /dev/null +++ b/platform/env/env.CONDA_NVHPC_GPU @@ -0,0 +1,11 @@ +#!/bin/bash + +if [ -n "$SSH_TTY" ] ; then + echo "Setting up $GACODE_PLATFORM environment for gacode" +fi + +# Recommended installation options +# conda create -q --yes --strict-channel-priority -n gacode -c conda-forge gxx_linux-64 gfortran_linux-64 make curl python +# conda activate gacode +# on NVIDIA systems + From b249c7eb6aa9535134911a2636f0b7acdb4f2b4f Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 4 Mar 2024 08:42:06 -0800 Subject: [PATCH 12/23] Add NVIDIA HPC download script --- .github/support_scripts/install_hpc_sdk.sh | 107 +++++++++++++++++++++ .github/workflows/main.yml | 19 +++- 2 files changed, 123 insertions(+), 3 deletions(-) create mode 100755 .github/support_scripts/install_hpc_sdk.sh diff --git a/.github/support_scripts/install_hpc_sdk.sh b/.github/support_scripts/install_hpc_sdk.sh new file mode 100755 index 000000000..32ccdefb8 --- /dev/null +++ b/.github/support_scripts/install_hpc_sdk.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +# +# This is a helper script for installing the NVIDIA HPC SDK +# needed to compile a GPU-enabled version of CGYRO. +# +# Note: The script currently assumes Linux_x86_64 platform. +# + +if [ "x${SYSROOT_DIR}" == "x" ]; then + SYSROOT_DIR=${CONDA_PREFIX}/x86_64-conda-linux-gnu/sysroot/usr/lib64 +fi + +# Create GCC symbolic links +# since NVIDIA HPC SDK does not use the env variables +if [ "x${GCC}" == "x" ]; then + echo "ERROR: GCC not defined" + exit 1 +fi + +# usually $CONDA_PREFIX/bin/x86_64-conda_cos6-linux-gnu- +EXE_PREFIX=`echo "$GCC" |sed 's/gcc$//g'` + +echo "GCC pointing to ${EXE_PREFIX}gcc" +ls -l ${EXE_PREFIX}gcc + +mkdir conda_nv_bins +(cd conda_nv_bins && for f in \ + ar as c++ cc cpp g++ gcc ld nm ranlib gfortran strip; \ + do \ + ln -s ${EXE_PREFIX}${f} ${f}; \ + done ) + +export PATH=$PWD/conda_nv_bins:$PATH + +# Install the NVIDIA HPC SDK + +# This link may need to be updated, as new compiler versions are released +# Note: Verified that it works with v23.5 +if [ "x${NV_URL}" == "x" ]; then + NV_URL=https://developer.download.nvidia.com/hpc-sdk/24.1/nvhpc_2024_241_Linux_x86_64_cuda_multi.tar.gz +fi + +echo "Downloading the NVIDIA HPC SDK" +# Defaults to using curl +# set USE_CURL=N if you want to use aria2 or wget +if [ "x${USE_CURL}" == "x" ]; then + # defaults to using inline untarrring + # set INLINE_CURL=N if you want a temp copy of file on disk + if [ "x${INLINE_CURL}" == "xN" ]; then + curl "${NV_URL}" -o nvhpc.tgz + tar xpzf nvhpc.tgz + rm -f nvhpc.tgz + else + # Do not unpack things we do not use for cgyro + curl -s "${NV_URL}" | tar xpzf - --exclude '*libcusparse*' --exclude '*libcusolver*' --exclude '*libcurand*' --exclude '*profilers*' --exclude '*/doc/*' --exclude '*/plugin*' + fi +elif [ "x${USE_ARIA2}" == "x" ]; then + aria2c "${NV_URL}" + tar xpzf nvhpc_*.tar.gz + rm -f nvhpc_*.tar.gz +else + wget "${NV_URL}" + tar xpzf nvhpc_*.tar.gz + rm -f nvhpc_*.tar.gz +fi + +echo "Installing NVIDIA HPC SDK" + +# must patch the install scripts to find the right gcc +for f in nvhpc_*/install_components/install nvhpc_*/install_components/*/*/compilers/bin/makelocalrc nvhpc_*/install_components/install_cuda; do + sed -i -e "s#PATH=/#PATH=$PWD/conda_nv_bins:/#g" $f +done + + +export NVHPC_INSTALL_DIR=$PWD/hpc_sdk +export NVHPC_SILENT=true + +(cd nvhpc_*; ./install) + +# create helper scripts +mkdir setup_scripts +cat > setup_scripts/setup_nv_hpc_bins.sh << EOF +PATH=$PWD/conda_nv_bins:`ls -d $PWD/hpc_sdk/*/202*/compilers/bin`:\$PATH + +unset CPPFLAGS +unset CXXFLAGS +unset CFLAGS + +unset DEBUG_CPPFLAGS +unset DEBUG_CXXFLAGS +unset DEBUG_CFLAGS + +EOF + +# patch localrc to find crt1.o +for f in ${NVHPC_INSTALL_DIR}/*/202*/compilers/bin/localrc; do + echo "set DEFSTDOBJDIR=${SYSROOT_DIR};" >> $f + #echo "====localrc $f ====" + #cat $f + #echo "====" +done + +# we don't need the install dir anymore +rm -fr nvhpc_* + +echo "Setup script avaiabile in $PWD/setup_scripts/setup_nv_hpc_bins.sh" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7a58c6bae..e207ecb84 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -26,7 +26,7 @@ jobs: shell: bash -l {0} run: | df -h . - conda create -q --yes --strict-channel-priority -n gacode -c conda-forge gxx_linux-64 gfortran_linux-64 make curl python + conda create -q --yes --strict-channel-priority -n gacode -c conda-forge gxx_linux-64 gfortran_linux-64 make curl wget python conda clean --yes -t df -h . conda activate gacode @@ -45,7 +45,9 @@ jobs: conda install --yes --strict-channel-priority -c conda-forge mpich export GACODE_PLATFORM=CONDA_CPU else - # TODO: install NVIDIA HPC SDK + # install NVIDIA HPC SDK + .github/support_scripts/install_hpc_sdk.sh + source setup_scripts/setup_nv_hpc_bins.sh export GACODE_PLATFORM=CONDA_NVHPC_GPU if [[ "$${{ matrix.hwflavor }}" == "ompgpu" ]]; then export GACODE_OMPGPU=1 @@ -67,7 +69,18 @@ jobs: shell: bash -l {0} run: | conda activate gacode - export GACODE_PLATFORM=CONDA_CPU + setup_scripts/setup_nv_hpc_bins.sh + if [[ "$${{ matrix.hwflavor }}" == "cpu" ]]; then + export GACODE_PLATFORM=CONDA_CPU + else + source setup_scripts/setup_nv_hpc_bins.sh + export GACODE_PLATFORM=CONDA_NVHPC_GPU + if [[ "$${{ matrix.hwflavor }}" == "ompgpu" ]]; then + export GACODE_OMPGPU=1 + else + export GACODE_OMPGPU=0 + fi + fi export GACODE_ROOT=$PWD . $GACODE_ROOT/shared/bin/gacode_setup source $GACODE_ROOT/platform/env/env.${GACODE_PLATFORM} From 54d12652f8546f4c5a9ce57c5e6594f9ddcbe8d6 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 4 Mar 2024 08:48:44 -0800 Subject: [PATCH 13/23] Add diagnostics --- .github/workflows/main.yml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e207ecb84..edc29016c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,13 +16,15 @@ jobs: os: [linux-gpu-cuda] hwflavor: [cpu, cuda] runs-on: ${{ matrix.os }} + env: + hwflavor: ${{ matrix.hwflavor }} steps: - uses: actions/checkout@v3 - uses: conda-incubator/setup-miniconda@v2 with: miniconda-version: "latest" auto-update-conda: true - - name: Install + - name: Install shell: bash -l {0} run: | df -h . @@ -30,7 +32,7 @@ jobs: conda clean --yes -t df -h . conda activate gacode - echo "$(uname -s)" + echo "OS: '$(uname -s)'" if [[ "$(uname -s)" == "Linux" ]]; then which x86_64-conda-linux-gnu-gcc @@ -40,7 +42,8 @@ jobs: which clang clang -v fi - if [[ "$${{ matrix.hwflavor }}" == "cpu" ]]; then + echo "HW flavor: '${hwflavor}'" + if [[ "${hwflavor}" == "cpu" ]]; then conda install --yes --strict-channel-priority -c conda-forge fftw 'openblas=*=*openmp*' conda install --yes --strict-channel-priority -c conda-forge mpich export GACODE_PLATFORM=CONDA_CPU @@ -49,7 +52,7 @@ jobs: .github/support_scripts/install_hpc_sdk.sh source setup_scripts/setup_nv_hpc_bins.sh export GACODE_PLATFORM=CONDA_NVHPC_GPU - if [[ "$${{ matrix.hwflavor }}" == "ompgpu" ]]; then + if [[ "${hwflavor}" == "ompgpu" ]]; then export GACODE_OMPGPU=1 else export GACODE_OMPGPU=0 @@ -68,14 +71,14 @@ jobs: - name: Tests shell: bash -l {0} run: | + echo "HW flavor: '${hwflavor}'" conda activate gacode - setup_scripts/setup_nv_hpc_bins.sh - if [[ "$${{ matrix.hwflavor }}" == "cpu" ]]; then + if [[ "${hwflavor}" == "cpu" ]]; then export GACODE_PLATFORM=CONDA_CPU else source setup_scripts/setup_nv_hpc_bins.sh export GACODE_PLATFORM=CONDA_NVHPC_GPU - if [[ "$${{ matrix.hwflavor }}" == "ompgpu" ]]; then + if [[ "${hwflavor}" == "ompgpu" ]]; then export GACODE_OMPGPU=1 else export GACODE_OMPGPU=0 From b7a12eb47d0bce6dcd9e22494a68b2b30bf88be5 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 4 Mar 2024 09:56:56 -0800 Subject: [PATCH 14/23] Fix gpu build --- .github/support_scripts/install_hpc_sdk.sh | 2 +- platform/build/make.inc.CONDA_NVHPC_GPU | 33 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 platform/build/make.inc.CONDA_NVHPC_GPU diff --git a/.github/support_scripts/install_hpc_sdk.sh b/.github/support_scripts/install_hpc_sdk.sh index 32ccdefb8..86b51660a 100755 --- a/.github/support_scripts/install_hpc_sdk.sh +++ b/.github/support_scripts/install_hpc_sdk.sh @@ -81,7 +81,7 @@ export NVHPC_SILENT=true # create helper scripts mkdir setup_scripts cat > setup_scripts/setup_nv_hpc_bins.sh << EOF -PATH=$PWD/conda_nv_bins:`ls -d $PWD/hpc_sdk/*/202*/compilers/bin`:\$PATH +PATH=$PWD/conda_nv_bins:`ls -d $PWD/hpc_sdk/*/202*/comm_libs/openmpi4/bin`:`ls -d $PWD/hpc_sdk/*/202*/compilers/bin`:\$PATH unset CPPFLAGS unset CXXFLAGS diff --git a/platform/build/make.inc.CONDA_NVHPC_GPU b/platform/build/make.inc.CONDA_NVHPC_GPU new file mode 100644 index 000000000..0cf9df630 --- /dev/null +++ b/platform/build/make.inc.CONDA_NVHPC_GPU @@ -0,0 +1,33 @@ +IDENTITY="Generic NVHPC GPU" +CORES_PER_NODE=16 +NUMAS_PER_NODE=4 + +# Fortran 90/95 compiler +FC = mpif90 -module ${GACODE_ROOT}/modules -Mpreprocess -DUSE_INLINE -Mdefaultunit + +# Fortran 77 compiler +F77 = ${FC} + +# Compiler options/flags +ifneq ($(GACODE_OMPGPU),1) +FACC =-acc -Minfo=accel -Mcudalib=cufft +else +FACC =-mp=gpu -DOMPGPU -Minfo=mp,accel -Mcudalib=cufft +endif +FOMP =-mp -Mstack_arrays +FMATH =-r8 +FOPT =-fast +FDEBUG =-g -Kieee -Ktrap=fp,divz -Mbounds -Mchkptr -Mchkstk -traceback -Minform=inform +F2PY = f2py --fcompiler=pg + + +# System math libraries +LMATH=-llapack -lblas + +# NetCDF +NETCDF=-L${NETCDF_DIR}/lib -lnetcdff -lnetcdf +NETCDF_INC = ${NETCDF_DIR}/include + +# Archive +ARCH = ar cr + From 499a0b31c9abd78b17a8dd132004aae3d1e52fd6 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 4 Mar 2024 10:53:40 -0800 Subject: [PATCH 15/23] Fix gpu test exec --- platform/exec/exec.CONDA_NVHPC_GPU | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100755 platform/exec/exec.CONDA_NVHPC_GPU diff --git a/platform/exec/exec.CONDA_NVHPC_GPU b/platform/exec/exec.CONDA_NVHPC_GPU new file mode 100755 index 000000000..71bd37fbf --- /dev/null +++ b/platform/exec/exec.CONDA_NVHPC_GPU @@ -0,0 +1,14 @@ +#!/bin/sh +# GACODE Parallel execution script + +simdir=${1} +nmpi=${2} +exec=${3} +nomp=${4} +numa=${5} +mpinuma=${6} + +cd $simdir +export OMP_NUM_THREADS=$nomp +mpiexec --mca plm_rsh_agent /bin/bash -oversubscribe mpiexec -n $nmpi $exec + From cd21d42aa69771818e8801064f60d0ff80b414d3 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 4 Mar 2024 16:17:30 -0800 Subject: [PATCH 16/23] Fix gpu test exec --- platform/exec/exec.CONDA_NVHPC_GPU | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/exec/exec.CONDA_NVHPC_GPU b/platform/exec/exec.CONDA_NVHPC_GPU index 71bd37fbf..40b5b07b2 100755 --- a/platform/exec/exec.CONDA_NVHPC_GPU +++ b/platform/exec/exec.CONDA_NVHPC_GPU @@ -10,5 +10,5 @@ mpinuma=${6} cd $simdir export OMP_NUM_THREADS=$nomp -mpiexec --mca plm_rsh_agent /bin/bash -oversubscribe mpiexec -n $nmpi $exec +mpiexec -oversubscribe -n $nmpi $exec From dc269e6d164528a1c54e32a46a2ebcff7f696b5e Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 4 Mar 2024 16:38:01 -0800 Subject: [PATCH 17/23] Add better diagnostics --- .github/workflows/main.yml | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index edc29016c..529c44acd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -66,7 +66,11 @@ jobs: env echo "======= end env =====" # build the code + echo "======= building =====" (cd cgyro && make) + echo "======= cgyro bin =====" + (cd cgyro/src; ls -l cgyro) + (cd cgyro/src; ldd cgyro) - name: Tests shell: bash -l {0} @@ -90,13 +94,30 @@ jobs: echo "======= begin env =====" env echo "======= end env =====" + echo "======= cgyro bin =====" + (cd cgyro/src; ls -l cgyro) + (cd cgyro/src; ldd cgyro) + echo "======= testing =====" # test the code (cd cgyro/bin && cgyro -r -n 4 -nomp 2) | tee cgyro_reg.log + echo "======= last out =====" + if [ -f cgyro/bin/cgyro_regression_test/out ]; then cat cgyro/bin/cgyro_regression_test/out; fi echo "======= results =====" - grep reg cgyro_reg.log |tee cgyro_reg.tests.log - grep PASS cgyro_reg.tests.log > cgyro_reg.tests.pass.log + (grep reg cgyro_reg.log || test -f cgyro_reg.log) |tee cgyro_reg.tests.log + (grep PASS cgyro_reg.tests.log || test -f cgyro_reg.tests.log) > cgyro_reg.tests.pass.log cnt=`wc -l cgyro_reg.tests.pass.log |awk '{print $1}'` - if [ "$cnt" -eq 20 ]; then echo "== ALL passed =="; else echo "Some tests FAILED"; test -z "error"; fi + if [ "$cnt" -eq 20 ]; then + echo "== ALL passed ==" + else + echo "== Some tests FAILED" + find cgyro/bin/cgyro_regression_test + echo "==== cgyro_regression_test/out" + if [ -f cgyro/bin/cgyro_regression_test/out ]; then cat cgyro/bin/cgyro_regression_test/out; fi + echo "==== cgyro_regression_test/.../out.cgyro.info" + cat cgyro/bin/cgyro_regression_test/*/out.cgyro.info + echo "== Aborting" + test -z "error" + fi - name: Sanity checks shell: bash -l {0} From c3e9c9ca71c1573db1fe07ba9d196d3d75831b36 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Mon, 4 Mar 2024 19:07:33 -0800 Subject: [PATCH 18/23] Remove agressive filters --- .github/support_scripts/install_hpc_sdk.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/support_scripts/install_hpc_sdk.sh b/.github/support_scripts/install_hpc_sdk.sh index 86b51660a..d959e5667 100755 --- a/.github/support_scripts/install_hpc_sdk.sh +++ b/.github/support_scripts/install_hpc_sdk.sh @@ -53,7 +53,7 @@ if [ "x${USE_CURL}" == "x" ]; then rm -f nvhpc.tgz else # Do not unpack things we do not use for cgyro - curl -s "${NV_URL}" | tar xpzf - --exclude '*libcusparse*' --exclude '*libcusolver*' --exclude '*libcurand*' --exclude '*profilers*' --exclude '*/doc/*' --exclude '*/plugin*' + curl -s "${NV_URL}" | tar xpzf - --exclude '*profilers*' --exclude '*/doc/*' --exclude '*/plugin*' fi elif [ "x${USE_ARIA2}" == "x" ]; then aria2c "${NV_URL}" From 4dc902d55fa9b22c80ae153bf2a32d81c648320a Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 5 Mar 2024 09:57:16 -0800 Subject: [PATCH 19/23] Rename CONDA_CPU and CONDA_NVHPC_GPU to CI_CPU and CI_GPU --- .github/workflows/main.yml | 8 ++++---- .../install/{make.ext.CONDA_NVHPC_GPU => make.ext.CI_GPU} | 0 platform/build/{make.inc.CONDA_CPU => make.inc.CI_CPU} | 0 .../build/{make.inc.CONDA_NVHPC_GPU => make.inc.CI_GPU} | 6 +++--- platform/env/{env.CONDA_CPU => env.CI_CPU} | 0 platform/env/{env.CONDA_NVHPC_GPU => env.CI_GPU} | 0 platform/exec/{exec.CONDA_CPU => exec.CI_CPU} | 0 platform/exec/{exec.CONDA_NVHPC_GPU => exec.CI_GPU} | 0 8 files changed, 7 insertions(+), 7 deletions(-) rename cgyro/install/{make.ext.CONDA_NVHPC_GPU => make.ext.CI_GPU} (100%) rename platform/build/{make.inc.CONDA_CPU => make.inc.CI_CPU} (100%) rename platform/build/{make.inc.CONDA_NVHPC_GPU => make.inc.CI_GPU} (83%) rename platform/env/{env.CONDA_CPU => env.CI_CPU} (100%) rename platform/env/{env.CONDA_NVHPC_GPU => env.CI_GPU} (100%) rename platform/exec/{exec.CONDA_CPU => exec.CI_CPU} (100%) rename platform/exec/{exec.CONDA_NVHPC_GPU => exec.CI_GPU} (100%) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 529c44acd..bf00ec603 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -46,12 +46,12 @@ jobs: if [[ "${hwflavor}" == "cpu" ]]; then conda install --yes --strict-channel-priority -c conda-forge fftw 'openblas=*=*openmp*' conda install --yes --strict-channel-priority -c conda-forge mpich - export GACODE_PLATFORM=CONDA_CPU + export GACODE_PLATFORM=CI_CPU else # install NVIDIA HPC SDK .github/support_scripts/install_hpc_sdk.sh source setup_scripts/setup_nv_hpc_bins.sh - export GACODE_PLATFORM=CONDA_NVHPC_GPU + export GACODE_PLATFORM=CI_GPU if [[ "${hwflavor}" == "ompgpu" ]]; then export GACODE_OMPGPU=1 else @@ -78,10 +78,10 @@ jobs: echo "HW flavor: '${hwflavor}'" conda activate gacode if [[ "${hwflavor}" == "cpu" ]]; then - export GACODE_PLATFORM=CONDA_CPU + export GACODE_PLATFORM=CI_CPU else source setup_scripts/setup_nv_hpc_bins.sh - export GACODE_PLATFORM=CONDA_NVHPC_GPU + export GACODE_PLATFORM=CI_GPU if [[ "${hwflavor}" == "ompgpu" ]]; then export GACODE_OMPGPU=1 else diff --git a/cgyro/install/make.ext.CONDA_NVHPC_GPU b/cgyro/install/make.ext.CI_GPU similarity index 100% rename from cgyro/install/make.ext.CONDA_NVHPC_GPU rename to cgyro/install/make.ext.CI_GPU diff --git a/platform/build/make.inc.CONDA_CPU b/platform/build/make.inc.CI_CPU similarity index 100% rename from platform/build/make.inc.CONDA_CPU rename to platform/build/make.inc.CI_CPU diff --git a/platform/build/make.inc.CONDA_NVHPC_GPU b/platform/build/make.inc.CI_GPU similarity index 83% rename from platform/build/make.inc.CONDA_NVHPC_GPU rename to platform/build/make.inc.CI_GPU index 0cf9df630..3a2996c9b 100644 --- a/platform/build/make.inc.CONDA_NVHPC_GPU +++ b/platform/build/make.inc.CI_GPU @@ -1,6 +1,6 @@ IDENTITY="Generic NVHPC GPU" CORES_PER_NODE=16 -NUMAS_PER_NODE=4 +NUMAS_PER_NODE=1 # Fortran 90/95 compiler FC = mpif90 -module ${GACODE_ROOT}/modules -Mpreprocess -DUSE_INLINE -Mdefaultunit @@ -10,9 +10,9 @@ F77 = ${FC} # Compiler options/flags ifneq ($(GACODE_OMPGPU),1) -FACC =-acc -Minfo=accel -Mcudalib=cufft +FACC =-acc -Minfo=accel -cudalib=cufft else -FACC =-mp=gpu -DOMPGPU -Minfo=mp,accel -Mcudalib=cufft +FACC =-mp=gpu -DOMPGPU -Minfo=mp,accel -cudalib=cufft endif FOMP =-mp -Mstack_arrays FMATH =-r8 diff --git a/platform/env/env.CONDA_CPU b/platform/env/env.CI_CPU similarity index 100% rename from platform/env/env.CONDA_CPU rename to platform/env/env.CI_CPU diff --git a/platform/env/env.CONDA_NVHPC_GPU b/platform/env/env.CI_GPU similarity index 100% rename from platform/env/env.CONDA_NVHPC_GPU rename to platform/env/env.CI_GPU diff --git a/platform/exec/exec.CONDA_CPU b/platform/exec/exec.CI_CPU similarity index 100% rename from platform/exec/exec.CONDA_CPU rename to platform/exec/exec.CI_CPU diff --git a/platform/exec/exec.CONDA_NVHPC_GPU b/platform/exec/exec.CI_GPU similarity index 100% rename from platform/exec/exec.CONDA_NVHPC_GPU rename to platform/exec/exec.CI_GPU From 828bad622d89b00900908148846351d59f08e101 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 5 Mar 2024 13:39:34 -0800 Subject: [PATCH 20/23] Use local complires with NVIDIA HPC SDK for CI_GPU --- .github/support_scripts/install_hpc_sdk.sh | 80 +--------------------- .github/workflows/main.yml | 23 +++---- platform/env/env.CI_GPU | 5 +- 3 files changed, 12 insertions(+), 96 deletions(-) diff --git a/.github/support_scripts/install_hpc_sdk.sh b/.github/support_scripts/install_hpc_sdk.sh index d959e5667..dcc4c8ee9 100755 --- a/.github/support_scripts/install_hpc_sdk.sh +++ b/.github/support_scripts/install_hpc_sdk.sh @@ -2,77 +2,22 @@ # # This is a helper script for installing the NVIDIA HPC SDK -# needed to compile a GPU-enabled version of CGYRO. -# # Note: The script currently assumes Linux_x86_64 platform. # -if [ "x${SYSROOT_DIR}" == "x" ]; then - SYSROOT_DIR=${CONDA_PREFIX}/x86_64-conda-linux-gnu/sysroot/usr/lib64 -fi - -# Create GCC symbolic links -# since NVIDIA HPC SDK does not use the env variables -if [ "x${GCC}" == "x" ]; then - echo "ERROR: GCC not defined" - exit 1 -fi - -# usually $CONDA_PREFIX/bin/x86_64-conda_cos6-linux-gnu- -EXE_PREFIX=`echo "$GCC" |sed 's/gcc$//g'` - -echo "GCC pointing to ${EXE_PREFIX}gcc" -ls -l ${EXE_PREFIX}gcc - -mkdir conda_nv_bins -(cd conda_nv_bins && for f in \ - ar as c++ cc cpp g++ gcc ld nm ranlib gfortran strip; \ - do \ - ln -s ${EXE_PREFIX}${f} ${f}; \ - done ) - -export PATH=$PWD/conda_nv_bins:$PATH - # Install the NVIDIA HPC SDK # This link may need to be updated, as new compiler versions are released -# Note: Verified that it works with v23.5 +# Note: Verified that it works with v24.1 if [ "x${NV_URL}" == "x" ]; then NV_URL=https://developer.download.nvidia.com/hpc-sdk/24.1/nvhpc_2024_241_Linux_x86_64_cuda_multi.tar.gz fi echo "Downloading the NVIDIA HPC SDK" -# Defaults to using curl -# set USE_CURL=N if you want to use aria2 or wget -if [ "x${USE_CURL}" == "x" ]; then - # defaults to using inline untarrring - # set INLINE_CURL=N if you want a temp copy of file on disk - if [ "x${INLINE_CURL}" == "xN" ]; then - curl "${NV_URL}" -o nvhpc.tgz - tar xpzf nvhpc.tgz - rm -f nvhpc.tgz - else - # Do not unpack things we do not use for cgyro - curl -s "${NV_URL}" | tar xpzf - --exclude '*profilers*' --exclude '*/doc/*' --exclude '*/plugin*' - fi -elif [ "x${USE_ARIA2}" == "x" ]; then - aria2c "${NV_URL}" - tar xpzf nvhpc_*.tar.gz - rm -f nvhpc_*.tar.gz -else - wget "${NV_URL}" - tar xpzf nvhpc_*.tar.gz - rm -f nvhpc_*.tar.gz -fi +curl -s "${NV_URL}" | tar xpzf - echo "Installing NVIDIA HPC SDK" -# must patch the install scripts to find the right gcc -for f in nvhpc_*/install_components/install nvhpc_*/install_components/*/*/compilers/bin/makelocalrc nvhpc_*/install_components/install_cuda; do - sed -i -e "s#PATH=/#PATH=$PWD/conda_nv_bins:/#g" $f -done - - export NVHPC_INSTALL_DIR=$PWD/hpc_sdk export NVHPC_SILENT=true @@ -81,27 +26,8 @@ export NVHPC_SILENT=true # create helper scripts mkdir setup_scripts cat > setup_scripts/setup_nv_hpc_bins.sh << EOF -PATH=$PWD/conda_nv_bins:`ls -d $PWD/hpc_sdk/*/202*/comm_libs/openmpi4/bin`:`ls -d $PWD/hpc_sdk/*/202*/compilers/bin`:\$PATH - -unset CPPFLAGS -unset CXXFLAGS -unset CFLAGS - -unset DEBUG_CPPFLAGS -unset DEBUG_CXXFLAGS -unset DEBUG_CFLAGS +module add ./hpc_sdk/modulefiles/nvhpc-openmpi3/* EOF -# patch localrc to find crt1.o -for f in ${NVHPC_INSTALL_DIR}/*/202*/compilers/bin/localrc; do - echo "set DEFSTDOBJDIR=${SYSROOT_DIR};" >> $f - #echo "====localrc $f ====" - #cat $f - #echo "====" -done - -# we don't need the install dir anymore -rm -fr nvhpc_* - echo "Setup script avaiabile in $PWD/setup_scripts/setup_nv_hpc_bins.sh" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index bf00ec603..7d275d640 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -28,26 +28,20 @@ jobs: shell: bash -l {0} run: | df -h . - conda create -q --yes --strict-channel-priority -n gacode -c conda-forge gxx_linux-64 gfortran_linux-64 make curl wget python - conda clean --yes -t - df -h . - conda activate gacode - echo "OS: '$(uname -s)'" - if [[ "$(uname -s)" == "Linux" ]]; - then + echo "HW flavor: '${hwflavor}'" + if [[ "${hwflavor}" == "cpu" ]]; then + conda create -q --yes --strict-channel-priority -n gacode -c conda-forge gxx_linux-64 gfortran_linux-64 make curl python + conda clean --yes -t + df -h . + conda activate gacode which x86_64-conda-linux-gnu-gcc x86_64-conda-linux-gnu-gcc -v x86_64-conda-linux-gnu-g++ -v - else - which clang - clang -v - fi - echo "HW flavor: '${hwflavor}'" - if [[ "${hwflavor}" == "cpu" ]]; then conda install --yes --strict-channel-priority -c conda-forge fftw 'openblas=*=*openmp*' conda install --yes --strict-channel-priority -c conda-forge mpich export GACODE_PLATFORM=CI_CPU else + # Assuming gfortran and modules are pre-installed # install NVIDIA HPC SDK .github/support_scripts/install_hpc_sdk.sh source setup_scripts/setup_nv_hpc_bins.sh @@ -76,8 +70,8 @@ jobs: shell: bash -l {0} run: | echo "HW flavor: '${hwflavor}'" - conda activate gacode if [[ "${hwflavor}" == "cpu" ]]; then + conda activate gacode export GACODE_PLATFORM=CI_CPU else source setup_scripts/setup_nv_hpc_bins.sh @@ -122,5 +116,4 @@ jobs: - name: Sanity checks shell: bash -l {0} run: | - conda activate gacode echo "TODO" diff --git a/platform/env/env.CI_GPU b/platform/env/env.CI_GPU index db411ad1b..42e468e46 100644 --- a/platform/env/env.CI_GPU +++ b/platform/env/env.CI_GPU @@ -4,8 +4,5 @@ if [ -n "$SSH_TTY" ] ; then echo "Setting up $GACODE_PLATFORM environment for gacode" fi -# Recommended installation options -# conda create -q --yes --strict-channel-priority -n gacode -c conda-forge gxx_linux-64 gfortran_linux-64 make curl python -# conda activate gacode -# on NVIDIA systems +# Expects NVIDIA HPC SDK to be installed From 62e2c4c71633533cdec72e6147927d0411cf67dc Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 5 Mar 2024 14:42:17 -0800 Subject: [PATCH 21/23] Add ompgpu test --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7d275d640..19b11aaef 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: os: [linux-gpu-cuda] - hwflavor: [cpu, cuda] + hwflavor: [cpu, cuda, ompgpu] runs-on: ${{ matrix.os }} env: hwflavor: ${{ matrix.hwflavor }} From fba1fa8d235200224508a59be4a7288161a7e5f2 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 5 Mar 2024 15:19:34 -0800 Subject: [PATCH 22/23] Clarify it is a CGYRO CI --- .github/workflows/{main.yml => cgyro.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{main.yml => cgyro.yml} (99%) diff --git a/.github/workflows/main.yml b/.github/workflows/cgyro.yml similarity index 99% rename from .github/workflows/main.yml rename to .github/workflows/cgyro.yml index 19b11aaef..0b4493477 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/cgyro.yml @@ -1,4 +1,4 @@ -name: gacode CI +name: cgyro CI on: push: From 49d35732d5cbb87eb9977da5cf30674c30e9303c Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 5 Mar 2024 15:42:31 -0800 Subject: [PATCH 23/23] Use openacc, not cuda, flavor --- .github/workflows/cgyro.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cgyro.yml b/.github/workflows/cgyro.yml index 0b4493477..4d80eedc2 100644 --- a/.github/workflows/cgyro.yml +++ b/.github/workflows/cgyro.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: os: [linux-gpu-cuda] - hwflavor: [cpu, cuda, ompgpu] + hwflavor: [cpu, openacc, ompgpu] runs-on: ${{ matrix.os }} env: hwflavor: ${{ matrix.hwflavor }}