diff --git a/apptainer/moose-dev.def b/apptainer/moose-dev.def index 0df39422fad5..97cfb1d53ee9 100644 --- a/apptainer/moose-dev.def +++ b/apptainer/moose-dev.def @@ -29,8 +29,8 @@ {%- set GPERF_DIR = '/opt/gperftools' -%} {#- The installation location for pprof -#} {%- set PPROF_DIR = '/opt/pprof' -%} -{#- The installation location for libtorch -#} -{%- set LIBTORCH_DEST = '/opt' -%} +{#- We need this here because more than one block will use it -#} +{%- set MOOSE_PYTHON_VERSION = '3.11' -%} {#- The script used to install wasp -#} {%- set WASP_BUILD_SCRIPT = 'update_and_rebuild_wasp.sh' -%} @@ -52,7 +52,7 @@ Fingerprints: 0CFFCAB55E806363601C442D211817B01E0911DB {%- if WITH_LIBTORCH %} # Make libtorch visible to moose - export LIBTORCH_DIR={{ LIBTORCH_DEST }}/libtorch + export LIBTORCH_DIR={{ MOOSE_MINIFORGE }}/lib/python{{ MOOSE_PYTHON_VERSION }}/site-packages/torch # Adding this to not get GPU initialization errors from MPICH export MPIR_CVAR_ENABLE_GPU=0 {%- endif %} @@ -81,7 +81,8 @@ Fingerprints: 0CFFCAB55E806363601C442D211817B01E0911DB # Pinned versions MINIFORGE_VERSION=23.3.1-1 - PYTHON_VERSION=3.11 + PYTHON_VERSION={{ MOOSE_PYTHON_VERSION }} + echo $PYTHON_VERSION CODE_SERVER_VERSION=4.92.2 # Install code-server @@ -113,26 +114,9 @@ Fingerprints: 0CFFCAB55E806363601C442D211817B01E0911DB git checkout ${WASP_GIT_SHA} git submodule update --init --recursive -{%- if WITH_LIBTORCH %} - # Libtorch-related vars - LIBTORCH_DEST={{ LIBTORCH_DEST }} - LIBTORCH_VER=2.1.0 - LIBTORCH_DISTRIBUTION={{ WITH_LIBTORCH }} - - # Install libtorch from a precompiled package - ${ROOT_BUILD_DIR}/setup_libtorch.sh --version=${LIBTORCH_VER} --libtorch-dest=${LIBTORCH_DEST} --libtorch-distribution=${LIBTORCH_DISTRIBUTION} --cleanup - -{%- if WITH_LIBTORCH.startswith('cu') %} - # We install CUDA Toolkit if the user wants cuda-based libtorch. - # Right now this assumes that cuda-based distributions start with -cu- - dnf -y install epel-release - CUDA_RPM=${BUILD_DIR}/cuda.rpm - curl -L https://developer.download.nvidia.com/compute/cuda/11.4.0/local_installers/cuda-repo-rhel8-11-4-local-11.4.0_470.42.01-1.x86_64.rpm -o ${CUDA_RPM} - rpm -i ${CUDA_RPM} - dnf -y install cuda - rm -rf ${CUDA_RPM} -{%- endif %} -{%- endif %} + # Build and install wasp + # This is redundant; hopefully we can use the one from conda in the future + WASP_SRC_DIR=${WASP_SRC_DIR} ${WASP_BUILD_SCRIPT} -D CMAKE_INSTALL_PREFIX:STRING=${WASP_DIR} # Install miniforge in MOOSE_MINIFORGE MINIFORGE_SCRIPT=${ROOT_BUILD_DIR}/miniforge3.sh @@ -156,9 +140,47 @@ Fingerprints: 0CFFCAB55E806363601C442D211817B01E0911DB # Needed for coverage pip3 --no-cache install lcov-cobertura - # Build and install wasp - # This is redundant; hopefully we can use the one from conda in the future - WASP_SRC_DIR=${WASP_SRC_DIR} ${WASP_BUILD_SCRIPT} -D CMAKE_INSTALL_PREFIX:STRING=${WASP_DIR} +{%- if WITH_LIBTORCH %} + # Libtorch-related vars + LIBTORCH_VER=2.3.0 + LIBTORCH_DISTRIBUTION={{ WITH_LIBTORCH }} + + # Clone pytorch + cd ${ROOT_BUILD_DIR} + git clone -b v${LIBTORCH_VER} --recursive https://github.com/pytorch/pytorch + cd pytorch + + # Install the python requirements + pip install -r requirements.txt + + # Below a certain version we need to downgrade our numpy + if { echo ${LIBTORCH_VER}; echo "2.2.0"; } | sort --version-sort --check=quiet; then + conda install -yq "numpy<2.0" + fi + + # Fetch the patch that enables the successful compilation on this linux distro + curl -L https://github.com/pytorch/pytorch/pull/99468.patch > 99468.patch + git apply 99468.patch + + # We have to diasble some errors considering they emerge as some optimization of the compiler on certain distributuins (sadly on this one to) + export CFLAGS+=" -Wno-error=maybe-uninitialized -Wno-error=uninitialized -Wno-error=restrict" + export CXXFLAGS+=" -Wno-error=maybe-uninitialized -Wno-error=uninitialized -Wno-error=restrict" + + # Build and install + export CMAKE_PREFIX_PATH="${CONDA_PREFIX:-'$(dirname $(which conda))/../'}:${CMAKE_PREFIX_PATH}" + MAX_JOBS=${MOOSE_JOBS} python setup.py install + +{%- if WITH_LIBTORCH.startswith('cu') %} + # We install CUDA Toolkit if the user wants cuda-based libtorch. + # Right now this assumes that cuda-based distributions start with -cu- + dnf -y install epel-release + CUDA_RPM=${BUILD_DIR}/cuda.rpm + curl -L https://developer.download.nvidia.com/compute/cuda/11.4.0/local_installers/cuda-repo-rhel8-11-4-local-11.4.0_470.42.01-1.x86_64.rpm -o ${CUDA_RPM} + rpm -i ${CUDA_RPM} + dnf -y install cuda + rm -rf ${CUDA_RPM} +{%- endif %} +{%- endif %} # Install node.js. Requested by dschwen for bison dnf install -y nodejs npm @@ -206,6 +228,3 @@ Fingerprints: 0CFFCAB55E806363601C442D211817B01E0911DB {{ FILES_DIR }}/opt/code-server/bin/code-server-start /opt/code-server/bin/code-server-start {{ MOOSE_DIR }}/scripts/{{ WASP_BUILD_SCRIPT }} {{ ROOT_BUILD_DIR }}/{{ WASP_BUILD_SCRIPT }} {{ MOOSE_DIR }}/scripts/configure_wasp.sh {{ ROOT_BUILD_DIR }}/configure_wasp.sh -{%- if WITH_LIBTORCH %} - {{ MOOSE_DIR }}/scripts/setup_libtorch.sh {{ ROOT_BUILD_DIR }}/setup_libtorch.sh -{%- endif %} diff --git a/scripts/setup_libtorch.sh b/scripts/setup_libtorch.sh index 6fe725e2c6f5..a9bb631902d2 100755 --- a/scripts/setup_libtorch.sh +++ b/scripts/setup_libtorch.sh @@ -80,7 +80,7 @@ version_check() { loc_2=$1 ;; *) - echo "'version_check' function does only supports '-g' and '-l' for comparison!" + echo "'version_check' function does only support '-g' and '-l' for comparison!" exit 0 ;; esac