-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Compiled pytorch within MOOSE-dev container. #29690
base: next
Are you sure you want to change the base?
Changes from all commits
2367de3
0ef2288
a3ab37b
ac918e6
9ec061d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,8 +29,8 @@ | |
{%- set GPERF_DIR = '/opt/gperftools' -%} | ||
{#- The installation location for pprof -#} | ||
{%- set PPROF_DIR = '/opt/pprof' -%} | ||
{#- The installation location for libtorch -#} | ||
{%- set LIBTORCH_DEST = '/opt' -%} | ||
{#- We need this here because more than one block will use it -#} | ||
{%- set MOOSE_PYTHON_VERSION = '3.11' -%} | ||
|
||
{#- The script used to install wasp -#} | ||
{%- set WASP_BUILD_SCRIPT = 'update_and_rebuild_wasp.sh' -%} | ||
|
@@ -52,7 +52,7 @@ Fingerprints: 0CFFCAB55E806363601C442D211817B01E0911DB | |
|
||
{%- if WITH_LIBTORCH %} | ||
# Make libtorch visible to moose | ||
export LIBTORCH_DIR={{ LIBTORCH_DEST }}/libtorch | ||
export LIBTORCH_DIR={{ MOOSE_MINIFORGE }}/lib/python{{ MOOSE_PYTHON_VERSION }}/site-packages/torch | ||
# Adding this to not get GPU initialization errors from MPICH | ||
export MPIR_CVAR_ENABLE_GPU=0 | ||
{%- endif %} | ||
|
@@ -81,7 +81,8 @@ Fingerprints: 0CFFCAB55E806363601C442D211817B01E0911DB | |
|
||
# Pinned versions | ||
MINIFORGE_VERSION=23.3.1-1 | ||
PYTHON_VERSION=3.11 | ||
PYTHON_VERSION={{ MOOSE_PYTHON_VERSION }} | ||
echo $PYTHON_VERSION | ||
CODE_SERVER_VERSION=4.92.2 | ||
|
||
# Install code-server | ||
|
@@ -113,26 +114,9 @@ Fingerprints: 0CFFCAB55E806363601C442D211817B01E0911DB | |
git checkout ${WASP_GIT_SHA} | ||
git submodule update --init --recursive | ||
|
||
{%- if WITH_LIBTORCH %} | ||
# Libtorch-related vars | ||
LIBTORCH_DEST={{ LIBTORCH_DEST }} | ||
LIBTORCH_VER=2.1.0 | ||
LIBTORCH_DISTRIBUTION={{ WITH_LIBTORCH }} | ||
|
||
# Install libtorch from a precompiled package | ||
${ROOT_BUILD_DIR}/setup_libtorch.sh --version=${LIBTORCH_VER} --libtorch-dest=${LIBTORCH_DEST} --libtorch-distribution=${LIBTORCH_DISTRIBUTION} --cleanup | ||
|
||
{%- if WITH_LIBTORCH.startswith('cu') %} | ||
# We install CUDA Toolkit if the user wants cuda-based libtorch. | ||
# Right now this assumes that cuda-based distributions start with -cu- | ||
dnf -y install epel-release | ||
CUDA_RPM=${BUILD_DIR}/cuda.rpm | ||
curl -L https://developer.download.nvidia.com/compute/cuda/11.4.0/local_installers/cuda-repo-rhel8-11-4-local-11.4.0_470.42.01-1.x86_64.rpm -o ${CUDA_RPM} | ||
rpm -i ${CUDA_RPM} | ||
dnf -y install cuda | ||
rm -rf ${CUDA_RPM} | ||
{%- endif %} | ||
{%- endif %} | ||
# Build and install wasp | ||
# This is redundant; hopefully we can use the one from conda in the future | ||
WASP_SRC_DIR=${WASP_SRC_DIR} ${WASP_BUILD_SCRIPT} -D CMAKE_INSTALL_PREFIX:STRING=${WASP_DIR} | ||
|
||
# Install miniforge in MOOSE_MINIFORGE | ||
MINIFORGE_SCRIPT=${ROOT_BUILD_DIR}/miniforge3.sh | ||
|
@@ -156,9 +140,47 @@ Fingerprints: 0CFFCAB55E806363601C442D211817B01E0911DB | |
# Needed for coverage | ||
pip3 --no-cache install lcov-cobertura | ||
|
||
# Build and install wasp | ||
# This is redundant; hopefully we can use the one from conda in the future | ||
WASP_SRC_DIR=${WASP_SRC_DIR} ${WASP_BUILD_SCRIPT} -D CMAKE_INSTALL_PREFIX:STRING=${WASP_DIR} | ||
{%- if WITH_LIBTORCH %} | ||
# Libtorch-related vars | ||
LIBTORCH_VER=2.3.0 | ||
LIBTORCH_DISTRIBUTION={{ WITH_LIBTORCH }} | ||
|
||
# Clone pytorch | ||
cd ${ROOT_BUILD_DIR} | ||
git clone -b v${LIBTORCH_VER} --recursive https://github.com/pytorch/pytorch | ||
cd pytorch | ||
|
||
# Install the python requirements | ||
pip install -r requirements.txt | ||
|
||
# Below a certain version we need to downgrade our numpy | ||
if { echo ${LIBTORCH_VER}; echo "2.2.0"; } | sort --version-sort --check=quiet; then | ||
conda install -yq "numpy<2.0" | ||
fi | ||
|
||
# Fetch the patch that enables the successful compilation on this linux distro | ||
curl -L https://github.com/pytorch/pytorch/pull/99468.patch > 99468.patch | ||
git apply 99468.patch | ||
|
||
# We have to diasble some errors considering they emerge as some optimization of the compiler on certain distributuins (sadly on this one to) | ||
export CFLAGS+=" -Wno-error=maybe-uninitialized -Wno-error=uninitialized -Wno-error=restrict" | ||
export CXXFLAGS+=" -Wno-error=maybe-uninitialized -Wno-error=uninitialized -Wno-error=restrict" | ||
|
||
# Build and install | ||
export CMAKE_PREFIX_PATH="${CONDA_PREFIX:-'$(dirname $(which conda))/../'}:${CMAKE_PREFIX_PATH}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Installing something like this directly into CONDA_PREFIX isn't great, because conda doesn't know about it. Is this what they suggest you do? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah these are the official instructions. So you are suggesting installing this somewhere else and adding it to python path? |
||
MAX_JOBS=${MOOSE_JOBS} python setup.py install | ||
|
||
{%- if WITH_LIBTORCH.startswith('cu') %} | ||
# We install CUDA Toolkit if the user wants cuda-based libtorch. | ||
# Right now this assumes that cuda-based distributions start with -cu- | ||
dnf -y install epel-release | ||
CUDA_RPM=${BUILD_DIR}/cuda.rpm | ||
curl -L https://developer.download.nvidia.com/compute/cuda/11.4.0/local_installers/cuda-repo-rhel8-11-4-local-11.4.0_470.42.01-1.x86_64.rpm -o ${CUDA_RPM} | ||
rpm -i ${CUDA_RPM} | ||
dnf -y install cuda | ||
rm -rf ${CUDA_RPM} | ||
{%- endif %} | ||
{%- endif %} | ||
|
||
# Install node.js. Requested by dschwen for bison | ||
dnf install -y nodejs npm | ||
|
@@ -206,6 +228,3 @@ Fingerprints: 0CFFCAB55E806363601C442D211817B01E0911DB | |
{{ FILES_DIR }}/opt/code-server/bin/code-server-start /opt/code-server/bin/code-server-start | ||
{{ MOOSE_DIR }}/scripts/{{ WASP_BUILD_SCRIPT }} {{ ROOT_BUILD_DIR }}/{{ WASP_BUILD_SCRIPT }} | ||
{{ MOOSE_DIR }}/scripts/configure_wasp.sh {{ ROOT_BUILD_DIR }}/configure_wasp.sh | ||
{%- if WITH_LIBTORCH %} | ||
{{ MOOSE_DIR }}/scripts/setup_libtorch.sh {{ ROOT_BUILD_DIR }}/setup_libtorch.sh | ||
{%- endif %} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.