Skip to content

Commit

Permalink
Attempt to remove AWS S3 flaky cache for sccache (#2953)
Browse files Browse the repository at this point in the history
* backend(trtllm): attempt to remove AWS S3 flaky cache for sccache

* backend(trtllm): what if we expose ENV instead of inline?

* backend(trtllm): and with the right env var for gha sccache

* backend(trtllm): relax the way to detect sccache

* backend(trtllm): make sccache definition manually

* backend(trtllm): ok let's try to define the launchers in build.rs when rustc_wrapper is present

* backend(trtllm): export env variable in run mb?

* backend(trtllm): Cache mode max to cache intermediate layers

* backend(trtllm): inject ompi_version build arg in dependent step
  • Loading branch information
mfuntowicz authored Jan 27, 2025
1 parent 6cb41a8 commit 40b0027
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 80 deletions.
47 changes: 19 additions & 28 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ on:
hardware:
type: string
description: Hardware
# options:
# - cuda
# - cuda-trtllm
# - rocm
# - intel
# options:
# - cuda
# - cuda-trtllm
# - rocm
# - intel
required: true
release-tests:
description: "Run release integration tests"
Expand Down Expand Up @@ -41,19 +41,18 @@ jobs:
uses: actions/checkout@v4
- name: Inject slug/short variables
uses: rlespinasse/[email protected]
- name: Inject required variables for sccache to interact with Github Actions Cache
uses: actions/github-script@v7
with:
script: |
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
- name: Extract TensorRT-LLM version
run: |
echo "TENSORRT_LLM_VERSION=$(grep -oP '([a-z,0-9]{40})' $GITHUB_WORKSPACE/backends/trtllm/cmake/trtllm.cmake)" >> $GITHUB_ENV
echo "TensorRT-LLM version: ${{ env.TENSORRT_LLM_VERSION }}"
- name: "Configure AWS Credentials"
id: aws-creds
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: us-east-1
role-to-assume: ${{ secrets.AWS_ROLE_GITHUB_TGI_TEST }}
role-duration-seconds: 7200
output-credentials: true
- name: Construct harware variables
- name: Construct hardware variables
shell: bash
run: |
case ${{ inputs.hardware }} in
Expand All @@ -75,9 +74,6 @@ jobs:
export runs_on="ubuntu-latest"
export platform=""
export extra_pytest=""
export target="ci-runtime"
export sccache_s3_key_prefix="trtllm"
export sccache_region="us-east-1"
export build_type="dev"
;;
rocm)
Expand Down Expand Up @@ -128,8 +124,6 @@ jobs:
echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
echo "TARGET=${target}" >> $GITHUB_ENV
echo "SCCACHE_S3_KEY_PREFIX=${sccache_s3_key_prefix}" >> $GITHUB_ENV
echo "SCCACHE_REGION=${sccache_region}" >> $GITHUB_ENV
echo "BUILD_TYPE=${build_type}" >> $GITHUB_ENV
- name: Initialize Docker Buildx
uses: docker/setup-buildx-action@v3
Expand Down Expand Up @@ -196,17 +190,14 @@ jobs:
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
PLATFORM=${{ env.PLATFORM }}
build_type=${{ env.BUILD_TYPE }}
is_gha_build=true
aws_access_key_id=${{ steps.aws-creds.outputs.aws-access-key-id }}
aws_secret_access_key=${{ steps.aws-creds.outputs.aws-secret-access-key }}
aws_session_token=${{ steps.aws-creds.outputs.aws-session-token }}
sccache_bucket=${{ secrets.AWS_S3_BUCKET_GITHUB_TGI_TEST }}
sccache_s3_key_prefix=${{ env.SCCACHE_S3_KEY_PREFIX }}
sccache_region=${{ env.SCCACHE_REGION }}
sccache_gha_enabled=on
actions_cache_url=${{ env.ACTIONS_CACHE_URL }}
actions_runtime_token=${{ env.ACTIONS_RUNTIME_TOKEN }}
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max
cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=max
- name: Final
id: final
run: |
Expand Down
60 changes: 28 additions & 32 deletions Dockerfile_trtllm
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
ARG cuda_arch_list="75-real;80-real;86-real;89-real;90-real"
ARG ompi_version="4.1.7"
ARG build_type=release
ARG is_gha_build=false
ARG ompi_version=4.1.7
ARG sccache_gha_enabled=no
ARG actions_cache_url=""
ARG actions_runtime_token=""

# CUDA dependent dependencies resolver stage
FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder
Expand Down Expand Up @@ -34,19 +36,19 @@ ENV TENSORRT_INSTALL_PREFIX=/usr/local/tensorrt

# Install OpenMPI
FROM cuda-builder AS mpi-builder
ARG ompi_version
WORKDIR /opt/src/mpi

ENV OMPI_TARBALL_FILENAME="openmpi-$ompi_version.tar.bz2"
ARG ompi_version
ENV OMPI_VERSION=${ompi_version}
ENV OMPI_TARBALL_FILENAME=openmpi-${OMPI_VERSION}.tar.bz2
ADD --checksum=sha256:54a33cb7ad81ff0976f15a6cc8003c3922f0f3d8ceed14e1813ef3603f22cd34 \
https://download.open-mpi.org/release/open-mpi/v4.1/$OMPI_TARBALL_FILENAME \
/opt/src/mpi/
https://download.open-mpi.org/release/open-mpi/v4.1/${OMPI_TARBALL_FILENAME} .

WORKDIR /opt/src/mpi
RUN tar --strip-components=1 -xf $OMPI_TARBALL_FILENAME &&\
RUN tar --strip-components=1 -xf ${OMPI_TARBALL_FILENAME} &&\
./configure --prefix=/usr/local/mpi --with-cuda=/usr/local/cuda --with-slurm && \
make -j all && \
make install && \
rm -rf "/opt/src/$OMPI_TARBALL_FILENAME"
rm -rf ${OMPI_TARBALL_FILENAME}/..

# Install TensorRT
FROM cuda-builder AS trt-builder
Expand All @@ -59,8 +61,11 @@ FROM cuda-builder AS tgi-builder
WORKDIR /usr/src/text-generation-inference

# Scoped global args reuse
ARG is_gha_build
ARG cuda_arch_list
ARG build_type
ARG sccache_gha_enabled
ARG actions_cache_url
ARG actions_runtime_token

# Install Rust
ENV PATH="/root/.cargo/bin:$PATH"
Expand All @@ -69,28 +74,17 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y &&
chmod -R a+w /root/.cargo && \
cargo install sccache --locked

# SCCACHE Specifics args - before finding a better, more generic, way...
ARG aws_access_key_id
ARG aws_secret_access_key
ARG aws_session_token
ARG sccache_bucket
ARG sccache_s3_key_prefix
ARG sccache_region

ENV AWS_ACCESS_KEY_ID=$aws_access_key_id
ENV AWS_SECRET_ACCESS_KEY=$aws_secret_access_key
ENV AWS_SESSION_TOKEN=$aws_session_token
ENV SCCACHE_BUCKET=$sccache_bucket
ENV SCCACHE_S3_KEY_PREFIX=$sccache_s3_key_prefix
ENV SCCACHE_REGION=$sccache_region

ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH"
ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig:$PKG_CONFIG_PATH"
ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH"
ENV PKG_CONFIG_PATH="/usr/local/mpi/lib/pkgconfig"
ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt"

ENV USE_LLD_LINKER=ON
ENV CUDA_ARCH_LIST=${cuda_arch_list}
ENV IS_GHA_BUILD=${is_gha_build}

# SCCACHE Specifics args - before finding a better, more generic, way...
ENV SCCACHE_GHA_ENABLED=${sccache_gha_enabled}
ENV ACTIONS_CACHE_URL=${actions_cache_url}
ENV ACTIONS_RUNTIME_TOKEN=${actions_runtime_token}

COPY Cargo.lock Cargo.lock
COPY Cargo.toml Cargo.toml
Expand All @@ -102,10 +96,12 @@ COPY launcher launcher
COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi

RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
python3 backends/trtllm/scripts/setup_sccache.py --is-gha-build ${is_gha_build} && \
CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX \
RUSTC_WRAPPER=sccache \
ENV RUSTC_WRAPPER=sccache
ENV CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX
RUN export CMAKE_C_COMPILER_LAUNCHER=sccache && \
export CMAKE_CXX_COMPILER_LAUNCHER=sccache && \
export CMAKE_CUDA_COMPILER_LAUNCHER=sccache && \
mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
cargo build --profile ${build_type} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm && \
sccache --show-stats

Expand Down
15 changes: 0 additions & 15 deletions backends/trtllm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,5 @@
cmake_minimum_required(VERSION 3.20)

if (NOT DEFINED CMAKE_CXX_COMPILER_LAUNCHER)
find_program(CCACHE_EXECUTABLE "ccache")
if (CCACHE_EXECUTABLE)
message(STATUS "Using ccache")
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_EXECUTABLE}")
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_EXECUTABLE}")
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_EXECUTABLE}")
endif ()
else ()
message(STATUS "Using user specified cmake cxx compiler launcher: ${CMAKE_CXX_COMPILER_LAUNCHER}")
set(CMAKE_C_COMPILER_LAUNCHER "${CMAKE_CXX_COMPILER_LAUNCHER}")
set(CMAKE_CXX_COMPILER_LAUNCHER "${CMAKE_CXX_COMPILER_LAUNCHER}")
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CMAKE_CXX_COMPILER_LAUNCHER}")
endif ()

if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
cmake_policy(SET CMP0135 NEW)
endif ()
Expand Down
9 changes: 4 additions & 5 deletions backends/trtllm/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const TENSORRT_ROOT_DIR: Option<&str> = option_env!("TENSORRT_ROOT_DIR");
const NCCL_ROOT_DIR: Option<&str> = option_env!("NCCL_ROOT_DIR");

const IS_GHA_BUILD: LazyLock<bool> = LazyLock::new(|| {
option_env!("IS_GHA_BUILD").map_or(false, |value| match value.to_lowercase().as_str() {
option_env!("SCCACHE_GHA_ENABLED").map_or(false, |value| match value.to_lowercase().as_str() {
"on" => true,
"true" => true,
"1" => true,
Expand Down Expand Up @@ -138,10 +138,9 @@ fn build_backend(is_debug: bool, opt_level: &str, out_dir: &PathBuf) -> (PathBuf

if let Some(wrapper) = option_env!("RUSTC_WRAPPER") {
println!("cargo:warning=Using caching tool: {wrapper}");

env::set_var("CMAKE_C_COMPILER_LAUNCHER", wrapper);
env::set_var("CMAKE_CXX_COMPILER_LAUNCHER", wrapper);
env::set_var("CMAKE_CUDA_COMPILER_LAUNCHER", wrapper);
config.define("CMAKE_C_COMPILER_LAUNCHER", wrapper);
config.define("CMAKE_CXX_COMPILER_LAUNCHER", wrapper);
config.define("CMAKE_CUDA_COMPILER_LAUNCHER", wrapper);
}

// Allow to override which Python to use ...
Expand Down

0 comments on commit 40b0027

Please sign in to comment.