-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
containers/tei/{cpu,gpu}/1.6.0
(#132)
* Add `containers/tei/{cpu,gpu}/1.6.0` baseline * Update TEI `Dockerfile` for CPU and add `entrypoint.sh` * Update TEI `Dockerfile` and `entrypoint.sh` for GPU * Update `tei/{cpu,gpu}/1.6.0/Dockerfile` * Fix `FromAsCasing` warning
- Loading branch information
1 parent
4a97ad3
commit 1c31c51
Showing
4 changed files
with
314 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
# Fetch and extract the TEI sources | ||
FROM alpine AS tei | ||
|
||
RUN mkdir -p /tei | ||
ADD https://github.com/huggingface/text-embeddings-inference/archive/refs/tags/v1.6.0.tar.gz /tei/sources.tar.gz | ||
RUN tar -C /tei -xf /tei/sources.tar.gz --strip-components=1 | ||
|
||
# Build cargo components (adapted from TEI original Dockerfile) | ||
FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef | ||
WORKDIR /usr/src | ||
|
||
ENV SCCACHE=0.5.4 | ||
ENV RUSTC_WRAPPER=/usr/local/bin/sccache | ||
|
||
# Donwload, configure sccache | ||
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ | ||
chmod +x /usr/local/bin/sccache | ||
|
||
FROM chef AS planner | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml Cargo.toml | ||
COPY --from=tei /tei/Cargo.lock Cargo.lock | ||
|
||
RUN cargo chef prepare --recipe-path recipe.json | ||
|
||
FROM chef AS builder | ||
|
||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ | ||
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ | ||
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ | ||
tee /etc/apt/sources.list.d/oneAPI.list | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
intel-oneapi-mkl-devel=2024.0.0-49656 \ | ||
build-essential \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \ | ||
gcc -shared -fPIC -o libfakeintel.so fakeintel.c | ||
|
||
COPY --from=planner /usr/src/recipe.json recipe.json | ||
|
||
RUN cargo chef cook --release --features ort --features candle --features mkl-dynamic --features google --no-default-features --recipe-path recipe.json && sccache -s | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml Cargo.toml | ||
COPY --from=tei /tei/Cargo.lock Cargo.lock | ||
|
||
FROM builder AS http-builder | ||
|
||
RUN cargo build --release --bin text-embeddings-router -F ort -F candle -F mkl-dynamic -F http -F google --no-default-features && sccache -s | ||
|
||
FROM builder AS grpc-builder | ||
|
||
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ | ||
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ | ||
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ | ||
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ | ||
rm -f $PROTOC_ZIP | ||
|
||
COPY --from=tei /tei/proto proto | ||
|
||
RUN cargo build --release --bin text-embeddings-router -F grpc -F ort -F candle -F mkl-dynamic -F google --no-default-features && sccache -s | ||
|
||
FROM debian:bookworm-slim AS base | ||
|
||
ENV HUGGINGFACE_HUB_CACHE=/tmp \ | ||
PORT=8080 \ | ||
MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \ | ||
RAYON_NUM_THREADS=4 \ | ||
LD_PRELOAD=/usr/local/libfakeintel.so \ | ||
LD_LIBRARY_PATH=/usr/local/lib | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
libomp-dev \ | ||
ca-certificates \ | ||
libssl-dev \ | ||
curl \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch... | ||
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2 | ||
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2 | ||
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2 | ||
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2 | ||
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2 | ||
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2 | ||
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2 | ||
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2 | ||
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2 | ||
COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so | ||
|
||
# Install Google CLI single command | ||
RUN apt-get update -y && \ | ||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg curl && \ | ||
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ | ||
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ | ||
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ | ||
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ | ||
apt-get update -y && \ | ||
apt-get install google-cloud-sdk -y | ||
|
||
# COPY custom entrypoint for Google | ||
COPY --chmod=775 containers/tei/cpu/1.6.0/entrypoint.sh entrypoint.sh | ||
|
||
FROM base AS grpc | ||
|
||
COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router | ||
|
||
ENTRYPOINT ["./entrypoint.sh"] | ||
CMD ["--json-output"] | ||
|
||
FROM base AS http | ||
|
||
COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router | ||
|
||
ENTRYPOINT ["./entrypoint.sh"] | ||
CMD ["--json-output"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/bin/bash | ||
|
||
# Check if MODEL_ID starts with "gcs://" | ||
if [[ $AIP_STORAGE_URI == gs://* ]]; then | ||
echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS." | ||
echo "AIP_STORAGE_URI: $AIP_STORAGE_URI" | ||
|
||
# Define the target directory | ||
TARGET_DIR="/tmp/model" | ||
mkdir -p "$TARGET_DIR" | ||
|
||
# Use gsutil to copy the content from GCS to the target directory | ||
echo "Running: gcloud storage storage cp $AIP_STORAGE_URI/* $TARGET_DIR --recursive" | ||
gcloud storage cp "$AIP_STORAGE_URI/*" "$TARGET_DIR" --recursive | ||
|
||
# Check if gsutil command was successful | ||
if [ $? -eq 0 ]; then | ||
echo "Model downloaded successfully to ${TARGET_DIR}." | ||
# Update MODEL_ID to point to the local directory | ||
echo "Updating MODEL_ID to point to the local directory." | ||
export MODEL_ID="$TARGET_DIR" | ||
else | ||
echo "Failed to download model from GCS." | ||
exit 1 | ||
fi | ||
fi | ||
|
||
ldconfig 2>/dev/null || echo "unable to refresh ld cache, not a big deal in most cases" | ||
|
||
exec text-embeddings-router $@ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
# Fetch and extract the TEI sources | ||
FROM alpine AS tei | ||
|
||
RUN mkdir -p /tei | ||
ADD https://github.com/huggingface/text-embeddings-inference/archive/refs/tags/v1.6.0.tar.gz /tei/sources.tar.gz | ||
RUN tar -C /tei -xf /tei/sources.tar.gz --strip-components=1 | ||
|
||
# Build cargo components (adapted from TEI original Dockerfile) | ||
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder | ||
|
||
ENV SCCACHE=0.5.4 | ||
ENV RUSTC_WRAPPER=/usr/local/bin/sccache | ||
ENV PATH="/root/.cargo/bin:${PATH}" | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
curl \ | ||
libssl-dev \ | ||
pkg-config \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Donwload and configure sccache | ||
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ | ||
chmod +x /usr/local/bin/sccache | ||
|
||
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y | ||
RUN cargo install cargo-chef --locked | ||
|
||
FROM base-builder AS planner | ||
|
||
WORKDIR /usr/src | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml Cargo.toml | ||
COPY --from=tei /tei/Cargo.lock Cargo.lock | ||
|
||
RUN cargo chef prepare --recipe-path recipe.json | ||
|
||
FROM base-builder AS builder | ||
|
||
WORKDIR /usr/src | ||
|
||
COPY --from=planner /usr/src/recipe.json recipe.json | ||
|
||
RUN cargo chef cook --release --features google --recipe-path recipe.json && sccache -s | ||
|
||
FROM builder AS builder-75 | ||
|
||
RUN CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml Cargo.toml | ||
COPY --from=tei /tei/Cargo.lock Cargo.lock | ||
|
||
RUN CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s | ||
|
||
FROM builder AS builder-80 | ||
|
||
RUN CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml Cargo.toml | ||
COPY --from=tei /tei/Cargo.lock Cargo.lock | ||
|
||
RUN CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s | ||
|
||
FROM builder AS builder-90 | ||
|
||
RUN CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml Cargo.toml | ||
COPY --from=tei /tei/Cargo.lock Cargo.lock | ||
|
||
RUN CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s | ||
|
||
FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base | ||
|
||
ENV HUGGINGFACE_HUB_CACHE=/tmp \ | ||
PORT=8080 \ | ||
USE_FLASH_ATTENTION=True | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
ca-certificates \ | ||
libssl-dev \ | ||
curl \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
COPY --from=builder-75 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-75 | ||
COPY --from=builder-80 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-80 | ||
COPY --from=builder-90 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-90 | ||
|
||
# Install Google CLI single command | ||
RUN apt-get update -y && \ | ||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg curl && \ | ||
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ | ||
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ | ||
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ | ||
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ | ||
apt-get update -y && \ | ||
apt-get install google-cloud-sdk -y | ||
|
||
# COPY custom entrypoint for Google | ||
COPY --chmod=775 containers/tei/gpu/1.6.0/entrypoint.sh entrypoint.sh | ||
ENTRYPOINT ["./entrypoint.sh"] | ||
CMD ["--json-output"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/bin/bash | ||
|
||
# Check if MODEL_ID starts with "gcs://" | ||
if [[ $AIP_STORAGE_URI == gs://* ]]; then | ||
echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS." | ||
echo "AIP_STORAGE_URI: $AIP_STORAGE_URI" | ||
|
||
# Define the target directory | ||
TARGET_DIR="/tmp/model" | ||
mkdir -p "$TARGET_DIR" | ||
|
||
# Use gsutil to copy the content from GCS to the target directory | ||
echo "Running: gcloud storage storage cp $AIP_STORAGE_URI/* $TARGET_DIR --recursive" | ||
gcloud storage cp "$AIP_STORAGE_URI/*" "$TARGET_DIR" --recursive | ||
|
||
# Check if gsutil command was successful | ||
if [ $? -eq 0 ]; then | ||
echo "Model downloaded successfully to ${TARGET_DIR}." | ||
# Update MODEL_ID to point to the local directory | ||
echo "Updating MODEL_ID to point to the local directory." | ||
export MODEL_ID="$TARGET_DIR" | ||
else | ||
echo "Failed to download model from GCS." | ||
exit 1 | ||
fi | ||
fi | ||
|
||
ldconfig 2>/dev/null || echo "unable to refresh ld cache, not a big deal in most cases" | ||
|
||
# Below is the original `cuda-all-entrypoint.sh` script. | ||
# Reference: https://github.com/huggingface/text-embeddings-inference/blob/v1.5.1/cuda-all-entrypoint.sh | ||
if ! command -v nvidia-smi &>/dev/null; then | ||
echo "Error: 'nvidia-smi' command not found." | ||
exit 1 | ||
fi | ||
|
||
compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g') | ||
|
||
if [ ${compute_cap} -eq 75 ]; then | ||
exec text-embeddings-router-75 "$@" | ||
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then | ||
exec text-embeddings-router-80 "$@" | ||
elif [ ${compute_cap} -eq 90 ]; then | ||
exec text-embeddings-router-90 "$@" | ||
else | ||
echo "cuda compute cap ${compute_cap} is not supported" | ||
exit 1 | ||
fi |