-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
containers/tei/{cpu,gpu}/1.5.1
(#61)
* Add `containers/tei/gpu/1.5.0/Dockerfile` starting image Copied from https://github.com/huggingface/text-embeddings-inference/blob/v1.5.0/Dockerfile-cuda-all * Add `containers/tei/cpu/1.5.0/Dockerfile` starting image Copied from https://github.com/huggingface/text-embeddings-inference/blob/v1.5.0/Dockerfile * Update `containers/tei/gpu/1.5.0/*` * Update `containers/tei/cpu/1.5.0/*` * Upgrade TEI DLC from 1.5.0 to 1.5.1 * Update reference in `entrypoint.sh` * Fix `FromAsCasting` warning
- Loading branch information
1 parent
b174d63
commit 18a9859
Showing
4 changed files
with
284 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Fetch and extract the TGI sources | ||
FROM alpine AS tei | ||
|
||
RUN mkdir -p /tei | ||
ADD https://github.com/huggingface/text-embeddings-inference/archive/refs/tags/v1.5.1.tar.gz /tei/sources.tar.gz | ||
RUN tar -C /tei -xf /tei/sources.tar.gz --strip-components=1 | ||
|
||
# Build cargo components (adapted from TEI original Dockerfile) | ||
FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef | ||
WORKDIR /usr/src | ||
|
||
ENV SCCACHE=0.5.4 | ||
ENV RUSTC_WRAPPER=/usr/local/bin/sccache | ||
|
||
# Donwload, configure sccache | ||
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ | ||
chmod +x /usr/local/bin/sccache | ||
|
||
FROM chef AS planner | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml ./ | ||
COPY --from=tei /tei/Cargo.lock ./ | ||
|
||
RUN cargo chef prepare --recipe-path recipe.json | ||
|
||
FROM chef AS builder | ||
|
||
COPY --from=planner /usr/src/recipe.json recipe.json | ||
|
||
RUN cargo chef cook --release --features ort --no-default-features --recipe-path recipe.json && sccache -s | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml ./ | ||
COPY --from=tei /tei/Cargo.lock ./ | ||
|
||
FROM builder AS http-builder | ||
|
||
RUN cargo build --release --bin text-embeddings-router -F google -F ort -F http --no-default-features && sccache -s | ||
|
||
FROM builder AS grpc-builder | ||
|
||
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ | ||
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ | ||
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ | ||
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ | ||
rm -f $PROTOC_ZIP | ||
|
||
COPY --from=tei /tei/proto proto | ||
|
||
RUN cargo build --release --bin text-embeddings-router -F google -F grpc -F ort --no-default-features && sccache -s | ||
|
||
FROM debian:bookworm-slim AS base | ||
|
||
ENV HUGGINGFACE_HUB_CACHE=/tmp \ | ||
PORT=8080 | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
ca-certificates \ | ||
libssl-dev \ | ||
curl \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Install Google CLI single command | ||
RUN apt-get update -y && \ | ||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg curl && \ | ||
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ | ||
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ | ||
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ | ||
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ | ||
apt-get update -y && \ | ||
apt-get install google-cloud-sdk -y | ||
|
||
# COPY custom entrypoint for Google | ||
COPY --chmod=775 containers/tei/cpu/1.5.1/entrypoint.sh entrypoint.sh | ||
|
||
FROM base AS grpc | ||
|
||
COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router | ||
|
||
ENTRYPOINT ["./entrypoint.sh"] | ||
CMD ["--json-output"] | ||
|
||
FROM base AS http | ||
|
||
COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router | ||
|
||
ENTRYPOINT ["./entrypoint.sh"] | ||
CMD ["--json-output"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/bin/bash | ||
|
||
# Check if MODEL_ID starts with "gcs://" | ||
if [[ $AIP_STORAGE_URI == gs://* ]]; then | ||
echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS." | ||
echo "AIP_STORAGE_URI: $AIP_STORAGE_URI" | ||
|
||
# Define the target directory | ||
TARGET_DIR="/tmp/model" | ||
mkdir -p "$TARGET_DIR" | ||
|
||
# Use gsutil to copy the content from GCS to the target directory | ||
echo "Running: gcloud storage storage cp $AIP_STORAGE_URI/* $TARGET_DIR --recursive" | ||
gcloud storage cp "$AIP_STORAGE_URI/*" "$TARGET_DIR" --recursive | ||
|
||
# Check if gsutil command was successful | ||
if [ $? -eq 0 ]; then | ||
echo "Model downloaded successfully to ${TARGET_DIR}." | ||
# Update MODEL_ID to point to the local directory | ||
echo "Updating MODEL_ID to point to the local directory." | ||
export MODEL_ID="$TARGET_DIR" | ||
else | ||
echo "Failed to download model from GCS." | ||
exit 1 | ||
fi | ||
fi | ||
|
||
ldconfig 2>/dev/null || echo "unable to refresh ld cache, not a big deal in most cases" | ||
|
||
exec text-embeddings-router $@ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
# Fetch and extract the TGI sources | ||
FROM alpine AS tei | ||
|
||
RUN mkdir -p /tei | ||
ADD https://github.com/huggingface/text-embeddings-inference/archive/refs/tags/v1.5.1.tar.gz /tei/sources.tar.gz | ||
RUN tar -C /tei -xf /tei/sources.tar.gz --strip-components=1 | ||
|
||
# Build cargo components (adapted from TEI original Dockerfile) | ||
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder | ||
|
||
ENV SCCACHE=0.5.4 | ||
ENV RUSTC_WRAPPER=/usr/local/bin/sccache | ||
ENV PATH="/root/.cargo/bin:${PATH}" | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
curl \ | ||
libssl-dev \ | ||
pkg-config \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Donwload and configure sccache | ||
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ | ||
chmod +x /usr/local/bin/sccache | ||
|
||
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y | ||
RUN cargo install cargo-chef --locked | ||
|
||
FROM base-builder AS planner | ||
|
||
WORKDIR /usr/src | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml ./ | ||
COPY --from=tei /tei/Cargo.lock ./ | ||
|
||
RUN cargo chef prepare --recipe-path recipe.json | ||
|
||
FROM base-builder AS builder | ||
|
||
WORKDIR /usr/src | ||
|
||
COPY --from=planner /usr/src/recipe.json recipe.json | ||
|
||
RUN cargo chef cook --release --features google --recipe-path recipe.json && sccache -s | ||
|
||
FROM builder AS builder-75 | ||
|
||
RUN CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml ./ | ||
COPY --from=tei /tei/Cargo.lock ./ | ||
|
||
RUN CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s | ||
|
||
FROM builder AS builder-80 | ||
|
||
RUN CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml ./ | ||
COPY --from=tei /tei/Cargo.lock ./ | ||
|
||
RUN CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s | ||
|
||
FROM builder AS builder-90 | ||
|
||
RUN CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s | ||
|
||
COPY --from=tei /tei/backends backends | ||
COPY --from=tei /tei/core core | ||
COPY --from=tei /tei/router router | ||
COPY --from=tei /tei/Cargo.toml ./ | ||
COPY --from=tei /tei/Cargo.lock ./ | ||
|
||
RUN CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s | ||
|
||
FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base | ||
|
||
ENV HUGGINGFACE_HUB_CACHE=/tmp \ | ||
PORT=8080 \ | ||
USE_FLASH_ATTENTION=True | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
ca-certificates \ | ||
libssl-dev \ | ||
curl \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
COPY --from=builder-75 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-75 | ||
COPY --from=builder-80 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-80 | ||
COPY --from=builder-90 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-90 | ||
|
||
# Install Google CLI single command | ||
RUN apt-get update -y && \ | ||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg curl && \ | ||
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ | ||
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ | ||
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ | ||
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ | ||
apt-get update -y && \ | ||
apt-get install google-cloud-sdk -y | ||
|
||
# COPY custom entrypoint for Google | ||
COPY --chmod=775 containers/tei/gpu/1.5.1/entrypoint.sh entrypoint.sh | ||
ENTRYPOINT ["./entrypoint.sh"] | ||
CMD ["--json-output"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/bin/bash | ||
|
||
# Check if MODEL_ID starts with "gcs://" | ||
if [[ $AIP_STORAGE_URI == gs://* ]]; then | ||
echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS." | ||
echo "AIP_STORAGE_URI: $AIP_STORAGE_URI" | ||
|
||
# Define the target directory | ||
TARGET_DIR="/tmp/model" | ||
mkdir -p "$TARGET_DIR" | ||
|
||
# Use gsutil to copy the content from GCS to the target directory | ||
echo "Running: gcloud storage storage cp $AIP_STORAGE_URI/* $TARGET_DIR --recursive" | ||
gcloud storage cp "$AIP_STORAGE_URI/*" "$TARGET_DIR" --recursive | ||
|
||
# Check if gsutil command was successful | ||
if [ $? -eq 0 ]; then | ||
echo "Model downloaded successfully to ${TARGET_DIR}." | ||
# Update MODEL_ID to point to the local directory | ||
echo "Updating MODEL_ID to point to the local directory." | ||
export MODEL_ID="$TARGET_DIR" | ||
else | ||
echo "Failed to download model from GCS." | ||
exit 1 | ||
fi | ||
fi | ||
|
||
ldconfig 2>/dev/null || echo "unable to refresh ld cache, not a big deal in most cases" | ||
|
||
# Below is the original `cuda-all-entrypoint.sh` script. | ||
# Reference: https://github.com/huggingface/text-embeddings-inference/blob/v1.5.1/cuda-all-entrypoint.sh | ||
if ! command -v nvidia-smi &>/dev/null; then | ||
echo "Error: 'nvidia-smi' command not found." | ||
exit 1 | ||
fi | ||
|
||
compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g') | ||
|
||
if [ ${compute_cap} -eq 75 ]; then | ||
exec text-embeddings-router-75 "$@" | ||
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then | ||
exec text-embeddings-router-80 "$@" | ||
elif [ ${compute_cap} -eq 90 ]; then | ||
exec text-embeddings-router-90 "$@" | ||
else | ||
echo "cuda compute cap ${compute_cap} is not supported" | ||
exit 1 | ||
fi |