Skip to content

Commit

Permalink
Add containers/tei/{cpu,gpu}/1.5.1 (#61)
Browse files Browse the repository at this point in the history
* Add `containers/tei/gpu/1.5.0/Dockerfile` starting image

Copied from
https://github.com/huggingface/text-embeddings-inference/blob/v1.5.0/Dockerfile-cuda-all

* Add `containers/tei/cpu/1.5.0/Dockerfile` starting image

Copied from
https://github.com/huggingface/text-embeddings-inference/blob/v1.5.0/Dockerfile

* Update `containers/tei/gpu/1.5.0/*`

* Update `containers/tei/cpu/1.5.0/*`

* Upgrade TEI DLC from 1.5.0 to 1.5.1

* Update reference in `entrypoint.sh`

* Fix `FromAsCasting` warning
  • Loading branch information
alvarobartt authored Dec 12, 2024
1 parent b174d63 commit 18a9859
Show file tree
Hide file tree
Showing 4 changed files with 284 additions and 0 deletions.
93 changes: 93 additions & 0 deletions containers/tei/cpu/1.5.1/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Fetch and extract the TGI sources
FROM alpine AS tei

RUN mkdir -p /tei
ADD https://github.com/huggingface/text-embeddings-inference/archive/refs/tags/v1.5.1.tar.gz /tei/sources.tar.gz
RUN tar -C /tei -xf /tei/sources.tar.gz --strip-components=1

# Build cargo components (adapted from TEI original Dockerfile)
FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
WORKDIR /usr/src

ENV SCCACHE=0.5.4
ENV RUSTC_WRAPPER=/usr/local/bin/sccache

# Donwload, configure sccache
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
chmod +x /usr/local/bin/sccache

FROM chef AS planner

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml ./
COPY --from=tei /tei/Cargo.lock ./

RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder

COPY --from=planner /usr/src/recipe.json recipe.json

RUN cargo chef cook --release --features ort --no-default-features --recipe-path recipe.json && sccache -s

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml ./
COPY --from=tei /tei/Cargo.lock ./

FROM builder AS http-builder

RUN cargo build --release --bin text-embeddings-router -F google -F ort -F http --no-default-features && sccache -s

FROM builder AS grpc-builder

RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
rm -f $PROTOC_ZIP

COPY --from=tei /tei/proto proto

RUN cargo build --release --bin text-embeddings-router -F google -F grpc -F ort --no-default-features && sccache -s

FROM debian:bookworm-slim AS base

ENV HUGGINGFACE_HUB_CACHE=/tmp \
PORT=8080

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
libssl-dev \
curl \
&& rm -rf /var/lib/apt/lists/*

# Install Google CLI single command
RUN apt-get update -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg curl && \
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
apt-get update -y && \
apt-get install google-cloud-sdk -y

# COPY custom entrypoint for Google
COPY --chmod=775 containers/tei/cpu/1.5.1/entrypoint.sh entrypoint.sh

FROM base AS grpc

COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router

ENTRYPOINT ["./entrypoint.sh"]
CMD ["--json-output"]

FROM base AS http

COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router

ENTRYPOINT ["./entrypoint.sh"]
CMD ["--json-output"]
30 changes: 30 additions & 0 deletions containers/tei/cpu/1.5.1/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

# Check if MODEL_ID starts with "gcs://"
if [[ $AIP_STORAGE_URI == gs://* ]]; then
echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS."
echo "AIP_STORAGE_URI: $AIP_STORAGE_URI"

# Define the target directory
TARGET_DIR="/tmp/model"
mkdir -p "$TARGET_DIR"

# Use gsutil to copy the content from GCS to the target directory
echo "Running: gcloud storage storage cp $AIP_STORAGE_URI/* $TARGET_DIR --recursive"
gcloud storage cp "$AIP_STORAGE_URI/*" "$TARGET_DIR" --recursive

# Check if gsutil command was successful
if [ $? -eq 0 ]; then
echo "Model downloaded successfully to ${TARGET_DIR}."
# Update MODEL_ID to point to the local directory
echo "Updating MODEL_ID to point to the local directory."
export MODEL_ID="$TARGET_DIR"
else
echo "Failed to download model from GCS."
exit 1
fi
fi

ldconfig 2>/dev/null || echo "unable to refresh ld cache, not a big deal in most cases"

exec text-embeddings-router $@
113 changes: 113 additions & 0 deletions containers/tei/gpu/1.5.1/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Fetch and extract the TGI sources
FROM alpine AS tei

RUN mkdir -p /tei
ADD https://github.com/huggingface/text-embeddings-inference/archive/refs/tags/v1.5.1.tar.gz /tei/sources.tar.gz
RUN tar -C /tei -xf /tei/sources.tar.gz --strip-components=1

# Build cargo components (adapted from TEI original Dockerfile)
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder

ENV SCCACHE=0.5.4
ENV RUSTC_WRAPPER=/usr/local/bin/sccache
ENV PATH="/root/.cargo/bin:${PATH}"

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
curl \
libssl-dev \
pkg-config \
&& rm -rf /var/lib/apt/lists/*

# Donwload and configure sccache
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
chmod +x /usr/local/bin/sccache

RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
RUN cargo install cargo-chef --locked

FROM base-builder AS planner

WORKDIR /usr/src

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml ./
COPY --from=tei /tei/Cargo.lock ./

RUN cargo chef prepare --recipe-path recipe.json

FROM base-builder AS builder

WORKDIR /usr/src

COPY --from=planner /usr/src/recipe.json recipe.json

RUN cargo chef cook --release --features google --recipe-path recipe.json && sccache -s

FROM builder AS builder-75

RUN CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml ./
COPY --from=tei /tei/Cargo.lock ./

RUN CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s

FROM builder AS builder-80

RUN CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml ./
COPY --from=tei /tei/Cargo.lock ./

RUN CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s

FROM builder AS builder-90

RUN CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml ./
COPY --from=tei /tei/Cargo.lock ./

RUN CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s

FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base

ENV HUGGINGFACE_HUB_CACHE=/tmp \
PORT=8080 \
USE_FLASH_ATTENTION=True

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
libssl-dev \
curl \
&& rm -rf /var/lib/apt/lists/*

COPY --from=builder-75 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-75
COPY --from=builder-80 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-80
COPY --from=builder-90 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-90

# Install Google CLI single command
RUN apt-get update -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg curl && \
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
apt-get update -y && \
apt-get install google-cloud-sdk -y

# COPY custom entrypoint for Google
COPY --chmod=775 containers/tei/gpu/1.5.1/entrypoint.sh entrypoint.sh
ENTRYPOINT ["./entrypoint.sh"]
CMD ["--json-output"]
48 changes: 48 additions & 0 deletions containers/tei/gpu/1.5.1/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash

# Check if MODEL_ID starts with "gcs://"
if [[ $AIP_STORAGE_URI == gs://* ]]; then
echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS."
echo "AIP_STORAGE_URI: $AIP_STORAGE_URI"

# Define the target directory
TARGET_DIR="/tmp/model"
mkdir -p "$TARGET_DIR"

# Use gsutil to copy the content from GCS to the target directory
echo "Running: gcloud storage storage cp $AIP_STORAGE_URI/* $TARGET_DIR --recursive"
gcloud storage cp "$AIP_STORAGE_URI/*" "$TARGET_DIR" --recursive

# Check if gsutil command was successful
if [ $? -eq 0 ]; then
echo "Model downloaded successfully to ${TARGET_DIR}."
# Update MODEL_ID to point to the local directory
echo "Updating MODEL_ID to point to the local directory."
export MODEL_ID="$TARGET_DIR"
else
echo "Failed to download model from GCS."
exit 1
fi
fi

ldconfig 2>/dev/null || echo "unable to refresh ld cache, not a big deal in most cases"

# Below is the original `cuda-all-entrypoint.sh` script.
# Reference: https://github.com/huggingface/text-embeddings-inference/blob/v1.5.1/cuda-all-entrypoint.sh
if ! command -v nvidia-smi &>/dev/null; then
echo "Error: 'nvidia-smi' command not found."
exit 1
fi

compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')

if [ ${compute_cap} -eq 75 ]; then
exec text-embeddings-router-75 "$@"
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then
exec text-embeddings-router-80 "$@"
elif [ ${compute_cap} -eq 90 ]; then
exec text-embeddings-router-90 "$@"
else
echo "cuda compute cap ${compute_cap} is not supported"
exit 1
fi

0 comments on commit 18a9859

Please sign in to comment.