-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
pytorch/inference/{gpu,cpu}/2.3.1/transformers/4.46.0/py311
(#117)
* Add `pytorch/inference/gpu/2.3.1/transformers/4.46.0/py311` (WIP) - Include missing `requirements.txt` installation in `entrypoint.sh` (required to install custom dependencies with custom models) - Fix Python 3.11 installation as it was not properly installed and Python 3.10 was used instead - Use `uv` to install the dependencies as it's way faster than default `pip` - Also `uv` is able to successfully install `kenlm` which is a `transformers` dependency that `pip` is not able to install when building the `Dockerfile` - Tested with some of the latest models that those bumped dependencies support as Gemma2, Llama3.2, StableDiffusion 3.5, and much more * Remove `uv` and don't upgrade `setuptools` Just by fixing the Python 3.11 and the `pip` installation, the installation issue affecting `kenlm` is solved already; so no need to add `uv` for the moment even though it would be a nice addition * Add `pytorch/inference/cpu/2.3.1/transformers/4.46.0/py311` * Update `pip install` syntax when installing from URL * Add `exec` to `uvicorn` in `entrypoint.sh` Kudos to @co42 for the catch at huggingface/huggingface-inference-toolkit#94 * Remove extra line-break in `Dockerfile` * Update `HF_INFERENCE_TOOLKIT_VERSION` to 0.5.1 See the latest `huggingface-inference-toolkit` release at https://github.com/huggingface/huggingface-inference-toolkit/releases/tag/0.5.1 * Bump `transformers` to 4.46.1 in `huggingface-inference-toolkit` `transformers` 4.46.0 was yanked because Python 3.8 support was unintentionally dropped, whilst also fixes some issues affecting both `torch.fx` and `onnx` Co-authored-by: Philipp Schmid <[email protected]> --------- Co-authored-by: Philipp Schmid <[email protected]>
- Loading branch information
1 parent
183c9c8
commit afcb6ad
Showing
4 changed files
with
210 additions
and
0 deletions.
There are no files selected for viewing
61 changes: 61 additions & 0 deletions
61
containers/pytorch/inference/cpu/2.3.1/transformers/4.46.1/py311/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
FROM ubuntu:22.04 | ||
SHELL ["/bin/bash", "-c"] | ||
|
||
LABEL maintainer="Hugging Face" | ||
|
||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
WORKDIR /app | ||
|
||
# Install required dependencies | ||
RUN apt-get update && \ | ||
apt-get install software-properties-common -y && \ | ||
add-apt-repository ppa:deadsnakes/ppa && \ | ||
apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \ | ||
apt-get install -y \ | ||
build-essential \ | ||
bzip2 \ | ||
curl \ | ||
git \ | ||
git-lfs \ | ||
tar \ | ||
gcc \ | ||
g++ \ | ||
cmake \ | ||
libprotobuf-dev \ | ||
protobuf-compiler \ | ||
python3.11 \ | ||
python3.11-dev \ | ||
libsndfile1-dev \ | ||
ffmpeg && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# Set Python 3.11 as the default python version | ||
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \ | ||
ln -sf /usr/bin/python3.11 /usr/bin/python | ||
|
||
# Install pip from source | ||
RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ | ||
python get-pip.py && \ | ||
rm get-pip.py | ||
|
||
# Hugging Face Inference Toolkit | ||
ARG HF_INFERENCE_TOOLKIT_VERSION=0.5.2 | ||
ARG HF_INFERENCE_TOOLKIT_URL=git+https://github.com/huggingface/huggingface-inference-toolkit.git@${HF_INFERENCE_TOOLKIT_VERSION} | ||
RUN pip install --upgrade "huggingface-inference-toolkit[torch,diffusers,st,google] @ ${HF_INFERENCE_TOOLKIT_URL}" --no-cache-dir | ||
|
||
ENV HF_HUB_ENABLE_HF_TRANSFER="1" | ||
|
||
# Install Google CLI single command | ||
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ | ||
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ | ||
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ | ||
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ | ||
apt-get update -y && \ | ||
apt-get install google-cloud-sdk -y && \ | ||
apt-get clean autoremove --yes && \ | ||
rm -rf /var/lib/{apt,dpkg,cache,log} | ||
|
||
# Copy entrypoint and change permissions | ||
COPY --chmod=0755 containers/pytorch/inference/cpu/2.3.1/transformers/4.46.1/py311/entrypoint.sh entrypoint.sh | ||
ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] |
44 changes: 44 additions & 0 deletions
44
containers/pytorch/inference/cpu/2.3.1/transformers/4.46.1/py311/entrypoint.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/bin/bash | ||
|
||
# Define the default port | ||
PORT=5000 | ||
|
||
# Check if AIP_MODE is set and adjust the port for Vertex AI | ||
if [[ ! -z "${AIP_MODE}" ]]; then | ||
PORT=${AIP_HTTP_PORT} | ||
fi | ||
|
||
# Check if MODEL_ID starts with "gcs://" | ||
if [[ $AIP_STORAGE_URI == gs://* ]]; then | ||
echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS." | ||
echo "AIP_STORAGE_URI: $AIP_STORAGE_URI" | ||
|
||
# Define the target directory | ||
TARGET_DIR="/opt/huggingface/model" | ||
mkdir -p "$TARGET_DIR" | ||
|
||
# Use gsutil to copy the content from GCS to the target directory | ||
echo "Running: gsutil -m cp -e -r "$AIP_STORAGE_URI/*" "$TARGET_DIR"" | ||
gsutil -m cp -e -r "$AIP_STORAGE_URI/*" "$TARGET_DIR" | ||
|
||
# Check if gsutil command was successful | ||
if [ $? -eq 0 ]; then | ||
echo "Model downloaded successfully to ${TARGET_DIR}." | ||
# Update MODEL_ID to point to the local directory | ||
echo "Updating MODEL_ID to point to the local directory." | ||
export HF_MODEL_DIR="$TARGET_DIR" | ||
export AIP_STORAGE_URI="" | ||
else | ||
echo "Failed to download model from GCS." | ||
exit 1 | ||
fi | ||
|
||
# Check if requirements.txt exists and if so install dependencies | ||
if [ -f "${HF_MODEL_DIR}/requirements.txt" ]; then | ||
echo "Installing custom dependencies from ${HF_MODEL_DIR}/requirements.txt" | ||
pip install -r ${HF_MODEL_DIR}/requirements.txt --no-cache-dir | ||
fi | ||
fi | ||
|
||
# Start the server | ||
exec uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT} |
61 changes: 61 additions & 0 deletions
61
containers/pytorch/inference/gpu/2.3.1/transformers/4.46.1/py311/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 | ||
SHELL ["/bin/bash", "-c"] | ||
|
||
LABEL maintainer="Hugging Face" | ||
|
||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
WORKDIR /app | ||
|
||
# Install required dependencies | ||
RUN apt-get update && \ | ||
apt-get install software-properties-common -y && \ | ||
add-apt-repository ppa:deadsnakes/ppa && \ | ||
apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \ | ||
apt-get install -y \ | ||
build-essential \ | ||
bzip2 \ | ||
curl \ | ||
git \ | ||
git-lfs \ | ||
tar \ | ||
gcc \ | ||
g++ \ | ||
cmake \ | ||
libprotobuf-dev \ | ||
protobuf-compiler \ | ||
python3.11 \ | ||
python3.11-dev \ | ||
libsndfile1-dev \ | ||
ffmpeg && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# Set Python 3.11 as the default python version | ||
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \ | ||
ln -sf /usr/bin/python3.11 /usr/bin/python | ||
|
||
# Install pip from source | ||
RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ | ||
python get-pip.py && \ | ||
rm get-pip.py | ||
|
||
# Hugging Face Inference Toolkit | ||
ARG HF_INFERENCE_TOOLKIT_VERSION=0.5.2 | ||
ARG HF_INFERENCE_TOOLKIT_URL=git+https://github.com/huggingface/huggingface-inference-toolkit.git@${HF_INFERENCE_TOOLKIT_VERSION} | ||
RUN pip install --upgrade "huggingface-inference-toolkit[torch,diffusers,st,google] @ ${HF_INFERENCE_TOOLKIT_URL}" --no-cache-dir | ||
|
||
ENV HF_HUB_ENABLE_HF_TRANSFER="1" | ||
|
||
# Install Google CLI single command | ||
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ | ||
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ | ||
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ | ||
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ | ||
apt-get update -y && \ | ||
apt-get install google-cloud-sdk -y && \ | ||
apt-get clean autoremove --yes && \ | ||
rm -rf /var/lib/{apt,dpkg,cache,log} | ||
|
||
# Copy entrypoint and change permissions | ||
COPY --chmod=0755 containers/pytorch/inference/gpu/2.3.1/transformers/4.46.1/py311/entrypoint.sh entrypoint.sh | ||
ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] |
44 changes: 44 additions & 0 deletions
44
containers/pytorch/inference/gpu/2.3.1/transformers/4.46.1/py311/entrypoint.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/bin/bash | ||
|
||
# Define the default port | ||
PORT=5000 | ||
|
||
# Check if AIP_MODE is set and adjust the port for Vertex AI | ||
if [[ ! -z "${AIP_MODE}" ]]; then | ||
PORT=${AIP_HTTP_PORT} | ||
fi | ||
|
||
# Check if MODEL_ID starts with "gcs://" | ||
if [[ $AIP_STORAGE_URI == gs://* ]]; then | ||
echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS." | ||
echo "AIP_STORAGE_URI: $AIP_STORAGE_URI" | ||
|
||
# Define the target directory | ||
TARGET_DIR="/opt/huggingface/model" | ||
mkdir -p "$TARGET_DIR" | ||
|
||
# Use gsutil to copy the content from GCS to the target directory | ||
echo "Running: gsutil -m cp -e -r "$AIP_STORAGE_URI/*" "$TARGET_DIR"" | ||
gsutil -m cp -e -r "$AIP_STORAGE_URI/*" "$TARGET_DIR" | ||
|
||
# Check if gsutil command was successful | ||
if [ $? -eq 0 ]; then | ||
echo "Model downloaded successfully to ${TARGET_DIR}." | ||
# Update MODEL_ID to point to the local directory | ||
echo "Updating MODEL_ID to point to the local directory." | ||
export HF_MODEL_DIR="$TARGET_DIR" | ||
export AIP_STORAGE_URI="" | ||
else | ||
echo "Failed to download model from GCS." | ||
exit 1 | ||
fi | ||
|
||
# Check if requirements.txt exists and if so install dependencies | ||
if [ -f "${HF_MODEL_DIR}/requirements.txt" ]; then | ||
echo "Installing custom dependencies from ${HF_MODEL_DIR}/requirements.txt" | ||
pip install -r ${HF_MODEL_DIR}/requirements.txt --no-cache-dir | ||
fi | ||
fi | ||
|
||
# Start the server | ||
exec uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT} |