Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
BenjaminBraunDev committed Jan 13, 2025
2 parents 6d8d0ec + b84cc26 commit cdff4c9
Show file tree
Hide file tree
Showing 71 changed files with 1,487 additions and 856 deletions.
2 changes: 1 addition & 1 deletion Dockerfile.sdk
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#

# Base image on the minimum Triton container
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.11-py3-min
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.12-py3-min

ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
Expand Down
32 changes: 18 additions & 14 deletions Dockerfile.win10.min
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ RUN choco install unzip -y
#
# Installing TensorRT
#
ARG TENSORRT_VERSION=10.4.0.26
ARG TENSORRT_VERSION=10.7.0.23
ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows.win10.cuda-12.6.zip"
ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip
ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/zip/TensorRT-10.7.0.23.Windows.win10.cuda-12.6.zip
# COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}
RUN unzip /tmp/%TENSORRT_ZIP%
Expand All @@ -51,9 +51,9 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
#
# Installing cuDNN
#
ARG CUDNN_VERSION=9.4.0.58
ARG CUDNN_VERSION=9.6.0.74
ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.4.0.58_cuda12-archive.zip
ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.6.0.74_cuda12-archive.zip
ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
RUN unzip /tmp/%CUDNN_ZIP%
RUN move cudnn-* cudnn
Expand All @@ -75,20 +75,19 @@ RUN choco install git docker unzip -y
#
# Installing python
#
ARG PYTHON_VERSION=3.10.11
ARG PYTHON_VERSION=3.12.3
ARG PYTHON_SOURCE=https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-amd64.exe
ADD ${PYTHON_SOURCE} python-${PYTHON_VERSION}-amd64.exe
RUN python-%PYTHON_VERSION%-amd64.exe /quiet InstallAllUsers=1 PrependPath=1 Include_doc=0 TargetDir="C:\python%PYTHON_VERSION%"
RUN mklink "C:\python%PYTHON_VERSION%\python3.exe" "C:\python%PYTHON_VERSION%\python.exe"
RUN pip install --upgrade wheel setuptools docker
RUN pip install grpcio-tools psutil

LABEL PYTHON_VERSION=${PYTHON_VERSION}

#
# Installing CMake
#
ARG CMAKE_VERSION=3.30.0
ARG CMAKE_VERSION=3.30.5
RUN pip install cmake==%CMAKE_VERSION%

ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
Expand All @@ -101,14 +100,16 @@ LABEL CMAKE_VERSION=${CMAKE_VERSION}
#
# Installing Visual Studio BuildTools: VS17 2022
#
ARG BUILDTOOLS_VERSION=17.10.35201.131
# Download collect.exe in case of an install failure.
ADD https://aka.ms/vscollect.exe "C:\tmp\collect.exe"

# Use the latest release channel. For more control, specify the location of an internal layout.
# Download the Build Tools bootstrapper.
# ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe
ARG BUILD_TOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/28626b4b-f88f-4b55-a0cf-f3eaa2c643fb/e6c43d4dfb36338d954cdb3ad9010ab2a479e712088f4f6b016eadcc721bab28/vs_BuildTools.exe

ARG BUILDTOOLS_VERSION=17.12.35506.116
ARG BUILD_TOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/5536698c-711c-4834-876f-2817d31a2ef2/58894fc272e86d3c3a6d85bf3a1df1e5a0685be8b9ab65d9f3cc5c2a8c6921cc/vs_BuildTools.exe

ADD ${BUILD_TOOLS_SOURCE} vs_buildtools.exe
# Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools workload, including recommended.
ARG VS_INSTALL_PATH_WP="C:\BuildTools"
Expand Down Expand Up @@ -149,12 +150,13 @@ WORKDIR /
# Installing CUDA
#
ARG CUDA_MAJOR=12
ARG CUDA_MINOR=5
ARG CUDA_PATCH=1
ARG CUDA_MINOR=6
ARG CUDA_PATCH=3
ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}
ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \
cudart_${CUDA_MAJOR}.${CUDA_MINOR} \
nvml_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
nvrtc_${CUDA_MAJOR}.${CUDA_MINOR} nvrtc_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
cublas_${CUDA_MAJOR}.${CUDA_MINOR} cublas_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
cufft_${CUDA_MAJOR}.${CUDA_MINOR} cufft_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
curand_${CUDA_MAJOR}.${CUDA_MINOR} curand_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
Expand All @@ -175,21 +177,23 @@ RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensi

RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"

ARG CUDNN_VERSION=9.4.0.58
ENV CUDA_VERSION=${CUDA_VERSION}
LABEL CUDA_VERSION="${CUDA_VERSION}"

ARG CUDNN_VERSION=9.6.0.74
ENV CUDNN_VERSION ${CUDNN_VERSION}
COPY --from=dependency_base /cudnn /cudnn
RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."
LABEL CUDNN_VERSION="${CUDNN_VERSION}"

ARG TENSORRT_VERSION=10.4.0.26
ARG TENSORRT_VERSION=10.7.0.23
ENV TRT_VERSION ${TENSORRT_VERSION}
COPY --from=dependency_base /TensorRT /TensorRT
RUN setx PATH "c:\TensorRT\lib;%PATH%"
LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"

LABEL CUDA_VERSION="${CUDA_VERSION}"
# It is important that the entrypoint initialize VisualStudio
# environment otherwise the build will fail. Also set
# CMAKE_TOOLCHAIN_FILE and VCPKG_TARGET_TRIPLET so
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@

>[!WARNING]
>You are currently on the `main` branch which tracks under-development progress
>towards the next release. The current release is version [2.52.0](https://github.com/triton-inference-server/server/releases/latest)
>and corresponds to the 24.11 container release on NVIDIA GPU Cloud (NGC).
>towards the next release. The current release is version [2.53.0](https://github.com/triton-inference-server/server/releases/latest)
>and corresponds to the 24.12 container release on NVIDIA GPU Cloud (NGC).
Triton Inference Server is an open source inference serving software that
streamlines AI inferencing. Triton enables teams to deploy any AI model from
Expand Down Expand Up @@ -91,16 +91,16 @@ Inference Server with the

```bash
# Step 1: Create the example model repository
git clone -b r24.11 https://github.com/triton-inference-server/server.git
git clone -b r24.12 https://github.com/triton-inference-server/server.git
cd server/docs/examples
./fetch_models.sh

# Step 2: Launch triton from the NGC Triton container
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.11-py3 tritonserver --model-repository=/models
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.12-py3 tritonserver --model-repository=/models

# Step 3: Sending an Inference Request
# In a separate console, launch the image_client example from the NGC Triton SDK container
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.11-py3-sdk
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.12-py3-sdk
/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg

# Inference should return the following
Expand Down
2 changes: 1 addition & 1 deletion TRITON_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.53.0dev
2.54.0dev
18 changes: 13 additions & 5 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@
#

DEFAULT_TRITON_VERSION_MAP = {
"release_version": "2.53.0dev",
"triton_container_version": "24.12dev",
"upstream_container_version": "24.11",
"ort_version": "1.19.2",
"release_version": "2.54.0dev",
"triton_container_version": "24.01dev",
"upstream_container_version": "24.12",
"ort_version": "1.20.1",
"ort_openvino_version": "2024.4.0",
"standalone_openvino_version": "2024.4.0",
"dcgm_version": "3.3.6",
"vllm_version": "0.5.5",
"vllm_version": "0.6.3.post1",
"rhel_py_version": "3.12.3",
}

Expand Down Expand Up @@ -986,6 +986,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
RUN pip3 install --upgrade pip \\
&& pip3 install --upgrade \\
build \\
wheel \\
setuptools \\
docker \\
Expand Down Expand Up @@ -1105,6 +1106,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
&& rm -rf /var/lib/apt/lists/*
RUN pip3 install --upgrade \\
build \\
docker \\
virtualenv
Expand Down Expand Up @@ -1236,6 +1238,8 @@ def create_dockerfile_linux(
find /opt/tritonserver/python -maxdepth 1 -type f -name \\
"tritonfrontend-*.whl" | xargs -I {} pip install --upgrade {}[all]
RUN pip3 install -r python/openai/requirements.txt
"""
if not FLAGS.no_core_build:
# Add feature labels for SageMaker endpoint
Expand Down Expand Up @@ -1932,6 +1936,10 @@ def core_build(
os.path.join(install_dir, "include", "triton", "core"),
)

cmake_script.cpdir(
os.path.join(repo_dir, "python", "openai"), os.path.join(install_dir, "python")
)

cmake_script.cp(os.path.join(repo_dir, "LICENSE"), install_dir)
cmake_script.cp(os.path.join(repo_dir, "TRITON_VERSION"), install_dir)

Expand Down
4 changes: 3 additions & 1 deletion compose.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -88,6 +88,8 @@ def start_dockerfile(ddir, images, argmap, dockerfile_name, backends):

df += """
FROM {}
ENV PIP_BREAK_SYSTEM_PACKAGES=1
""".format(
images["min"]
)
Expand Down
2 changes: 1 addition & 1 deletion deploy/aws/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
replicaCount: 1

image:
imageName: nvcr.io/nvidia/tritonserver:24.11-py3
imageName: nvcr.io/nvidia/tritonserver:24.12-py3
pullPolicy: IfNotPresent
modelRepositoryPath: s3://triton-inference-server-repository/model_repository
numGpus: 1
Expand Down
2 changes: 1 addition & 1 deletion deploy/fleetcommand/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

apiVersion: v1
# appVersion is the Triton version; update when changing release
appVersion: "2.51.0"
appVersion: "2.53.0"
description: Triton Inference Server (Fleet Command)
name: triton-inference-server
# version is the Chart version; update when changing anything in the chart
Expand Down
6 changes: 3 additions & 3 deletions deploy/fleetcommand/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
replicaCount: 1

image:
imageName: nvcr.io/nvidia/tritonserver:24.11-py3
imageName: nvcr.io/nvidia/tritonserver:24.12-py3
pullPolicy: IfNotPresent
numGpus: 1
serverCommand: tritonserver
Expand All @@ -47,13 +47,13 @@ image:
#
# To set model control mode, uncomment and configure below
# TODO: Fix the following url, it is invalid
# See https://github.com/triton-inference-server/server/blob/r24.11/docs/model_management.md
# See https://github.com/triton-inference-server/server/blob/r24.12/docs/model_management.md
# for more details
#- --model-control-mode=explicit|poll|none
#
# Additional server args
#
# see https://github.com/triton-inference-server/server/blob/r24.11/README.md
# see https://github.com/triton-inference-server/server/blob/r24.12/README.md
# for more details

service:
Expand Down
2 changes: 1 addition & 1 deletion deploy/gcp/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
replicaCount: 1

image:
imageName: nvcr.io/nvidia/tritonserver:24.11-py3
imageName: nvcr.io/nvidia/tritonserver:24.12-py3
pullPolicy: IfNotPresent
modelRepositoryPath: gs://triton-inference-server-repository/model_repository
numGpus: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ metadata:
namespace: default
spec:
containers:
- image: nvcr.io/nvidia/tritonserver:24.11-py3-sdk
- image: nvcr.io/nvidia/tritonserver:24.12-py3-sdk
imagePullPolicy: Always
name: nv-triton-client
securityContext:
Expand Down
6 changes: 3 additions & 3 deletions deploy/gke-marketplace-app/server-deployer/build_and_push.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@

export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
export APP_NAME=tritonserver
export MAJOR_VERSION=2.51
export MINOR_VERSION=2.51.0
export NGC_VERSION=24.11-py3
export MAJOR_VERSION=2.53
export MINOR_VERSION=2.53.0
export NGC_VERSION=24.12-py3

docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

apiVersion: v1
appVersion: "2.51"
appVersion: "2.53"
description: Triton Inference Server
name: triton-inference-server
version: 2.51.0
version: 2.53.0
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ maxReplicaCount: 3
tritonProtocol: HTTP
# HPA GPU utilization autoscaling target
HPATargetAverageValue: 85
modelRepositoryPath: gs://triton_sample_models/24.11
publishedVersion: '2.51.0'
modelRepositoryPath: gs://triton_sample_models/24.12
publishedVersion: '2.53.0'
gcpMarketplace: true

image:
registry: gcr.io
repository: nvidia-ngc-public/tritonserver
tag: 24.11-py3
tag: 24.12-py3
pullPolicy: IfNotPresent
# modify the model repository here to match your GCP storage bucket
numGpus: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
x-google-marketplace:
schemaVersion: v2
applicationApiVersion: v1beta1
publishedVersion: '2.51.0'
publishedVersion: '2.53.0'
publishedVersionMetadata:
releaseNote: >-
Initial release.
Expand Down
4 changes: 2 additions & 2 deletions deploy/gke-marketplace-app/server-deployer/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
x-google-marketplace:
schemaVersion: v2
applicationApiVersion: v1beta1
publishedVersion: '2.51.0'
publishedVersion: '2.53.0'
publishedVersionMetadata:
releaseNote: >-
Initial release.
Expand Down Expand Up @@ -89,7 +89,7 @@ properties:
modelRepositoryPath:
type: string
title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc.
default: gs://triton_sample_models/24.11
default: gs://triton_sample_models/24.12
image.ldPreloadPath:
type: string
title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable.
Expand Down
6 changes: 3 additions & 3 deletions deploy/gke-marketplace-app/trt-engine/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
```
docker run --gpus all -it --network host \
--shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-v ~:/scripts nvcr.io/nvidia/tensorrt:24.11-py3
-v ~:/scripts nvcr.io/nvidia/tensorrt:24.12-py3
pip install onnx six torch tf2onnx tensorflow
Expand All @@ -57,7 +57,7 @@ mkdir -p engines
python3 builder.py -m models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/model.ckpt -o engines/bert_large_int8_bs1_s128.engine -b 1 -s 128 -c models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/ -v models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/vocab.txt --int8 --fp16 --strict --calib-num 1 -iln -imh
gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/24.11/bert/1/model.plan
gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/24.12/bert/1/model.plan
```

For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/24.11/` should be updated accordingly with the correct version.
For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/24.12/` should be updated accordingly with the correct version.
2 changes: 1 addition & 1 deletion deploy/k8s-onprem/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ tags:
loadBalancing: true

image:
imageName: nvcr.io/nvidia/tritonserver:24.11-py3
imageName: nvcr.io/nvidia/tritonserver:24.12-py3
pullPolicy: IfNotPresent
modelRepositoryServer: < Replace with the IP Address of your file server >
modelRepositoryPath: /srv/models
Expand Down
2 changes: 1 addition & 1 deletion deploy/oci/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
replicaCount: 1

image:
imageName: nvcr.io/nvidia/tritonserver:24.11-py3
imageName: nvcr.io/nvidia/tritonserver:24.12-py3
pullPolicy: IfNotPresent
modelRepositoryPath: s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
numGpus: 1
Expand Down
Loading

0 comments on commit cdff4c9

Please sign in to comment.