Merge branch 'main' into r24.10

triton-inference-server · Jan 16, 2025 · d47a547 · d47a547
2 parents 13c10fe + f7fe649
commit d47a547
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 22 deletions.
diff --git a/build.py b/build.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -72,7 +72,7 @@
 
 DEFAULT_TRITON_VERSION_MAP = {
     "release_version": "2.54.0dev",
-    "triton_container_version": "24.01dev",
+    "triton_container_version": "25.01dev",
     "upstream_container_version": "24.12",
     "ort_version": "1.20.1",
     "ort_openvino_version": "2024.4.0",
@@ -1048,6 +1048,8 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
     # Install the windows- or linux-specific buildbase dependencies
     if target_platform() == "windows":
         df += """
+RUN python3 -m pip install build
+
 SHELL ["cmd", "/S", "/C"]
 """
     else:

diff --git a/qa/L0_implicit_state/models/growable_memory/config.pbtxt b/qa/L0_implicit_state/models/growable_memory/config.pbtxt
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -28,6 +28,8 @@ name: "growable_memory"
 backend: "implicit_state"
 max_batch_size: 0
 sequence_batching {
+  # Set large idle timeout to avoid inter-request timeouts for test consistency
+  max_sequence_idle_microseconds: 10000000
   control_input [
     {
       name: "START"

diff --git a/qa/L0_lifecycle/test.sh b/qa/L0_lifecycle/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -1576,7 +1576,7 @@ if [ `grep -c "Model 'custom_zero_1_float32' (version 1) has 1 in-flight inferen
     RET=1
 fi
 
-kill $SERVER_PID
+kill $SERVER_PID || true
 wait $SERVER_PID
 
 rm -f $CLIENT_LOG
@@ -1614,7 +1614,7 @@ if [ `grep -c "Model 'custom_sequence_int32' (version 1) has 1 in-flight inferen
     RET=1
 fi
 
-kill $SERVER_PID
+kill $SERVER_PID || true
 wait $SERVER_PID
 
 rm -f $CLIENT_LOG
@@ -1655,7 +1655,7 @@ if [ `grep -c "Model 'ensemble_zero_1_float32' (version 1) has 1 in-flight infer
     RET=1
 fi
 
-kill $SERVER_PID
+kill $SERVER_PID || true
 wait $SERVER_PID
 
 LOG_IDX=$((LOG_IDX+1))
@@ -2128,7 +2128,7 @@ if [ $? -ne 0 ]; then
 fi
 set -e
 
-kill $SERVER_PID
+kill $SERVER_PID || true
 wait $SERVER_PID
 
 LOG_IDX=$((LOG_IDX+1))

diff --git a/qa/L0_perf_analyzer_capi/test.sh b/qa/L0_perf_analyzer_capi/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -218,6 +218,7 @@ if [ $(cat $CLIENT_LOG | grep ": 0 infer/sec\|: 0 usec" | wc -l) -ne 0 ]; then
 fi
 
 $PERF_ANALYZER -v -m  simple_savedmodel_sequence_object -p 2000 -t5 --sync \
+-s ${STABILITY_THRESHOLD} \
 --input-data=$SEQ_JSONDATAFILE \
 --service-kind=triton_c_api --model-repository=$DATADIR \
 --triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1
@@ -234,6 +235,7 @@ fi
 
 set +e
 $PERF_ANALYZER -v -m graphdef_sequence_float32 --shape INPUT:2 \
+-s ${STABILITY_THRESHOLD} \
 --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE \
 --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE -p2000 \
 --service-kind=triton_c_api --model-repository=$DATADIR \
@@ -250,21 +252,9 @@ if [ $(cat $CLIENT_LOG |  grep -P "The supplied shape .+ is incompatible with th
 fi
 set -e
 
-# Negative test for the async mode.
-set +e
-$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 -a \
---service-kind=triton_c_api --model-repository=$DATADIR \
---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
->$CLIENT_LOG 2>&1
-if [ $(cat $CLIENT_LOG | grep "not supported by triton_c_api service" | wc -l) -ne 1 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-set -e
-
 for SHARED_MEMORY_TYPE in system cuda; do
     $PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \
+    -s ${STABILITY_THRESHOLD} \
     --shared-memory=$SHARED_MEMORY_TYPE \
     --service-kind=triton_c_api --model-repository=$DATADIR \
     --triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1