triton-inference-server · rmccorm4 · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025
diff --git a/qa/L0_pinned_memory/test.sh b/qa/L0_pinned_memory/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -38,6 +38,9 @@ if [ ! -z "$TEST_REPO_ARCH" ]; then
     REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
 fi
 
+# Use "--request-count" throughout the test to PA stability criteria and
+# reduce flaky failures from PA unstable measurements.
+REQUEST_COUNT=10
 CLIENT=../clients/perf_client
 # Only use libtorch as it accepts GPU I/O and it can handle variable shape
 BACKENDS=${BACKENDS:="libtorch"}
@@ -91,7 +94,7 @@ for BACKEND in $BACKENDS; do
 
     # Sanity check that the server allocates pinned memory for large size
     set +e
-    $CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:16777216
+    $CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:16777216 --request-count ${REQUEST_COUNT}
     if (( $? != 0 )); then
         RET=1
     fi
@@ -128,6 +131,7 @@ for BACKEND in $BACKENDS; do
     for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
         $CLIENT -i grpc -u localhost:8001 -m${ENSEMBLE_NAME} \
                 --shape INPUT0:${TENSOR_SIZE} \
+                --request-count ${REQUEST_COUNT} \
                 >> ${BACKEND}.${TENSOR_SIZE}.pinned.log 2>&1
         if (( $? != 0 )); then
             RET=1
@@ -150,7 +154,7 @@ for BACKEND in $BACKENDS; do
 
     # Sanity check that the server allocates non-pinned memory
     set +e
-    $CLIENT  -m${ENSEMBLE_NAME} --shape INPUT0:1
+    $CLIENT  -m${ENSEMBLE_NAME} --shape INPUT0:1 --request-count ${REQUEST_COUNT}
     if (( $? != 0 )); then
         RET=1
     fi
@@ -180,6 +184,7 @@ for BACKEND in $BACKENDS; do
     for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
         $CLIENT -i grpc -u localhost:8001 -m${ENSEMBLE_NAME} \
                 --shape INPUT0:${TENSOR_SIZE} \
+                --request-count ${REQUEST_COUNT} \
                 >> ${BACKEND}.${TENSOR_SIZE}.nonpinned.log 2>&1
         if (( $? != 0 )); then
             RET=1