From 7044e920ee65d3ff655b424f580d234e0a7ba2c2 Mon Sep 17 00:00:00 2001 From: Somasundaram Date: Fri, 31 Jan 2025 15:38:51 -0800 Subject: [PATCH] [ci] convert nxdi tests to aot compiled --- tests/integration/tests.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/integration/tests.py b/tests/integration/tests.py index f020af0ae..fcc58239b 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -902,15 +902,21 @@ def test_llama_speculative_compiled(self): "transformers_neuronx_rolling_batch llama-speculative-compiled-rb" .split()) - def test_llama_vllm_nxdi(self): + def test_llama_8b_vllm_nxdi(self): # For neuron, handler is names as transformers_neuronx, but this handler supports, TNX, NXDI and optimum. with Runner('pytorch-inf2', 'llama-3-1-8b-instruct-vllm-nxdi') as r: prepare.build_transformers_neuronx_handler_model( "llama-3-1-8b-instruct-vllm-nxdi") - r.launch(container='pytorch-inf2-4') + r.launch( + container="pytorch-inf2-4", + cmd= + "partition --model-dir /opt/ml/input/data/training --save-mp-checkpoint-path /opt/ml/input/data/training/aot --skip-copy" + ) + r.launch(container="pytorch-inf2-4", + cmd="serve -m test=file:/opt/ml/model/test/aot") client.run( "transformers_neuronx_rolling_batch llama-3-1-8b-instruct-vllm-nxdi" - ) + .split()) def test_llama_vllm_nxdi_aot(self): with Runner('pytorch-inf2', @@ -926,7 +932,7 @@ def test_llama_vllm_nxdi_aot(self): cmd="serve -m test=file:/opt/ml/model/test/aot") client.run( "transformers_neuronx_rolling_batch llama-3-2-1b-instruct-vllm-nxdi-aot" - ) + .split()) @pytest.mark.correctness