From 1c4ab1ace1ce4ee60ab448e6114cd524a347f551 Mon Sep 17 00:00:00 2001 From: Sindhu Somasundaram <56774226+sindhuvahinis@users.noreply.github.com> Date: Sun, 2 Feb 2025 20:08:20 -0800 Subject: [PATCH] [python] offload setting block size to neuron (#2705) --- .../setup/djl_python/properties_manager/vllm_rb_properties.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/engines/python/setup/djl_python/properties_manager/vllm_rb_properties.py b/engines/python/setup/djl_python/properties_manager/vllm_rb_properties.py index 539874123..4309467ae 100644 --- a/engines/python/setup/djl_python/properties_manager/vllm_rb_properties.py +++ b/engines/python/setup/djl_python/properties_manager/vllm_rb_properties.py @@ -187,9 +187,6 @@ def generate_vllm_engine_arg_dict(self, if self.max_rolling_batch_prefill_tokens is not None: vllm_engine_args[ 'max_num_batched_tokens'] = self.max_rolling_batch_prefill_tokens - if self.device == 'neuron': - vllm_engine_args['block_size'] = passthrough_vllm_engine_args.get( - "max_model_len") vllm_engine_args.update(passthrough_vllm_engine_args) return vllm_engine_args