Skip to content

Commit

Permalink
[python] offload setting block size to neuron (#2705)
Browse files Browse the repository at this point in the history
  • Loading branch information
sindhuvahinis authored Feb 3, 2025
1 parent 950dc7b commit 1c4ab1a
Showing 1 changed file with 0 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,6 @@ def generate_vllm_engine_arg_dict(self,
if self.max_rolling_batch_prefill_tokens is not None:
vllm_engine_args[
'max_num_batched_tokens'] = self.max_rolling_batch_prefill_tokens
if self.device == 'neuron':
vllm_engine_args['block_size'] = passthrough_vllm_engine_args.get(
"max_model_len")
vllm_engine_args.update(passthrough_vllm_engine_args)
return vllm_engine_args

Expand Down

0 comments on commit 1c4ab1a

Please sign in to comment.