From 32235f239529b65838c301776a4128f7b89a66c3 Mon Sep 17 00:00:00 2001 From: Matthias Reso <13337103+mreso@users.noreply.github.com> Date: Mon, 7 Oct 2024 23:01:56 +0000 Subject: [PATCH] Update vllm to 0.6.2 --- examples/large_models/vllm/requirements.txt | 2 +- ts/torch_handler/vllm_handler.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/examples/large_models/vllm/requirements.txt b/examples/large_models/vllm/requirements.txt index 6d0209c820..a3f418ffdd 100644 --- a/examples/large_models/vllm/requirements.txt +++ b/examples/large_models/vllm/requirements.txt @@ -1 +1 @@ -vllm==0.6.1.post2 +vllm==0.6.2 diff --git a/ts/torch_handler/vllm_handler.py b/ts/torch_handler/vllm_handler.py index 910a9461cc..31d84eb758 100644 --- a/ts/torch_handler/vllm_handler.py +++ b/ts/torch_handler/vllm_handler.py @@ -13,7 +13,7 @@ ) from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion -from vllm.entrypoints.openai.serving_engine import LoRAModulePath +from vllm.entrypoints.openai.serving_engine import BaseModelPath, LoRAModulePath from ts.handler_utils.utils import send_intermediate_predict_response from ts.service import PredictionException @@ -54,6 +54,11 @@ def initialize(self, ctx): else: served_model_names = [vllm_engine_config.model] + base_model_paths = [ + BaseModelPath(name=name, model_path=vllm_engine_config.model) + for name in served_model_names + ] + chat_template = ctx.model_yaml_config.get("handler", {}).get( "chat_template", None ) @@ -64,7 +69,7 @@ def initialize(self, ctx): self.completion_service = OpenAIServingCompletion( self.vllm_engine, model_config, - served_model_names, + base_model_paths, lora_modules=lora_modules, prompt_adapters=None, request_logger=None, @@ -73,7 +78,7 @@ def initialize(self, ctx): self.chat_completion_service = OpenAIServingChat( self.vllm_engine, model_config, - served_model_names, + base_model_paths, "assistant", lora_modules=lora_modules, prompt_adapters=None,