From a34a3059aa86fa302d4bf256fe3f627193716146 Mon Sep 17 00:00:00 2001 From: agunapal Date: Mon, 13 Nov 2023 20:43:13 +0000 Subject: [PATCH] Added support for multiple GPUs --- examples/large_models/vllm/mistral/Readme.md | 2 +- examples/large_models/vllm/mistral/custom_handler.py | 3 ++- examples/large_models/vllm/mistral/model-config.yaml | 7 ++++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/examples/large_models/vllm/mistral/Readme.md b/examples/large_models/vllm/mistral/Readme.md index ba15867758..b88c91efe9 100644 --- a/examples/large_models/vllm/mistral/Readme.md +++ b/examples/large_models/vllm/mistral/Readme.md @@ -18,7 +18,7 @@ huggingface-cli login --token $HUGGINGFACE_TOKEN ``` ```bash -python ../Download_model.py --model_path model --model_name mistralai/Mistral-7B-v0.1 +python ../../Huggingface_accelerate/Download_model.py --model_path model --model_name mistralai/Mistral-7B-v0.1 ``` Model will be saved in the following path, `mistralai/Mistral-7B-v0.1`. diff --git a/examples/large_models/vllm/mistral/custom_handler.py b/examples/large_models/vllm/mistral/custom_handler.py index ae4c29ab57..cedca0c5bb 100644 --- a/examples/large_models/vllm/mistral/custom_handler.py +++ b/examples/large_models/vllm/mistral/custom_handler.py @@ -34,10 +34,11 @@ def initialize(self, ctx: Context): self.max_new_tokens = int(ctx.model_yaml_config["handler"]["max_new_tokens"]) model_name = ctx.model_yaml_config["handler"]["model_name"] model_path = ctx.model_yaml_config["handler"]["model_path"] + tp_size = ctx.model_yaml_config["handler"]["tensor_parallel_size"] seed = int(ctx.model_yaml_config["handler"]["manual_seed"]) torch.manual_seed(seed) - self.model = LLM(model=model_path) + self.model = LLM(model=model_path, tensor_parallel_size=tp_size) logger.info("Model %s loaded successfully", ctx.model_name) self.initialized = True diff --git a/examples/large_models/vllm/mistral/model-config.yaml b/examples/large_models/vllm/mistral/model-config.yaml index c4282cac71..dbd73251b5 100644 --- a/examples/large_models/vllm/mistral/model-config.yaml +++ b/examples/large_models/vllm/mistral/model-config.yaml @@ -4,10 +4,15 @@ maxWorkers: 1 maxBatchDelay: 100 responseTimeout: 1200 deviceType: "gpu" +# example of user specified GPU deviceIds +deviceIds: [0,1,2,3] # seting CUDA_VISIBLE_DEVICES + +torchrun: + nproc-per-node: 4 handler: model_name: "mistralai/Mistral-7B-v0.1" model_path: "/home/ubuntu/serve/examples/large_models/vllm/mistral/model/models--mistralai--Mistral-7B-v0.1/snapshots/5e9c98b96d071dce59368012254c55b0ec6f8658" max_new_tokens: 100 manual_seed: 40 - fast_kernels: True + tensor_parallel_size : 4