From a34a3059aa86fa302d4bf256fe3f627193716146 Mon Sep 17 00:00:00 2001
From: agunapal <agunapal@ischool.berkeley.edu>
Date: Mon, 13 Nov 2023 20:43:13 +0000
Subject: [PATCH] Added support for multiple GPUs

---
 examples/large_models/vllm/mistral/Readme.md         | 2 +-
 examples/large_models/vllm/mistral/custom_handler.py | 3 ++-
 examples/large_models/vllm/mistral/model-config.yaml | 7 ++++++-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/examples/large_models/vllm/mistral/Readme.md b/examples/large_models/vllm/mistral/Readme.md
index ba15867758..b88c91efe9 100644
--- a/examples/large_models/vllm/mistral/Readme.md
+++ b/examples/large_models/vllm/mistral/Readme.md
@@ -18,7 +18,7 @@ huggingface-cli login --token $HUGGINGFACE_TOKEN
 ```
 
 ```bash
-python ../Download_model.py --model_path model --model_name mistralai/Mistral-7B-v0.1
+python ../../Huggingface_accelerate/Download_model.py --model_path model --model_name mistralai/Mistral-7B-v0.1
 ```
 Model will be saved in the following path, `mistralai/Mistral-7B-v0.1`.
 
diff --git a/examples/large_models/vllm/mistral/custom_handler.py b/examples/large_models/vllm/mistral/custom_handler.py
index ae4c29ab57..cedca0c5bb 100644
--- a/examples/large_models/vllm/mistral/custom_handler.py
+++ b/examples/large_models/vllm/mistral/custom_handler.py
@@ -34,10 +34,11 @@ def initialize(self, ctx: Context):
         self.max_new_tokens = int(ctx.model_yaml_config["handler"]["max_new_tokens"])
         model_name = ctx.model_yaml_config["handler"]["model_name"]
         model_path = ctx.model_yaml_config["handler"]["model_path"]
+        tp_size = ctx.model_yaml_config["handler"]["tensor_parallel_size"]
         seed = int(ctx.model_yaml_config["handler"]["manual_seed"])
         torch.manual_seed(seed)
 
-        self.model = LLM(model=model_path)
+        self.model = LLM(model=model_path, tensor_parallel_size=tp_size)
 
         logger.info("Model %s loaded successfully", ctx.model_name)
         self.initialized = True
diff --git a/examples/large_models/vllm/mistral/model-config.yaml b/examples/large_models/vllm/mistral/model-config.yaml
index c4282cac71..dbd73251b5 100644
--- a/examples/large_models/vllm/mistral/model-config.yaml
+++ b/examples/large_models/vllm/mistral/model-config.yaml
@@ -4,10 +4,15 @@ maxWorkers: 1
 maxBatchDelay: 100
 responseTimeout: 1200
 deviceType: "gpu"
+# example of user specified GPU deviceIds
+deviceIds: [0,1,2,3] # seting CUDA_VISIBLE_DEVICES
+
+torchrun:
+    nproc-per-node: 4
 
 handler:
     model_name: "mistralai/Mistral-7B-v0.1"
     model_path: "/home/ubuntu/serve/examples/large_models/vllm/mistral/model/models--mistralai--Mistral-7B-v0.1/snapshots/5e9c98b96d071dce59368012254c55b0ec6f8658"
     max_new_tokens: 100
     manual_seed: 40
-    fast_kernels: True
+    tensor_parallel_size : 4