Added support for multiple GPUs

pytorch · Nov 13, 2023 · a34a305 · a34a305
1 parent 5fedf7a
commit a34a305
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 3 deletions.
diff --git a/examples/large_models/vllm/mistral/Readme.md b/examples/large_models/vllm/mistral/Readme.md
@@ -18,7 +18,7 @@ huggingface-cli login --token $HUGGINGFACE_TOKEN
 ```
 
 ```bash
-python ../Download_model.py --model_path model --model_name mistralai/Mistral-7B-v0.1
+python ../../Huggingface_accelerate/Download_model.py --model_path model --model_name mistralai/Mistral-7B-v0.1
 ```
 Model will be saved in the following path, `mistralai/Mistral-7B-v0.1`.
 

diff --git a/examples/large_models/vllm/mistral/custom_handler.py b/examples/large_models/vllm/mistral/custom_handler.py
@@ -34,10 +34,11 @@ def initialize(self, ctx: Context):
         self.max_new_tokens = int(ctx.model_yaml_config["handler"]["max_new_tokens"])
         model_name = ctx.model_yaml_config["handler"]["model_name"]
         model_path = ctx.model_yaml_config["handler"]["model_path"]
+        tp_size = ctx.model_yaml_config["handler"]["tensor_parallel_size"]
         seed = int(ctx.model_yaml_config["handler"]["manual_seed"])
         torch.manual_seed(seed)
 
-        self.model = LLM(model=model_path)
+        self.model = LLM(model=model_path, tensor_parallel_size=tp_size)
 
         logger.info("Model %s loaded successfully", ctx.model_name)
         self.initialized = True

diff --git a/examples/large_models/vllm/mistral/model-config.yaml b/examples/large_models/vllm/mistral/model-config.yaml
@@ -4,10 +4,15 @@ maxWorkers: 1
 maxBatchDelay: 100
 responseTimeout: 1200
 deviceType: "gpu"
+# example of user specified GPU deviceIds
+deviceIds: [0,1,2,3] # seting CUDA_VISIBLE_DEVICES
+
+torchrun:
+    nproc-per-node: 4
 
 handler:
     model_name: "mistralai/Mistral-7B-v0.1"
     model_path: "/home/ubuntu/serve/examples/large_models/vllm/mistral/model/models--mistralai--Mistral-7B-v0.1/snapshots/5e9c98b96d071dce59368012254c55b0ec6f8658"
     max_new_tokens: 100
     manual_seed: 40
-    fast_kernels: True
+    tensor_parallel_size : 4
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,7 +18,7 @@ huggingface-cli login --token $HUGGINGFACE_TOKEN @@
     ```
     ```bash
-    python ../Download_model.py --model_path model --model_name mistralai/Mistral-7B-v0.1
+    python ../../Huggingface_accelerate/Download_model.py --model_path model --model_name mistralai/Mistral-7B-v0.1
     ```
     Model will be saved in the following path, `mistralai/Mistral-7B-v0.1`.
@@ Expand Down @@