From 00d24101f3b8061f1f2c469af5c020c31ec1cdd5 Mon Sep 17 00:00:00 2001
From: zhanghy-sketchzh <1750410339@qq.com>
Date: Wed, 14 Jun 2023 00:22:02 +0800
Subject: [PATCH] support multi gpus

---
 pilot/model/adapter.py | 4 +---
 pilot/model/loader.py  | 6 +++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/pilot/model/adapter.py b/pilot/model/adapter.py
index 7892e4b1b..f648ea650 100644
--- a/pilot/model/adapter.py
+++ b/pilot/model/adapter.py
@@ -107,7 +107,7 @@ class GuanacoAdapter(BaseLLMAdaper):
     def loader(self, model_path: str, from_pretrained_kwargs: dict):
         tokenizer = LlamaTokenizer.from_pretrained(model_path)
         model = AutoModelForCausalLM.from_pretrained(
-            model_path, load_in_4bit=True, device_map={"": 0}, **from_pretrained_kwargs
+            model_path, load_in_4bit=True, **from_pretrained_kwargs
         )
         return model, tokenizer
 
@@ -126,7 +126,6 @@ class FalconAdapater(BaseLLMAdaper):
                 model_path,
                 load_in_4bit=True,  # quantize
                 quantization_config=bnb_config,
-                device_map={"": 0},
                 trust_remote_code=True,
                 **from_pretrained_kwagrs,
             )
@@ -134,7 +133,6 @@ class FalconAdapater(BaseLLMAdaper):
             model = AutoModelForCausalLM.from_pretrained(
                 model_path,
                 trust_remote_code=True,
-                device_map={"": 0},
                 **from_pretrained_kwagrs,
             )
         return model, tokenizer
diff --git a/pilot/model/loader.py b/pilot/model/loader.py
index 6fd6143ff..6acbc9234 100644
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@@ -73,12 +73,12 @@ class ModelLoader(metaclass=Singleton):
 
         elif self.device == "cuda":
             kwargs = {"torch_dtype": torch.float16}
-            num_gpus = int(num_gpus)
+            num_gpus = torch.cuda.device_count()
 
             if num_gpus != 1:
                 kwargs["device_map"] = "auto"
-                if max_gpu_memory is None:
-                    kwargs["device_map"] = "sequential"
+                # if max_gpu_memory is None:
+                #     kwargs["device_map"] = "sequential"
 
                 available_gpu_memory = get_gpu_memory(num_gpus)
                 kwargs["max_memory"] = {