From 00d24101f3b8061f1f2c469af5c020c31ec1cdd5 Mon Sep 17 00:00:00 2001 From: zhanghy-sketchzh <1750410339@qq.com> Date: Wed, 14 Jun 2023 00:22:02 +0800 Subject: [PATCH] support multi gpus --- pilot/model/adapter.py | 4 +--- pilot/model/loader.py | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pilot/model/adapter.py b/pilot/model/adapter.py index 7892e4b1b..f648ea650 100644 --- a/pilot/model/adapter.py +++ b/pilot/model/adapter.py @@ -107,7 +107,7 @@ class GuanacoAdapter(BaseLLMAdaper): def loader(self, model_path: str, from_pretrained_kwargs: dict): tokenizer = LlamaTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained( - model_path, load_in_4bit=True, device_map={"": 0}, **from_pretrained_kwargs + model_path, load_in_4bit=True, **from_pretrained_kwargs ) return model, tokenizer @@ -126,7 +126,6 @@ class FalconAdapater(BaseLLMAdaper): model_path, load_in_4bit=True, # quantize quantization_config=bnb_config, - device_map={"": 0}, trust_remote_code=True, **from_pretrained_kwagrs, ) @@ -134,7 +133,6 @@ class FalconAdapater(BaseLLMAdaper): model = AutoModelForCausalLM.from_pretrained( model_path, trust_remote_code=True, - device_map={"": 0}, **from_pretrained_kwagrs, ) return model, tokenizer diff --git a/pilot/model/loader.py b/pilot/model/loader.py index 6fd6143ff..6acbc9234 100644 --- a/pilot/model/loader.py +++ b/pilot/model/loader.py @@ -73,12 +73,12 @@ class ModelLoader(metaclass=Singleton): elif self.device == "cuda": kwargs = {"torch_dtype": torch.float16} - num_gpus = int(num_gpus) + num_gpus = torch.cuda.device_count() if num_gpus != 1: kwargs["device_map"] = "auto" - if max_gpu_memory is None: - kwargs["device_map"] = "sequential" + # if max_gpu_memory is None: + # kwargs["device_map"] = "sequential" available_gpu_memory = get_gpu_memory(num_gpus) kwargs["max_memory"] = {