support multi gpus

2025-09-28 21:12:13 +00:00 · 2023-06-14 00:22:02 +08:00
parent 44bb5135cd
commit 00d24101f3
2 changed files with 4 additions and 6 deletions
--- a/pilot/model/adapter.py
+++ b/pilot/model/adapter.py
@@ -107,7 +107,7 @@ class GuanacoAdapter(BaseLLMAdaper):
    def loader(self, model_path: str, from_pretrained_kwargs: dict):
        tokenizer = LlamaTokenizer.from_pretrained(model_path)
        model = AutoModelForCausalLM.from_pretrained(
-            model_path, load_in_4bit=True, device_map={"": 0}, **from_pretrained_kwargs
+            model_path, load_in_4bit=True, **from_pretrained_kwargs
        )
        return model, tokenizer
@@ -126,7 +126,6 @@ class FalconAdapater(BaseLLMAdaper):
                model_path,
                load_in_4bit=True,  # quantize
                quantization_config=bnb_config,
                device_map={"": 0},
                trust_remote_code=True,
                **from_pretrained_kwagrs,
            )
@@ -134,7 +133,6 @@ class FalconAdapater(BaseLLMAdaper):
            model = AutoModelForCausalLM.from_pretrained(
                model_path,
                trust_remote_code=True,
                device_map={"": 0},
                **from_pretrained_kwagrs,
            )
        return model, tokenizer
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@@ -73,12 +73,12 @@ class ModelLoader(metaclass=Singleton):
        elif self.device == "cuda":
            kwargs = {"torch_dtype": torch.float16}
-            num_gpus = int(num_gpus)
+            num_gpus = torch.cuda.device_count()
            if num_gpus != 1:
                kwargs["device_map"] = "auto"
-                if max_gpu_memory is None:
+                # if max_gpu_memory is None:
-                    kwargs["device_map"] = "sequential"
+                #     kwargs["device_map"] = "sequential"
                available_gpu_memory = get_gpu_memory(num_gpus)
                kwargs["max_memory"] = {