mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-10 12:42:34 +00:00
commit
713bdb77db
@ -107,7 +107,7 @@ class GuanacoAdapter(BaseLLMAdaper):
|
||||
def loader(self, model_path: str, from_pretrained_kwargs: dict):
|
||||
tokenizer = LlamaTokenizer.from_pretrained(model_path)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_path, load_in_4bit=True, device_map={"": 0}, **from_pretrained_kwargs
|
||||
model_path, load_in_4bit=True, **from_pretrained_kwargs
|
||||
)
|
||||
return model, tokenizer
|
||||
|
||||
@ -126,7 +126,6 @@ class FalconAdapater(BaseLLMAdaper):
|
||||
model_path,
|
||||
load_in_4bit=True, # quantize
|
||||
quantization_config=bnb_config,
|
||||
device_map={"": 0},
|
||||
trust_remote_code=True,
|
||||
**from_pretrained_kwagrs,
|
||||
)
|
||||
@ -134,7 +133,6 @@ class FalconAdapater(BaseLLMAdaper):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_path,
|
||||
trust_remote_code=True,
|
||||
device_map={"": 0},
|
||||
**from_pretrained_kwagrs,
|
||||
)
|
||||
return model, tokenizer
|
||||
|
@ -73,12 +73,12 @@ class ModelLoader(metaclass=Singleton):
|
||||
|
||||
elif self.device == "cuda":
|
||||
kwargs = {"torch_dtype": torch.float16}
|
||||
num_gpus = int(num_gpus)
|
||||
num_gpus = torch.cuda.device_count()
|
||||
|
||||
if num_gpus != 1:
|
||||
kwargs["device_map"] = "auto"
|
||||
if max_gpu_memory is None:
|
||||
kwargs["device_map"] = "sequential"
|
||||
# if max_gpu_memory is None:
|
||||
# kwargs["device_map"] = "sequential"
|
||||
|
||||
available_gpu_memory = get_gpu_memory(num_gpus)
|
||||
kwargs["max_memory"] = {
|
||||
|
Loading…
Reference in New Issue
Block a user