mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-07-23 20:26:15 +00:00
fix load model gpu oom
This commit is contained in:
parent
ca29dacc37
commit
eca14bc038
@ -7,6 +7,8 @@ from transformers import (
|
|||||||
AutoModelForCausalLM,
|
AutoModelForCausalLM,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from fastchat.serve.compression import compress_module
|
||||||
|
|
||||||
class ModerLoader:
|
class ModerLoader:
|
||||||
|
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
@ -29,6 +31,9 @@ class ModerLoader:
|
|||||||
if debug:
|
if debug:
|
||||||
print(model)
|
print(model)
|
||||||
|
|
||||||
|
if load_8bit:
|
||||||
|
compress_module(model, self.device)
|
||||||
|
|
||||||
# if self.device == "cuda":
|
# if self.device == "cuda":
|
||||||
# model.to(self.device)
|
# model.to(self.device)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user