mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-07 03:50:42 +00:00
fix load model gpu oom
This commit is contained in:
@@ -7,6 +7,8 @@ from transformers import (
|
||||
AutoModelForCausalLM,
|
||||
)
|
||||
|
||||
from fastchat.serve.compression import compress_module
|
||||
|
||||
class ModerLoader:
|
||||
|
||||
kwargs = {}
|
||||
@@ -29,6 +31,9 @@ class ModerLoader:
|
||||
if debug:
|
||||
print(model)
|
||||
|
||||
if load_8bit:
|
||||
compress_module(model, self.device)
|
||||
|
||||
# if self.device == "cuda":
|
||||
# model.to(self.device)
|
||||
|
||||
|
Reference in New Issue
Block a user