fix load model gpu oom

2025-09-07 03:50:42 +00:00 · 2023-04-29 23:02:13 +08:00
parent ca29dacc37
commit eca14bc038
1 changed files with 5 additions and 0 deletions
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@@ -7,6 +7,8 @@ from transformers import (
    AutoModelForCausalLM,
 )

+from fastchat.serve.compression import compress_module
+
 class ModerLoader:

    kwargs = {}
@@ -29,6 +31,9 @@ class ModerLoader:
        if debug:
            print(model)

+        if load_8bit:
+            compress_module(model, self.device) 
+
        # if self.device == "cuda":
        #     model.to(self.device)