init

2025-09-05 11:01:09 +00:00 · 2023-04-28 21:59:18 +08:00
parent d308f0f5d8
commit 38f57e157c
7 changed files with 92 additions and 1 deletions
--- a/pilot/model/loader.py
+++ b/pilot/model/loader.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import torch
+from utils import get_gpu_memory
+from fastchat.serve.inference import compress_module
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+)
+
+class ModerLoader:
+
+    kwargs = {}
+
+    def __init__(self, 
+                 model_path) -> None:
+        
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model_path = model_path 
+        self.kwargs = {
+            "torch_dtype": torch.float16,
+            "device_map": "auto",
+            "max_memory": get_gpu_memory(),
+        }
+
+    def loader(self, load_8bit=False, debug=False):
+       
+        tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_fast=False)
+        model = AutoModelForCausalLM.from_pretrained(self.model_path, low_cpu_mem_usage=True, **self.kwargs)
+
+        if load_8bit:
+            compress_module(model, self.device)
+
+        if debug:
+            print(model)
+
+        return model, tokenizer
+