feat: Modify config for quantization and doc

This commit is contained in:
FangYin Cheng
2023-08-02 19:29:25 +08:00
parent d8a4b776d5
commit bceb609cf6
6 changed files with 51 additions and 34 deletions

View File

@@ -39,7 +39,11 @@ class ModelWorker:
print(f"Loading {model_name} LLM ModelServer in {device}! Please Wait......")
self.ml = ModelLoader(model_path=model_path, model_name=self.model_name)
self.model, self.tokenizer = self.ml.loader(
num_gpus, load_8bit=ISLOAD_8BIT, debug=ISDEBUG
num_gpus,
load_8bit=CFG.IS_LOAD_8BIT,
load_4bit=CFG.IS_LOAD_4BIT,
debug=ISDEBUG,
max_gpu_memory=CFG.MAX_GPU_MEMORY,
)
if not isinstance(self.model, str):