From fc4a9a953bac8b7bd3d4fbfd1cd0c8cc97bd7734 Mon Sep 17 00:00:00 2001 From: csunny Date: Fri, 28 Apr 2023 22:48:39 +0800 Subject: [PATCH] update --- pilot/model/__init__.py | 2 -- pilot/server/sqlgpt.py | 24 ++++++++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pilot/model/__init__.py b/pilot/model/__init__.py index 0c23b3d79..e69de29bb 100644 --- a/pilot/model/__init__.py +++ b/pilot/model/__init__.py @@ -1,2 +0,0 @@ - -from model.loader import * \ No newline at end of file diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index 65966006a..582775953 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -1,15 +1,23 @@ #!/usr/bin/env python3 #-*- coding: utf-8 -*- -from pilot.model.loader import ModerLoader -from fastchat.serve.inference import generate_stream -from pilot.configs.model_config import * +import torch +from fastchat.serve.inference import generate_stream, compress_module + +BASE_MODE = "/home/magic/workspace/github/DB-GPT/models/vicuna-13b" +from transformers import AutoTokenizer, AutoModelForCausalLM if __name__ == "__main__": - model_path = llm_model_config[LLM_MODEL] + device = "cuda" if torch.cuda.is_available() else "cpu" + tokenizer = AutoTokenizer.from_pretrained(BASE_MODE, use_fast=False) + model = AutoModelForCausalLM.from_pretrained( + BASE_MODE, + low_cpu_mem_usage=True, + torch_dtype=torch.float16, + device_map="auto", + ) - ml = ModerLoader(model_path) - model, tokenizer = ml.loader(load_8bit=True) - print(model) - print(tokenizer) \ No newline at end of file + print(device) + #compress_module(model, device) + print(model, tokenizer) \ No newline at end of file