From 38f57e157c97915f68e7b35a43cbf2f6083f99c7 Mon Sep 17 00:00:00 2001 From: csunny Date: Fri, 28 Apr 2023 21:59:18 +0800 Subject: [PATCH] init --- configs/model_config.py | 18 +++++++++++++++ pilot/connections/mysql_conn.py | 2 ++ pilot/connections/pg_conn.py | 2 ++ pilot/model/loader.py | 39 +++++++++++++++++++++++++++++++++ pilot/model/vicuna_llm.py | 9 ++++++++ pilot/server/sqlgpt.py | 1 - pilot/utils.py | 22 +++++++++++++++++++ 7 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 configs/model_config.py create mode 100644 pilot/connections/mysql_conn.py create mode 100644 pilot/connections/pg_conn.py create mode 100644 pilot/model/loader.py create mode 100644 pilot/model/vicuna_llm.py create mode 100644 pilot/utils.py diff --git a/configs/model_config.py b/configs/model_config.py new file mode 100644 index 000000000..b843a900c --- /dev/null +++ b/configs/model_config.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +import torch +import os + + +root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +model_path = os.path.join(root_path, "models") +vector_storepath = os.path.join(root_path, "vector_store") + + +llm_model_config = { + "flan-t5-base": os.path.join(model_path, "flan-t5-base"), + "vicuna-13b": os.path.join(model_path, "vicuna-13b") +} + +LLM_MODEL = "vicuna-13b" \ No newline at end of file diff --git a/pilot/connections/mysql_conn.py b/pilot/connections/mysql_conn.py new file mode 100644 index 000000000..1f776fc63 --- /dev/null +++ b/pilot/connections/mysql_conn.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- \ No newline at end of file diff --git a/pilot/connections/pg_conn.py b/pilot/connections/pg_conn.py new file mode 100644 index 000000000..1f776fc63 --- /dev/null +++ b/pilot/connections/pg_conn.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- \ No newline at end of file diff --git a/pilot/model/loader.py b/pilot/model/loader.py new file mode 100644 index 000000000..768bcea1d --- /dev/null +++ b/pilot/model/loader.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import torch +from utils import get_gpu_memory +from fastchat.serve.inference import compress_module +from transformers import ( + AutoTokenizer, + AutoModelForCausalLM, +) + +class ModerLoader: + + kwargs = {} + + def __init__(self, + model_path) -> None: + + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.model_path = model_path + self.kwargs = { + "torch_dtype": torch.float16, + "device_map": "auto", + "max_memory": get_gpu_memory(), + } + + def loader(self, load_8bit=False, debug=False): + + tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_fast=False) + model = AutoModelForCausalLM.from_pretrained(self.model_path, low_cpu_mem_usage=True, **self.kwargs) + + if load_8bit: + compress_module(model, self.device) + + if debug: + print(model) + + return model, tokenizer + diff --git a/pilot/model/vicuna_llm.py b/pilot/model/vicuna_llm.py new file mode 100644 index 000000000..1cc0ca3c3 --- /dev/null +++ b/pilot/model/vicuna_llm.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +from transformers import pipeline +from langchain.llms.base import LLM +from configs.model_config import * + +class VicunaLLM(LLM): + model_name = llm_model_config[LLM_MODEL] diff --git a/pilot/server/sqlgpt.py b/pilot/server/sqlgpt.py index f5a71c392..867053afa 100644 --- a/pilot/server/sqlgpt.py +++ b/pilot/server/sqlgpt.py @@ -1,4 +1,3 @@ #!/usr/bin/env python3 #-*- coding: utf-8 -*- - diff --git a/pilot/utils.py b/pilot/utils.py new file mode 100644 index 000000000..093b14f99 --- /dev/null +++ b/pilot/utils.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +import torch + +def get_gpu_memory(max_gpus=None): + gpu_memory = [] + num_gpus = ( + torch.cuda.device_count() + if max_gpus is None + else min(max_gpus, torch.cuda.device_count()) + ) + + for gpu_id in range(num_gpus): + with torch.cuda.device(gpu_id): + device = torch.cuda.current_device() + gpu_properties = torch.cuda.get_device_properties(device) + total_memory = gpu_properties.total_memory / (1024 ** 3) + allocated_memory = torch.cuda.memory_allocated() / (1024 ** 3) + available_memory = total_memory - allocated_memory + gpu_memory.append(available_memory) + return gpu_memory