mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-07-29 23:01:38 +00:00
init
This commit is contained in:
parent
d308f0f5d8
commit
38f57e157c
18
configs/model_config.py
Normal file
18
configs/model_config.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding:utf-8 -*-
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
model_path = os.path.join(root_path, "models")
|
||||||
|
vector_storepath = os.path.join(root_path, "vector_store")
|
||||||
|
|
||||||
|
|
||||||
|
llm_model_config = {
|
||||||
|
"flan-t5-base": os.path.join(model_path, "flan-t5-base"),
|
||||||
|
"vicuna-13b": os.path.join(model_path, "vicuna-13b")
|
||||||
|
}
|
||||||
|
|
||||||
|
LLM_MODEL = "vicuna-13b"
|
2
pilot/connections/mysql_conn.py
Normal file
2
pilot/connections/mysql_conn.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
2
pilot/connections/pg_conn.py
Normal file
2
pilot/connections/pg_conn.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
39
pilot/model/loader.py
Normal file
39
pilot/model/loader.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from utils import get_gpu_memory
|
||||||
|
from fastchat.serve.inference import compress_module
|
||||||
|
from transformers import (
|
||||||
|
AutoTokenizer,
|
||||||
|
AutoModelForCausalLM,
|
||||||
|
)
|
||||||
|
|
||||||
|
class ModerLoader:
|
||||||
|
|
||||||
|
kwargs = {}
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
model_path) -> None:
|
||||||
|
|
||||||
|
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
self.model_path = model_path
|
||||||
|
self.kwargs = {
|
||||||
|
"torch_dtype": torch.float16,
|
||||||
|
"device_map": "auto",
|
||||||
|
"max_memory": get_gpu_memory(),
|
||||||
|
}
|
||||||
|
|
||||||
|
def loader(self, load_8bit=False, debug=False):
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_fast=False)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(self.model_path, low_cpu_mem_usage=True, **self.kwargs)
|
||||||
|
|
||||||
|
if load_8bit:
|
||||||
|
compress_module(model, self.device)
|
||||||
|
|
||||||
|
if debug:
|
||||||
|
print(model)
|
||||||
|
|
||||||
|
return model, tokenizer
|
||||||
|
|
9
pilot/model/vicuna_llm.py
Normal file
9
pilot/model/vicuna_llm.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding:utf-8 -*-
|
||||||
|
|
||||||
|
from transformers import pipeline
|
||||||
|
from langchain.llms.base import LLM
|
||||||
|
from configs.model_config import *
|
||||||
|
|
||||||
|
class VicunaLLM(LLM):
|
||||||
|
model_name = llm_model_config[LLM_MODEL]
|
@ -1,4 +1,3 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
#-*- coding: utf-8 -*-
|
#-*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
|
22
pilot/utils.py
Normal file
22
pilot/utils.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding:utf-8 -*-
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
def get_gpu_memory(max_gpus=None):
|
||||||
|
gpu_memory = []
|
||||||
|
num_gpus = (
|
||||||
|
torch.cuda.device_count()
|
||||||
|
if max_gpus is None
|
||||||
|
else min(max_gpus, torch.cuda.device_count())
|
||||||
|
)
|
||||||
|
|
||||||
|
for gpu_id in range(num_gpus):
|
||||||
|
with torch.cuda.device(gpu_id):
|
||||||
|
device = torch.cuda.current_device()
|
||||||
|
gpu_properties = torch.cuda.get_device_properties(device)
|
||||||
|
total_memory = gpu_properties.total_memory / (1024 ** 3)
|
||||||
|
allocated_memory = torch.cuda.memory_allocated() / (1024 ** 3)
|
||||||
|
available_memory = total_memory - allocated_memory
|
||||||
|
gpu_memory.append(available_memory)
|
||||||
|
return gpu_memory
|
Loading…
Reference in New Issue
Block a user