This commit is contained in:
csunny 2023-04-28 21:59:18 +08:00
parent d308f0f5d8
commit 38f57e157c
7 changed files with 92 additions and 1 deletions

18
configs/model_config.py Normal file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import torch
import os
root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
model_path = os.path.join(root_path, "models")
vector_storepath = os.path.join(root_path, "vector_store")
llm_model_config = {
"flan-t5-base": os.path.join(model_path, "flan-t5-base"),
"vicuna-13b": os.path.join(model_path, "vicuna-13b")
}
LLM_MODEL = "vicuna-13b"

View File

@ -0,0 +1,2 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

View File

@ -0,0 +1,2 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

39
pilot/model/loader.py Normal file
View File

@ -0,0 +1,39 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import torch
from utils import get_gpu_memory
from fastchat.serve.inference import compress_module
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
)
class ModerLoader:
kwargs = {}
def __init__(self,
model_path) -> None:
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model_path = model_path
self.kwargs = {
"torch_dtype": torch.float16,
"device_map": "auto",
"max_memory": get_gpu_memory(),
}
def loader(self, load_8bit=False, debug=False):
tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(self.model_path, low_cpu_mem_usage=True, **self.kwargs)
if load_8bit:
compress_module(model, self.device)
if debug:
print(model)
return model, tokenizer

View File

@ -0,0 +1,9 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
from transformers import pipeline
from langchain.llms.base import LLM
from configs.model_config import *
class VicunaLLM(LLM):
model_name = llm_model_config[LLM_MODEL]

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python3
#-*- coding: utf-8 -*-

22
pilot/utils.py Normal file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import torch
def get_gpu_memory(max_gpus=None):
gpu_memory = []
num_gpus = (
torch.cuda.device_count()
if max_gpus is None
else min(max_gpus, torch.cuda.device_count())
)
for gpu_id in range(num_gpus):
with torch.cuda.device(gpu_id):
device = torch.cuda.current_device()
gpu_properties = torch.cuda.get_device_properties(device)
total_memory = gpu_properties.total_memory / (1024 ** 3)
allocated_memory = torch.cuda.memory_allocated() / (1024 ** 3)
available_memory = total_memory - allocated_memory
gpu_memory.append(available_memory)
return gpu_memory