From cbf1d0662a0b9a39eab5d0491859206b7c09c033 Mon Sep 17 00:00:00 2001 From: csunny Date: Sat, 20 May 2023 16:06:32 +0800 Subject: [PATCH] llms: add models --- .gitignore | 1 + pilot/configs/model_config.py | 5 ++++- pilot/server/chat_adapter.py | 13 +++++++++++++ pilot/server/llmserver.py | 1 - 4 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 pilot/server/chat_adapter.py diff --git a/.gitignore b/.gitignore index 5043f7db0..22bb204db 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ lib/ lib64/ parts/ sdist/ +models var/ wheels/ models/ diff --git a/pilot/configs/model_config.py b/pilot/configs/model_config.py index 9699061b7..265007ae5 100644 --- a/pilot/configs/model_config.py +++ b/pilot/configs/model_config.py @@ -20,10 +20,13 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu" LLM_MODEL_CONFIG = { "flan-t5-base": os.path.join(MODEL_PATH, "flan-t5-base"), "vicuna-13b": os.path.join(MODEL_PATH, "vicuna-13b"), + "vicuna-7b": os.path.join(MODEL_PATH, "vicuna-7b"), "text2vec": os.path.join(MODEL_PATH, "text2vec-large-chinese"), "sentence-transforms": os.path.join(MODEL_PATH, "all-MiniLM-L6-v2"), - "codegen2-7b": os.path.join(MODEL_PATH, ""), + "codegen2-1b": os.path.join(MODEL_PATH, "codegen2-1B"), "codet5p-2b": os.path.join(MODEL_PATH, "codet5p-2b"), + "chatglm-6b-int4": os.path.join(MODEL_PATH, "chatglm-6b-int4"), + "chatglm-6b": os.path.join(MODEL_PATH, "chatglm-6b"), } # Load model config diff --git a/pilot/server/chat_adapter.py b/pilot/server/chat_adapter.py new file mode 100644 index 000000000..9c32c911d --- /dev/null +++ b/pilot/server/chat_adapter.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + + +class BaseChatAdpter: + """The Base class for chat with llm models. it will match the model, + and fetch output from model""" + + def match(self, model_path: str): + return True + + def get_generate_stream_func(self): + pass \ No newline at end of file diff --git a/pilot/server/llmserver.py b/pilot/server/llmserver.py index e1c7556f6..33d3d545d 100644 --- a/pilot/server/llmserver.py +++ b/pilot/server/llmserver.py @@ -30,7 +30,6 @@ model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL] ml = ModelLoader(model_path=model_path) model, tokenizer = ml.loader(num_gpus=1, load_8bit=ISLOAD_8BIT, debug=ISDEBUG) -#model, tokenizer = load_model(model_path=model_path, device=DEVICE, num_gpus=1, load_8bit=True, debug=False) class ModelWorker: def __init__(self):