llms: add models

2025-07-30 15:21:02 +00:00 · 2023-05-20 16:06:32 +08:00 · 2023-05-20 16:06:32 +08:00 · cbf1d0662a
commit cbf1d0662a
parent c0532246af
4 changed files with 18 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -23,6 +23,7 @@ lib/
 lib64/
 parts/
 sdist/
 models
 var/
 wheels/
 models/
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@ -20,10 +20,13 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 LLM_MODEL_CONFIG = {
    "flan-t5-base": os.path.join(MODEL_PATH, "flan-t5-base"),
    "vicuna-13b": os.path.join(MODEL_PATH, "vicuna-13b"),
    "vicuna-7b": os.path.join(MODEL_PATH, "vicuna-7b"),
    "text2vec": os.path.join(MODEL_PATH, "text2vec-large-chinese"),
    "sentence-transforms": os.path.join(MODEL_PATH, "all-MiniLM-L6-v2"),
-    "codegen2-7b": os.path.join(MODEL_PATH, ""),
+    "codegen2-1b": os.path.join(MODEL_PATH, "codegen2-1B"),
    "codet5p-2b": os.path.join(MODEL_PATH, "codet5p-2b"),
    "chatglm-6b-int4": os.path.join(MODEL_PATH, "chatglm-6b-int4"),
    "chatglm-6b": os.path.join(MODEL_PATH, "chatglm-6b"),
 }
 # Load model config
--- a/pilot/server/chat_adapter.py
+++ b/pilot/server/chat_adapter.py
@ -0,0 +1,13 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 class BaseChatAdpter:
    """The Base class for chat with llm models. it will match the model,
    and fetch output from model"""
    def match(self, model_path: str):
        return True
    def get_generate_stream_func(self):
        pass
--- a/pilot/server/llmserver.py
+++ b/pilot/server/llmserver.py
@ -30,7 +30,6 @@ model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]
 ml = ModelLoader(model_path=model_path)
 model, tokenizer = ml.loader(num_gpus=1, load_8bit=ISLOAD_8BIT, debug=ISDEBUG)
 #model, tokenizer = load_model(model_path=model_path, device=DEVICE, num_gpus=1, load_8bit=True, debug=False)
 class ModelWorker:
    def __init__(self):