llms: add models

2025-07-24 12:45:45 +00:00 · 2023-05-20 16:06:32 +08:00 · 2023-05-20 16:06:32 +08:00 · cbf1d0662a
commit cbf1d0662a
parent c0532246af
4 changed files with 18 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -23,6 +23,7 @@ lib/
 lib64/
 parts/
 sdist/
+models
 var/
 wheels/
 models/
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@ -20,10 +20,13 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 LLM_MODEL_CONFIG = {
    "flan-t5-base": os.path.join(MODEL_PATH, "flan-t5-base"),
    "vicuna-13b": os.path.join(MODEL_PATH, "vicuna-13b"),
+    "vicuna-7b": os.path.join(MODEL_PATH, "vicuna-7b"),
    "text2vec": os.path.join(MODEL_PATH, "text2vec-large-chinese"),
    "sentence-transforms": os.path.join(MODEL_PATH, "all-MiniLM-L6-v2"),
-    "codegen2-7b": os.path.join(MODEL_PATH, ""),
+    "codegen2-1b": os.path.join(MODEL_PATH, "codegen2-1B"),
    "codet5p-2b": os.path.join(MODEL_PATH, "codet5p-2b"),
+    "chatglm-6b-int4": os.path.join(MODEL_PATH, "chatglm-6b-int4"),
+    "chatglm-6b": os.path.join(MODEL_PATH, "chatglm-6b"),
 }

 # Load model config
--- a/pilot/server/chat_adapter.py
+++ b/pilot/server/chat_adapter.py
@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+
+class BaseChatAdpter:
+    """The Base class for chat with llm models. it will match the model,
+    and fetch output from model"""
+
+    def match(self, model_path: str):
+        return True
+
+    def get_generate_stream_func(self):
+        pass
--- a/pilot/server/llmserver.py
+++ b/pilot/server/llmserver.py
@ -30,7 +30,6 @@ model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]

 ml = ModelLoader(model_path=model_path)
 model, tokenizer = ml.loader(num_gpus=1, load_8bit=ISLOAD_8BIT, debug=ISDEBUG)
-#model, tokenizer = load_model(model_path=model_path, device=DEVICE, num_gpus=1, load_8bit=True, debug=False)

 class ModelWorker:
    def __init__(self):