From cbf1d0662a0b9a39eab5d0491859206b7c09c033 Mon Sep 17 00:00:00 2001
From: csunny <cfqsunny@163.com>
Date: Sat, 20 May 2023 16:06:32 +0800
Subject: [PATCH] llms: add models

---
 .gitignore                    |  1 +
 pilot/configs/model_config.py |  5 ++++-
 pilot/server/chat_adapter.py  | 13 +++++++++++++
 pilot/server/llmserver.py     |  1 -
 4 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 pilot/server/chat_adapter.py

diff --git a/.gitignore b/.gitignore
index 5043f7db0..22bb204db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,6 +23,7 @@ lib/
 lib64/
 parts/
 sdist/
+models
 var/
 wheels/
 models/
diff --git a/pilot/configs/model_config.py b/pilot/configs/model_config.py
index 9699061b7..265007ae5 100644
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@@ -20,10 +20,13 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 LLM_MODEL_CONFIG = {
     "flan-t5-base": os.path.join(MODEL_PATH, "flan-t5-base"),
     "vicuna-13b": os.path.join(MODEL_PATH, "vicuna-13b"),
+    "vicuna-7b": os.path.join(MODEL_PATH, "vicuna-7b"),
     "text2vec": os.path.join(MODEL_PATH, "text2vec-large-chinese"),
     "sentence-transforms": os.path.join(MODEL_PATH, "all-MiniLM-L6-v2"),
-    "codegen2-7b": os.path.join(MODEL_PATH, ""),
+    "codegen2-1b": os.path.join(MODEL_PATH, "codegen2-1B"),
     "codet5p-2b": os.path.join(MODEL_PATH, "codet5p-2b"),
+    "chatglm-6b-int4": os.path.join(MODEL_PATH, "chatglm-6b-int4"),
+    "chatglm-6b": os.path.join(MODEL_PATH, "chatglm-6b"),
 }
 
 # Load model config
diff --git a/pilot/server/chat_adapter.py b/pilot/server/chat_adapter.py
new file mode 100644
index 000000000..9c32c911d
--- /dev/null
+++ b/pilot/server/chat_adapter.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+
+class BaseChatAdpter:
+    """The Base class for chat with llm models. it will match the model,
+    and fetch output from model"""
+
+    def match(self, model_path: str):
+        return True
+
+    def get_generate_stream_func(self):
+        pass
\ No newline at end of file
diff --git a/pilot/server/llmserver.py b/pilot/server/llmserver.py
index e1c7556f6..33d3d545d 100644
--- a/pilot/server/llmserver.py
+++ b/pilot/server/llmserver.py
@@ -30,7 +30,6 @@ model_path = LLM_MODEL_CONFIG[CFG.LLM_MODEL]
 
 ml = ModelLoader(model_path=model_path)
 model, tokenizer = ml.loader(num_gpus=1, load_8bit=ISLOAD_8BIT, debug=ISDEBUG)
-#model, tokenizer = load_model(model_path=model_path, device=DEVICE, num_gpus=1, load_8bit=True, debug=False)
 
 class ModelWorker:
     def __init__(self):