Merge branch 'main' of https://github.com/csunny/DB-GPT into tt_dev

2025-08-01 16:18:27 +00:00 · 2023-07-21 14:33:34 +08:00 · 2023-07-21 14:33:34 +08:00 · fe522ac067
commit fe522ac067
parent 3792728182 b2fb3745c2
25 changed files with 1182 additions and 185 deletions
--- a/README.md
+++ b/README.md
@ -124,10 +124,17 @@ The core capabilities mainly consist of the following parts:
 - [DB-GPT-Plugins](https://github.com/csunny/DB-GPT-Plugins) DB-GPT Plugins, Can run autogpt plugin directly
 - [DB-GPT-Web](https://github.com/csunny/DB-GPT-Web)  ChatUI for DB-GPT  
-
+## Image
 🌐 [AutoDL Image](https://www.codewithgpu.com/i/csunny/DB-GPT/dbgpt-0.3.1-v2)
 ## Install 
 [Quickstart](https://db-gpt.readthedocs.io/en/latest/getting_started/getting_started.html)
 ### Language Switching
    In the .env configuration file, modify the LANGUAGE parameter to switch to different languages. The default is English (Chinese: zh, English: en, other languages to be added later).
 ### Platform Deployment
 - autodl
    [autodl image](https://www.codewithgpu.com/i/csunny/DB-GPT/csunny-db-gpt). You can refer to the image instructions to build from scratch, or use `docker pull` to obtain the shared image, follow the instructions in the document to operate. If you have any questions, please leave a comment.
 ## Usage Instructions
 - [Multi LLMs Usage](https://db-gpt.readthedocs.io/en/latest/modules/llms.html)
--- a/README.zh.md
+++ b/README.zh.md
@ -118,12 +118,19 @@ DB-GPT基于 [FastChat](https://github.com/lm-sys/FastChat) 构建大模型运
 - [DB-GPT-Plugins](https://github.com/csunny/DB-GPT-Plugins) DB-GPT 插件仓库, 兼容Auto-GPT
 - [DB-GPT-Web](https://github.com/csunny/DB-GPT-Web)  多端交互前端界面
 ## Image
 🌐 [AutoDL镜像](https://www.codewithgpu.com/i/csunny/DB-GPT/dbgpt-0.3.1-v2)
 ## 安装
 [快速开始](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/getting_started/getting_started.html)
 ### 多语言切换
  在.env 配置文件当中，修改LANGUAGE参数来切换使用不同的语言，默认是英文(中文zh, 英文en, 其他语言待补充)
 ### 平台部署
 - autodl
  [autodl镜像](https://www.codewithgpu.com/i/csunny/DB-GPT/csunny-db-gpt)，从头搭建可参考镜像说明，或通过`docker pull`获取共享镜像，按照文档中的说明操作即可，若有问题，欢迎评论。
 ## 使用说明
 ### 多模型使用
--- a/assets/faq/chatdataerror.png
+++ b/assets/faq/chatdataerror.png
--- a/assets/faq/illegal_character.png
+++ b/assets/faq/illegal_character.png
--- a/assets/faq/proxyerror.png
+++ b/assets/faq/proxyerror.png
--- a/assets/schema/knowledge_management.sql
+++ b/assets/schema/knowledge_management.sql
@ -1,3 +1,5 @@
 CREATE DATABASE knowledge_management;
 use knowledge_management;
 CREATE TABLE `knowledge_space` (
  `id` int NOT NULL AUTO_INCREMENT COMMENT 'auto increment id',
  `name` varchar(100) NOT NULL COMMENT 'knowledge space name',
@ -38,4 +40,37 @@ CREATE TABLE `document_chunk` (
  `gmt_modified` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time',
  PRIMARY KEY (`id`),
  KEY `idx_document_id` (`document_id`) COMMENT 'index:document_id'
-) ENGINE=InnoDB AUTO_INCREMENT=100001 DEFAULT CHARSET=utf8mb4 COMMENT='knowledge document chunk detail'
+) ENGINE=InnoDB AUTO_INCREMENT=100001 DEFAULT CHARSET=utf8mb4 COMMENT='knowledge document chunk detail';
 CREATE DATABASE EXAMPLE_1;
 use EXAMPLE_1;
 CREATE TABLE `users` (
  `id` int NOT NULL AUTO_INCREMENT,
  `username` varchar(50) NOT NULL COMMENT '用户名',
  `password` varchar(50) NOT NULL COMMENT '密码',
  `email` varchar(50) NOT NULL COMMENT '邮箱',
  `phone` varchar(20) DEFAULT NULL COMMENT '电话',
  PRIMARY KEY (`id`),
  KEY `idx_username` (`username`) COMMENT '索引：按用户名查询'
 ) ENGINE=InnoDB AUTO_INCREMENT=101 DEFAULT CHARSET=utf8mb4 COMMENT='聊天用户表';
 INSERT INTO users (username, password, email, phone) VALUES ('user_1', 'password_1', 'user_1@example.com', '12345678901');
 INSERT INTO users (username, password, email, phone) VALUES ('user_2', 'password_2', 'user_2@example.com', '12345678902');
 INSERT INTO users (username, password, email, phone) VALUES ('user_3', 'password_3', 'user_3@example.com', '12345678903');
 INSERT INTO users (username, password, email, phone) VALUES ('user_4', 'password_4', 'user_4@example.com', '12345678904');
 INSERT INTO users (username, password, email, phone) VALUES ('user_5', 'password_5', 'user_5@example.com', '12345678905');
 INSERT INTO users (username, password, email, phone) VALUES ('user_6', 'password_6', 'user_6@example.com', '12345678906');
 INSERT INTO users (username, password, email, phone) VALUES ('user_7', 'password_7', 'user_7@example.com', '12345678907');
 INSERT INTO users (username, password, email, phone) VALUES ('user_8', 'password_8', 'user_8@example.com', '12345678908');
 INSERT INTO users (username, password, email, phone) VALUES ('user_9', 'password_9', 'user_9@example.com', '12345678909');
 INSERT INTO users (username, password, email, phone) VALUES ('user_10', 'password_10', 'user_10@example.com', '12345678900');
 INSERT INTO users (username, password, email, phone) VALUES ('user_11', 'password_11', 'user_11@example.com', '12345678901');
 INSERT INTO users (username, password, email, phone) VALUES ('user_12', 'password_12', 'user_12@example.com', '12345678902');
 INSERT INTO users (username, password, email, phone) VALUES ('user_13', 'password_13', 'user_13@example.com', '12345678903');
 INSERT INTO users (username, password, email, phone) VALUES ('user_14', 'password_14', 'user_14@example.com', '12345678904');
 INSERT INTO users (username, password, email, phone) VALUES ('user_15', 'password_15', 'user_15@example.com', '12345678905');
 INSERT INTO users (username, password, email, phone) VALUES ('user_16', 'password_16', 'user_16@example.com', '12345678906');
 INSERT INTO users (username, password, email, phone) VALUES ('user_17', 'password_17', 'user_17@example.com', '12345678907');
 INSERT INTO users (username, password, email, phone) VALUES ('user_18', 'password_18', 'user_18@example.com', '12345678908');
 INSERT INTO users (username, password, email, phone) VALUES ('user_19', 'password_19', 'user_19@example.com', '12345678909');
 INSERT INTO users (username, password, email, phone) VALUES ('user_20', 'password_20', 'user_20@example.com', '12345678900');
--- a/docs/faq.md
+++ b/docs/faq.md
@ -0,0 +1,97 @@
 # FAQ
 ##### Q1: text2vec-large-chinese not found
 ##### A1: make sure you have download text2vec-large-chinese embedding model in right way
 ```tip
 centos:yum install git-lfs
 ubuntu:apt-get install git-lfs -y
 macos:brew install git-lfs
 ```
 ```bash
 cd models
 git lfs clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
 ```
 ##### Q2: execute `pip install -r requirements.txt` error, found some package cannot find correct version.
 ##### A2: change the pip source.
 ```bash
 # pypi
 $ pip install -r requirements.txt -i https://pypi.python.org/simple
 ```
 or
 ```bash
 # tsinghua
 $ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
 ```
 or
 ```bash
 # aliyun
 $ pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/
 ```
 ##### Q3:Access denied for user 'root@localhost'(using password :NO)
 ##### A3: make sure you have installed mysql instance in right way
 Docker:
 ```bash
 docker run --name=mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=aa12345678 -dit mysql:latest
 ```
 Normal:
 [download mysql instance](https://dev.mysql.com/downloads/mysql/)
 ##### Q4:When I use openai(MODEL_SERVER=proxyllm) to chat
 <p align="left">
  <img src="../assets/faq/proxyerror.png" width="800px" />
 </p>
 ##### A4: make sure your openapi API_KEY is available
 ##### Q5:When I Chat Data and Chat Meta Data, I found the error
 <p align="left">
  <img src="../assets/faq/chatdataerror.png" width="800px" />
 </p>
 ##### A5: you have not create your database and table
 1.create your database.
 ```bash
 mysql> create database {$your_name}
 mysql> use {$your_name}
 ```
 2.create table {$your_table} and insert your data. 
 eg:
 ```bash
 mysql>CREATE TABLE `users` (
  `id` int NOT NULL AUTO_INCREMENT,
  `username` varchar(50) NOT NULL COMMENT '用户名',
  `password` varchar(50) NOT NULL COMMENT '密码',
  `email` varchar(50) NOT NULL COMMENT '邮箱',
  `phone` varchar(20) DEFAULT NULL COMMENT '电话',
  PRIMARY KEY (`id`),
  KEY `idx_username` (`username`) COMMENT '索引：按用户名查询'
 ) ENGINE=InnoDB AUTO_INCREMENT=101 DEFAULT CHARSET=utf8mb4 COMMENT='聊天用户表'
 ```
 ##### Q6:When I use vicuna-13b, found some illegal character like this.
 <p align="left">
  <img src="../assets/faq/illegal_character.png" width="800px" />
 </p>
 ##### A6: set KNOWLEDGE_SEARCH_TOP_SIZE smaller or set KNOWLEDGE_CHUNK_SIZE smaller, and reboot server.
--- a/docs/getting_started/getting_started.md
+++ b/docs/getting_started/getting_started.md
@ -17,11 +17,15 @@ As our project has the ability to achieve ChatGPT performance of over 85%, there
 ### 2. Install
-This project relies on a local MySQL database service, which you need to install locally. We recommend using Docker for installation.
+1.This project relies on a local MySQL database service, which you need to install locally. We recommend using Docker for installation.
 ```bash
 $ docker run --name=mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=aa12345678 -dit mysql:latest
 ```
 2. prepare server sql script
 ```bash
 $ mysql -h127.0.0.1 -uroot -paa12345678 < ./assets/schema/knowledge_management.sql
 ```
 We use [Chroma embedding database](https://github.com/chroma-core/chroma) as the default for our vector database, so there is no need for special installation. If you choose to connect to other databases, you can follow our tutorial for installation and configuration. 
 For the entire installation process of DB-GPT, we use the miniconda3 virtual environment. Create a virtual environment and install the Python dependencies.
@ -63,16 +67,10 @@ You can refer to this document to obtain the Vicuna weights: [Vicuna](https://gi
 If you have difficulty with this step, you can also directly use the model from [this link](https://huggingface.co/Tribbiani/vicuna-7b) as a replacement.
 1. prepare server sql script
 ```bash
 mysql> CREATE DATABASE knowledge_management;
 mysql> use knowledge_management;
 mysql> source ./assets/schema/knowledge_management.sql
 ```
 set .env configuration set your vector store type, eg:VECTOR_STORE_TYPE=Chroma, now we support Chroma and Milvus(version > 2.1)
-2. Run db-gpt server 
+1.Run db-gpt server 
 ```bash
 $ python pilot/server/dbgpt_server.py
--- a/docs/index.rst
+++ b/docs/index.rst
@ -130,6 +130,18 @@ Reference
   ./reference.md
 FAQ
 -----------
 | DB-GPT FAQ.
 .. toctree::
   :maxdepth: 1
   :caption: FAQ
   :name: FAQ
   :hidden:
   ./faq.md
 Ecosystem
 ----------
--- a/docs/locales/zh_CN/LC_MESSAGES/faq.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/faq.po
@ -0,0 +1,100 @@
 # SOME DESCRIPTIVE TITLE.
 # Copyright (C) 2023, csunny
 # This file is distributed under the same license as the DB-GPT package.
 # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 #
 #, fuzzy
 msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 👏👏 0.3.0\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2023-07-20 10:53+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
 "Language-Team: zh_CN <LL@li.org>\n"
 "Plural-Forms: nplurals=1; plural=0;\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=utf-8\n"
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel 2.12.1\n"
 #: ../../faq.md:1 81cedd396b274db9b2a69448df98a28d
 msgid "FAQ"
 msgstr "FAQ"
 #: ../../faq.md:2 1a3f8c7661e34721a4465a34281416b1
 msgid "Q1: text2vec-large-chinese not found"
 msgstr "Q1: text2vec-large-chinese not found"
 #: ../../faq.md:4 5d2844bcbcc843fc97de41a491f914fe
 msgid ""
 "A1: make sure you have download text2vec-large-chinese embedding model in"
 " right way"
 msgstr "按照正确的姿势下载text2vec-large-chinese模型"
 #: ../../faq.md:16 82ade01884534030b81be7c3e06f1504
 msgid ""
 "Q2: execute `pip install -r requirements.txt` error, found some package "
 "cannot find correct version."
 msgstr "执行`pip install -r requirements.txt`报错"
 #: ../../faq.md:19 451ede833da642788e5224811a71ba0f
 msgid "A2: change the pip source."
 msgstr "修改pip源"
 #: ../../faq.md:26 ../../faq.md:33 0142abf1050d4fee9caf056322b52247
 #: 0adb87dd67fa4122997871f7ab064637
 msgid "or"
 msgstr "或"
 #: ../../faq.md:41 d6981160ecc6491284fd32e1098fc10e
 msgid "Q3:Access denied for user 'root@localhost'(using password :NO)"
 msgstr "或"
 #: ../../faq.md:43 e50b195057804f28b84d0d10859e4f1b
 msgid "A3: make sure you have installed mysql instance in right way"
 msgstr "按照正确姿势安装mysql"
 #: ../../faq.md:45 03ba25aa7bd241d3b32cc1916f858a3e
 msgid "Docker:"
 msgstr "Docker:"
 #: ../../faq.md:49 dd5336b44673459c93a1408097cb76f9
 msgid "Normal: [download mysql instance](https://dev.mysql.com/downloads/mysql/)"
 msgstr "[download mysql instance](https://dev.mysql.com/downloads/mysql/)"
 #: ../../faq.md:52 2b290c4653a2410c8d330ed5b0e9a821
 msgid "Q4:When I use openai(MODEL_SERVER=proxyllm) to chat"
 msgstr "使用openai-chatgpt模型时(MODEL_SERVER=proxyllm)"
 #: ../../faq.md:57 f4d0e8e8113f4ca4bc55f167b661fd6a
 msgid "A4: make sure your openapi API_KEY is available"
 msgstr "确认openapi API_KEY是否可用"
 #: ../../faq.md:59 092ca3dea0c5466ab6e22ab0049f166e
 msgid "Q5:When I Chat Data and Chat Meta Data, I found the error"
 msgstr "Chat Data and Chat Meta Data报如下错"
 #: ../../faq.md:64 dbf61e6ea2c64ecebfdbbde83cb74e3e
 msgid "A5: you have not create your database and table"
 msgstr "需要创建自己的数据库"
 #: ../../faq.md:65 0505bb716e6445c2a7960436d93cb407
 msgid "1.create your database."
 msgstr "1.先创建数据库"
 #: ../../faq.md:71 fd689b541ee549bd85385647c219b4cb
 msgid "2.create table {$your_table} and insert your data.  eg:"
 msgstr "然后创建数据表，模拟数据"
 #: ../../faq.md:85 de2d78db5fb6450cb08b0f15385ed525
 msgid "Q6:When I use vicuna-13b, found some illegal character like this."
 msgstr "使用vicuna-13b，知识库问答出现乱码"
 #: ../../faq.md:90 0cb1d0c2ec434763ae80e6f87d4a1665
 msgid ""
 "A6: set KNOWLEDGE_SEARCH_TOP_SIZE smaller or set KNOWLEDGE_CHUNK_SIZE "
 "smaller, and reboot server."
 msgstr "将KNOWLEDGE_SEARCH_TOP_SIZE和KNOWLEDGE_CHUNK_SIZE设置小点然后重启"
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/getting_started.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/getting_started.po
@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 0.3.0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-07-13 15:39+0800\n"
+"POT-Creation-Date: 2023-07-20 10:53+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@ -19,29 +19,29 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel 2.12.1\n"
-#: ../../getting_started/getting_started.md:1 0b2e795438a3413c875fd80191e85bad
+#: ../../getting_started/getting_started.md:1 7c12b6d7d5be4528be005cdadec568db
 msgid "Quickstart Guide"
 msgstr "使用指南"
-#: ../../getting_started/getting_started.md:3 7b84c9776f8a4f9fb55afc640f37f45c
+#: ../../getting_started/getting_started.md:3 ba312f5a132541be89dcc09012076784
 msgid ""
 "This tutorial gives you a quick walkthrough about use DB-GPT with you "
 "environment and data."
 msgstr "本教程为您提供了关于如何使用DB-GPT的使用指南。"
-#: ../../getting_started/getting_started.md:5 1b2880e1ef674bfdbf39ac9f330aeec9
+#: ../../getting_started/getting_started.md:5 8e56b3f0726740abaaafa57415b10bea
 msgid "Installation"
 msgstr "安装"
-#: ../../getting_started/getting_started.md:7 d0a8c6654bfe4bbdb0eb40ceb2ea3388
+#: ../../getting_started/getting_started.md:7 3af8fe74db1043349e8f784c109b0417
 msgid "To get started, install DB-GPT with the following steps."
 msgstr "请按照以下步骤安装DB-GPT"
-#: ../../getting_started/getting_started.md:9 0a4e0b06c7fe49a9b2ca56ba2eb7b8ba
+#: ../../getting_started/getting_started.md:9 0b33cf4604f846e781a63d857dde72b2
 msgid "1. Hardware Requirements"
 msgstr "1. 硬件要求"
-#: ../../getting_started/getting_started.md:10 2b42f6546ef141f696943ba2120584e5
+#: ../../getting_started/getting_started.md:10 f1d4abb176494bcb85cead7f3f8b719d
 msgid ""
 "As our project has the ability to achieve ChatGPT performance of over "
 "85%, there are certain hardware requirements. However, overall, the "
@ -49,62 +49,67 @@ msgid ""
 "specific hardware requirements for deployment are as follows:"
 msgstr "由于我们的项目有能力达到85%以上的ChatGPT性能，所以对硬件有一定的要求。但总体来说，我们在消费级的显卡上即可完成项目的部署使用，具体部署的硬件说明如下:"
-#: ../../getting_started/getting_started.md 4df0c44eff8741f39ca0fdeff222f90c
+#: ../../getting_started/getting_started.md e8516902f29d4ca2bb46f19b5e3deb81
 msgid "GPU"
 msgstr "GPU"
-#: ../../getting_started/getting_started.md b740a2991ce546cca43a426b760e9901
+#: ../../getting_started/getting_started.md a951ccca67364cf7ad5f0af2ec0ece8d
 msgid "VRAM Size"
 msgstr "显存大小"
-#: ../../getting_started/getting_started.md 222b91ff82f14d12acaac5aa238758c8
+#: ../../getting_started/getting_started.md 01b7e055ee4543bdb619fbc14fea4d86
 msgid "Performance"
 msgstr "显存大小"
-#: ../../getting_started/getting_started.md c2d2ae6a4c964c4f90a9009160754782
+#: ../../getting_started/getting_started.md 0b20b224ff8a4e2c890a8b4ff43b6045
 msgid "RTX 4090"
 msgstr "RTX 4090"
-#: ../../getting_started/getting_started.md 529220ec6a294e449dc460ba2e8829a1
+#: ../../getting_started/getting_started.md 17a343c4359d45c987f29de1c73760b4
-#: 5e0c5900842e4d66b2064b13cc31a3ad
+#: c9daaf0578434a7e812a8d3f3edde3f0
 msgid "24 GB"
 msgstr "24 GB"
-#: ../../getting_started/getting_started.md 84d29eef342f4d6282295c0e32487548
+#: ../../getting_started/getting_started.md 6a35be039a0a43eaaf7d1aa40aece6f7
 msgid "Smooth conversation inference"
 msgstr "可以流畅的进行对话推理，无卡顿"
-#: ../../getting_started/getting_started.md 5a10effe322e4afb8315415c04dc05a4
+#: ../../getting_started/getting_started.md a414f5ae7bdd450f8d171d1f075d6b66
 msgid "RTX 3090"
 msgstr "RTX 3090"
-#: ../../getting_started/getting_started.md 8924059525ab43329a8bb6659e034d5e
+#: ../../getting_started/getting_started.md 109ed9bda6e541b88eb3400a44e15df7
 msgid "Smooth conversation inference, better than V100"
 msgstr "可以流畅进行对话推理，有卡顿感，但好于V100"
-#: ../../getting_started/getting_started.md 10f5bc076f524127a956d7a23f3666ba
+#: ../../getting_started/getting_started.md 3f2a05dc610f461faa989f3c12750d00
 msgid "V100"
 msgstr "V100"
-#: ../../getting_started/getting_started.md 7d664e81984847c7accd08db93fad404
+#: ../../getting_started/getting_started.md 90e24795876546ecbb4796ca5d313514
 msgid "16 GB"
 msgstr "16 GB"
-#: ../../getting_started/getting_started.md 86765bc9ab01409fb7f5edf04f9b32a5
+#: ../../getting_started/getting_started.md 34061757e69e4691b7a5ff3c2953f1e3
 msgid "Conversation inference possible, noticeable stutter"
 msgstr "可以进行对话推理，有明显卡顿"
-#: ../../getting_started/getting_started.md:18 a0ac5591c0ac4ac6a385e562353daf22
+#: ../../getting_started/getting_started.md:18 61d59cb27daf43eb9aa2775fa6dac820
 msgid "2. Install"
 msgstr "2. 安装"
-#: ../../getting_started/getting_started.md:20 a64a9a5945074ece872509f8cb425da9
+#: ../../getting_started/getting_started.md:20 28a6b66c92464929bed8f04d5b841e8c
 #, fuzzy
 msgid ""
-"This project relies on a local MySQL database service, which you need to "
+"1.This project relies on a local MySQL database service, which you need "
-"install locally. We recommend using Docker for installation."
+"to install locally. We recommend using Docker for installation."
 msgstr "本项目依赖一个本地的 MySQL 数据库服务，你需要本地安装，推荐直接使用 Docker 安装。"
-#: ../../getting_started/getting_started.md:25 11e799a372ab4d0f8269cd7be98bebc6
+#: ../../getting_started/getting_started.md:24 7793b19e96b44ba481b527a877c81170
 msgid "prepare server sql script"
 msgstr "准备db-gpt server sql脚本"
 #: ../../getting_started/getting_started.md:29 e3743699e95e45eab3e140df266bb3b5
 msgid ""
 "We use [Chroma embedding database](https://github.com/chroma-core/chroma)"
 " as the default for our vector database, so there is no need for special "
@ -117,11 +122,11 @@ msgstr ""
 "向量数据库我们默认使用的是Chroma内存数据库，所以无需特殊安装，如果有需要连接其他的同学，可以按照我们的教程进行安装配置。整个DB-"
 "GPT的安装过程，我们使用的是miniconda3的虚拟环境。创建虚拟环境，并安装python依赖包"
-#: ../../getting_started/getting_started.md:34 dcab69c83d4c48b9bb19c4336ee74a66
+#: ../../getting_started/getting_started.md:38 be9f388e255c4bc7837029ce8237ef0b
 msgid "Before use DB-GPT Knowledge Management"
 msgstr "使用知识库管理功能之前"
-#: ../../getting_started/getting_started.md:40 735aeb6ae8aa4344b7ff679548279acc
+#: ../../getting_started/getting_started.md:44 9175b34ed78c4310b048f53ac07b13cb
 msgid ""
 "Once the environment is installed, we have to create a new folder "
 "\"models\" in the DB-GPT project, and then we can put all the models "
@ -130,33 +135,33 @@ msgstr ""
 "环境安装完成后，我们必须在DB-"
 "GPT项目中创建一个新文件夹\"models\"，然后我们可以把从huggingface下载的所有模型放到这个目录下。"
-#: ../../getting_started/getting_started.md:43 7cbefe131b24488b9be39b3e8ed4f563
+#: ../../getting_started/getting_started.md:47 40715051bd844b0187265d425debfbee
 #, fuzzy
 msgid "Notice make sure you have install git-lfs"
 msgstr "确保你已经安装了git-lfs"
-#: ../../getting_started/getting_started.md:53 54ec90ebb969475988451cd66e6ff412
+#: ../../getting_started/getting_started.md:57 7d7e7b1fe72b4e0c96eba721a8aa2113
 msgid ""
 "The model files are large and will take a long time to download. During "
 "the download, let's configure the .env file, which needs to be copied and"
 " created from the .env.template"
 msgstr "模型文件很大，需要很长时间才能下载。在下载过程中，让我们配置.env文件，它需要从。env.template中复制和创建。"
-#: ../../getting_started/getting_started.md:56 9bdadbee88af4683a4eb7b4f221fb4b8
+#: ../../getting_started/getting_started.md:60 6a25e5307bdb49a0afc69b9d17395a5a
 msgid "cp .env.template .env"
 msgstr "cp .env.template .env"
-#: ../../getting_started/getting_started.md:59 6357c4a0154b4f08a079419ac408442d
+#: ../../getting_started/getting_started.md:63 14567647544f4036beaae158b59833f6
 msgid ""
 "You can configure basic parameters in the .env file, for example setting "
 "LLM_MODEL to the model to be used"
 msgstr "您可以在.env文件中配置基本参数，例如将LLM_MODEL设置为要使用的模型。"
-#: ../../getting_started/getting_started.md:61 2f349f3ed3184b849ade2a15d5bf0c6c
+#: ../../getting_started/getting_started.md:65 1b459d413a4d4b7e883d1ec17384ca30
 msgid "3. Run"
 msgstr "3. 运行"
-#: ../../getting_started/getting_started.md:62 fe408e4405bd48288e2e746386615925
+#: ../../getting_started/getting_started.md:66 ed15ee15450e4a028bf5aa05a9309697
 msgid ""
 "You can refer to this document to obtain the Vicuna weights: "
 "[Vicuna](https://github.com/lm-sys/FastChat/blob/main/README.md#model-"
@ -165,7 +170,7 @@ msgstr ""
 "关于基础模型, 可以根据[Vicuna](https://github.com/lm-"
 "sys/FastChat/blob/main/README.md#model-weights) 合成教程进行合成。"
-#: ../../getting_started/getting_started.md:64 c0acfe28007f459ca21174f968763fa3
+#: ../../getting_started/getting_started.md:68 de15a70920a94192a1f2017cbe3cdb55
 msgid ""
 "If you have difficulty with this step, you can also directly use the "
 "model from [this link](https://huggingface.co/Tribbiani/vicuna-7b) as a "
@ -174,11 +179,7 @@ msgstr ""
 "如果此步有困难的同学，也可以直接使用[此链接](https://huggingface.co/Tribbiani/vicuna-"
 "7b)上的模型进行替代。"
-#: ../../getting_started/getting_started.md:66 cc0f4c4e43f24b679f857a8d937528ee
+#: ../../getting_started/getting_started.md:70 763aaed45fd948fab761552a7e06061a
 msgid "prepare server sql script"
 msgstr "准备db-gpt server sql脚本"
 #: ../../getting_started/getting_started.md:72 386948064fe646f2b9f51a262dd64bf2
 msgid ""
 "set .env configuration set your vector store type, "
 "eg:VECTOR_STORE_TYPE=Chroma, now we support Chroma and Milvus(version > "
@ -187,17 +188,17 @@ msgstr ""
 "在.env文件设置向量数据库环境变量，eg:VECTOR_STORE_TYPE=Chroma, 目前我们支持了 Chroma and "
 "Milvus(version >2.1) "
-#: ../../getting_started/getting_started.md:75 e6f6b06459944f2d8509703af365c664
+#: ../../getting_started/getting_started.md:73 a8f0dc3546c54a1098ff10157f980cef
 #, fuzzy
-msgid "Run db-gpt server"
+msgid "1.Run db-gpt server"
 msgstr "运行模型服务"
-#: ../../getting_started/getting_started.md:80 489b595dc08a459ca2fd83b1389d3bbd
+#: ../../getting_started/getting_started.md:78 1715948545154c10af585de8960bf853
 #, fuzzy
 msgid "Open http://localhost:5000 with your browser to see the product."
 msgstr "打开浏览器访问http://localhost:5000"
-#: ../../getting_started/getting_started.md:82 699afb01c9f243ab837cdc73252f624c
+#: ../../getting_started/getting_started.md:80 8ea9964df477473e866fe844dcf4be54
 msgid ""
 "If you want to access an external LLM service, you need to 1.set the "
 "variables LLM_MODEL=YOUR_MODEL_NAME "
@ -205,7 +206,7 @@ msgid ""
 "file. 2.execute dbgpt_server.py in light mode"
 msgstr "如果你想访问外部的大模型服务，1.需要在.env文件设置模型名和外部模型服务地址。2.使用light模式启动服务"
-#: ../../getting_started/getting_started.md:89 7df7f3870e1140d3a17dc322a46d6476
+#: ../../getting_started/getting_started.md:87 4c409a0e2a994f428712ab94b475e9bd
 msgid ""
 "If you want to learn about dbgpt-webui, read https://github.com/csunny"
 "/DB-GPT/tree/new-page-framework/datacenter"
--- a/docs/locales/zh_CN/LC_MESSAGES/index.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/index.po
@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 0.3.0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-06-30 17:16+0800\n"
+"POT-Creation-Date: 2023-07-20 10:53+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@ -19,31 +19,35 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel 2.12.1\n"
-#: ../../index.rst:34 ../../index.rst:45 00f3369727374a2da4b3e02c35c63363
+#: ../../index.rst:34 ../../index.rst:45 5bab1511780c442e9dd9e18519ad7ef3
 msgid "Getting Started"
 msgstr "开始"
-#: ../../index.rst:56 ../../index.rst:77 f3b85d983f0141b5872a46451d20252a
+#: ../../index.rst:57 ../../index.rst:78 dbdeae6c9a344e7889d399a5dbbca872
 msgid "Modules"
 msgstr "模块"
-#: ../../index.rst:91 ../../index.rst:107 614a2220f0004aadab14402dab5c926d
+#: ../../index.rst:92 ../../index.rst:108 43594aec45bf49609d7c7435c6472a9b
 msgid "Use Cases"
 msgstr "示例"
-#: ../../index.rst:121 ../../index.rst:124 fae0a63e09f049a79ffbd8c1fd5b8f12
+#: ../../index.rst:122 ../../index.rst:125 013a0e5d909e4332ac49f6200924043e
 msgid "Reference"
 msgstr "参考"
-#: ../../index.rst:148 ../../index.rst:154 d4113b3e7b2f4a1ba462e6a54062adff
+#: ../../index.rst:134 ../../index.rst:137 42907cf1d1bb491bae2fcd7ede06e421
 msgid "FAQ"
 msgstr ""
 #: ../../index.rst:161 ../../index.rst:167 64f4a587bc1f4ca9a456b97fac5d6def
 msgid "Resources"
 msgstr "资源"
-#: ../../index.rst:7 6548011d66b14e71bf3f028b82bb58d6
+#: ../../index.rst:7 df1ad6da308f49debd4053e5899a4c8d
 msgid "Welcome to DB-GPT!"
 msgstr "欢迎来到DB-GPT中文文档"
-#: ../../index.rst:8 da9fdd494e664e7e83448d4993c30f60
+#: ../../index.rst:8 cc5b388ea2924968b2769e4b4a159761
 msgid ""
 "As large models are released and iterated upon, they are becoming "
 "increasingly intelligent. However, in the process of using large models, "
@ -61,7 +65,7 @@ msgstr ""
 "，我们启动了DB-"
 "GPT项目，为所有基于数据库的场景构建一个完整的私有大模型解决方案。该方案“”支持本地部署，既可应用于“独立私有环境”，又可根据业务模块进行“独立部署”和“隔离”，确保“大模型”的能力绝对私有、安全、可控。"
-#: ../../index.rst:10 7650af0ff7bc4a93ad82c930c81cb7f5
+#: ../../index.rst:10 b831cd62a1ba46108dcf343792e8d67a
 msgid ""
 "**DB-GPT** is an experimental open-source project that uses localized GPT"
 " large models to interact with your data and environment. With this "
@ -71,102 +75,102 @@ msgstr ""
 "DB-GPT 是一个开源的以数据库为基础的GPT实验项目，使用本地化的GPT大模型与您的数据和环境进行交互，无数据泄露风险100% 私密，100%"
 " 安全。"
-#: ../../index.rst:12 18249811cea0456096f06da05638d474
+#: ../../index.rst:12 3baac7e36a824395a57c246e05560c67
 msgid "**Features**"
 msgstr "特性"
-#: ../../index.rst:13 367c092acf1a49a0906f78433e6d9926
+#: ../../index.rst:13 6638568ff94e47f6a0b04a6775ca45d1
 msgid ""
 "Currently, we have released multiple key features, which are listed below"
 " to demonstrate our current capabilities:"
 msgstr "目前我们已经发布了多种关键的特性，这里一一列举展示一下当前发布的能力。"
-#: ../../index.rst:15 01334c212ef44244a306f8cfb584c48f
+#: ../../index.rst:15 b49710ac36ed4a188280aeb3589f11fe
 msgid "SQL language capabilities - SQL generation - SQL diagnosis"
 msgstr "SQL语言能力 - SQL生成 - SQL诊断"
-#: ../../index.rst:19 fe7396137ed84517bd99c93500f21bb6
+#: ../../index.rst:19 6bd495632b39477e8948677aeb4ab1ee
 msgid ""
 "Private domain Q&A and data processing - Database knowledge Q&A - Data "
 "processing"
 msgstr "私有领域问答与数据处理 - 数据库知识问答 - 数据处理"
-#: ../../index.rst:23 c12af975ed2d451496a476d91b6bb4c5
+#: ../../index.rst:23 d1d3665a412246b9814139a938a29d1b
 msgid ""
 "Plugins - Support custom plugin execution tasks and natively support the "
 "Auto-GPT plugin, such as:"
 msgstr "插件模型 - 支持自定义插件执行任务，并原生支持Auto-GPT插件，例如:* SQL自动执行，获取查询结果 * 自动爬取学习知识"
-#: ../../index.rst:26 97507eb30ab548fb9443b252e481d6be
+#: ../../index.rst:26 41e37b63a8f44fc09a4da3574b25fcd3
 msgid ""
 "Unified vector storage/indexing of knowledge base - Support for "
 "unstructured data such as PDF, Markdown, CSV, and WebURL"
 msgstr "知识库统一向量存储/索引 - 非结构化数据支持包括PDF、MarkDown、CSV、WebURL"
-#: ../../index.rst:29 b2ff661c13d54cd79f27923163079706
+#: ../../index.rst:29 1acac8a6cbe54f7b8c31135189d726d6
 msgid ""
 "Milti LLMs Support - Supports multiple large language models, currently "
 "supporting Vicuna (7b, 13b), ChatGLM-6b (int4, int8) - TODO: codegen2, "
 "codet5p"
 msgstr "多模型支持 - 支持多种大语言模型, 当前已支持Vicuna(7b,13b), ChatGLM-6b(int4, int8)"
-#: ../../index.rst:35 15178a36df624fa9b4a5acdab5060752
+#: ../../index.rst:35 481edf63bcd348b4b3e14353dc5be952
 msgid ""
 "How to get started using DB-GPT to interact with your data and "
 "environment."
 msgstr "开始使用DB-GPT与您的数据环境进行交互。"
-#: ../../index.rst:36 8aa3a44286b244a4b9b92db285e5382b
+#: ../../index.rst:36 6b26a04661f7445198b3797442d3f178
 #, fuzzy
 msgid "`Quickstart Guide <./getting_started/getting_started.html>`_"
 msgstr "`使用指南 <./getting_started/getting_started.html>`_"
-#: ../../index.rst:38 3f4ec91e5df44629aff6c94ffbaa37d7
+#: ../../index.rst:38 ff435ef9964d44508ad840de2950c1c8
 msgid "Concepts and terminology"
 msgstr "相关概念"
-#: ../../index.rst:40 d0126674e0e24aefbf51d610b2fcf5da
+#: ../../index.rst:40 db9dfacbe93e4f858843c4d2a0e2dd25
 #, fuzzy
 msgid "`Concepts and Terminology  <./getting_started/concepts.html>`_"
 msgstr "`相关概念 <./getting_started/concepts.html>`_"
-#: ../../index.rst:42 c6d67e4c1b8346c18f2b99c8f5795627
+#: ../../index.rst:42 50faf8302ba140ce84897daef2c90ca2
 msgid "Coming soon..."
 msgstr ""
-#: ../../index.rst:44 18d63d2d729246648743d1b7470029e0
+#: ../../index.rst:44 547bb007f90e43c096f25647cebefd7b
 msgid "`Tutorials <.getting_started/tutorials.html>`_"
 msgstr "`教程 <.getting_started/tutorials.html>`_"
-#: ../../index.rst:58 5f93833b5e8f42b7b8728587f5054d8b
+#: ../../index.rst:59 89bc50c752b84e3fb789ce5da1b654dc
 msgid ""
 "These modules are the core abstractions with which we can interact with "
 "data and environment smoothly."
 msgstr "这些模块是我们可以与数据和环境顺利地进行交互的核心组成。"
-#: ../../index.rst:59 d68173b40df146818ddf68b309bbd27d
+#: ../../index.rst:60 81f3860920964720958c62b3a6769f12
 msgid ""
 "It's very important for DB-GPT, DB-GPT also provide standard, extendable "
 "interfaces."
 msgstr "DB-GPT还提供了标准的、可扩展的接口。"
-#: ../../index.rst:61 00257e3907b346d5bd007f2cfe52bac9
+#: ../../index.rst:62 15faf914d05544df86359e50fdc70483
 msgid ""
 "The docs for each module contain quickstart examples, how to guides, "
 "reference docs, and conceptual guides."
 msgstr "每个模块的文档都包含快速入门的例子、操作指南、参考文档和相关概念等内容。"
-#: ../../index.rst:63 e04ba16ec943405080a26ec874e67823
+#: ../../index.rst:64 18a673690afa47df8e6dd3d065b2580f
 msgid "The modules are as follows"
 msgstr "组成模块如下："
-#: ../../index.rst:65 12d9ce3eda484316b3fcbbb2adb48b6e
+#: ../../index.rst:66 d2c20ed1b6ba4aa697090453b0f775a1
 msgid ""
 "`LLMs <./modules/llms.html>`_: Supported multi models management and "
 "integrations."
 msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 "
-#: ../../index.rst:67 9d7ce6fea46c41c0940198499336430e
+#: ../../index.rst:68 d6c72e6fee3348e89422f9ff45804e3a
 msgid ""
 "`Prompts <./modules/prompts.html>`_: Prompt management, optimization, and"
 " serialization for multi database."
@ -174,59 +178,59 @@ msgstr ""
 "`Prompt自动生成与优化 <./modules/prompts.html>`_: 自动化生成高质量的Prompt "
 "，并进行优化，提高系统的响应效率"
-#: ../../index.rst:69 0847158883424a98a02e58fd9c2d6744
+#: ../../index.rst:70 1756de47335d4eb7916a6c8b8b5ab70b
 msgid "`Plugins <./modules/plugins.html>`_: Plugins management, scheduler."
 msgstr "`Agent与插件： <./modules/plugins.html>`_:提供Agent和插件机制，使得用户可以自定义并增强系统的行为。"
-#: ../../index.rst:71 6b2a8143adff4395b6a86a6d22078a87
+#: ../../index.rst:72 542efb88411f4cc192a08e7d28c84863
 #, fuzzy
 msgid ""
 "`Knowledge <./modules/knowledge.html>`_: Knowledge management, embedding,"
 " and search."
 msgstr "`知识库能力： <./modules/knowledge.html>`_: 支持私域知识库问答能力, "
-#: ../../index.rst:73 70e831c592ca431791a614934061c148
+#: ../../index.rst:74 0204630cd5f14c68ada3bf11e0b0fbf5
 msgid ""
 "`Connections <./modules/connections.html>`_: Supported multi databases "
 "connection. management connections and interact with this."
 msgstr "`连接模块 <./modules/connections.html>`_: 用于连接不同的模块和数据源，实现数据的流转和交互 "
-#: ../../index.rst:75 1a5eaebe25174828b60c17475dae7928
+#: ../../index.rst:76 f9363a963be44d0ea01bb5d65b69d0f8
 #, fuzzy
 msgid "`Vector <./modules/vector.html>`_: Supported multi vector database."
 msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 "
-#: ../../index.rst:93 8a2ea497d36449febd1560dbecd5ec44
+#: ../../index.rst:94 7223c3b95e9446bcae40a355e6b02324
 msgid "Best Practices and built-in implementations for common DB-GPT use cases:"
 msgstr "DB-GPT用例的最佳实践和内置方法:"
-#: ../../index.rst:95 fcc65a470c5643619688488cfe010f61
+#: ../../index.rst:96 c088f2ab9f2247ac9a8c9af31d0da7a6
 msgid ""
 "`Sql generation and diagnosis "
 "<./use_cases/sql_generation_and_diagnosis.html>`_: SQL generation and "
 "diagnosis."
 msgstr "`Sql生成和诊断 <./use_cases/sql_generation_and_diagnosis.html>`_: Sql生成和诊断。"
-#: ../../index.rst:97 26b983fbb9394f94bf31636175cc6f95
+#: ../../index.rst:98 23973b07c6ba42088a714b048d4b43c4
 msgid ""
 "`knownledge Based QA <./use_cases/knownledge_based_qa.html>`_: A "
 "important scene for user to chat with database documents, codes, bugs and"
 " schemas."
 msgstr "`知识库问答 <./use_cases/knownledge_based_qa.html>`_:  用户与数据库文档、代码和bug聊天的重要场景\""
-#: ../../index.rst:99 aa4cfc95a3bf463682952bf40f38c99b
+#: ../../index.rst:100 6a1d77bc56a14803b66ebc9f2b6c4b7b
 msgid ""
 "`Chatbots <./use_cases/chatbots.html>`_: Language model love to chat, use"
 " multi models to chat."
 msgstr "`聊天机器人 <./use_cases/chatbots.html>`_: 使用多模型进行对话"
-#: ../../index.rst:101 e9b540aab91c44ae911bf96aa18c0f36
+#: ../../index.rst:102 621774a68cba46daa7112f865e6e3af9
 msgid ""
 "`Querying Database Data <./use_cases/query_database_data.html>`_: Query "
 "and Analysis data from databases and give charts."
 msgstr "`查询数据库数据 <./use_cases/query_database_data.html>`_:从数据库中查询和分析数据并给出图表。"
-#: ../../index.rst:103 88a99b638b4a42abae39fe9de4f16927
+#: ../../index.rst:104 9f379fbe8aac47f4a1a53d84ab2a2f51
 msgid ""
 "`Interacting with apis <./use_cases/interacting_with_api.html>`_: "
 "Interact with apis, such as create a table, deploy a database cluster, "
@ -235,33 +239,37 @@ msgstr ""
 "`API交互 <./use_cases/interacting_with_api.html>`_: "
 "与API交互，例如创建表、部署数据库集群、创建数据库等。"
-#: ../../index.rst:105 53cb38b16ed04420934233aec02fd44b
+#: ../../index.rst:106 3dbb717e1b024a20bb6049facb616b1b
 msgid ""
 "`Tool use with plugins <./use_cases/tool_use_with_plugin>`_: According to"
 " Plugin use tools to manage databases autonomoly."
 msgstr "`插件工具 <./use_cases/tool_use_with_plugin>`_: 根据插件使用工具自主管理数据库。"
-#: ../../index.rst:122 fbc2fe1374ac4909b1c66ea11bbfafec
+#: ../../index.rst:123 4acedc39ccf34e79b805189a11285a3a
 msgid ""
 "Full documentation on all methods, classes, installation methods, and "
 "integration setups for DB-GPT."
 msgstr "关于DB-GPT的所有方法、类、安装方法和集成设置的完整文档。"
-#: ../../index.rst:133 c45530a6c3a34a42a0fd8bca6efe3b07
+#: ../../index.rst:135 622112f3cce34461ba7e0d52fa81d438
 msgid "DB-GPT FAQ."
 msgstr ""
 #: ../../index.rst:146 a989256fd69f4bbfae73191b505c59fa
 msgid "Ecosystem"
 msgstr "环境系统"
-#: ../../index.rst:135 e9f68f4abbdc4832889d0e343ce43d27
+#: ../../index.rst:148 1b5c410dd94842f2801f08540dd57647
 msgid "Guides for how other companies/products can be used with DB-GPT"
 msgstr "其他公司/产品如何与DB-GPT一起使用的方法指南"
-#: ../../index.rst:150 c33686daaa7c44d28c96dc22c0d9480c
+#: ../../index.rst:163 8368b4c62ebe41ad91551e241dbcc4df
 msgid ""
 "Additional resources we think may be useful as you develop your "
 "application!"
 msgstr "“我们认为在您开发应用程序时可能有用的其他资源!”"
-#: ../../index.rst:152 f6d108be181f47af80c4b8a5931ff172
+#: ../../index.rst:165 2a4cd897b20c4683979b44fb9d7470e6
 msgid ""
 "`Discord <https://discord.com/invite/twmZk3vv>`_: if your have some "
 "problem or ideas, you can talk from discord."
--- a/docs/modules/llms.md
+++ b/docs/modules/llms.md
@ -11,7 +11,7 @@ cp .env.template .env
 LLM_MODEL=vicuna-13b
 MODEL_SERVER=http://127.0.0.1:8000
 ```
-now we support models vicuna-13b, vicuna-7b, chatglm-6b, flan-t5-base, guanaco-33b-merged, falcon-40b, gorilla-7b.
+now we support models vicuna-13b, vicuna-7b, chatglm-6b, flan-t5-base, guanaco-33b-merged, falcon-40b, gorilla-7b, llama-2-7b, llama-2-13b.
 if you want use other model, such as chatglm-6b, you just need update .env config file.
 ```
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@ -47,6 +47,9 @@ LLM_MODEL_CONFIG = {
    "gorilla-7b": os.path.join(MODEL_PATH, "gorilla-7b"),
    "gptj-6b": os.path.join(MODEL_PATH, "ggml-gpt4all-j-v1.3-groovy.bin"),
    "proxyllm": "proxyllm",
    "llama-2-7b": os.path.join(MODEL_PATH, "Llama-2-7b-chat-hf"),
    "llama-2-13b": os.path.join(MODEL_PATH, "Llama-2-13b-chat-hf"),
    "llama-2-70b": os.path.join(MODEL_PATH, "Llama-2-70b-chat-hf"),
 }
 # Load model config
--- a/pilot/model/adapter.py
+++ b/pilot/model/adapter.py
@ -263,12 +263,26 @@ class ProxyllmAdapter(BaseLLMAdaper):
        return "proxyllm", None
 class Llama2Adapter(BaseLLMAdaper):
    """The model adapter for llama-2"""
    def match(self, model_path: str):
        return "llama-2" in model_path.lower()
    def loader(self, model_path: str, from_pretrained_kwargs: dict):
        model, tokenizer = super().loader(model_path, from_pretrained_kwargs)
        model.config.eos_token_id = tokenizer.eos_token_id
        model.config.pad_token_id = tokenizer.pad_token_id
        return model, tokenizer
 register_llm_model_adapters(VicunaLLMAdapater)
 register_llm_model_adapters(ChatGLMAdapater)
 register_llm_model_adapters(GuanacoAdapter)
 register_llm_model_adapters(FalconAdapater)
 register_llm_model_adapters(GorillaAdapter)
 register_llm_model_adapters(GPT4AllAdapter)
 register_llm_model_adapters(Llama2Adapter)
 # TODO Default support vicuna, other model need to tests and Evaluate
 # just for test_py, remove this later
--- a/pilot/model/conversation.py
+++ b/pilot/model/conversation.py
@ -0,0 +1,308 @@
 """
 Fork from fastchat: https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
 Conversation prompt templates.
 """
 import dataclasses
 from enum import auto, IntEnum
 from typing import List, Any, Dict, Callable
 class SeparatorStyle(IntEnum):
    """Separator styles."""
    ADD_COLON_SINGLE = auto()
    ADD_COLON_TWO = auto()
    ADD_COLON_SPACE_SINGLE = auto()
    NO_COLON_SINGLE = auto()
    NO_COLON_TWO = auto()
    ADD_NEW_LINE_SINGLE = auto()
    LLAMA2 = auto()
    CHATGLM = auto()
    CHATML = auto()
    CHATINTERN = auto()
    DOLLY = auto()
    RWKV = auto()
    PHOENIX = auto()
    ROBIN = auto()
@dataclasses.dataclass
 class Conversation:
    """A class that manages prompt templates and keeps all conversation history."""
    # The name of this template
    name: str
    # The system prompt
    system: str
    # Two roles
    roles: List[str]
    # All messages. Each item is (role, message).
    messages: List[List[str]]
    # The number of few shot examples
    offset: int
    # Separators
    sep_style: SeparatorStyle
    sep: str
    sep2: str = None
    # Stop criteria (the default one is EOS token)
    stop_str: str = None
    # Stops generation if meeting any token in this list
    stop_token_ids: List[int] = None
    # format system message
    system_formatter: Callable = None
    def get_prompt(self) -> str:
        """Get the prompt for generation."""
        if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:
            ret = self.system + self.sep
            for role, message in self.messages:
                if message:
                    ret += role + ": " + message + self.sep
                else:
                    ret += role + ":"
            return ret
        elif self.sep_style == SeparatorStyle.ADD_COLON_TWO:
            seps = [self.sep, self.sep2]
            ret = self.system + seps[0]
            for i, (role, message) in enumerate(self.messages):
                if message:
                    ret += role + ": " + message + seps[i % 2]
                else:
                    ret += role + ":"
            return ret
        elif self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE:
            ret = self.system + self.sep
            for role, message in self.messages:
                if message:
                    ret += role + ": " + message + self.sep
                else:
                    ret += role + ": "  # must be end with a space
            return ret
        elif self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE:
            ret = "" if self.system == "" else self.system + self.sep
            for role, message in self.messages:
                if message:
                    ret += role + "\n" + message + self.sep
                else:
                    ret += role + "\n"
            return ret
        elif self.sep_style == SeparatorStyle.NO_COLON_SINGLE:
            ret = self.system
            for role, message in self.messages:
                if message:
                    ret += role + message + self.sep
                else:
                    ret += role
            return ret
        elif self.sep_style == SeparatorStyle.NO_COLON_TWO:
            seps = [self.sep, self.sep2]
            ret = self.system
            for i, (role, message) in enumerate(self.messages):
                if message:
                    ret += role + message + seps[i % 2]
                else:
                    ret += role
            return ret
        elif self.sep_style == SeparatorStyle.RWKV:
            ret = self.system
            for i, (role, message) in enumerate(self.messages):
                if message:
                    ret += (
                        role
                        + ": "
                        + message.replace("\r\n", "\n").replace("\n\n", "\n")
                    )
                    ret += "\n\n"
                else:
                    ret += role + ":"
            return ret
        elif self.sep_style == SeparatorStyle.LLAMA2:
            seps = [self.sep, self.sep2]
            ret = ""
            for i, (role, message) in enumerate(self.messages):
                if message:
                    if i == 0:
                        ret += self.system + message
                    else:
                        ret += role + " " + message + seps[i % 2]
                else:
                    ret += role
            return ret
        elif self.sep_style == SeparatorStyle.CHATGLM:
            # source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308
            # source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
            round_add_n = 1 if self.name == "chatglm2" else 0
            if self.system:
                ret = self.system + self.sep
            else:
                ret = ""
            for i, (role, message) in enumerate(self.messages):
                if i % 2 == 0:
                    ret += f"[Round {i//2 + round_add_n}]{self.sep}"
                if message:
                    ret += f"{role}：{message}{self.sep}"
                else:
                    ret += f"{role}："
            return ret
        elif self.sep_style == SeparatorStyle.CHATML:
            ret = "" if self.system == "" else self.system + self.sep + "\n"
            for role, message in self.messages:
                if message:
                    ret += role + "\n" + message + self.sep + "\n"
                else:
                    ret += role + "\n"
            return ret
        elif self.sep_style == SeparatorStyle.CHATINTERN:
            # source: https://huggingface.co/internlm/internlm-chat-7b-8k/blob/bd546fa984b4b0b86958f56bf37f94aa75ab8831/modeling_internlm.py#L771
            seps = [self.sep, self.sep2]
            ret = self.system
            for i, (role, message) in enumerate(self.messages):
                if i % 2 == 0:
                    ret += "<s>"
                if message:
                    ret += role + ":" + message + seps[i % 2] + "\n"
                else:
                    ret += role + ":"
            return ret
        elif self.sep_style == SeparatorStyle.DOLLY:
            seps = [self.sep, self.sep2]
            ret = self.system
            for i, (role, message) in enumerate(self.messages):
                if message:
                    ret += role + ":\n" + message + seps[i % 2]
                    if i % 2 == 1:
                        ret += "\n\n"
                else:
                    ret += role + ":\n"
            return ret
        elif self.sep_style == SeparatorStyle.PHOENIX:
            ret = self.system
            for role, message in self.messages:
                if message:
                    ret += role + ": " + "<s>" + message + "</s>"
                else:
                    ret += role + ": " + "<s>"
            return ret
        elif self.sep_style == SeparatorStyle.ROBIN:
            ret = self.system + self.sep
            for role, message in self.messages:
                if message:
                    ret += role + ":\n" + message + self.sep
                else:
                    ret += role + ":\n"
            return ret
        else:
            raise ValueError(f"Invalid style: {self.sep_style}")
    def append_message(self, role: str, message: str):
        """Append a new message."""
        self.messages.append([role, message])
    def update_last_message(self, message: str):
        """Update the last output.
        The last message is typically set to be None when constructing the prompt,
        so we need to update it in-place after getting the response from a model.
        """
        self.messages[-1][1] = message
    def update_system_message(self, system_message: str):
        """Update system message"""
        if self.system_formatter:
            self.system = self.system_formatter(system_message)
        else:
            self.system = system_message
    def to_gradio_chatbot(self):
        """Convert the conversation to gradio chatbot format."""
        ret = []
        for i, (role, msg) in enumerate(self.messages[self.offset :]):
            if i % 2 == 0:
                ret.append([msg, None])
            else:
                ret[-1][-1] = msg
        return ret
    def to_openai_api_messages(self):
        """Convert the conversation to OpenAI chat completion format."""
        ret = [{"role": "system", "content": self.system}]
        for i, (_, msg) in enumerate(self.messages[self.offset :]):
            if i % 2 == 0:
                ret.append({"role": "user", "content": msg})
            else:
                if msg is not None:
                    ret.append({"role": "assistant", "content": msg})
        return ret
    def copy(self):
        return Conversation(
            name=self.name,
            system=self.system,
            roles=self.roles,
            messages=[[x, y] for x, y in self.messages],
            offset=self.offset,
            sep_style=self.sep_style,
            sep=self.sep,
            sep2=self.sep2,
            stop_str=self.stop_str,
            stop_token_ids=self.stop_token_ids,
            system_formatter=self.system_formatter,
        )
    def dict(self):
        return {
            "template_name": self.name,
            "system": self.system,
            "roles": self.roles,
            "messages": self.messages,
            "offset": self.offset,
        }
 # A global registry for all conversation templates
 conv_templates: Dict[str, Conversation] = {}
 def register_conv_template(template: Conversation, override: bool = False):
    """Register a new conversation template."""
    if not override:
        assert (
            template.name not in conv_templates
        ), f"{template.name} has been registered."
    conv_templates[template.name] = template
 def get_conv_template(name: str) -> Conversation:
    """Get a conversation template."""
    return conv_templates[name].copy()
 # llama2 template
 # reference: https://github.com/facebookresearch/llama/blob/cfc3fc8c1968d390eb830e65c63865e980873a06/llama/generation.py#L212
 register_conv_template(
    Conversation(
        name="llama-2",
        system="<s>[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. "
        "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. "
        "Please ensure that your responses are socially unbiased and positive in nature.\n\n"
        "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. "
        "If you don't know the answer to a question, please don't share false information.\n<</SYS>>\n\n",
        roles=("[INST]", "[/INST]"),
        messages=(),
        offset=0,
        sep_style=SeparatorStyle.LLAMA2,
        sep=" ",
        sep2=" </s><s>",
        stop_token_ids=[2],
        system_formatter=lambda msg: f"<s>[INST] <<SYS>>\n{msg}\n<</SYS>>\n\n",
    )
 )
 # TODO Support other model conversation template
--- a/pilot/model/inference.py
+++ b/pilot/model/inference.py
@ -0,0 +1,242 @@
 """
 Fork from fastchat: https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/inference.py
 """
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import gc
 from typing import Iterable, Dict
 import torch
 import torch
 from transformers.generation.logits_process import (
    LogitsProcessorList,
    RepetitionPenaltyLogitsProcessor,
    TemperatureLogitsWarper,
    TopKLogitsWarper,
    TopPLogitsWarper,
 )
 from pilot.model.llm_utils import is_sentence_complete, is_partial_stop
 def prepare_logits_processor(
    temperature: float, repetition_penalty: float, top_p: float, top_k: int
 ) -> LogitsProcessorList:
    processor_list = LogitsProcessorList()
    # TemperatureLogitsWarper doesn't accept 0.0, 1.0 makes it a no-op so we skip two cases.
    if temperature >= 1e-5 and temperature != 1.0:
        processor_list.append(TemperatureLogitsWarper(temperature))
    if repetition_penalty > 1.0:
        processor_list.append(RepetitionPenaltyLogitsProcessor(repetition_penalty))
    if 1e-8 <= top_p < 1.0:
        processor_list.append(TopPLogitsWarper(top_p))
    if top_k > 0:
        processor_list.append(TopKLogitsWarper(top_k))
    return processor_list
@torch.inference_mode()
 def generate_stream(
    model,
    tokenizer,
    params: Dict,
    device: str,
    context_len: int,
    stream_interval: int = 2,
    judge_sent_end: bool = False,
 ):
    # Read parameters
    prompt = params["prompt"]
    print(f"Prompt of model: \n{prompt}")
    len_prompt = len(prompt)
    temperature = float(params.get("temperature", 1.0))
    repetition_penalty = float(params.get("repetition_penalty", 1.0))
    top_p = float(params.get("top_p", 1.0))
    top_k = int(params.get("top_k", -1))  # -1 means disable
    max_new_tokens = int(params.get("max_new_tokens", 2048))
    echo = bool(params.get("echo", True))
    stop_str = params.get("stop", None)
    stop_token_ids = params.get("stop_token_ids", None) or []
    stop_token_ids.append(tokenizer.eos_token_id)
    logits_processor = prepare_logits_processor(
        temperature, repetition_penalty, top_p, top_k
    )
    input_ids = tokenizer(prompt).input_ids
    if model.config.is_encoder_decoder:
        max_src_len = context_len
    else:  # truncate
        max_src_len = context_len - max_new_tokens - 1
    input_ids = input_ids[-max_src_len:]
    output_ids = list(input_ids)
    input_echo_len = len(input_ids)
    if model.config.is_encoder_decoder:
        encoder_output = model.encoder(
            input_ids=torch.as_tensor([input_ids], device=device)
        )[0]
        start_ids = torch.as_tensor(
            [[model.generation_config.decoder_start_token_id]],
            dtype=torch.int64,
            device=device,
        )
    past_key_values = out = None
    sent_interrupt = False
    for i in range(max_new_tokens):
        if i == 0:  # prefill
            if model.config.is_encoder_decoder:
                out = model.decoder(
                    input_ids=start_ids,
                    encoder_hidden_states=encoder_output,
                    use_cache=True,
                )
                logits = model.lm_head(out[0])
            else:
                out = model(torch.as_tensor([input_ids], device=device), use_cache=True)
                logits = out.logits
            past_key_values = out.past_key_values
        else:  # decoding
            if model.config.is_encoder_decoder:
                out = model.decoder(
                    input_ids=torch.as_tensor(
                        [[token] if not sent_interrupt else output_ids], device=device
                    ),
                    encoder_hidden_states=encoder_output,
                    use_cache=True,
                    past_key_values=past_key_values if not sent_interrupt else None,
                )
                sent_interrupt = False
                logits = model.lm_head(out[0])
            else:
                out = model(
                    input_ids=torch.as_tensor(
                        [[token] if not sent_interrupt else output_ids], device=device
                    ),
                    use_cache=True,
                    past_key_values=past_key_values if not sent_interrupt else None,
                )
                sent_interrupt = False
                logits = out.logits
            past_key_values = out.past_key_values
        if logits_processor:
            if repetition_penalty > 1.0:
                tmp_output_ids = torch.as_tensor([output_ids], device=logits.device)
            else:
                tmp_output_ids = None
            last_token_logits = logits_processor(tmp_output_ids, logits[:, -1, :])[0]
        else:
            last_token_logits = logits[0, -1, :]
        if device == "mps":
            # Switch to CPU by avoiding some bugs in mps backend.
            last_token_logits = last_token_logits.float().to("cpu")
        if temperature < 1e-5 or top_p < 1e-8:  # greedy
            _, indices = torch.topk(last_token_logits, 2)
            tokens = [int(index) for index in indices.tolist()]
        else:
            probs = torch.softmax(last_token_logits, dim=-1)
            indices = torch.multinomial(probs, num_samples=2)
            tokens = [int(token) for token in indices.tolist()]
        token = tokens[0]
        output_ids.append(token)
        if token in stop_token_ids:
            stopped = True
        else:
            stopped = False
        # Yield the output tokens
        if i % stream_interval == 0 or i == max_new_tokens - 1 or stopped:
            if echo:
                tmp_output_ids = output_ids
                rfind_start = len_prompt
            else:
                tmp_output_ids = output_ids[input_echo_len:]
                rfind_start = 0
            output = tokenizer.decode(
                tmp_output_ids,
                skip_special_tokens=True,
                spaces_between_special_tokens=False,
                clean_up_tokenization_spaces=True,
            )
            # TODO: For the issue of incomplete sentences interrupting output, apply a patch and others can also modify it to a more elegant way
            if judge_sent_end and stopped and not is_sentence_complete(output):
                if len(tokens) > 1:
                    token = tokens[1]
                    output_ids[-1] = token
                else:
                    output_ids.pop()
                stopped = False
                sent_interrupt = True
            partially_stopped = False
            if stop_str:
                if isinstance(stop_str, str):
                    pos = output.rfind(stop_str, rfind_start)
                    if pos != -1:
                        output = output[:pos]
                        stopped = True
                    else:
                        partially_stopped = is_partial_stop(output, stop_str)
                elif isinstance(stop_str, Iterable):
                    for each_stop in stop_str:
                        pos = output.rfind(each_stop, rfind_start)
                        if pos != -1:
                            output = output[:pos]
                            stopped = True
                            break
                        else:
                            partially_stopped = is_partial_stop(output, each_stop)
                            if partially_stopped:
                                break
                else:
                    raise ValueError("Invalid stop field type.")
            # Prevent yielding partial stop sequence
            if not partially_stopped:
                yield output
                # yield {
                #     "text": output,
                #     "usage": {
                #         "prompt_tokens": input_echo_len,
                #         "completion_tokens": i,
                #         "total_tokens": input_echo_len + i,
                #     },
                #     "finish_reason": None,
                # }
        if stopped:
            break
    # Finish stream event, which contains finish reason
    if i == max_new_tokens - 1:
        finish_reason = "length"
    elif stopped:
        finish_reason = "stop"
    else:
        finish_reason = None
    yield output
    # yield {
    #     "text": output,
    #     "usage": {
    #         "prompt_tokens": input_echo_len,
    #         "completion_tokens": i,
    #         "total_tokens": input_echo_len + i,
    #     },
    #     "finish_reason": finish_reason,
    # }
    # Clean
    del past_key_values, out
    gc.collect()
    torch.cuda.empty_cache()
--- a/pilot/model/llm_out/chatglm_llm.py
+++ b/pilot/model/llm_out/chatglm_llm.py
@ -8,6 +8,11 @@ import copy
 import torch
 from pilot.conversation import ROLE_ASSISTANT, ROLE_USER
 from pilot.scene.base_message import ModelMessage, _parse_model_messages
 # TODO move sep to scene prompt of model
 _CHATGLM_SEP = "\n"
 _CHATGLM2_SEP = "\n\n"
@torch.inference_mode()
@ -32,42 +37,20 @@ def chatglm_generate_stream(
        generate_kwargs["temperature"] = temperature
    # TODO, Fix this
-    print(prompt)
+    # print(prompt)
-    messages = prompt.split(stop)
+    # messages = prompt.split(stop)
-    #
+    messages: List[ModelMessage] = params["messages"]
-    # # Add history conversation
+    query, system_messages, hist = _parse_model_messages(messages)
-    hist = [HistoryEntry()]
+    system_messages_str = "".join(system_messages)
    system_messages = []
    for message in messages[:-2]:
        if len(message) <= 0:
            continue
        if "human:" in message:
            hist[-1].add_question(message.split("human:")[1])
        elif "system:" in message:
            msg = message.split("system:")[1]
            hist[-1].add_question(msg)
            system_messages.append(msg)
        elif "ai:" in message:
            hist[-1].add_answer(message.split("ai:")[1])
            hist.append(HistoryEntry())
        else:
            # TODO
            # hist[-1].add_question(message.split("system:")[1])
            # once_conversation.append(f"""###system:{message} """)
            pass
    try:
        query = messages[-2].split("human:")[1]
    except IndexError:
        query = messages[-3].split("human:")[1]
    hist = build_history(hist)
    if not hist:
        # No history conversation, but has system messages, merge to user`s query
-        query = prompt_adaptation(system_messages, query)
+        query = prompt_adaptation(system_messages_str, query)
    else:
        # history exist, add system message to head of history
        hist[0][0] = system_messages_str + _CHATGLM2_SEP + hist[0][0]
    print("Query Message: ", query)
    print("hist: ", hist)
    # output = ""
    # i = 0
    for i, (response, new_hist) in enumerate(
        model.stream_chat(tokenizer, query, hist, **generate_kwargs)
@ -103,10 +86,10 @@ def build_history(hist: List[HistoryEntry]) -> List[List[str]]:
    return list(filter(lambda hl: hl is not None, map(lambda h: h.to_list(), hist)))
-def prompt_adaptation(system_messages: List[str], human_message: str) -> str:
+def prompt_adaptation(system_messages_str: str, human_message: str) -> str:
-    if not system_messages:
+    if not system_messages_str or system_messages_str == "":
        return human_message
-    system_messages_str = " ".join(system_messages)
+    # TODO Multi-model prompt adaptation
    adaptation_rules = [
        r"Question:\s*{}\s*",  # chat_db scene
        r"Goals:\s*{}\s*",  # chat_execution
@ -119,4 +102,4 @@ def prompt_adaptation(system_messages: List[str], human_message: str) -> str:
        if re.search(pattern, system_messages_str):
            return system_messages_str
    # https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
-    return f"{system_messages_str}\n\n问：{human_message}\n\n答："
+    return system_messages_str + _CHATGLM2_SEP + human_message
--- a/pilot/model/llm_out/proxy_llm.py
+++ b/pilot/model/llm_out/proxy_llm.py
@ -3,8 +3,10 @@
 import json
 import requests
 from typing import List
 from pilot.configs.config import Config
 from pilot.conversation import ROLE_ASSISTANT, ROLE_USER
 from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
 CFG = Config()
@ -20,36 +22,17 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
        "Token": CFG.proxy_api_key,
    }
-    messages = prompt.split(stop)
+    messages: List[ModelMessage] = params["messages"]
    # Add history conversation
    for message in messages:
-        if len(message) <= 0:
+        if message.role == ModelMessageRoleType.HUMAN:
-            continue
+            history.append({"role": "user", "content": message.content})
-        if "human:" in message:
+        elif message.role == ModelMessageRoleType.SYSTEM:
-            history.append(
+            history.append({"role": "system", "content": message.content})
-                {"role": "user", "content": message.split("human:")[1]},
+        elif message.role == ModelMessageRoleType.AI:
-            )
+            history.append({"role": "assistant", "content": message.content})
        elif "system:" in message:
            history.append(
                {
                    "role": "system",
                    "content": message.split("system:")[1],
                }
            )
        elif "ai:" in message:
            history.append(
                {
                    "role": "assistant",
                    "content": message.split("ai:")[1],
                }
            )
        else:
-            history.append(
+            pass
                {
                    "role": "system",
                    "content": message,
                }
            )
    # Move the last user's information to the end
    temp_his = history[::-1]
--- a/pilot/model/llm_utils.py
+++ b/pilot/model/llm_utils.py
@ -10,7 +10,6 @@ from typing import List, Optional
 from pilot.configs.config import Config
 from pilot.model.base import Message
 from pilot.server.llmserver import generate_output
 def create_chat_completion(
@ -115,3 +114,17 @@ class Iteratorize:
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.stop_now = True
 def is_sentence_complete(output: str):
    """Check whether the output is a complete sentence."""
    end_symbols = (".", "?", "!", "...", "。", "？", "！", "…", '"', "'", "”")
    return output.endswith(end_symbols)
 def is_partial_stop(output: str, stop_str: str):
    """Check whether the output contains a partial stop str."""
    for i in range(0, min(len(output), len(stop_str))):
        if stop_str.startswith(output[-i:]):
            return True
    return False
--- a/pilot/out_parser/base.py
+++ b/pilot/out_parser/base.py
@ -53,8 +53,15 @@ class BaseOutputParser(ABC):
        """ TODO Multi mode output handler,  rewrite this for multi model, use adapter mode.
        """
        model_context = data.get("model_context")
        if model_context and "prompt_echo_len_char" in model_context:
            prompt_echo_len_char = int(model_context.get("prompt_echo_len_char", -1))
            if prompt_echo_len_char != -1:
                skip_echo_len = prompt_echo_len_char
        if data.get("error_code", 0) == 0:
-            if "vicuna" in CFG.LLM_MODEL:
+            if "vicuna" in CFG.LLM_MODEL or "llama-2" in CFG.LLM_MODEL:
                # TODO Judging from model_context
                # output = data["text"][skip_echo_len + 11:].strip()
                output = data["text"][skip_echo_len:].strip()
            elif "guanaco" in CFG.LLM_MODEL:
--- a/pilot/scene/base_chat.py
+++ b/pilot/scene/base_chat.py
@ -2,6 +2,7 @@ import time
 from abc import ABC, abstractmethod
 import datetime
 import traceback
 import warnings
 import json
 from pydantic import BaseModel, Field, root_validator, validator, Extra
 from typing import (
@ -37,6 +38,8 @@ from pilot.scene.base_message import (
    HumanMessage,
    AIMessage,
    ViewMessage,
    ModelMessage,
    ModelMessageRoleType,
 )
 from pilot.configs.config import Config
@ -116,6 +119,7 @@ class BaseChat(ABC):
        payload = {
            "model": self.llm_model,
            "prompt": self.generate_llm_text(),
            "messages": self.generate_llm_messages(),
            "temperature": float(self.prompt_template.temperature),
            "max_new_tokens": int(self.prompt_template.max_new_tokens),
            "stop": self.prompt_template.sep,
@ -227,6 +231,7 @@ class BaseChat(ABC):
            return self.nostream_call()
    def generate_llm_text(self) -> str:
        warnings.warn("This method is deprecated - please use `generate_llm_messages`.")
        text = ""
        ### Load scene setting or character definition
        if self.prompt_template.template_define:
@ -244,24 +249,62 @@ class BaseChat(ABC):
        text += self.__load_user_message()
        return text
-    def __load_system_message(self):
+    def generate_llm_messages(self) -> List[ModelMessage]:
        """
        Structured prompt messages interaction between dbgpt-server and llm-server
        See https://github.com/csunny/DB-GPT/issues/328
        """
        messages = []
        ### Load scene setting or character definition as system message
        if self.prompt_template.template_define:
            messages.append(
                ModelMessage(
                    role=ModelMessageRoleType.SYSTEM,
                    content=self.prompt_template.template_define,
                )
            )
        ### Load prompt
        messages += self.__load_system_message(str_message=False)
        ### Load examples
        messages += self.__load_example_messages(str_message=False)
        ### Load History
        messages += self.__load_histroy_messages(str_message=False)
        ### Load User Input
        messages += self.__load_user_message(str_message=False)
        return messages
    def __load_system_message(self, str_message: bool = True):
        system_convs = self.current_message.get_system_conv()
        system_text = ""
        system_messages = []
        for system_conv in system_convs:
            system_text += (
                system_conv.type + ":" + system_conv.content + self.prompt_template.sep
            )
-        return system_text
+            system_messages.append(
                ModelMessage(role=system_conv.type, content=system_conv.content)
            )
        return system_text if str_message else system_messages
-    def __load_user_message(self):
+    def __load_user_message(self, str_message: bool = True):
        user_conv = self.current_message.get_user_conv()
        user_messages = []
        if user_conv:
-            return user_conv.type + ":" + user_conv.content + self.prompt_template.sep
+            user_text = (
                user_conv.type + ":" + user_conv.content + self.prompt_template.sep
            )
            user_messages.append(
                ModelMessage(role=user_conv.type, content=user_conv.content)
            )
            return user_text if str_message else user_messages
        else:
            raise ValueError("Hi! What do you want to talk about？")
-    def __load_example_messages(self):
+    def __load_example_messages(self, str_message: bool = True):
        example_text = ""
        example_messages = []
        if self.prompt_template.example_selector:
            for round_conv in self.prompt_template.example_selector.examples():
                for round_message in round_conv["messages"]:
@ -269,16 +312,22 @@ class BaseChat(ABC):
                        SystemMessage.type,
                        ViewMessage.type,
                    ]:
                        message_type = round_message["type"]
                        message_content = round_message["data"]["content"]
                        example_text += (
-                            round_message["type"]
+                            message_type
                            + ":"
-                            + round_message["data"]["content"]
+                            + message_content
                            + self.prompt_template.sep
                        )
-        return example_text
+                        example_messages.append(
                            ModelMessage(role=message_type, content=message_content)
                        )
        return example_text if str_message else example_messages
-    def __load_histroy_messages(self):
+    def __load_histroy_messages(self, str_message: bool = True):
        history_text = ""
        history_messages = []
        if self.prompt_template.need_historical_messages:
            if self.history_message:
                logger.info(
@ -290,12 +339,17 @@ class BaseChat(ABC):
                        ViewMessage.type,
                        SystemMessage.type,
                    ]:
                        message_type = first_message["type"]
                        message_content = first_message["data"]["content"]
                        history_text += (
-                            first_message["type"]
+                            message_type
                            + ":"
-                            + first_message["data"]["content"]
+                            + message_content
                            + self.prompt_template.sep
                        )
                        history_messages.append(
                            ModelMessage(role=message_type, content=message_content)
                        )
                index = self.chat_retention_rounds - 1
                for round_conv in self.history_message[-index:]:
@ -304,12 +358,17 @@ class BaseChat(ABC):
                            SystemMessage.type,
                            ViewMessage.type,
                        ]:
                            message_type = round_message["type"]
                            message_content = round_message["data"]["content"]
                            history_text += (
-                                round_message["type"]
+                                message_type
                                + ":"
-                                + round_message["data"]["content"]
+                                + message_content
                                + self.prompt_template.sep
                            )
                            history_messages.append(
                                ModelMessage(role=message_type, content=message_content)
                            )
            else:
                ### user all history
@ -320,14 +379,19 @@ class BaseChat(ABC):
                            SystemMessage.type,
                            ViewMessage.type,
                        ]:
                            message_type = message["type"]
                            message_content = message["data"]["content"]
                            history_text += (
-                                message["type"]
+                                message_type
                                + ":"
-                                + message["data"]["content"]
+                                + message_content
                                + self.prompt_template.sep
                            )
                            history_messages.append(
                                ModelMessage(role=message_type, content=message_content)
                            )
-        return history_text
+        return history_text if str_message else history_messages
    def current_ai_response(self) -> str:
        for message in self.current_message.messages:
--- a/pilot/scene/base_message.py
+++ b/pilot/scene/base_message.py
@ -6,6 +6,7 @@ from typing import (
    Dict,
    Generic,
    List,
    Tuple,
    NamedTuple,
    Optional,
    Sequence,
@ -80,6 +81,22 @@ class SystemMessage(BaseMessage):
        return "system"
 class ModelMessage(BaseModel):
    """Type of message that interaction between dbgpt-server and llm-server"""
    """Similar to openai's message format"""
    role: str
    content: str
 class ModelMessageRoleType:
    """ "Type of ModelMessage role"""
    SYSTEM = "system"
    HUMAN = "human"
    AI = "ai"
 class Generation(BaseModel):
    """Output of a single generation."""
@ -146,3 +163,35 @@ def _message_from_dict(message: dict) -> BaseMessage:
 def messages_from_dict(messages: List[dict]) -> List[BaseMessage]:
    return [_message_from_dict(m) for m in messages]
 def _parse_model_messages(
    messages: List[ModelMessage],
 ) -> Tuple[str, List[str], List[List[str, str]]]:
    """ "
    Parameters:
        messages: List of message from base chat.
    Returns:
        A tuple contains user prompt, system message list and history message list
        str: user prompt
        List[str]: system messages
        List[List[str]]: history message of user and assistant
    """
    user_prompt = ""
    system_messages: List[str] = []
    history_messages: List[List[str]] = [[]]
    for message in messages[:-1]:
        if message.role == "human":
            history_messages[-1].append(message.content)
        elif message.role == "system":
            system_messages.append(message.content)
        elif message.role == "ai":
            history_messages[-1].append(message.content)
            history_messages.append([])
    if messages[-1].role != "human":
        raise ValueError("Hi! What do you want to talk about？")
    # Keep message pair of [user message, assistant message]
    history_messages = list(filter(lambda x: len(x) == 2, history_messages))
    user_prompt = messages[-1].content
    return user_prompt, system_messages, history_messages
--- a/pilot/server/chat_adapter.py
+++ b/pilot/server/chat_adapter.py
@ -2,8 +2,10 @@
 # -*- coding: utf-8 -*-
 from functools import cache
-from typing import List
+from typing import List, Dict, Tuple
 from pilot.model.llm_out.vicuna_base_llm import generate_stream
 from pilot.model.conversation import Conversation, get_conv_template
 from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
 class BaseChatAdpter:
@ -17,6 +19,52 @@ class BaseChatAdpter:
        """Return the generate stream handler func"""
        pass
    def get_conv_template(self) -> Conversation:
        return None
    def model_adaptation(self, params: Dict) -> Tuple[Dict, Dict]:
        """Params adaptation"""
        conv = self.get_conv_template()
        messages = params.get("messages")
        # Some model scontext to dbgpt server
        model_context = {"prompt_echo_len_char": -1}
        if not conv or not messages:
            # Nothing to do
            return params, model_context
        conv = conv.copy()
        system_messages = []
        for message in messages:
            role, content = None, None
            if isinstance(message, ModelMessage):
                role = message.role
                content = message.content
            elif isinstance(message, dict):
                role = message["role"]
                content = message["content"]
            else:
                raise ValueError(f"Invalid message type: {message}")
            if role == ModelMessageRoleType.SYSTEM:
                # Support for multiple system messages
                system_messages.append(content)
            elif role == ModelMessageRoleType.HUMAN:
                conv.append_message(conv.roles[0], content)
            elif role == ModelMessageRoleType.AI:
                conv.append_message(conv.roles[1], content)
            else:
                raise ValueError(f"Unknown role: {role}")
        if system_messages:
            conv.update_system_message("".join(system_messages))
        # Add a blank message for the assistant.
        conv.append_message(conv.roles[1], None)
        new_prompt = conv.get_prompt()
        # Overwrite the original prompt
        # TODO remote bos token and eos token from tokenizer_config.json of model
        prompt_echo_len_char = len(new_prompt.replace("</s>", "").replace("<s>", ""))
        model_context["prompt_echo_len_char"] = prompt_echo_len_char
        params["prompt"] = new_prompt
        return params, model_context
 llm_model_chat_adapters: List[BaseChatAdpter] = []
@ -134,12 +182,26 @@ class GPT4AllChatAdapter(BaseChatAdpter):
        return gpt4all_generate_stream
 class Llama2ChatAdapter(BaseChatAdpter):
    def match(self, model_path: str):
        return "llama-2" in model_path.lower()
    def get_conv_template(self) -> Conversation:
        return get_conv_template("llama-2")
    def get_generate_stream_func(self):
        from pilot.model.inference import generate_stream
        return generate_stream
 register_llm_model_chat_adapter(VicunaChatAdapter)
 register_llm_model_chat_adapter(ChatGLMChatAdapter)
 register_llm_model_chat_adapter(GuanacoChatAdapter)
 register_llm_model_chat_adapter(FalconChatAdapter)
 register_llm_model_chat_adapter(GorillaChatAdapter)
 register_llm_model_chat_adapter(GPT4AllChatAdapter)
 register_llm_model_chat_adapter(Llama2ChatAdapter)
 # Proxy model for test and develop, it's cheap for us now.
 register_llm_model_chat_adapter(ProxyllmChatAdapter)
--- a/pilot/server/llmserver.py
+++ b/pilot/server/llmserver.py
@ -5,6 +5,7 @@ import asyncio
 import json
 import os
 import sys
 from typing import List
 import uvicorn
 from fastapi import BackgroundTasks, FastAPI, Request
@ -24,6 +25,7 @@ from pilot.configs.model_config import *
 from pilot.model.llm_out.vicuna_base_llm import get_embeddings
 from pilot.model.loader import ModelLoader
 from pilot.server.chat_adapter import get_llm_chat_adapter
 from pilot.scene.base_message import ModelMessage
 CFG = Config()
@ -75,6 +77,8 @@ class ModelWorker:
    def generate_stream_gate(self, params):
        try:
            # params adaptation
            params, model_context = self.llm_chat_adapter.model_adaptation(params)
            for output in self.generate_stream_func(
                self.model, self.tokenizer, params, DEVICE, CFG.MAX_POSITION_EMBEDDINGS
            ):
@ -82,10 +86,8 @@ class ModelWorker:
                # The gpt4all thread shares stdout with the parent process,
                # and opening it may affect the frontend output.
                print("output: ", output)
-                ret = {
+                # return some model context to dgt-server
-                    "text": output,
+                ret = {"text": output, "error_code": 0, "model_context": model_context}
                    "error_code": 0,
                }
                yield json.dumps(ret).encode() + b"\0"
        except torch.cuda.CudaError:
@ -128,6 +130,7 @@ app = FastAPI()
 class PromptRequest(BaseModel):
    messages: List[ModelMessage]
    prompt: str
    temperature: float
    max_new_tokens: int
@ -170,6 +173,7 @@ async def api_generate_stream(request: Request):
@app.post("/generate")
 def generate(prompt_request: PromptRequest) -> str:
    params = {
        "messages": prompt_request.messages,
        "prompt": prompt_request.prompt,
        "temperature": prompt_request.temperature,
        "max_new_tokens": prompt_request.max_new_tokens,