Merge branch 'main' of https://github.com/csunny/DB-GPT into tt_dev

This commit is contained in:
csunny 2023-07-21 14:33:34 +08:00
commit fe522ac067
25 changed files with 1182 additions and 185 deletions

View File

@ -124,10 +124,17 @@ The core capabilities mainly consist of the following parts:
- [DB-GPT-Plugins](https://github.com/csunny/DB-GPT-Plugins) DB-GPT Plugins, Can run autogpt plugin directly - [DB-GPT-Plugins](https://github.com/csunny/DB-GPT-Plugins) DB-GPT Plugins, Can run autogpt plugin directly
- [DB-GPT-Web](https://github.com/csunny/DB-GPT-Web) ChatUI for DB-GPT - [DB-GPT-Web](https://github.com/csunny/DB-GPT-Web) ChatUI for DB-GPT
## Image
🌐 [AutoDL Image](https://www.codewithgpu.com/i/csunny/DB-GPT/dbgpt-0.3.1-v2)
## Install ## Install
[Quickstart](https://db-gpt.readthedocs.io/en/latest/getting_started/getting_started.html) [Quickstart](https://db-gpt.readthedocs.io/en/latest/getting_started/getting_started.html)
### Language Switching
In the .env configuration file, modify the LANGUAGE parameter to switch to different languages. The default is English (Chinese: zh, English: en, other languages to be added later).
### Platform Deployment
- autodl
[autodl image](https://www.codewithgpu.com/i/csunny/DB-GPT/csunny-db-gpt). You can refer to the image instructions to build from scratch, or use `docker pull` to obtain the shared image, follow the instructions in the document to operate. If you have any questions, please leave a comment.
## Usage Instructions ## Usage Instructions
- [Multi LLMs Usage](https://db-gpt.readthedocs.io/en/latest/modules/llms.html) - [Multi LLMs Usage](https://db-gpt.readthedocs.io/en/latest/modules/llms.html)

View File

@ -118,12 +118,19 @@ DB-GPT基于 [FastChat](https://github.com/lm-sys/FastChat) 构建大模型运
- [DB-GPT-Plugins](https://github.com/csunny/DB-GPT-Plugins) DB-GPT 插件仓库, 兼容Auto-GPT - [DB-GPT-Plugins](https://github.com/csunny/DB-GPT-Plugins) DB-GPT 插件仓库, 兼容Auto-GPT
- [DB-GPT-Web](https://github.com/csunny/DB-GPT-Web) 多端交互前端界面 - [DB-GPT-Web](https://github.com/csunny/DB-GPT-Web) 多端交互前端界面
## Image
🌐 [AutoDL镜像](https://www.codewithgpu.com/i/csunny/DB-GPT/dbgpt-0.3.1-v2)
## 安装 ## 安装
[快速开始](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/getting_started/getting_started.html) [快速开始](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/getting_started/getting_started.html)
### 多语言切换 ### 多语言切换
在.env 配置文件当中修改LANGUAGE参数来切换使用不同的语言默认是英文(中文zh, 英文en, 其他语言待补充) 在.env 配置文件当中修改LANGUAGE参数来切换使用不同的语言默认是英文(中文zh, 英文en, 其他语言待补充)
### 平台部署
- autodl
[autodl镜像](https://www.codewithgpu.com/i/csunny/DB-GPT/csunny-db-gpt),从头搭建可参考镜像说明,或通过`docker pull`获取共享镜像,按照文档中的说明操作即可,若有问题,欢迎评论。
## 使用说明 ## 使用说明
### 多模型使用 ### 多模型使用

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

BIN
assets/faq/proxyerror.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

View File

@ -1,3 +1,5 @@
CREATE DATABASE knowledge_management;
use knowledge_management;
CREATE TABLE `knowledge_space` ( CREATE TABLE `knowledge_space` (
`id` int NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', `id` int NOT NULL AUTO_INCREMENT COMMENT 'auto increment id',
`name` varchar(100) NOT NULL COMMENT 'knowledge space name', `name` varchar(100) NOT NULL COMMENT 'knowledge space name',
@ -38,4 +40,37 @@ CREATE TABLE `document_chunk` (
`gmt_modified` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time', `gmt_modified` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time',
PRIMARY KEY (`id`), PRIMARY KEY (`id`),
KEY `idx_document_id` (`document_id`) COMMENT 'index:document_id' KEY `idx_document_id` (`document_id`) COMMENT 'index:document_id'
) ENGINE=InnoDB AUTO_INCREMENT=100001 DEFAULT CHARSET=utf8mb4 COMMENT='knowledge document chunk detail' ) ENGINE=InnoDB AUTO_INCREMENT=100001 DEFAULT CHARSET=utf8mb4 COMMENT='knowledge document chunk detail';
CREATE DATABASE EXAMPLE_1;
use EXAMPLE_1;
CREATE TABLE `users` (
`id` int NOT NULL AUTO_INCREMENT,
`username` varchar(50) NOT NULL COMMENT '用户名',
`password` varchar(50) NOT NULL COMMENT '密码',
`email` varchar(50) NOT NULL COMMENT '邮箱',
`phone` varchar(20) DEFAULT NULL COMMENT '电话',
PRIMARY KEY (`id`),
KEY `idx_username` (`username`) COMMENT '索引:按用户名查询'
) ENGINE=InnoDB AUTO_INCREMENT=101 DEFAULT CHARSET=utf8mb4 COMMENT='聊天用户表';
INSERT INTO users (username, password, email, phone) VALUES ('user_1', 'password_1', 'user_1@example.com', '12345678901');
INSERT INTO users (username, password, email, phone) VALUES ('user_2', 'password_2', 'user_2@example.com', '12345678902');
INSERT INTO users (username, password, email, phone) VALUES ('user_3', 'password_3', 'user_3@example.com', '12345678903');
INSERT INTO users (username, password, email, phone) VALUES ('user_4', 'password_4', 'user_4@example.com', '12345678904');
INSERT INTO users (username, password, email, phone) VALUES ('user_5', 'password_5', 'user_5@example.com', '12345678905');
INSERT INTO users (username, password, email, phone) VALUES ('user_6', 'password_6', 'user_6@example.com', '12345678906');
INSERT INTO users (username, password, email, phone) VALUES ('user_7', 'password_7', 'user_7@example.com', '12345678907');
INSERT INTO users (username, password, email, phone) VALUES ('user_8', 'password_8', 'user_8@example.com', '12345678908');
INSERT INTO users (username, password, email, phone) VALUES ('user_9', 'password_9', 'user_9@example.com', '12345678909');
INSERT INTO users (username, password, email, phone) VALUES ('user_10', 'password_10', 'user_10@example.com', '12345678900');
INSERT INTO users (username, password, email, phone) VALUES ('user_11', 'password_11', 'user_11@example.com', '12345678901');
INSERT INTO users (username, password, email, phone) VALUES ('user_12', 'password_12', 'user_12@example.com', '12345678902');
INSERT INTO users (username, password, email, phone) VALUES ('user_13', 'password_13', 'user_13@example.com', '12345678903');
INSERT INTO users (username, password, email, phone) VALUES ('user_14', 'password_14', 'user_14@example.com', '12345678904');
INSERT INTO users (username, password, email, phone) VALUES ('user_15', 'password_15', 'user_15@example.com', '12345678905');
INSERT INTO users (username, password, email, phone) VALUES ('user_16', 'password_16', 'user_16@example.com', '12345678906');
INSERT INTO users (username, password, email, phone) VALUES ('user_17', 'password_17', 'user_17@example.com', '12345678907');
INSERT INTO users (username, password, email, phone) VALUES ('user_18', 'password_18', 'user_18@example.com', '12345678908');
INSERT INTO users (username, password, email, phone) VALUES ('user_19', 'password_19', 'user_19@example.com', '12345678909');
INSERT INTO users (username, password, email, phone) VALUES ('user_20', 'password_20', 'user_20@example.com', '12345678900');

97
docs/faq.md Normal file
View File

@ -0,0 +1,97 @@
# FAQ
##### Q1: text2vec-large-chinese not found
##### A1: make sure you have download text2vec-large-chinese embedding model in right way
```tip
centos:yum install git-lfs
ubuntu:apt-get install git-lfs -y
macos:brew install git-lfs
```
```bash
cd models
git lfs clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
```
##### Q2: execute `pip install -r requirements.txt` error, found some package cannot find correct version.
##### A2: change the pip source.
```bash
# pypi
$ pip install -r requirements.txt -i https://pypi.python.org/simple
```
or
```bash
# tsinghua
$ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
```
or
```bash
# aliyun
$ pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/
```
##### Q3:Access denied for user 'root@localhost'(using password :NO)
##### A3: make sure you have installed mysql instance in right way
Docker:
```bash
docker run --name=mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=aa12345678 -dit mysql:latest
```
Normal:
[download mysql instance](https://dev.mysql.com/downloads/mysql/)
##### Q4:When I use openai(MODEL_SERVER=proxyllm) to chat
<p align="left">
<img src="../assets/faq/proxyerror.png" width="800px" />
</p>
##### A4: make sure your openapi API_KEY is available
##### Q5:When I Chat Data and Chat Meta Data, I found the error
<p align="left">
<img src="../assets/faq/chatdataerror.png" width="800px" />
</p>
##### A5: you have not create your database and table
1.create your database.
```bash
mysql> create database {$your_name}
mysql> use {$your_name}
```
2.create table {$your_table} and insert your data.
eg:
```bash
mysql>CREATE TABLE `users` (
`id` int NOT NULL AUTO_INCREMENT,
`username` varchar(50) NOT NULL COMMENT '用户名',
`password` varchar(50) NOT NULL COMMENT '密码',
`email` varchar(50) NOT NULL COMMENT '邮箱',
`phone` varchar(20) DEFAULT NULL COMMENT '电话',
PRIMARY KEY (`id`),
KEY `idx_username` (`username`) COMMENT '索引:按用户名查询'
) ENGINE=InnoDB AUTO_INCREMENT=101 DEFAULT CHARSET=utf8mb4 COMMENT='聊天用户表'
```
##### Q6:When I use vicuna-13b, found some illegal character like this.
<p align="left">
<img src="../assets/faq/illegal_character.png" width="800px" />
</p>
##### A6: set KNOWLEDGE_SEARCH_TOP_SIZE smaller or set KNOWLEDGE_CHUNK_SIZE smaller, and reboot server.

View File

@ -17,11 +17,15 @@ As our project has the ability to achieve ChatGPT performance of over 85%, there
### 2. Install ### 2. Install
This project relies on a local MySQL database service, which you need to install locally. We recommend using Docker for installation. 1.This project relies on a local MySQL database service, which you need to install locally. We recommend using Docker for installation.
```bash ```bash
$ docker run --name=mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=aa12345678 -dit mysql:latest $ docker run --name=mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=aa12345678 -dit mysql:latest
``` ```
2. prepare server sql script
```bash
$ mysql -h127.0.0.1 -uroot -paa12345678 < ./assets/schema/knowledge_management.sql
```
We use [Chroma embedding database](https://github.com/chroma-core/chroma) as the default for our vector database, so there is no need for special installation. If you choose to connect to other databases, you can follow our tutorial for installation and configuration. We use [Chroma embedding database](https://github.com/chroma-core/chroma) as the default for our vector database, so there is no need for special installation. If you choose to connect to other databases, you can follow our tutorial for installation and configuration.
For the entire installation process of DB-GPT, we use the miniconda3 virtual environment. Create a virtual environment and install the Python dependencies. For the entire installation process of DB-GPT, we use the miniconda3 virtual environment. Create a virtual environment and install the Python dependencies.
@ -63,16 +67,10 @@ You can refer to this document to obtain the Vicuna weights: [Vicuna](https://gi
If you have difficulty with this step, you can also directly use the model from [this link](https://huggingface.co/Tribbiani/vicuna-7b) as a replacement. If you have difficulty with this step, you can also directly use the model from [this link](https://huggingface.co/Tribbiani/vicuna-7b) as a replacement.
1. prepare server sql script
```bash
mysql> CREATE DATABASE knowledge_management;
mysql> use knowledge_management;
mysql> source ./assets/schema/knowledge_management.sql
```
set .env configuration set your vector store type, eg:VECTOR_STORE_TYPE=Chroma, now we support Chroma and Milvus(version > 2.1) set .env configuration set your vector store type, eg:VECTOR_STORE_TYPE=Chroma, now we support Chroma and Milvus(version > 2.1)
2. Run db-gpt server 1.Run db-gpt server
```bash ```bash
$ python pilot/server/dbgpt_server.py $ python pilot/server/dbgpt_server.py

View File

@ -130,6 +130,18 @@ Reference
./reference.md ./reference.md
FAQ
-----------
| DB-GPT FAQ.
.. toctree::
:maxdepth: 1
:caption: FAQ
:name: FAQ
:hidden:
./faq.md
Ecosystem Ecosystem
---------- ----------

View File

@ -0,0 +1,100 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2023, csunny
# This file is distributed under the same license as the DB-GPT package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: DB-GPT 👏👏 0.3.0\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-07-20 10:53+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
"Language-Team: zh_CN <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.12.1\n"
#: ../../faq.md:1 81cedd396b274db9b2a69448df98a28d
msgid "FAQ"
msgstr "FAQ"
#: ../../faq.md:2 1a3f8c7661e34721a4465a34281416b1
msgid "Q1: text2vec-large-chinese not found"
msgstr "Q1: text2vec-large-chinese not found"
#: ../../faq.md:4 5d2844bcbcc843fc97de41a491f914fe
msgid ""
"A1: make sure you have download text2vec-large-chinese embedding model in"
" right way"
msgstr "按照正确的姿势下载text2vec-large-chinese模型"
#: ../../faq.md:16 82ade01884534030b81be7c3e06f1504
msgid ""
"Q2: execute `pip install -r requirements.txt` error, found some package "
"cannot find correct version."
msgstr "执行`pip install -r requirements.txt`报错"
#: ../../faq.md:19 451ede833da642788e5224811a71ba0f
msgid "A2: change the pip source."
msgstr "修改pip源"
#: ../../faq.md:26 ../../faq.md:33 0142abf1050d4fee9caf056322b52247
#: 0adb87dd67fa4122997871f7ab064637
msgid "or"
msgstr "或"
#: ../../faq.md:41 d6981160ecc6491284fd32e1098fc10e
msgid "Q3:Access denied for user 'root@localhost'(using password :NO)"
msgstr "或"
#: ../../faq.md:43 e50b195057804f28b84d0d10859e4f1b
msgid "A3: make sure you have installed mysql instance in right way"
msgstr "按照正确姿势安装mysql"
#: ../../faq.md:45 03ba25aa7bd241d3b32cc1916f858a3e
msgid "Docker:"
msgstr "Docker:"
#: ../../faq.md:49 dd5336b44673459c93a1408097cb76f9
msgid "Normal: [download mysql instance](https://dev.mysql.com/downloads/mysql/)"
msgstr "[download mysql instance](https://dev.mysql.com/downloads/mysql/)"
#: ../../faq.md:52 2b290c4653a2410c8d330ed5b0e9a821
msgid "Q4:When I use openai(MODEL_SERVER=proxyllm) to chat"
msgstr "使用openai-chatgpt模型时(MODEL_SERVER=proxyllm)"
#: ../../faq.md:57 f4d0e8e8113f4ca4bc55f167b661fd6a
msgid "A4: make sure your openapi API_KEY is available"
msgstr "确认openapi API_KEY是否可用"
#: ../../faq.md:59 092ca3dea0c5466ab6e22ab0049f166e
msgid "Q5:When I Chat Data and Chat Meta Data, I found the error"
msgstr "Chat Data and Chat Meta Data报如下错"
#: ../../faq.md:64 dbf61e6ea2c64ecebfdbbde83cb74e3e
msgid "A5: you have not create your database and table"
msgstr "需要创建自己的数据库"
#: ../../faq.md:65 0505bb716e6445c2a7960436d93cb407
msgid "1.create your database."
msgstr "1.先创建数据库"
#: ../../faq.md:71 fd689b541ee549bd85385647c219b4cb
msgid "2.create table {$your_table} and insert your data. eg:"
msgstr "然后创建数据表,模拟数据"
#: ../../faq.md:85 de2d78db5fb6450cb08b0f15385ed525
msgid "Q6:When I use vicuna-13b, found some illegal character like this."
msgstr "使用vicuna-13b知识库问答出现乱码"
#: ../../faq.md:90 0cb1d0c2ec434763ae80e6f87d4a1665
msgid ""
"A6: set KNOWLEDGE_SEARCH_TOP_SIZE smaller or set KNOWLEDGE_CHUNK_SIZE "
"smaller, and reboot server."
msgstr "将KNOWLEDGE_SEARCH_TOP_SIZE和KNOWLEDGE_CHUNK_SIZE设置小点然后重启"

View File

@ -8,7 +8,7 @@ msgid ""
msgstr "" msgstr ""
"Project-Id-Version: DB-GPT 0.3.0\n" "Project-Id-Version: DB-GPT 0.3.0\n"
"Report-Msgid-Bugs-To: \n" "Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-07-13 15:39+0800\n" "POT-Creation-Date: 2023-07-20 10:53+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n" "Language: zh_CN\n"
@ -19,29 +19,29 @@ msgstr ""
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.12.1\n" "Generated-By: Babel 2.12.1\n"
#: ../../getting_started/getting_started.md:1 0b2e795438a3413c875fd80191e85bad #: ../../getting_started/getting_started.md:1 7c12b6d7d5be4528be005cdadec568db
msgid "Quickstart Guide" msgid "Quickstart Guide"
msgstr "使用指南" msgstr "使用指南"
#: ../../getting_started/getting_started.md:3 7b84c9776f8a4f9fb55afc640f37f45c #: ../../getting_started/getting_started.md:3 ba312f5a132541be89dcc09012076784
msgid "" msgid ""
"This tutorial gives you a quick walkthrough about use DB-GPT with you " "This tutorial gives you a quick walkthrough about use DB-GPT with you "
"environment and data." "environment and data."
msgstr "本教程为您提供了关于如何使用DB-GPT的使用指南。" msgstr "本教程为您提供了关于如何使用DB-GPT的使用指南。"
#: ../../getting_started/getting_started.md:5 1b2880e1ef674bfdbf39ac9f330aeec9 #: ../../getting_started/getting_started.md:5 8e56b3f0726740abaaafa57415b10bea
msgid "Installation" msgid "Installation"
msgstr "安装" msgstr "安装"
#: ../../getting_started/getting_started.md:7 d0a8c6654bfe4bbdb0eb40ceb2ea3388 #: ../../getting_started/getting_started.md:7 3af8fe74db1043349e8f784c109b0417
msgid "To get started, install DB-GPT with the following steps." msgid "To get started, install DB-GPT with the following steps."
msgstr "请按照以下步骤安装DB-GPT" msgstr "请按照以下步骤安装DB-GPT"
#: ../../getting_started/getting_started.md:9 0a4e0b06c7fe49a9b2ca56ba2eb7b8ba #: ../../getting_started/getting_started.md:9 0b33cf4604f846e781a63d857dde72b2
msgid "1. Hardware Requirements" msgid "1. Hardware Requirements"
msgstr "1. 硬件要求" msgstr "1. 硬件要求"
#: ../../getting_started/getting_started.md:10 2b42f6546ef141f696943ba2120584e5 #: ../../getting_started/getting_started.md:10 f1d4abb176494bcb85cead7f3f8b719d
msgid "" msgid ""
"As our project has the ability to achieve ChatGPT performance of over " "As our project has the ability to achieve ChatGPT performance of over "
"85%, there are certain hardware requirements. However, overall, the " "85%, there are certain hardware requirements. However, overall, the "
@ -49,62 +49,67 @@ msgid ""
"specific hardware requirements for deployment are as follows:" "specific hardware requirements for deployment are as follows:"
msgstr "由于我们的项目有能力达到85%以上的ChatGPT性能所以对硬件有一定的要求。但总体来说我们在消费级的显卡上即可完成项目的部署使用具体部署的硬件说明如下:" msgstr "由于我们的项目有能力达到85%以上的ChatGPT性能所以对硬件有一定的要求。但总体来说我们在消费级的显卡上即可完成项目的部署使用具体部署的硬件说明如下:"
#: ../../getting_started/getting_started.md 4df0c44eff8741f39ca0fdeff222f90c #: ../../getting_started/getting_started.md e8516902f29d4ca2bb46f19b5e3deb81
msgid "GPU" msgid "GPU"
msgstr "GPU" msgstr "GPU"
#: ../../getting_started/getting_started.md b740a2991ce546cca43a426b760e9901 #: ../../getting_started/getting_started.md a951ccca67364cf7ad5f0af2ec0ece8d
msgid "VRAM Size" msgid "VRAM Size"
msgstr "显存大小" msgstr "显存大小"
#: ../../getting_started/getting_started.md 222b91ff82f14d12acaac5aa238758c8 #: ../../getting_started/getting_started.md 01b7e055ee4543bdb619fbc14fea4d86
msgid "Performance" msgid "Performance"
msgstr "显存大小" msgstr "显存大小"
#: ../../getting_started/getting_started.md c2d2ae6a4c964c4f90a9009160754782 #: ../../getting_started/getting_started.md 0b20b224ff8a4e2c890a8b4ff43b6045
msgid "RTX 4090" msgid "RTX 4090"
msgstr "RTX 4090" msgstr "RTX 4090"
#: ../../getting_started/getting_started.md 529220ec6a294e449dc460ba2e8829a1 #: ../../getting_started/getting_started.md 17a343c4359d45c987f29de1c73760b4
#: 5e0c5900842e4d66b2064b13cc31a3ad #: c9daaf0578434a7e812a8d3f3edde3f0
msgid "24 GB" msgid "24 GB"
msgstr "24 GB" msgstr "24 GB"
#: ../../getting_started/getting_started.md 84d29eef342f4d6282295c0e32487548 #: ../../getting_started/getting_started.md 6a35be039a0a43eaaf7d1aa40aece6f7
msgid "Smooth conversation inference" msgid "Smooth conversation inference"
msgstr "可以流畅的进行对话推理,无卡顿" msgstr "可以流畅的进行对话推理,无卡顿"
#: ../../getting_started/getting_started.md 5a10effe322e4afb8315415c04dc05a4 #: ../../getting_started/getting_started.md a414f5ae7bdd450f8d171d1f075d6b66
msgid "RTX 3090" msgid "RTX 3090"
msgstr "RTX 3090" msgstr "RTX 3090"
#: ../../getting_started/getting_started.md 8924059525ab43329a8bb6659e034d5e #: ../../getting_started/getting_started.md 109ed9bda6e541b88eb3400a44e15df7
msgid "Smooth conversation inference, better than V100" msgid "Smooth conversation inference, better than V100"
msgstr "可以流畅进行对话推理有卡顿感但好于V100" msgstr "可以流畅进行对话推理有卡顿感但好于V100"
#: ../../getting_started/getting_started.md 10f5bc076f524127a956d7a23f3666ba #: ../../getting_started/getting_started.md 3f2a05dc610f461faa989f3c12750d00
msgid "V100" msgid "V100"
msgstr "V100" msgstr "V100"
#: ../../getting_started/getting_started.md 7d664e81984847c7accd08db93fad404 #: ../../getting_started/getting_started.md 90e24795876546ecbb4796ca5d313514
msgid "16 GB" msgid "16 GB"
msgstr "16 GB" msgstr "16 GB"
#: ../../getting_started/getting_started.md 86765bc9ab01409fb7f5edf04f9b32a5 #: ../../getting_started/getting_started.md 34061757e69e4691b7a5ff3c2953f1e3
msgid "Conversation inference possible, noticeable stutter" msgid "Conversation inference possible, noticeable stutter"
msgstr "可以进行对话推理,有明显卡顿" msgstr "可以进行对话推理,有明显卡顿"
#: ../../getting_started/getting_started.md:18 a0ac5591c0ac4ac6a385e562353daf22 #: ../../getting_started/getting_started.md:18 61d59cb27daf43eb9aa2775fa6dac820
msgid "2. Install" msgid "2. Install"
msgstr "2. 安装" msgstr "2. 安装"
#: ../../getting_started/getting_started.md:20 a64a9a5945074ece872509f8cb425da9 #: ../../getting_started/getting_started.md:20 28a6b66c92464929bed8f04d5b841e8c
#, fuzzy
msgid "" msgid ""
"This project relies on a local MySQL database service, which you need to " "1.This project relies on a local MySQL database service, which you need "
"install locally. We recommend using Docker for installation." "to install locally. We recommend using Docker for installation."
msgstr "本项目依赖一个本地的 MySQL 数据库服务,你需要本地安装,推荐直接使用 Docker 安装。" msgstr "本项目依赖一个本地的 MySQL 数据库服务,你需要本地安装,推荐直接使用 Docker 安装。"
#: ../../getting_started/getting_started.md:25 11e799a372ab4d0f8269cd7be98bebc6 #: ../../getting_started/getting_started.md:24 7793b19e96b44ba481b527a877c81170
msgid "prepare server sql script"
msgstr "准备db-gpt server sql脚本"
#: ../../getting_started/getting_started.md:29 e3743699e95e45eab3e140df266bb3b5
msgid "" msgid ""
"We use [Chroma embedding database](https://github.com/chroma-core/chroma)" "We use [Chroma embedding database](https://github.com/chroma-core/chroma)"
" as the default for our vector database, so there is no need for special " " as the default for our vector database, so there is no need for special "
@ -117,11 +122,11 @@ msgstr ""
"向量数据库我们默认使用的是Chroma内存数据库所以无需特殊安装如果有需要连接其他的同学可以按照我们的教程进行安装配置。整个DB-" "向量数据库我们默认使用的是Chroma内存数据库所以无需特殊安装如果有需要连接其他的同学可以按照我们的教程进行安装配置。整个DB-"
"GPT的安装过程我们使用的是miniconda3的虚拟环境。创建虚拟环境并安装python依赖包" "GPT的安装过程我们使用的是miniconda3的虚拟环境。创建虚拟环境并安装python依赖包"
#: ../../getting_started/getting_started.md:34 dcab69c83d4c48b9bb19c4336ee74a66 #: ../../getting_started/getting_started.md:38 be9f388e255c4bc7837029ce8237ef0b
msgid "Before use DB-GPT Knowledge Management" msgid "Before use DB-GPT Knowledge Management"
msgstr "使用知识库管理功能之前" msgstr "使用知识库管理功能之前"
#: ../../getting_started/getting_started.md:40 735aeb6ae8aa4344b7ff679548279acc #: ../../getting_started/getting_started.md:44 9175b34ed78c4310b048f53ac07b13cb
msgid "" msgid ""
"Once the environment is installed, we have to create a new folder " "Once the environment is installed, we have to create a new folder "
"\"models\" in the DB-GPT project, and then we can put all the models " "\"models\" in the DB-GPT project, and then we can put all the models "
@ -130,33 +135,33 @@ msgstr ""
"环境安装完成后我们必须在DB-" "环境安装完成后我们必须在DB-"
"GPT项目中创建一个新文件夹\"models\"然后我们可以把从huggingface下载的所有模型放到这个目录下。" "GPT项目中创建一个新文件夹\"models\"然后我们可以把从huggingface下载的所有模型放到这个目录下。"
#: ../../getting_started/getting_started.md:43 7cbefe131b24488b9be39b3e8ed4f563 #: ../../getting_started/getting_started.md:47 40715051bd844b0187265d425debfbee
#, fuzzy #, fuzzy
msgid "Notice make sure you have install git-lfs" msgid "Notice make sure you have install git-lfs"
msgstr "确保你已经安装了git-lfs" msgstr "确保你已经安装了git-lfs"
#: ../../getting_started/getting_started.md:53 54ec90ebb969475988451cd66e6ff412 #: ../../getting_started/getting_started.md:57 7d7e7b1fe72b4e0c96eba721a8aa2113
msgid "" msgid ""
"The model files are large and will take a long time to download. During " "The model files are large and will take a long time to download. During "
"the download, let's configure the .env file, which needs to be copied and" "the download, let's configure the .env file, which needs to be copied and"
" created from the .env.template" " created from the .env.template"
msgstr "模型文件很大,需要很长时间才能下载。在下载过程中,让我们配置.env文件它需要从。env.template中复制和创建。" msgstr "模型文件很大,需要很长时间才能下载。在下载过程中,让我们配置.env文件它需要从。env.template中复制和创建。"
#: ../../getting_started/getting_started.md:56 9bdadbee88af4683a4eb7b4f221fb4b8 #: ../../getting_started/getting_started.md:60 6a25e5307bdb49a0afc69b9d17395a5a
msgid "cp .env.template .env" msgid "cp .env.template .env"
msgstr "cp .env.template .env" msgstr "cp .env.template .env"
#: ../../getting_started/getting_started.md:59 6357c4a0154b4f08a079419ac408442d #: ../../getting_started/getting_started.md:63 14567647544f4036beaae158b59833f6
msgid "" msgid ""
"You can configure basic parameters in the .env file, for example setting " "You can configure basic parameters in the .env file, for example setting "
"LLM_MODEL to the model to be used" "LLM_MODEL to the model to be used"
msgstr "您可以在.env文件中配置基本参数例如将LLM_MODEL设置为要使用的模型。" msgstr "您可以在.env文件中配置基本参数例如将LLM_MODEL设置为要使用的模型。"
#: ../../getting_started/getting_started.md:61 2f349f3ed3184b849ade2a15d5bf0c6c #: ../../getting_started/getting_started.md:65 1b459d413a4d4b7e883d1ec17384ca30
msgid "3. Run" msgid "3. Run"
msgstr "3. 运行" msgstr "3. 运行"
#: ../../getting_started/getting_started.md:62 fe408e4405bd48288e2e746386615925 #: ../../getting_started/getting_started.md:66 ed15ee15450e4a028bf5aa05a9309697
msgid "" msgid ""
"You can refer to this document to obtain the Vicuna weights: " "You can refer to this document to obtain the Vicuna weights: "
"[Vicuna](https://github.com/lm-sys/FastChat/blob/main/README.md#model-" "[Vicuna](https://github.com/lm-sys/FastChat/blob/main/README.md#model-"
@ -165,7 +170,7 @@ msgstr ""
"关于基础模型, 可以根据[Vicuna](https://github.com/lm-" "关于基础模型, 可以根据[Vicuna](https://github.com/lm-"
"sys/FastChat/blob/main/README.md#model-weights) 合成教程进行合成。" "sys/FastChat/blob/main/README.md#model-weights) 合成教程进行合成。"
#: ../../getting_started/getting_started.md:64 c0acfe28007f459ca21174f968763fa3 #: ../../getting_started/getting_started.md:68 de15a70920a94192a1f2017cbe3cdb55
msgid "" msgid ""
"If you have difficulty with this step, you can also directly use the " "If you have difficulty with this step, you can also directly use the "
"model from [this link](https://huggingface.co/Tribbiani/vicuna-7b) as a " "model from [this link](https://huggingface.co/Tribbiani/vicuna-7b) as a "
@ -174,11 +179,7 @@ msgstr ""
"如果此步有困难的同学,也可以直接使用[此链接](https://huggingface.co/Tribbiani/vicuna-" "如果此步有困难的同学,也可以直接使用[此链接](https://huggingface.co/Tribbiani/vicuna-"
"7b)上的模型进行替代。" "7b)上的模型进行替代。"
#: ../../getting_started/getting_started.md:66 cc0f4c4e43f24b679f857a8d937528ee #: ../../getting_started/getting_started.md:70 763aaed45fd948fab761552a7e06061a
msgid "prepare server sql script"
msgstr "准备db-gpt server sql脚本"
#: ../../getting_started/getting_started.md:72 386948064fe646f2b9f51a262dd64bf2
msgid "" msgid ""
"set .env configuration set your vector store type, " "set .env configuration set your vector store type, "
"eg:VECTOR_STORE_TYPE=Chroma, now we support Chroma and Milvus(version > " "eg:VECTOR_STORE_TYPE=Chroma, now we support Chroma and Milvus(version > "
@ -187,17 +188,17 @@ msgstr ""
"在.env文件设置向量数据库环境变量eg:VECTOR_STORE_TYPE=Chroma, 目前我们支持了 Chroma and " "在.env文件设置向量数据库环境变量eg:VECTOR_STORE_TYPE=Chroma, 目前我们支持了 Chroma and "
"Milvus(version >2.1) " "Milvus(version >2.1) "
#: ../../getting_started/getting_started.md:75 e6f6b06459944f2d8509703af365c664 #: ../../getting_started/getting_started.md:73 a8f0dc3546c54a1098ff10157f980cef
#, fuzzy #, fuzzy
msgid "Run db-gpt server" msgid "1.Run db-gpt server"
msgstr "运行模型服务" msgstr "运行模型服务"
#: ../../getting_started/getting_started.md:80 489b595dc08a459ca2fd83b1389d3bbd #: ../../getting_started/getting_started.md:78 1715948545154c10af585de8960bf853
#, fuzzy #, fuzzy
msgid "Open http://localhost:5000 with your browser to see the product." msgid "Open http://localhost:5000 with your browser to see the product."
msgstr "打开浏览器访问http://localhost:5000" msgstr "打开浏览器访问http://localhost:5000"
#: ../../getting_started/getting_started.md:82 699afb01c9f243ab837cdc73252f624c #: ../../getting_started/getting_started.md:80 8ea9964df477473e866fe844dcf4be54
msgid "" msgid ""
"If you want to access an external LLM service, you need to 1.set the " "If you want to access an external LLM service, you need to 1.set the "
"variables LLM_MODEL=YOUR_MODEL_NAME " "variables LLM_MODEL=YOUR_MODEL_NAME "
@ -205,7 +206,7 @@ msgid ""
"file. 2.execute dbgpt_server.py in light mode" "file. 2.execute dbgpt_server.py in light mode"
msgstr "如果你想访问外部的大模型服务1.需要在.env文件设置模型名和外部模型服务地址。2.使用light模式启动服务" msgstr "如果你想访问外部的大模型服务1.需要在.env文件设置模型名和外部模型服务地址。2.使用light模式启动服务"
#: ../../getting_started/getting_started.md:89 7df7f3870e1140d3a17dc322a46d6476 #: ../../getting_started/getting_started.md:87 4c409a0e2a994f428712ab94b475e9bd
msgid "" msgid ""
"If you want to learn about dbgpt-webui, read https://github.com/csunny" "If you want to learn about dbgpt-webui, read https://github.com/csunny"
"/DB-GPT/tree/new-page-framework/datacenter" "/DB-GPT/tree/new-page-framework/datacenter"

View File

@ -8,7 +8,7 @@ msgid ""
msgstr "" msgstr ""
"Project-Id-Version: DB-GPT 0.3.0\n" "Project-Id-Version: DB-GPT 0.3.0\n"
"Report-Msgid-Bugs-To: \n" "Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-06-30 17:16+0800\n" "POT-Creation-Date: 2023-07-20 10:53+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n" "Language: zh_CN\n"
@ -19,31 +19,35 @@ msgstr ""
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.12.1\n" "Generated-By: Babel 2.12.1\n"
#: ../../index.rst:34 ../../index.rst:45 00f3369727374a2da4b3e02c35c63363 #: ../../index.rst:34 ../../index.rst:45 5bab1511780c442e9dd9e18519ad7ef3
msgid "Getting Started" msgid "Getting Started"
msgstr "开始" msgstr "开始"
#: ../../index.rst:56 ../../index.rst:77 f3b85d983f0141b5872a46451d20252a #: ../../index.rst:57 ../../index.rst:78 dbdeae6c9a344e7889d399a5dbbca872
msgid "Modules" msgid "Modules"
msgstr "模块" msgstr "模块"
#: ../../index.rst:91 ../../index.rst:107 614a2220f0004aadab14402dab5c926d #: ../../index.rst:92 ../../index.rst:108 43594aec45bf49609d7c7435c6472a9b
msgid "Use Cases" msgid "Use Cases"
msgstr "示例" msgstr "示例"
#: ../../index.rst:121 ../../index.rst:124 fae0a63e09f049a79ffbd8c1fd5b8f12 #: ../../index.rst:122 ../../index.rst:125 013a0e5d909e4332ac49f6200924043e
msgid "Reference" msgid "Reference"
msgstr "参考" msgstr "参考"
#: ../../index.rst:148 ../../index.rst:154 d4113b3e7b2f4a1ba462e6a54062adff #: ../../index.rst:134 ../../index.rst:137 42907cf1d1bb491bae2fcd7ede06e421
msgid "FAQ"
msgstr ""
#: ../../index.rst:161 ../../index.rst:167 64f4a587bc1f4ca9a456b97fac5d6def
msgid "Resources" msgid "Resources"
msgstr "资源" msgstr "资源"
#: ../../index.rst:7 6548011d66b14e71bf3f028b82bb58d6 #: ../../index.rst:7 df1ad6da308f49debd4053e5899a4c8d
msgid "Welcome to DB-GPT!" msgid "Welcome to DB-GPT!"
msgstr "欢迎来到DB-GPT中文文档" msgstr "欢迎来到DB-GPT中文文档"
#: ../../index.rst:8 da9fdd494e664e7e83448d4993c30f60 #: ../../index.rst:8 cc5b388ea2924968b2769e4b4a159761
msgid "" msgid ""
"As large models are released and iterated upon, they are becoming " "As large models are released and iterated upon, they are becoming "
"increasingly intelligent. However, in the process of using large models, " "increasingly intelligent. However, in the process of using large models, "
@ -61,7 +65,7 @@ msgstr ""
"我们启动了DB-" "我们启动了DB-"
"GPT项目为所有基于数据库的场景构建一个完整的私有大模型解决方案。该方案“”支持本地部署既可应用于“独立私有环境”又可根据业务模块进行“独立部署”和“隔离”确保“大模型”的能力绝对私有、安全、可控。" "GPT项目为所有基于数据库的场景构建一个完整的私有大模型解决方案。该方案“”支持本地部署既可应用于“独立私有环境”又可根据业务模块进行“独立部署”和“隔离”确保“大模型”的能力绝对私有、安全、可控。"
#: ../../index.rst:10 7650af0ff7bc4a93ad82c930c81cb7f5 #: ../../index.rst:10 b831cd62a1ba46108dcf343792e8d67a
msgid "" msgid ""
"**DB-GPT** is an experimental open-source project that uses localized GPT" "**DB-GPT** is an experimental open-source project that uses localized GPT"
" large models to interact with your data and environment. With this " " large models to interact with your data and environment. With this "
@ -71,102 +75,102 @@ msgstr ""
"DB-GPT 是一个开源的以数据库为基础的GPT实验项目使用本地化的GPT大模型与您的数据和环境进行交互无数据泄露风险100% 私密100%" "DB-GPT 是一个开源的以数据库为基础的GPT实验项目使用本地化的GPT大模型与您的数据和环境进行交互无数据泄露风险100% 私密100%"
" 安全。" " 安全。"
#: ../../index.rst:12 18249811cea0456096f06da05638d474 #: ../../index.rst:12 3baac7e36a824395a57c246e05560c67
msgid "**Features**" msgid "**Features**"
msgstr "特性" msgstr "特性"
#: ../../index.rst:13 367c092acf1a49a0906f78433e6d9926 #: ../../index.rst:13 6638568ff94e47f6a0b04a6775ca45d1
msgid "" msgid ""
"Currently, we have released multiple key features, which are listed below" "Currently, we have released multiple key features, which are listed below"
" to demonstrate our current capabilities:" " to demonstrate our current capabilities:"
msgstr "目前我们已经发布了多种关键的特性,这里一一列举展示一下当前发布的能力。" msgstr "目前我们已经发布了多种关键的特性,这里一一列举展示一下当前发布的能力。"
#: ../../index.rst:15 01334c212ef44244a306f8cfb584c48f #: ../../index.rst:15 b49710ac36ed4a188280aeb3589f11fe
msgid "SQL language capabilities - SQL generation - SQL diagnosis" msgid "SQL language capabilities - SQL generation - SQL diagnosis"
msgstr "SQL语言能力 - SQL生成 - SQL诊断" msgstr "SQL语言能力 - SQL生成 - SQL诊断"
#: ../../index.rst:19 fe7396137ed84517bd99c93500f21bb6 #: ../../index.rst:19 6bd495632b39477e8948677aeb4ab1ee
msgid "" msgid ""
"Private domain Q&A and data processing - Database knowledge Q&A - Data " "Private domain Q&A and data processing - Database knowledge Q&A - Data "
"processing" "processing"
msgstr "私有领域问答与数据处理 - 数据库知识问答 - 数据处理" msgstr "私有领域问答与数据处理 - 数据库知识问答 - 数据处理"
#: ../../index.rst:23 c12af975ed2d451496a476d91b6bb4c5 #: ../../index.rst:23 d1d3665a412246b9814139a938a29d1b
msgid "" msgid ""
"Plugins - Support custom plugin execution tasks and natively support the " "Plugins - Support custom plugin execution tasks and natively support the "
"Auto-GPT plugin, such as:" "Auto-GPT plugin, such as:"
msgstr "插件模型 - 支持自定义插件执行任务并原生支持Auto-GPT插件例如:* SQL自动执行获取查询结果 * 自动爬取学习知识" msgstr "插件模型 - 支持自定义插件执行任务并原生支持Auto-GPT插件例如:* SQL自动执行获取查询结果 * 自动爬取学习知识"
#: ../../index.rst:26 97507eb30ab548fb9443b252e481d6be #: ../../index.rst:26 41e37b63a8f44fc09a4da3574b25fcd3
msgid "" msgid ""
"Unified vector storage/indexing of knowledge base - Support for " "Unified vector storage/indexing of knowledge base - Support for "
"unstructured data such as PDF, Markdown, CSV, and WebURL" "unstructured data such as PDF, Markdown, CSV, and WebURL"
msgstr "知识库统一向量存储/索引 - 非结构化数据支持包括PDF、MarkDown、CSV、WebURL" msgstr "知识库统一向量存储/索引 - 非结构化数据支持包括PDF、MarkDown、CSV、WebURL"
#: ../../index.rst:29 b2ff661c13d54cd79f27923163079706 #: ../../index.rst:29 1acac8a6cbe54f7b8c31135189d726d6
msgid "" msgid ""
"Milti LLMs Support - Supports multiple large language models, currently " "Milti LLMs Support - Supports multiple large language models, currently "
"supporting Vicuna (7b, 13b), ChatGLM-6b (int4, int8) - TODO: codegen2, " "supporting Vicuna (7b, 13b), ChatGLM-6b (int4, int8) - TODO: codegen2, "
"codet5p" "codet5p"
msgstr "多模型支持 - 支持多种大语言模型, 当前已支持Vicuna(7b,13b), ChatGLM-6b(int4, int8)" msgstr "多模型支持 - 支持多种大语言模型, 当前已支持Vicuna(7b,13b), ChatGLM-6b(int4, int8)"
#: ../../index.rst:35 15178a36df624fa9b4a5acdab5060752 #: ../../index.rst:35 481edf63bcd348b4b3e14353dc5be952
msgid "" msgid ""
"How to get started using DB-GPT to interact with your data and " "How to get started using DB-GPT to interact with your data and "
"environment." "environment."
msgstr "开始使用DB-GPT与您的数据环境进行交互。" msgstr "开始使用DB-GPT与您的数据环境进行交互。"
#: ../../index.rst:36 8aa3a44286b244a4b9b92db285e5382b #: ../../index.rst:36 6b26a04661f7445198b3797442d3f178
#, fuzzy #, fuzzy
msgid "`Quickstart Guide <./getting_started/getting_started.html>`_" msgid "`Quickstart Guide <./getting_started/getting_started.html>`_"
msgstr "`使用指南 <./getting_started/getting_started.html>`_" msgstr "`使用指南 <./getting_started/getting_started.html>`_"
#: ../../index.rst:38 3f4ec91e5df44629aff6c94ffbaa37d7 #: ../../index.rst:38 ff435ef9964d44508ad840de2950c1c8
msgid "Concepts and terminology" msgid "Concepts and terminology"
msgstr "相关概念" msgstr "相关概念"
#: ../../index.rst:40 d0126674e0e24aefbf51d610b2fcf5da #: ../../index.rst:40 db9dfacbe93e4f858843c4d2a0e2dd25
#, fuzzy #, fuzzy
msgid "`Concepts and Terminology <./getting_started/concepts.html>`_" msgid "`Concepts and Terminology <./getting_started/concepts.html>`_"
msgstr "`相关概念 <./getting_started/concepts.html>`_" msgstr "`相关概念 <./getting_started/concepts.html>`_"
#: ../../index.rst:42 c6d67e4c1b8346c18f2b99c8f5795627 #: ../../index.rst:42 50faf8302ba140ce84897daef2c90ca2
msgid "Coming soon..." msgid "Coming soon..."
msgstr "" msgstr ""
#: ../../index.rst:44 18d63d2d729246648743d1b7470029e0 #: ../../index.rst:44 547bb007f90e43c096f25647cebefd7b
msgid "`Tutorials <.getting_started/tutorials.html>`_" msgid "`Tutorials <.getting_started/tutorials.html>`_"
msgstr "`教程 <.getting_started/tutorials.html>`_" msgstr "`教程 <.getting_started/tutorials.html>`_"
#: ../../index.rst:58 5f93833b5e8f42b7b8728587f5054d8b #: ../../index.rst:59 89bc50c752b84e3fb789ce5da1b654dc
msgid "" msgid ""
"These modules are the core abstractions with which we can interact with " "These modules are the core abstractions with which we can interact with "
"data and environment smoothly." "data and environment smoothly."
msgstr "这些模块是我们可以与数据和环境顺利地进行交互的核心组成。" msgstr "这些模块是我们可以与数据和环境顺利地进行交互的核心组成。"
#: ../../index.rst:59 d68173b40df146818ddf68b309bbd27d #: ../../index.rst:60 81f3860920964720958c62b3a6769f12
msgid "" msgid ""
"It's very important for DB-GPT, DB-GPT also provide standard, extendable " "It's very important for DB-GPT, DB-GPT also provide standard, extendable "
"interfaces." "interfaces."
msgstr "DB-GPT还提供了标准的、可扩展的接口。" msgstr "DB-GPT还提供了标准的、可扩展的接口。"
#: ../../index.rst:61 00257e3907b346d5bd007f2cfe52bac9 #: ../../index.rst:62 15faf914d05544df86359e50fdc70483
msgid "" msgid ""
"The docs for each module contain quickstart examples, how to guides, " "The docs for each module contain quickstart examples, how to guides, "
"reference docs, and conceptual guides." "reference docs, and conceptual guides."
msgstr "每个模块的文档都包含快速入门的例子、操作指南、参考文档和相关概念等内容。" msgstr "每个模块的文档都包含快速入门的例子、操作指南、参考文档和相关概念等内容。"
#: ../../index.rst:63 e04ba16ec943405080a26ec874e67823 #: ../../index.rst:64 18a673690afa47df8e6dd3d065b2580f
msgid "The modules are as follows" msgid "The modules are as follows"
msgstr "组成模块如下:" msgstr "组成模块如下:"
#: ../../index.rst:65 12d9ce3eda484316b3fcbbb2adb48b6e #: ../../index.rst:66 d2c20ed1b6ba4aa697090453b0f775a1
msgid "" msgid ""
"`LLMs <./modules/llms.html>`_: Supported multi models management and " "`LLMs <./modules/llms.html>`_: Supported multi models management and "
"integrations." "integrations."
msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 " msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 "
#: ../../index.rst:67 9d7ce6fea46c41c0940198499336430e #: ../../index.rst:68 d6c72e6fee3348e89422f9ff45804e3a
msgid "" msgid ""
"`Prompts <./modules/prompts.html>`_: Prompt management, optimization, and" "`Prompts <./modules/prompts.html>`_: Prompt management, optimization, and"
" serialization for multi database." " serialization for multi database."
@ -174,59 +178,59 @@ msgstr ""
"`Prompt自动生成与优化 <./modules/prompts.html>`_: 自动化生成高质量的Prompt " "`Prompt自动生成与优化 <./modules/prompts.html>`_: 自动化生成高质量的Prompt "
",并进行优化,提高系统的响应效率" ",并进行优化,提高系统的响应效率"
#: ../../index.rst:69 0847158883424a98a02e58fd9c2d6744 #: ../../index.rst:70 1756de47335d4eb7916a6c8b8b5ab70b
msgid "`Plugins <./modules/plugins.html>`_: Plugins management, scheduler." msgid "`Plugins <./modules/plugins.html>`_: Plugins management, scheduler."
msgstr "`Agent与插件 <./modules/plugins.html>`_:提供Agent和插件机制使得用户可以自定义并增强系统的行为。" msgstr "`Agent与插件 <./modules/plugins.html>`_:提供Agent和插件机制使得用户可以自定义并增强系统的行为。"
#: ../../index.rst:71 6b2a8143adff4395b6a86a6d22078a87 #: ../../index.rst:72 542efb88411f4cc192a08e7d28c84863
#, fuzzy #, fuzzy
msgid "" msgid ""
"`Knowledge <./modules/knowledge.html>`_: Knowledge management, embedding," "`Knowledge <./modules/knowledge.html>`_: Knowledge management, embedding,"
" and search." " and search."
msgstr "`知识库能力: <./modules/knowledge.html>`_: 支持私域知识库问答能力, " msgstr "`知识库能力: <./modules/knowledge.html>`_: 支持私域知识库问答能力, "
#: ../../index.rst:73 70e831c592ca431791a614934061c148 #: ../../index.rst:74 0204630cd5f14c68ada3bf11e0b0fbf5
msgid "" msgid ""
"`Connections <./modules/connections.html>`_: Supported multi databases " "`Connections <./modules/connections.html>`_: Supported multi databases "
"connection. management connections and interact with this." "connection. management connections and interact with this."
msgstr "`连接模块 <./modules/connections.html>`_: 用于连接不同的模块和数据源,实现数据的流转和交互 " msgstr "`连接模块 <./modules/connections.html>`_: 用于连接不同的模块和数据源,实现数据的流转和交互 "
#: ../../index.rst:75 1a5eaebe25174828b60c17475dae7928 #: ../../index.rst:76 f9363a963be44d0ea01bb5d65b69d0f8
#, fuzzy #, fuzzy
msgid "`Vector <./modules/vector.html>`_: Supported multi vector database." msgid "`Vector <./modules/vector.html>`_: Supported multi vector database."
msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 " msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 "
#: ../../index.rst:93 8a2ea497d36449febd1560dbecd5ec44 #: ../../index.rst:94 7223c3b95e9446bcae40a355e6b02324
msgid "Best Practices and built-in implementations for common DB-GPT use cases:" msgid "Best Practices and built-in implementations for common DB-GPT use cases:"
msgstr "DB-GPT用例的最佳实践和内置方法:" msgstr "DB-GPT用例的最佳实践和内置方法:"
#: ../../index.rst:95 fcc65a470c5643619688488cfe010f61 #: ../../index.rst:96 c088f2ab9f2247ac9a8c9af31d0da7a6
msgid "" msgid ""
"`Sql generation and diagnosis " "`Sql generation and diagnosis "
"<./use_cases/sql_generation_and_diagnosis.html>`_: SQL generation and " "<./use_cases/sql_generation_and_diagnosis.html>`_: SQL generation and "
"diagnosis." "diagnosis."
msgstr "`Sql生成和诊断 <./use_cases/sql_generation_and_diagnosis.html>`_: Sql生成和诊断。" msgstr "`Sql生成和诊断 <./use_cases/sql_generation_and_diagnosis.html>`_: Sql生成和诊断。"
#: ../../index.rst:97 26b983fbb9394f94bf31636175cc6f95 #: ../../index.rst:98 23973b07c6ba42088a714b048d4b43c4
msgid "" msgid ""
"`knownledge Based QA <./use_cases/knownledge_based_qa.html>`_: A " "`knownledge Based QA <./use_cases/knownledge_based_qa.html>`_: A "
"important scene for user to chat with database documents, codes, bugs and" "important scene for user to chat with database documents, codes, bugs and"
" schemas." " schemas."
msgstr "`知识库问答 <./use_cases/knownledge_based_qa.html>`_: 用户与数据库文档、代码和bug聊天的重要场景\"" msgstr "`知识库问答 <./use_cases/knownledge_based_qa.html>`_: 用户与数据库文档、代码和bug聊天的重要场景\""
#: ../../index.rst:99 aa4cfc95a3bf463682952bf40f38c99b #: ../../index.rst:100 6a1d77bc56a14803b66ebc9f2b6c4b7b
msgid "" msgid ""
"`Chatbots <./use_cases/chatbots.html>`_: Language model love to chat, use" "`Chatbots <./use_cases/chatbots.html>`_: Language model love to chat, use"
" multi models to chat." " multi models to chat."
msgstr "`聊天机器人 <./use_cases/chatbots.html>`_: 使用多模型进行对话" msgstr "`聊天机器人 <./use_cases/chatbots.html>`_: 使用多模型进行对话"
#: ../../index.rst:101 e9b540aab91c44ae911bf96aa18c0f36 #: ../../index.rst:102 621774a68cba46daa7112f865e6e3af9
msgid "" msgid ""
"`Querying Database Data <./use_cases/query_database_data.html>`_: Query " "`Querying Database Data <./use_cases/query_database_data.html>`_: Query "
"and Analysis data from databases and give charts." "and Analysis data from databases and give charts."
msgstr "`查询数据库数据 <./use_cases/query_database_data.html>`_:从数据库中查询和分析数据并给出图表。" msgstr "`查询数据库数据 <./use_cases/query_database_data.html>`_:从数据库中查询和分析数据并给出图表。"
#: ../../index.rst:103 88a99b638b4a42abae39fe9de4f16927 #: ../../index.rst:104 9f379fbe8aac47f4a1a53d84ab2a2f51
msgid "" msgid ""
"`Interacting with apis <./use_cases/interacting_with_api.html>`_: " "`Interacting with apis <./use_cases/interacting_with_api.html>`_: "
"Interact with apis, such as create a table, deploy a database cluster, " "Interact with apis, such as create a table, deploy a database cluster, "
@ -235,33 +239,37 @@ msgstr ""
"`API交互 <./use_cases/interacting_with_api.html>`_: " "`API交互 <./use_cases/interacting_with_api.html>`_: "
"与API交互例如创建表、部署数据库集群、创建数据库等。" "与API交互例如创建表、部署数据库集群、创建数据库等。"
#: ../../index.rst:105 53cb38b16ed04420934233aec02fd44b #: ../../index.rst:106 3dbb717e1b024a20bb6049facb616b1b
msgid "" msgid ""
"`Tool use with plugins <./use_cases/tool_use_with_plugin>`_: According to" "`Tool use with plugins <./use_cases/tool_use_with_plugin>`_: According to"
" Plugin use tools to manage databases autonomoly." " Plugin use tools to manage databases autonomoly."
msgstr "`插件工具 <./use_cases/tool_use_with_plugin>`_: 根据插件使用工具自主管理数据库。" msgstr "`插件工具 <./use_cases/tool_use_with_plugin>`_: 根据插件使用工具自主管理数据库。"
#: ../../index.rst:122 fbc2fe1374ac4909b1c66ea11bbfafec #: ../../index.rst:123 4acedc39ccf34e79b805189a11285a3a
msgid "" msgid ""
"Full documentation on all methods, classes, installation methods, and " "Full documentation on all methods, classes, installation methods, and "
"integration setups for DB-GPT." "integration setups for DB-GPT."
msgstr "关于DB-GPT的所有方法、类、安装方法和集成设置的完整文档。" msgstr "关于DB-GPT的所有方法、类、安装方法和集成设置的完整文档。"
#: ../../index.rst:133 c45530a6c3a34a42a0fd8bca6efe3b07 #: ../../index.rst:135 622112f3cce34461ba7e0d52fa81d438
msgid "DB-GPT FAQ."
msgstr ""
#: ../../index.rst:146 a989256fd69f4bbfae73191b505c59fa
msgid "Ecosystem" msgid "Ecosystem"
msgstr "环境系统" msgstr "环境系统"
#: ../../index.rst:135 e9f68f4abbdc4832889d0e343ce43d27 #: ../../index.rst:148 1b5c410dd94842f2801f08540dd57647
msgid "Guides for how other companies/products can be used with DB-GPT" msgid "Guides for how other companies/products can be used with DB-GPT"
msgstr "其他公司/产品如何与DB-GPT一起使用的方法指南" msgstr "其他公司/产品如何与DB-GPT一起使用的方法指南"
#: ../../index.rst:150 c33686daaa7c44d28c96dc22c0d9480c #: ../../index.rst:163 8368b4c62ebe41ad91551e241dbcc4df
msgid "" msgid ""
"Additional resources we think may be useful as you develop your " "Additional resources we think may be useful as you develop your "
"application!" "application!"
msgstr "“我们认为在您开发应用程序时可能有用的其他资源!”" msgstr "“我们认为在您开发应用程序时可能有用的其他资源!”"
#: ../../index.rst:152 f6d108be181f47af80c4b8a5931ff172 #: ../../index.rst:165 2a4cd897b20c4683979b44fb9d7470e6
msgid "" msgid ""
"`Discord <https://discord.com/invite/twmZk3vv>`_: if your have some " "`Discord <https://discord.com/invite/twmZk3vv>`_: if your have some "
"problem or ideas, you can talk from discord." "problem or ideas, you can talk from discord."

View File

@ -11,7 +11,7 @@ cp .env.template .env
LLM_MODEL=vicuna-13b LLM_MODEL=vicuna-13b
MODEL_SERVER=http://127.0.0.1:8000 MODEL_SERVER=http://127.0.0.1:8000
``` ```
now we support models vicuna-13b, vicuna-7b, chatglm-6b, flan-t5-base, guanaco-33b-merged, falcon-40b, gorilla-7b. now we support models vicuna-13b, vicuna-7b, chatglm-6b, flan-t5-base, guanaco-33b-merged, falcon-40b, gorilla-7b, llama-2-7b, llama-2-13b.
if you want use other model, such as chatglm-6b, you just need update .env config file. if you want use other model, such as chatglm-6b, you just need update .env config file.
``` ```

View File

@ -47,6 +47,9 @@ LLM_MODEL_CONFIG = {
"gorilla-7b": os.path.join(MODEL_PATH, "gorilla-7b"), "gorilla-7b": os.path.join(MODEL_PATH, "gorilla-7b"),
"gptj-6b": os.path.join(MODEL_PATH, "ggml-gpt4all-j-v1.3-groovy.bin"), "gptj-6b": os.path.join(MODEL_PATH, "ggml-gpt4all-j-v1.3-groovy.bin"),
"proxyllm": "proxyllm", "proxyllm": "proxyllm",
"llama-2-7b": os.path.join(MODEL_PATH, "Llama-2-7b-chat-hf"),
"llama-2-13b": os.path.join(MODEL_PATH, "Llama-2-13b-chat-hf"),
"llama-2-70b": os.path.join(MODEL_PATH, "Llama-2-70b-chat-hf"),
} }
# Load model config # Load model config

View File

@ -263,12 +263,26 @@ class ProxyllmAdapter(BaseLLMAdaper):
return "proxyllm", None return "proxyllm", None
class Llama2Adapter(BaseLLMAdaper):
"""The model adapter for llama-2"""
def match(self, model_path: str):
return "llama-2" in model_path.lower()
def loader(self, model_path: str, from_pretrained_kwargs: dict):
model, tokenizer = super().loader(model_path, from_pretrained_kwargs)
model.config.eos_token_id = tokenizer.eos_token_id
model.config.pad_token_id = tokenizer.pad_token_id
return model, tokenizer
register_llm_model_adapters(VicunaLLMAdapater) register_llm_model_adapters(VicunaLLMAdapater)
register_llm_model_adapters(ChatGLMAdapater) register_llm_model_adapters(ChatGLMAdapater)
register_llm_model_adapters(GuanacoAdapter) register_llm_model_adapters(GuanacoAdapter)
register_llm_model_adapters(FalconAdapater) register_llm_model_adapters(FalconAdapater)
register_llm_model_adapters(GorillaAdapter) register_llm_model_adapters(GorillaAdapter)
register_llm_model_adapters(GPT4AllAdapter) register_llm_model_adapters(GPT4AllAdapter)
register_llm_model_adapters(Llama2Adapter)
# TODO Default support vicuna, other model need to tests and Evaluate # TODO Default support vicuna, other model need to tests and Evaluate
# just for test_py, remove this later # just for test_py, remove this later

308
pilot/model/conversation.py Normal file
View File

@ -0,0 +1,308 @@
"""
Fork from fastchat: https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
Conversation prompt templates.
"""
import dataclasses
from enum import auto, IntEnum
from typing import List, Any, Dict, Callable
class SeparatorStyle(IntEnum):
"""Separator styles."""
ADD_COLON_SINGLE = auto()
ADD_COLON_TWO = auto()
ADD_COLON_SPACE_SINGLE = auto()
NO_COLON_SINGLE = auto()
NO_COLON_TWO = auto()
ADD_NEW_LINE_SINGLE = auto()
LLAMA2 = auto()
CHATGLM = auto()
CHATML = auto()
CHATINTERN = auto()
DOLLY = auto()
RWKV = auto()
PHOENIX = auto()
ROBIN = auto()
@dataclasses.dataclass
class Conversation:
"""A class that manages prompt templates and keeps all conversation history."""
# The name of this template
name: str
# The system prompt
system: str
# Two roles
roles: List[str]
# All messages. Each item is (role, message).
messages: List[List[str]]
# The number of few shot examples
offset: int
# Separators
sep_style: SeparatorStyle
sep: str
sep2: str = None
# Stop criteria (the default one is EOS token)
stop_str: str = None
# Stops generation if meeting any token in this list
stop_token_ids: List[int] = None
# format system message
system_formatter: Callable = None
def get_prompt(self) -> str:
"""Get the prompt for generation."""
if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:
ret = self.system + self.sep
for role, message in self.messages:
if message:
ret += role + ": " + message + self.sep
else:
ret += role + ":"
return ret
elif self.sep_style == SeparatorStyle.ADD_COLON_TWO:
seps = [self.sep, self.sep2]
ret = self.system + seps[0]
for i, (role, message) in enumerate(self.messages):
if message:
ret += role + ": " + message + seps[i % 2]
else:
ret += role + ":"
return ret
elif self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE:
ret = self.system + self.sep
for role, message in self.messages:
if message:
ret += role + ": " + message + self.sep
else:
ret += role + ": " # must be end with a space
return ret
elif self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE:
ret = "" if self.system == "" else self.system + self.sep
for role, message in self.messages:
if message:
ret += role + "\n" + message + self.sep
else:
ret += role + "\n"
return ret
elif self.sep_style == SeparatorStyle.NO_COLON_SINGLE:
ret = self.system
for role, message in self.messages:
if message:
ret += role + message + self.sep
else:
ret += role
return ret
elif self.sep_style == SeparatorStyle.NO_COLON_TWO:
seps = [self.sep, self.sep2]
ret = self.system
for i, (role, message) in enumerate(self.messages):
if message:
ret += role + message + seps[i % 2]
else:
ret += role
return ret
elif self.sep_style == SeparatorStyle.RWKV:
ret = self.system
for i, (role, message) in enumerate(self.messages):
if message:
ret += (
role
+ ": "
+ message.replace("\r\n", "\n").replace("\n\n", "\n")
)
ret += "\n\n"
else:
ret += role + ":"
return ret
elif self.sep_style == SeparatorStyle.LLAMA2:
seps = [self.sep, self.sep2]
ret = ""
for i, (role, message) in enumerate(self.messages):
if message:
if i == 0:
ret += self.system + message
else:
ret += role + " " + message + seps[i % 2]
else:
ret += role
return ret
elif self.sep_style == SeparatorStyle.CHATGLM:
# source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308
# source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
round_add_n = 1 if self.name == "chatglm2" else 0
if self.system:
ret = self.system + self.sep
else:
ret = ""
for i, (role, message) in enumerate(self.messages):
if i % 2 == 0:
ret += f"[Round {i//2 + round_add_n}]{self.sep}"
if message:
ret += f"{role}{message}{self.sep}"
else:
ret += f"{role}"
return ret
elif self.sep_style == SeparatorStyle.CHATML:
ret = "" if self.system == "" else self.system + self.sep + "\n"
for role, message in self.messages:
if message:
ret += role + "\n" + message + self.sep + "\n"
else:
ret += role + "\n"
return ret
elif self.sep_style == SeparatorStyle.CHATINTERN:
# source: https://huggingface.co/internlm/internlm-chat-7b-8k/blob/bd546fa984b4b0b86958f56bf37f94aa75ab8831/modeling_internlm.py#L771
seps = [self.sep, self.sep2]
ret = self.system
for i, (role, message) in enumerate(self.messages):
if i % 2 == 0:
ret += "<s>"
if message:
ret += role + ":" + message + seps[i % 2] + "\n"
else:
ret += role + ":"
return ret
elif self.sep_style == SeparatorStyle.DOLLY:
seps = [self.sep, self.sep2]
ret = self.system
for i, (role, message) in enumerate(self.messages):
if message:
ret += role + ":\n" + message + seps[i % 2]
if i % 2 == 1:
ret += "\n\n"
else:
ret += role + ":\n"
return ret
elif self.sep_style == SeparatorStyle.PHOENIX:
ret = self.system
for role, message in self.messages:
if message:
ret += role + ": " + "<s>" + message + "</s>"
else:
ret += role + ": " + "<s>"
return ret
elif self.sep_style == SeparatorStyle.ROBIN:
ret = self.system + self.sep
for role, message in self.messages:
if message:
ret += role + ":\n" + message + self.sep
else:
ret += role + ":\n"
return ret
else:
raise ValueError(f"Invalid style: {self.sep_style}")
def append_message(self, role: str, message: str):
"""Append a new message."""
self.messages.append([role, message])
def update_last_message(self, message: str):
"""Update the last output.
The last message is typically set to be None when constructing the prompt,
so we need to update it in-place after getting the response from a model.
"""
self.messages[-1][1] = message
def update_system_message(self, system_message: str):
"""Update system message"""
if self.system_formatter:
self.system = self.system_formatter(system_message)
else:
self.system = system_message
def to_gradio_chatbot(self):
"""Convert the conversation to gradio chatbot format."""
ret = []
for i, (role, msg) in enumerate(self.messages[self.offset :]):
if i % 2 == 0:
ret.append([msg, None])
else:
ret[-1][-1] = msg
return ret
def to_openai_api_messages(self):
"""Convert the conversation to OpenAI chat completion format."""
ret = [{"role": "system", "content": self.system}]
for i, (_, msg) in enumerate(self.messages[self.offset :]):
if i % 2 == 0:
ret.append({"role": "user", "content": msg})
else:
if msg is not None:
ret.append({"role": "assistant", "content": msg})
return ret
def copy(self):
return Conversation(
name=self.name,
system=self.system,
roles=self.roles,
messages=[[x, y] for x, y in self.messages],
offset=self.offset,
sep_style=self.sep_style,
sep=self.sep,
sep2=self.sep2,
stop_str=self.stop_str,
stop_token_ids=self.stop_token_ids,
system_formatter=self.system_formatter,
)
def dict(self):
return {
"template_name": self.name,
"system": self.system,
"roles": self.roles,
"messages": self.messages,
"offset": self.offset,
}
# A global registry for all conversation templates
conv_templates: Dict[str, Conversation] = {}
def register_conv_template(template: Conversation, override: bool = False):
"""Register a new conversation template."""
if not override:
assert (
template.name not in conv_templates
), f"{template.name} has been registered."
conv_templates[template.name] = template
def get_conv_template(name: str) -> Conversation:
"""Get a conversation template."""
return conv_templates[name].copy()
# llama2 template
# reference: https://github.com/facebookresearch/llama/blob/cfc3fc8c1968d390eb830e65c63865e980873a06/llama/generation.py#L212
register_conv_template(
Conversation(
name="llama-2",
system="<s>[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. "
"Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. "
"Please ensure that your responses are socially unbiased and positive in nature.\n\n"
"If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. "
"If you don't know the answer to a question, please don't share false information.\n<</SYS>>\n\n",
roles=("[INST]", "[/INST]"),
messages=(),
offset=0,
sep_style=SeparatorStyle.LLAMA2,
sep=" ",
sep2=" </s><s>",
stop_token_ids=[2],
system_formatter=lambda msg: f"<s>[INST] <<SYS>>\n{msg}\n<</SYS>>\n\n",
)
)
# TODO Support other model conversation template

242
pilot/model/inference.py Normal file
View File

@ -0,0 +1,242 @@
"""
Fork from fastchat: https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/inference.py
"""
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import gc
from typing import Iterable, Dict
import torch
import torch
from transformers.generation.logits_process import (
LogitsProcessorList,
RepetitionPenaltyLogitsProcessor,
TemperatureLogitsWarper,
TopKLogitsWarper,
TopPLogitsWarper,
)
from pilot.model.llm_utils import is_sentence_complete, is_partial_stop
def prepare_logits_processor(
temperature: float, repetition_penalty: float, top_p: float, top_k: int
) -> LogitsProcessorList:
processor_list = LogitsProcessorList()
# TemperatureLogitsWarper doesn't accept 0.0, 1.0 makes it a no-op so we skip two cases.
if temperature >= 1e-5 and temperature != 1.0:
processor_list.append(TemperatureLogitsWarper(temperature))
if repetition_penalty > 1.0:
processor_list.append(RepetitionPenaltyLogitsProcessor(repetition_penalty))
if 1e-8 <= top_p < 1.0:
processor_list.append(TopPLogitsWarper(top_p))
if top_k > 0:
processor_list.append(TopKLogitsWarper(top_k))
return processor_list
@torch.inference_mode()
def generate_stream(
model,
tokenizer,
params: Dict,
device: str,
context_len: int,
stream_interval: int = 2,
judge_sent_end: bool = False,
):
# Read parameters
prompt = params["prompt"]
print(f"Prompt of model: \n{prompt}")
len_prompt = len(prompt)
temperature = float(params.get("temperature", 1.0))
repetition_penalty = float(params.get("repetition_penalty", 1.0))
top_p = float(params.get("top_p", 1.0))
top_k = int(params.get("top_k", -1)) # -1 means disable
max_new_tokens = int(params.get("max_new_tokens", 2048))
echo = bool(params.get("echo", True))
stop_str = params.get("stop", None)
stop_token_ids = params.get("stop_token_ids", None) or []
stop_token_ids.append(tokenizer.eos_token_id)
logits_processor = prepare_logits_processor(
temperature, repetition_penalty, top_p, top_k
)
input_ids = tokenizer(prompt).input_ids
if model.config.is_encoder_decoder:
max_src_len = context_len
else: # truncate
max_src_len = context_len - max_new_tokens - 1
input_ids = input_ids[-max_src_len:]
output_ids = list(input_ids)
input_echo_len = len(input_ids)
if model.config.is_encoder_decoder:
encoder_output = model.encoder(
input_ids=torch.as_tensor([input_ids], device=device)
)[0]
start_ids = torch.as_tensor(
[[model.generation_config.decoder_start_token_id]],
dtype=torch.int64,
device=device,
)
past_key_values = out = None
sent_interrupt = False
for i in range(max_new_tokens):
if i == 0: # prefill
if model.config.is_encoder_decoder:
out = model.decoder(
input_ids=start_ids,
encoder_hidden_states=encoder_output,
use_cache=True,
)
logits = model.lm_head(out[0])
else:
out = model(torch.as_tensor([input_ids], device=device), use_cache=True)
logits = out.logits
past_key_values = out.past_key_values
else: # decoding
if model.config.is_encoder_decoder:
out = model.decoder(
input_ids=torch.as_tensor(
[[token] if not sent_interrupt else output_ids], device=device
),
encoder_hidden_states=encoder_output,
use_cache=True,
past_key_values=past_key_values if not sent_interrupt else None,
)
sent_interrupt = False
logits = model.lm_head(out[0])
else:
out = model(
input_ids=torch.as_tensor(
[[token] if not sent_interrupt else output_ids], device=device
),
use_cache=True,
past_key_values=past_key_values if not sent_interrupt else None,
)
sent_interrupt = False
logits = out.logits
past_key_values = out.past_key_values
if logits_processor:
if repetition_penalty > 1.0:
tmp_output_ids = torch.as_tensor([output_ids], device=logits.device)
else:
tmp_output_ids = None
last_token_logits = logits_processor(tmp_output_ids, logits[:, -1, :])[0]
else:
last_token_logits = logits[0, -1, :]
if device == "mps":
# Switch to CPU by avoiding some bugs in mps backend.
last_token_logits = last_token_logits.float().to("cpu")
if temperature < 1e-5 or top_p < 1e-8: # greedy
_, indices = torch.topk(last_token_logits, 2)
tokens = [int(index) for index in indices.tolist()]
else:
probs = torch.softmax(last_token_logits, dim=-1)
indices = torch.multinomial(probs, num_samples=2)
tokens = [int(token) for token in indices.tolist()]
token = tokens[0]
output_ids.append(token)
if token in stop_token_ids:
stopped = True
else:
stopped = False
# Yield the output tokens
if i % stream_interval == 0 or i == max_new_tokens - 1 or stopped:
if echo:
tmp_output_ids = output_ids
rfind_start = len_prompt
else:
tmp_output_ids = output_ids[input_echo_len:]
rfind_start = 0
output = tokenizer.decode(
tmp_output_ids,
skip_special_tokens=True,
spaces_between_special_tokens=False,
clean_up_tokenization_spaces=True,
)
# TODO: For the issue of incomplete sentences interrupting output, apply a patch and others can also modify it to a more elegant way
if judge_sent_end and stopped and not is_sentence_complete(output):
if len(tokens) > 1:
token = tokens[1]
output_ids[-1] = token
else:
output_ids.pop()
stopped = False
sent_interrupt = True
partially_stopped = False
if stop_str:
if isinstance(stop_str, str):
pos = output.rfind(stop_str, rfind_start)
if pos != -1:
output = output[:pos]
stopped = True
else:
partially_stopped = is_partial_stop(output, stop_str)
elif isinstance(stop_str, Iterable):
for each_stop in stop_str:
pos = output.rfind(each_stop, rfind_start)
if pos != -1:
output = output[:pos]
stopped = True
break
else:
partially_stopped = is_partial_stop(output, each_stop)
if partially_stopped:
break
else:
raise ValueError("Invalid stop field type.")
# Prevent yielding partial stop sequence
if not partially_stopped:
yield output
# yield {
# "text": output,
# "usage": {
# "prompt_tokens": input_echo_len,
# "completion_tokens": i,
# "total_tokens": input_echo_len + i,
# },
# "finish_reason": None,
# }
if stopped:
break
# Finish stream event, which contains finish reason
if i == max_new_tokens - 1:
finish_reason = "length"
elif stopped:
finish_reason = "stop"
else:
finish_reason = None
yield output
# yield {
# "text": output,
# "usage": {
# "prompt_tokens": input_echo_len,
# "completion_tokens": i,
# "total_tokens": input_echo_len + i,
# },
# "finish_reason": finish_reason,
# }
# Clean
del past_key_values, out
gc.collect()
torch.cuda.empty_cache()

View File

@ -8,6 +8,11 @@ import copy
import torch import torch
from pilot.conversation import ROLE_ASSISTANT, ROLE_USER from pilot.conversation import ROLE_ASSISTANT, ROLE_USER
from pilot.scene.base_message import ModelMessage, _parse_model_messages
# TODO move sep to scene prompt of model
_CHATGLM_SEP = "\n"
_CHATGLM2_SEP = "\n\n"
@torch.inference_mode() @torch.inference_mode()
@ -32,42 +37,20 @@ def chatglm_generate_stream(
generate_kwargs["temperature"] = temperature generate_kwargs["temperature"] = temperature
# TODO, Fix this # TODO, Fix this
print(prompt) # print(prompt)
messages = prompt.split(stop) # messages = prompt.split(stop)
# messages: List[ModelMessage] = params["messages"]
# # Add history conversation query, system_messages, hist = _parse_model_messages(messages)
hist = [HistoryEntry()] system_messages_str = "".join(system_messages)
system_messages = []
for message in messages[:-2]:
if len(message) <= 0:
continue
if "human:" in message:
hist[-1].add_question(message.split("human:")[1])
elif "system:" in message:
msg = message.split("system:")[1]
hist[-1].add_question(msg)
system_messages.append(msg)
elif "ai:" in message:
hist[-1].add_answer(message.split("ai:")[1])
hist.append(HistoryEntry())
else:
# TODO
# hist[-1].add_question(message.split("system:")[1])
# once_conversation.append(f"""###system:{message} """)
pass
try:
query = messages[-2].split("human:")[1]
except IndexError:
query = messages[-3].split("human:")[1]
hist = build_history(hist)
if not hist: if not hist:
# No history conversation, but has system messages, merge to user`s query # No history conversation, but has system messages, merge to user`s query
query = prompt_adaptation(system_messages, query) query = prompt_adaptation(system_messages_str, query)
else:
# history exist, add system message to head of history
hist[0][0] = system_messages_str + _CHATGLM2_SEP + hist[0][0]
print("Query Message: ", query) print("Query Message: ", query)
print("hist: ", hist) print("hist: ", hist)
# output = ""
# i = 0
for i, (response, new_hist) in enumerate( for i, (response, new_hist) in enumerate(
model.stream_chat(tokenizer, query, hist, **generate_kwargs) model.stream_chat(tokenizer, query, hist, **generate_kwargs)
@ -103,10 +86,10 @@ def build_history(hist: List[HistoryEntry]) -> List[List[str]]:
return list(filter(lambda hl: hl is not None, map(lambda h: h.to_list(), hist))) return list(filter(lambda hl: hl is not None, map(lambda h: h.to_list(), hist)))
def prompt_adaptation(system_messages: List[str], human_message: str) -> str: def prompt_adaptation(system_messages_str: str, human_message: str) -> str:
if not system_messages: if not system_messages_str or system_messages_str == "":
return human_message return human_message
system_messages_str = " ".join(system_messages) # TODO Multi-model prompt adaptation
adaptation_rules = [ adaptation_rules = [
r"Question:\s*{}\s*", # chat_db scene r"Question:\s*{}\s*", # chat_db scene
r"Goals:\s*{}\s*", # chat_execution r"Goals:\s*{}\s*", # chat_execution
@ -119,4 +102,4 @@ def prompt_adaptation(system_messages: List[str], human_message: str) -> str:
if re.search(pattern, system_messages_str): if re.search(pattern, system_messages_str):
return system_messages_str return system_messages_str
# https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926 # https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
return f"{system_messages_str}\n\n问:{human_message}\n\n答:" return system_messages_str + _CHATGLM2_SEP + human_message

View File

@ -3,8 +3,10 @@
import json import json
import requests import requests
from typing import List
from pilot.configs.config import Config from pilot.configs.config import Config
from pilot.conversation import ROLE_ASSISTANT, ROLE_USER from pilot.conversation import ROLE_ASSISTANT, ROLE_USER
from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
CFG = Config() CFG = Config()
@ -20,36 +22,17 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
"Token": CFG.proxy_api_key, "Token": CFG.proxy_api_key,
} }
messages = prompt.split(stop) messages: List[ModelMessage] = params["messages"]
# Add history conversation # Add history conversation
for message in messages: for message in messages:
if len(message) <= 0: if message.role == ModelMessageRoleType.HUMAN:
continue history.append({"role": "user", "content": message.content})
if "human:" in message: elif message.role == ModelMessageRoleType.SYSTEM:
history.append( history.append({"role": "system", "content": message.content})
{"role": "user", "content": message.split("human:")[1]}, elif message.role == ModelMessageRoleType.AI:
) history.append({"role": "assistant", "content": message.content})
elif "system:" in message:
history.append(
{
"role": "system",
"content": message.split("system:")[1],
}
)
elif "ai:" in message:
history.append(
{
"role": "assistant",
"content": message.split("ai:")[1],
}
)
else: else:
history.append( pass
{
"role": "system",
"content": message,
}
)
# Move the last user's information to the end # Move the last user's information to the end
temp_his = history[::-1] temp_his = history[::-1]

View File

@ -10,7 +10,6 @@ from typing import List, Optional
from pilot.configs.config import Config from pilot.configs.config import Config
from pilot.model.base import Message from pilot.model.base import Message
from pilot.server.llmserver import generate_output
def create_chat_completion( def create_chat_completion(
@ -115,3 +114,17 @@ class Iteratorize:
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
self.stop_now = True self.stop_now = True
def is_sentence_complete(output: str):
"""Check whether the output is a complete sentence."""
end_symbols = (".", "?", "!", "...", "", "", "", "", '"', "'", "")
return output.endswith(end_symbols)
def is_partial_stop(output: str, stop_str: str):
"""Check whether the output contains a partial stop str."""
for i in range(0, min(len(output), len(stop_str))):
if stop_str.startswith(output[-i:]):
return True
return False

View File

@ -53,8 +53,15 @@ class BaseOutputParser(ABC):
""" TODO Multi mode output handler, rewrite this for multi model, use adapter mode. """ TODO Multi mode output handler, rewrite this for multi model, use adapter mode.
""" """
model_context = data.get("model_context")
if model_context and "prompt_echo_len_char" in model_context:
prompt_echo_len_char = int(model_context.get("prompt_echo_len_char", -1))
if prompt_echo_len_char != -1:
skip_echo_len = prompt_echo_len_char
if data.get("error_code", 0) == 0: if data.get("error_code", 0) == 0:
if "vicuna" in CFG.LLM_MODEL: if "vicuna" in CFG.LLM_MODEL or "llama-2" in CFG.LLM_MODEL:
# TODO Judging from model_context
# output = data["text"][skip_echo_len + 11:].strip() # output = data["text"][skip_echo_len + 11:].strip()
output = data["text"][skip_echo_len:].strip() output = data["text"][skip_echo_len:].strip()
elif "guanaco" in CFG.LLM_MODEL: elif "guanaco" in CFG.LLM_MODEL:

View File

@ -2,6 +2,7 @@ import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
import datetime import datetime
import traceback import traceback
import warnings
import json import json
from pydantic import BaseModel, Field, root_validator, validator, Extra from pydantic import BaseModel, Field, root_validator, validator, Extra
from typing import ( from typing import (
@ -37,6 +38,8 @@ from pilot.scene.base_message import (
HumanMessage, HumanMessage,
AIMessage, AIMessage,
ViewMessage, ViewMessage,
ModelMessage,
ModelMessageRoleType,
) )
from pilot.configs.config import Config from pilot.configs.config import Config
@ -116,6 +119,7 @@ class BaseChat(ABC):
payload = { payload = {
"model": self.llm_model, "model": self.llm_model,
"prompt": self.generate_llm_text(), "prompt": self.generate_llm_text(),
"messages": self.generate_llm_messages(),
"temperature": float(self.prompt_template.temperature), "temperature": float(self.prompt_template.temperature),
"max_new_tokens": int(self.prompt_template.max_new_tokens), "max_new_tokens": int(self.prompt_template.max_new_tokens),
"stop": self.prompt_template.sep, "stop": self.prompt_template.sep,
@ -227,6 +231,7 @@ class BaseChat(ABC):
return self.nostream_call() return self.nostream_call()
def generate_llm_text(self) -> str: def generate_llm_text(self) -> str:
warnings.warn("This method is deprecated - please use `generate_llm_messages`.")
text = "" text = ""
### Load scene setting or character definition ### Load scene setting or character definition
if self.prompt_template.template_define: if self.prompt_template.template_define:
@ -244,24 +249,62 @@ class BaseChat(ABC):
text += self.__load_user_message() text += self.__load_user_message()
return text return text
def __load_system_message(self): def generate_llm_messages(self) -> List[ModelMessage]:
"""
Structured prompt messages interaction between dbgpt-server and llm-server
See https://github.com/csunny/DB-GPT/issues/328
"""
messages = []
### Load scene setting or character definition as system message
if self.prompt_template.template_define:
messages.append(
ModelMessage(
role=ModelMessageRoleType.SYSTEM,
content=self.prompt_template.template_define,
)
)
### Load prompt
messages += self.__load_system_message(str_message=False)
### Load examples
messages += self.__load_example_messages(str_message=False)
### Load History
messages += self.__load_histroy_messages(str_message=False)
### Load User Input
messages += self.__load_user_message(str_message=False)
return messages
def __load_system_message(self, str_message: bool = True):
system_convs = self.current_message.get_system_conv() system_convs = self.current_message.get_system_conv()
system_text = "" system_text = ""
system_messages = []
for system_conv in system_convs: for system_conv in system_convs:
system_text += ( system_text += (
system_conv.type + ":" + system_conv.content + self.prompt_template.sep system_conv.type + ":" + system_conv.content + self.prompt_template.sep
) )
return system_text system_messages.append(
ModelMessage(role=system_conv.type, content=system_conv.content)
)
return system_text if str_message else system_messages
def __load_user_message(self): def __load_user_message(self, str_message: bool = True):
user_conv = self.current_message.get_user_conv() user_conv = self.current_message.get_user_conv()
user_messages = []
if user_conv: if user_conv:
return user_conv.type + ":" + user_conv.content + self.prompt_template.sep user_text = (
user_conv.type + ":" + user_conv.content + self.prompt_template.sep
)
user_messages.append(
ModelMessage(role=user_conv.type, content=user_conv.content)
)
return user_text if str_message else user_messages
else: else:
raise ValueError("Hi! What do you want to talk about") raise ValueError("Hi! What do you want to talk about")
def __load_example_messages(self): def __load_example_messages(self, str_message: bool = True):
example_text = "" example_text = ""
example_messages = []
if self.prompt_template.example_selector: if self.prompt_template.example_selector:
for round_conv in self.prompt_template.example_selector.examples(): for round_conv in self.prompt_template.example_selector.examples():
for round_message in round_conv["messages"]: for round_message in round_conv["messages"]:
@ -269,16 +312,22 @@ class BaseChat(ABC):
SystemMessage.type, SystemMessage.type,
ViewMessage.type, ViewMessage.type,
]: ]:
message_type = round_message["type"]
message_content = round_message["data"]["content"]
example_text += ( example_text += (
round_message["type"] message_type
+ ":" + ":"
+ round_message["data"]["content"] + message_content
+ self.prompt_template.sep + self.prompt_template.sep
) )
return example_text example_messages.append(
ModelMessage(role=message_type, content=message_content)
)
return example_text if str_message else example_messages
def __load_histroy_messages(self): def __load_histroy_messages(self, str_message: bool = True):
history_text = "" history_text = ""
history_messages = []
if self.prompt_template.need_historical_messages: if self.prompt_template.need_historical_messages:
if self.history_message: if self.history_message:
logger.info( logger.info(
@ -290,12 +339,17 @@ class BaseChat(ABC):
ViewMessage.type, ViewMessage.type,
SystemMessage.type, SystemMessage.type,
]: ]:
message_type = first_message["type"]
message_content = first_message["data"]["content"]
history_text += ( history_text += (
first_message["type"] message_type
+ ":" + ":"
+ first_message["data"]["content"] + message_content
+ self.prompt_template.sep + self.prompt_template.sep
) )
history_messages.append(
ModelMessage(role=message_type, content=message_content)
)
index = self.chat_retention_rounds - 1 index = self.chat_retention_rounds - 1
for round_conv in self.history_message[-index:]: for round_conv in self.history_message[-index:]:
@ -304,12 +358,17 @@ class BaseChat(ABC):
SystemMessage.type, SystemMessage.type,
ViewMessage.type, ViewMessage.type,
]: ]:
message_type = round_message["type"]
message_content = round_message["data"]["content"]
history_text += ( history_text += (
round_message["type"] message_type
+ ":" + ":"
+ round_message["data"]["content"] + message_content
+ self.prompt_template.sep + self.prompt_template.sep
) )
history_messages.append(
ModelMessage(role=message_type, content=message_content)
)
else: else:
### user all history ### user all history
@ -320,14 +379,19 @@ class BaseChat(ABC):
SystemMessage.type, SystemMessage.type,
ViewMessage.type, ViewMessage.type,
]: ]:
message_type = message["type"]
message_content = message["data"]["content"]
history_text += ( history_text += (
message["type"] message_type
+ ":" + ":"
+ message["data"]["content"] + message_content
+ self.prompt_template.sep + self.prompt_template.sep
) )
history_messages.append(
ModelMessage(role=message_type, content=message_content)
)
return history_text return history_text if str_message else history_messages
def current_ai_response(self) -> str: def current_ai_response(self) -> str:
for message in self.current_message.messages: for message in self.current_message.messages:

View File

@ -6,6 +6,7 @@ from typing import (
Dict, Dict,
Generic, Generic,
List, List,
Tuple,
NamedTuple, NamedTuple,
Optional, Optional,
Sequence, Sequence,
@ -80,6 +81,22 @@ class SystemMessage(BaseMessage):
return "system" return "system"
class ModelMessage(BaseModel):
"""Type of message that interaction between dbgpt-server and llm-server"""
"""Similar to openai's message format"""
role: str
content: str
class ModelMessageRoleType:
""" "Type of ModelMessage role"""
SYSTEM = "system"
HUMAN = "human"
AI = "ai"
class Generation(BaseModel): class Generation(BaseModel):
"""Output of a single generation.""" """Output of a single generation."""
@ -146,3 +163,35 @@ def _message_from_dict(message: dict) -> BaseMessage:
def messages_from_dict(messages: List[dict]) -> List[BaseMessage]: def messages_from_dict(messages: List[dict]) -> List[BaseMessage]:
return [_message_from_dict(m) for m in messages] return [_message_from_dict(m) for m in messages]
def _parse_model_messages(
messages: List[ModelMessage],
) -> Tuple[str, List[str], List[List[str, str]]]:
""" "
Parameters:
messages: List of message from base chat.
Returns:
A tuple contains user prompt, system message list and history message list
str: user prompt
List[str]: system messages
List[List[str]]: history message of user and assistant
"""
user_prompt = ""
system_messages: List[str] = []
history_messages: List[List[str]] = [[]]
for message in messages[:-1]:
if message.role == "human":
history_messages[-1].append(message.content)
elif message.role == "system":
system_messages.append(message.content)
elif message.role == "ai":
history_messages[-1].append(message.content)
history_messages.append([])
if messages[-1].role != "human":
raise ValueError("Hi! What do you want to talk about")
# Keep message pair of [user message, assistant message]
history_messages = list(filter(lambda x: len(x) == 2, history_messages))
user_prompt = messages[-1].content
return user_prompt, system_messages, history_messages

View File

@ -2,8 +2,10 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from functools import cache from functools import cache
from typing import List from typing import List, Dict, Tuple
from pilot.model.llm_out.vicuna_base_llm import generate_stream from pilot.model.llm_out.vicuna_base_llm import generate_stream
from pilot.model.conversation import Conversation, get_conv_template
from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
class BaseChatAdpter: class BaseChatAdpter:
@ -17,6 +19,52 @@ class BaseChatAdpter:
"""Return the generate stream handler func""" """Return the generate stream handler func"""
pass pass
def get_conv_template(self) -> Conversation:
return None
def model_adaptation(self, params: Dict) -> Tuple[Dict, Dict]:
"""Params adaptation"""
conv = self.get_conv_template()
messages = params.get("messages")
# Some model scontext to dbgpt server
model_context = {"prompt_echo_len_char": -1}
if not conv or not messages:
# Nothing to do
return params, model_context
conv = conv.copy()
system_messages = []
for message in messages:
role, content = None, None
if isinstance(message, ModelMessage):
role = message.role
content = message.content
elif isinstance(message, dict):
role = message["role"]
content = message["content"]
else:
raise ValueError(f"Invalid message type: {message}")
if role == ModelMessageRoleType.SYSTEM:
# Support for multiple system messages
system_messages.append(content)
elif role == ModelMessageRoleType.HUMAN:
conv.append_message(conv.roles[0], content)
elif role == ModelMessageRoleType.AI:
conv.append_message(conv.roles[1], content)
else:
raise ValueError(f"Unknown role: {role}")
if system_messages:
conv.update_system_message("".join(system_messages))
# Add a blank message for the assistant.
conv.append_message(conv.roles[1], None)
new_prompt = conv.get_prompt()
# Overwrite the original prompt
# TODO remote bos token and eos token from tokenizer_config.json of model
prompt_echo_len_char = len(new_prompt.replace("</s>", "").replace("<s>", ""))
model_context["prompt_echo_len_char"] = prompt_echo_len_char
params["prompt"] = new_prompt
return params, model_context
llm_model_chat_adapters: List[BaseChatAdpter] = [] llm_model_chat_adapters: List[BaseChatAdpter] = []
@ -134,12 +182,26 @@ class GPT4AllChatAdapter(BaseChatAdpter):
return gpt4all_generate_stream return gpt4all_generate_stream
class Llama2ChatAdapter(BaseChatAdpter):
def match(self, model_path: str):
return "llama-2" in model_path.lower()
def get_conv_template(self) -> Conversation:
return get_conv_template("llama-2")
def get_generate_stream_func(self):
from pilot.model.inference import generate_stream
return generate_stream
register_llm_model_chat_adapter(VicunaChatAdapter) register_llm_model_chat_adapter(VicunaChatAdapter)
register_llm_model_chat_adapter(ChatGLMChatAdapter) register_llm_model_chat_adapter(ChatGLMChatAdapter)
register_llm_model_chat_adapter(GuanacoChatAdapter) register_llm_model_chat_adapter(GuanacoChatAdapter)
register_llm_model_chat_adapter(FalconChatAdapter) register_llm_model_chat_adapter(FalconChatAdapter)
register_llm_model_chat_adapter(GorillaChatAdapter) register_llm_model_chat_adapter(GorillaChatAdapter)
register_llm_model_chat_adapter(GPT4AllChatAdapter) register_llm_model_chat_adapter(GPT4AllChatAdapter)
register_llm_model_chat_adapter(Llama2ChatAdapter)
# Proxy model for test and develop, it's cheap for us now. # Proxy model for test and develop, it's cheap for us now.
register_llm_model_chat_adapter(ProxyllmChatAdapter) register_llm_model_chat_adapter(ProxyllmChatAdapter)

View File

@ -5,6 +5,7 @@ import asyncio
import json import json
import os import os
import sys import sys
from typing import List
import uvicorn import uvicorn
from fastapi import BackgroundTasks, FastAPI, Request from fastapi import BackgroundTasks, FastAPI, Request
@ -24,6 +25,7 @@ from pilot.configs.model_config import *
from pilot.model.llm_out.vicuna_base_llm import get_embeddings from pilot.model.llm_out.vicuna_base_llm import get_embeddings
from pilot.model.loader import ModelLoader from pilot.model.loader import ModelLoader
from pilot.server.chat_adapter import get_llm_chat_adapter from pilot.server.chat_adapter import get_llm_chat_adapter
from pilot.scene.base_message import ModelMessage
CFG = Config() CFG = Config()
@ -75,6 +77,8 @@ class ModelWorker:
def generate_stream_gate(self, params): def generate_stream_gate(self, params):
try: try:
# params adaptation
params, model_context = self.llm_chat_adapter.model_adaptation(params)
for output in self.generate_stream_func( for output in self.generate_stream_func(
self.model, self.tokenizer, params, DEVICE, CFG.MAX_POSITION_EMBEDDINGS self.model, self.tokenizer, params, DEVICE, CFG.MAX_POSITION_EMBEDDINGS
): ):
@ -82,10 +86,8 @@ class ModelWorker:
# The gpt4all thread shares stdout with the parent process, # The gpt4all thread shares stdout with the parent process,
# and opening it may affect the frontend output. # and opening it may affect the frontend output.
print("output: ", output) print("output: ", output)
ret = { # return some model context to dgt-server
"text": output, ret = {"text": output, "error_code": 0, "model_context": model_context}
"error_code": 0,
}
yield json.dumps(ret).encode() + b"\0" yield json.dumps(ret).encode() + b"\0"
except torch.cuda.CudaError: except torch.cuda.CudaError:
@ -128,6 +130,7 @@ app = FastAPI()
class PromptRequest(BaseModel): class PromptRequest(BaseModel):
messages: List[ModelMessage]
prompt: str prompt: str
temperature: float temperature: float
max_new_tokens: int max_new_tokens: int
@ -170,6 +173,7 @@ async def api_generate_stream(request: Request):
@app.post("/generate") @app.post("/generate")
def generate(prompt_request: PromptRequest) -> str: def generate(prompt_request: PromptRequest) -> str:
params = { params = {
"messages": prompt_request.messages,
"prompt": prompt_request.prompt, "prompt": prompt_request.prompt,
"temperature": prompt_request.temperature, "temperature": prompt_request.temperature,
"max_new_tokens": prompt_request.max_new_tokens, "max_new_tokens": prompt_request.max_new_tokens,