mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-01 00:03:29 +00:00
Merge branch 'main' of https://github.com/csunny/DB-GPT into tt_dev
This commit is contained in:
commit
fe522ac067
@ -124,10 +124,17 @@ The core capabilities mainly consist of the following parts:
|
||||
- [DB-GPT-Plugins](https://github.com/csunny/DB-GPT-Plugins) DB-GPT Plugins, Can run autogpt plugin directly
|
||||
- [DB-GPT-Web](https://github.com/csunny/DB-GPT-Web) ChatUI for DB-GPT
|
||||
|
||||
|
||||
## Image
|
||||
🌐 [AutoDL Image](https://www.codewithgpu.com/i/csunny/DB-GPT/dbgpt-0.3.1-v2)
|
||||
## Install
|
||||
[Quickstart](https://db-gpt.readthedocs.io/en/latest/getting_started/getting_started.html)
|
||||
|
||||
### Language Switching
|
||||
In the .env configuration file, modify the LANGUAGE parameter to switch to different languages. The default is English (Chinese: zh, English: en, other languages to be added later).
|
||||
### Platform Deployment
|
||||
- autodl
|
||||
[autodl image](https://www.codewithgpu.com/i/csunny/DB-GPT/csunny-db-gpt). You can refer to the image instructions to build from scratch, or use `docker pull` to obtain the shared image, follow the instructions in the document to operate. If you have any questions, please leave a comment.
|
||||
|
||||
## Usage Instructions
|
||||
|
||||
- [Multi LLMs Usage](https://db-gpt.readthedocs.io/en/latest/modules/llms.html)
|
||||
|
@ -118,12 +118,19 @@ DB-GPT基于 [FastChat](https://github.com/lm-sys/FastChat) 构建大模型运
|
||||
- [DB-GPT-Plugins](https://github.com/csunny/DB-GPT-Plugins) DB-GPT 插件仓库, 兼容Auto-GPT
|
||||
- [DB-GPT-Web](https://github.com/csunny/DB-GPT-Web) 多端交互前端界面
|
||||
|
||||
## Image
|
||||
🌐 [AutoDL镜像](https://www.codewithgpu.com/i/csunny/DB-GPT/dbgpt-0.3.1-v2)
|
||||
|
||||
## 安装
|
||||
[快速开始](https://db-gpt.readthedocs.io/projects/db-gpt-docs-zh-cn/zh_CN/latest/getting_started/getting_started.html)
|
||||
|
||||
### 多语言切换
|
||||
在.env 配置文件当中,修改LANGUAGE参数来切换使用不同的语言,默认是英文(中文zh, 英文en, 其他语言待补充)
|
||||
|
||||
### 平台部署
|
||||
- autodl
|
||||
[autodl镜像](https://www.codewithgpu.com/i/csunny/DB-GPT/csunny-db-gpt),从头搭建可参考镜像说明,或通过`docker pull`获取共享镜像,按照文档中的说明操作即可,若有问题,欢迎评论。
|
||||
|
||||
## 使用说明
|
||||
|
||||
### 多模型使用
|
||||
|
BIN
assets/faq/chatdataerror.png
Normal file
BIN
assets/faq/chatdataerror.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 45 KiB |
BIN
assets/faq/illegal_character.png
Normal file
BIN
assets/faq/illegal_character.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 16 KiB |
BIN
assets/faq/proxyerror.png
Normal file
BIN
assets/faq/proxyerror.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 148 KiB |
@ -1,3 +1,5 @@
|
||||
CREATE DATABASE knowledge_management;
|
||||
use knowledge_management;
|
||||
CREATE TABLE `knowledge_space` (
|
||||
`id` int NOT NULL AUTO_INCREMENT COMMENT 'auto increment id',
|
||||
`name` varchar(100) NOT NULL COMMENT 'knowledge space name',
|
||||
@ -38,4 +40,37 @@ CREATE TABLE `document_chunk` (
|
||||
`gmt_modified` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `idx_document_id` (`document_id`) COMMENT 'index:document_id'
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=100001 DEFAULT CHARSET=utf8mb4 COMMENT='knowledge document chunk detail'
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=100001 DEFAULT CHARSET=utf8mb4 COMMENT='knowledge document chunk detail';
|
||||
|
||||
CREATE DATABASE EXAMPLE_1;
|
||||
use EXAMPLE_1;
|
||||
CREATE TABLE `users` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`username` varchar(50) NOT NULL COMMENT '用户名',
|
||||
`password` varchar(50) NOT NULL COMMENT '密码',
|
||||
`email` varchar(50) NOT NULL COMMENT '邮箱',
|
||||
`phone` varchar(20) DEFAULT NULL COMMENT '电话',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `idx_username` (`username`) COMMENT '索引:按用户名查询'
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=101 DEFAULT CHARSET=utf8mb4 COMMENT='聊天用户表';
|
||||
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_1', 'password_1', 'user_1@example.com', '12345678901');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_2', 'password_2', 'user_2@example.com', '12345678902');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_3', 'password_3', 'user_3@example.com', '12345678903');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_4', 'password_4', 'user_4@example.com', '12345678904');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_5', 'password_5', 'user_5@example.com', '12345678905');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_6', 'password_6', 'user_6@example.com', '12345678906');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_7', 'password_7', 'user_7@example.com', '12345678907');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_8', 'password_8', 'user_8@example.com', '12345678908');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_9', 'password_9', 'user_9@example.com', '12345678909');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_10', 'password_10', 'user_10@example.com', '12345678900');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_11', 'password_11', 'user_11@example.com', '12345678901');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_12', 'password_12', 'user_12@example.com', '12345678902');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_13', 'password_13', 'user_13@example.com', '12345678903');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_14', 'password_14', 'user_14@example.com', '12345678904');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_15', 'password_15', 'user_15@example.com', '12345678905');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_16', 'password_16', 'user_16@example.com', '12345678906');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_17', 'password_17', 'user_17@example.com', '12345678907');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_18', 'password_18', 'user_18@example.com', '12345678908');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_19', 'password_19', 'user_19@example.com', '12345678909');
|
||||
INSERT INTO users (username, password, email, phone) VALUES ('user_20', 'password_20', 'user_20@example.com', '12345678900');
|
97
docs/faq.md
Normal file
97
docs/faq.md
Normal file
@ -0,0 +1,97 @@
|
||||
# FAQ
|
||||
##### Q1: text2vec-large-chinese not found
|
||||
|
||||
##### A1: make sure you have download text2vec-large-chinese embedding model in right way
|
||||
|
||||
```tip
|
||||
centos:yum install git-lfs
|
||||
ubuntu:apt-get install git-lfs -y
|
||||
macos:brew install git-lfs
|
||||
```
|
||||
```bash
|
||||
cd models
|
||||
git lfs clone https://huggingface.co/GanymedeNil/text2vec-large-chinese
|
||||
```
|
||||
|
||||
##### Q2: execute `pip install -r requirements.txt` error, found some package cannot find correct version.
|
||||
|
||||
|
||||
##### A2: change the pip source.
|
||||
|
||||
```bash
|
||||
# pypi
|
||||
$ pip install -r requirements.txt -i https://pypi.python.org/simple
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
# tsinghua
|
||||
$ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
# aliyun
|
||||
$ pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/
|
||||
```
|
||||
|
||||
|
||||
##### Q3:Access denied for user 'root@localhost'(using password :NO)
|
||||
|
||||
##### A3: make sure you have installed mysql instance in right way
|
||||
|
||||
Docker:
|
||||
```bash
|
||||
docker run --name=mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=aa12345678 -dit mysql:latest
|
||||
```
|
||||
Normal:
|
||||
[download mysql instance](https://dev.mysql.com/downloads/mysql/)
|
||||
|
||||
##### Q4:When I use openai(MODEL_SERVER=proxyllm) to chat
|
||||
<p align="left">
|
||||
<img src="../assets/faq/proxyerror.png" width="800px" />
|
||||
</p>
|
||||
|
||||
##### A4: make sure your openapi API_KEY is available
|
||||
|
||||
##### Q5:When I Chat Data and Chat Meta Data, I found the error
|
||||
<p align="left">
|
||||
<img src="../assets/faq/chatdataerror.png" width="800px" />
|
||||
</p>
|
||||
|
||||
##### A5: you have not create your database and table
|
||||
1.create your database.
|
||||
```bash
|
||||
mysql> create database {$your_name}
|
||||
mysql> use {$your_name}
|
||||
```
|
||||
|
||||
2.create table {$your_table} and insert your data.
|
||||
eg:
|
||||
```bash
|
||||
mysql>CREATE TABLE `users` (
|
||||
`id` int NOT NULL AUTO_INCREMENT,
|
||||
`username` varchar(50) NOT NULL COMMENT '用户名',
|
||||
`password` varchar(50) NOT NULL COMMENT '密码',
|
||||
`email` varchar(50) NOT NULL COMMENT '邮箱',
|
||||
`phone` varchar(20) DEFAULT NULL COMMENT '电话',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `idx_username` (`username`) COMMENT '索引:按用户名查询'
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=101 DEFAULT CHARSET=utf8mb4 COMMENT='聊天用户表'
|
||||
```
|
||||
|
||||
##### Q6:When I use vicuna-13b, found some illegal character like this.
|
||||
<p align="left">
|
||||
<img src="../assets/faq/illegal_character.png" width="800px" />
|
||||
</p>
|
||||
|
||||
##### A6: set KNOWLEDGE_SEARCH_TOP_SIZE smaller or set KNOWLEDGE_CHUNK_SIZE smaller, and reboot server.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -17,11 +17,15 @@ As our project has the ability to achieve ChatGPT performance of over 85%, there
|
||||
|
||||
### 2. Install
|
||||
|
||||
This project relies on a local MySQL database service, which you need to install locally. We recommend using Docker for installation.
|
||||
|
||||
1.This project relies on a local MySQL database service, which you need to install locally. We recommend using Docker for installation.
|
||||
```bash
|
||||
$ docker run --name=mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=aa12345678 -dit mysql:latest
|
||||
```
|
||||
2. prepare server sql script
|
||||
```bash
|
||||
$ mysql -h127.0.0.1 -uroot -paa12345678 < ./assets/schema/knowledge_management.sql
|
||||
```
|
||||
|
||||
We use [Chroma embedding database](https://github.com/chroma-core/chroma) as the default for our vector database, so there is no need for special installation. If you choose to connect to other databases, you can follow our tutorial for installation and configuration.
|
||||
For the entire installation process of DB-GPT, we use the miniconda3 virtual environment. Create a virtual environment and install the Python dependencies.
|
||||
|
||||
@ -63,16 +67,10 @@ You can refer to this document to obtain the Vicuna weights: [Vicuna](https://gi
|
||||
|
||||
If you have difficulty with this step, you can also directly use the model from [this link](https://huggingface.co/Tribbiani/vicuna-7b) as a replacement.
|
||||
|
||||
1. prepare server sql script
|
||||
```bash
|
||||
mysql> CREATE DATABASE knowledge_management;
|
||||
mysql> use knowledge_management;
|
||||
mysql> source ./assets/schema/knowledge_management.sql
|
||||
```
|
||||
set .env configuration set your vector store type, eg:VECTOR_STORE_TYPE=Chroma, now we support Chroma and Milvus(version > 2.1)
|
||||
|
||||
|
||||
2. Run db-gpt server
|
||||
1.Run db-gpt server
|
||||
|
||||
```bash
|
||||
$ python pilot/server/dbgpt_server.py
|
||||
|
@ -130,6 +130,18 @@ Reference
|
||||
|
||||
./reference.md
|
||||
|
||||
FAQ
|
||||
-----------
|
||||
| DB-GPT FAQ.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: FAQ
|
||||
:name: FAQ
|
||||
:hidden:
|
||||
|
||||
./faq.md
|
||||
|
||||
Ecosystem
|
||||
----------
|
||||
|
||||
|
100
docs/locales/zh_CN/LC_MESSAGES/faq.po
Normal file
100
docs/locales/zh_CN/LC_MESSAGES/faq.po
Normal file
@ -0,0 +1,100 @@
|
||||
# SOME DESCRIPTIVE TITLE.
|
||||
# Copyright (C) 2023, csunny
|
||||
# This file is distributed under the same license as the DB-GPT package.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 👏👏 0.3.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-07-20 10:53+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
"Language-Team: zh_CN <LL@li.org>\n"
|
||||
"Plural-Forms: nplurals=1; plural=0;\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../faq.md:1 81cedd396b274db9b2a69448df98a28d
|
||||
msgid "FAQ"
|
||||
msgstr "FAQ"
|
||||
|
||||
#: ../../faq.md:2 1a3f8c7661e34721a4465a34281416b1
|
||||
msgid "Q1: text2vec-large-chinese not found"
|
||||
msgstr "Q1: text2vec-large-chinese not found"
|
||||
|
||||
#: ../../faq.md:4 5d2844bcbcc843fc97de41a491f914fe
|
||||
msgid ""
|
||||
"A1: make sure you have download text2vec-large-chinese embedding model in"
|
||||
" right way"
|
||||
msgstr "按照正确的姿势下载text2vec-large-chinese模型"
|
||||
|
||||
#: ../../faq.md:16 82ade01884534030b81be7c3e06f1504
|
||||
msgid ""
|
||||
"Q2: execute `pip install -r requirements.txt` error, found some package "
|
||||
"cannot find correct version."
|
||||
msgstr "执行`pip install -r requirements.txt`报错"
|
||||
|
||||
#: ../../faq.md:19 451ede833da642788e5224811a71ba0f
|
||||
msgid "A2: change the pip source."
|
||||
msgstr "修改pip源"
|
||||
|
||||
#: ../../faq.md:26 ../../faq.md:33 0142abf1050d4fee9caf056322b52247
|
||||
#: 0adb87dd67fa4122997871f7ab064637
|
||||
msgid "or"
|
||||
msgstr "或"
|
||||
|
||||
#: ../../faq.md:41 d6981160ecc6491284fd32e1098fc10e
|
||||
msgid "Q3:Access denied for user 'root@localhost'(using password :NO)"
|
||||
msgstr "或"
|
||||
|
||||
#: ../../faq.md:43 e50b195057804f28b84d0d10859e4f1b
|
||||
msgid "A3: make sure you have installed mysql instance in right way"
|
||||
msgstr "按照正确姿势安装mysql"
|
||||
|
||||
#: ../../faq.md:45 03ba25aa7bd241d3b32cc1916f858a3e
|
||||
msgid "Docker:"
|
||||
msgstr "Docker:"
|
||||
|
||||
#: ../../faq.md:49 dd5336b44673459c93a1408097cb76f9
|
||||
msgid "Normal: [download mysql instance](https://dev.mysql.com/downloads/mysql/)"
|
||||
msgstr "[download mysql instance](https://dev.mysql.com/downloads/mysql/)"
|
||||
|
||||
#: ../../faq.md:52 2b290c4653a2410c8d330ed5b0e9a821
|
||||
msgid "Q4:When I use openai(MODEL_SERVER=proxyllm) to chat"
|
||||
msgstr "使用openai-chatgpt模型时(MODEL_SERVER=proxyllm)"
|
||||
|
||||
#: ../../faq.md:57 f4d0e8e8113f4ca4bc55f167b661fd6a
|
||||
msgid "A4: make sure your openapi API_KEY is available"
|
||||
msgstr "确认openapi API_KEY是否可用"
|
||||
|
||||
#: ../../faq.md:59 092ca3dea0c5466ab6e22ab0049f166e
|
||||
msgid "Q5:When I Chat Data and Chat Meta Data, I found the error"
|
||||
msgstr "Chat Data and Chat Meta Data报如下错"
|
||||
|
||||
#: ../../faq.md:64 dbf61e6ea2c64ecebfdbbde83cb74e3e
|
||||
msgid "A5: you have not create your database and table"
|
||||
msgstr "需要创建自己的数据库"
|
||||
|
||||
#: ../../faq.md:65 0505bb716e6445c2a7960436d93cb407
|
||||
msgid "1.create your database."
|
||||
msgstr "1.先创建数据库"
|
||||
|
||||
#: ../../faq.md:71 fd689b541ee549bd85385647c219b4cb
|
||||
msgid "2.create table {$your_table} and insert your data. eg:"
|
||||
msgstr "然后创建数据表,模拟数据"
|
||||
|
||||
#: ../../faq.md:85 de2d78db5fb6450cb08b0f15385ed525
|
||||
msgid "Q6:When I use vicuna-13b, found some illegal character like this."
|
||||
msgstr "使用vicuna-13b,知识库问答出现乱码"
|
||||
|
||||
#: ../../faq.md:90 0cb1d0c2ec434763ae80e6f87d4a1665
|
||||
msgid ""
|
||||
"A6: set KNOWLEDGE_SEARCH_TOP_SIZE smaller or set KNOWLEDGE_CHUNK_SIZE "
|
||||
"smaller, and reboot server."
|
||||
msgstr "将KNOWLEDGE_SEARCH_TOP_SIZE和KNOWLEDGE_CHUNK_SIZE设置小点然后重启"
|
||||
|
@ -8,7 +8,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.3.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-07-13 15:39+0800\n"
|
||||
"POT-Creation-Date: 2023-07-20 10:53+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
@ -19,29 +19,29 @@ msgstr ""
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../getting_started/getting_started.md:1 0b2e795438a3413c875fd80191e85bad
|
||||
#: ../../getting_started/getting_started.md:1 7c12b6d7d5be4528be005cdadec568db
|
||||
msgid "Quickstart Guide"
|
||||
msgstr "使用指南"
|
||||
|
||||
#: ../../getting_started/getting_started.md:3 7b84c9776f8a4f9fb55afc640f37f45c
|
||||
#: ../../getting_started/getting_started.md:3 ba312f5a132541be89dcc09012076784
|
||||
msgid ""
|
||||
"This tutorial gives you a quick walkthrough about use DB-GPT with you "
|
||||
"environment and data."
|
||||
msgstr "本教程为您提供了关于如何使用DB-GPT的使用指南。"
|
||||
|
||||
#: ../../getting_started/getting_started.md:5 1b2880e1ef674bfdbf39ac9f330aeec9
|
||||
#: ../../getting_started/getting_started.md:5 8e56b3f0726740abaaafa57415b10bea
|
||||
msgid "Installation"
|
||||
msgstr "安装"
|
||||
|
||||
#: ../../getting_started/getting_started.md:7 d0a8c6654bfe4bbdb0eb40ceb2ea3388
|
||||
#: ../../getting_started/getting_started.md:7 3af8fe74db1043349e8f784c109b0417
|
||||
msgid "To get started, install DB-GPT with the following steps."
|
||||
msgstr "请按照以下步骤安装DB-GPT"
|
||||
|
||||
#: ../../getting_started/getting_started.md:9 0a4e0b06c7fe49a9b2ca56ba2eb7b8ba
|
||||
#: ../../getting_started/getting_started.md:9 0b33cf4604f846e781a63d857dde72b2
|
||||
msgid "1. Hardware Requirements"
|
||||
msgstr "1. 硬件要求"
|
||||
|
||||
#: ../../getting_started/getting_started.md:10 2b42f6546ef141f696943ba2120584e5
|
||||
#: ../../getting_started/getting_started.md:10 f1d4abb176494bcb85cead7f3f8b719d
|
||||
msgid ""
|
||||
"As our project has the ability to achieve ChatGPT performance of over "
|
||||
"85%, there are certain hardware requirements. However, overall, the "
|
||||
@ -49,62 +49,67 @@ msgid ""
|
||||
"specific hardware requirements for deployment are as follows:"
|
||||
msgstr "由于我们的项目有能力达到85%以上的ChatGPT性能,所以对硬件有一定的要求。但总体来说,我们在消费级的显卡上即可完成项目的部署使用,具体部署的硬件说明如下:"
|
||||
|
||||
#: ../../getting_started/getting_started.md 4df0c44eff8741f39ca0fdeff222f90c
|
||||
#: ../../getting_started/getting_started.md e8516902f29d4ca2bb46f19b5e3deb81
|
||||
msgid "GPU"
|
||||
msgstr "GPU"
|
||||
|
||||
#: ../../getting_started/getting_started.md b740a2991ce546cca43a426b760e9901
|
||||
#: ../../getting_started/getting_started.md a951ccca67364cf7ad5f0af2ec0ece8d
|
||||
msgid "VRAM Size"
|
||||
msgstr "显存大小"
|
||||
|
||||
#: ../../getting_started/getting_started.md 222b91ff82f14d12acaac5aa238758c8
|
||||
#: ../../getting_started/getting_started.md 01b7e055ee4543bdb619fbc14fea4d86
|
||||
msgid "Performance"
|
||||
msgstr "显存大小"
|
||||
|
||||
#: ../../getting_started/getting_started.md c2d2ae6a4c964c4f90a9009160754782
|
||||
#: ../../getting_started/getting_started.md 0b20b224ff8a4e2c890a8b4ff43b6045
|
||||
msgid "RTX 4090"
|
||||
msgstr "RTX 4090"
|
||||
|
||||
#: ../../getting_started/getting_started.md 529220ec6a294e449dc460ba2e8829a1
|
||||
#: 5e0c5900842e4d66b2064b13cc31a3ad
|
||||
#: ../../getting_started/getting_started.md 17a343c4359d45c987f29de1c73760b4
|
||||
#: c9daaf0578434a7e812a8d3f3edde3f0
|
||||
msgid "24 GB"
|
||||
msgstr "24 GB"
|
||||
|
||||
#: ../../getting_started/getting_started.md 84d29eef342f4d6282295c0e32487548
|
||||
#: ../../getting_started/getting_started.md 6a35be039a0a43eaaf7d1aa40aece6f7
|
||||
msgid "Smooth conversation inference"
|
||||
msgstr "可以流畅的进行对话推理,无卡顿"
|
||||
|
||||
#: ../../getting_started/getting_started.md 5a10effe322e4afb8315415c04dc05a4
|
||||
#: ../../getting_started/getting_started.md a414f5ae7bdd450f8d171d1f075d6b66
|
||||
msgid "RTX 3090"
|
||||
msgstr "RTX 3090"
|
||||
|
||||
#: ../../getting_started/getting_started.md 8924059525ab43329a8bb6659e034d5e
|
||||
#: ../../getting_started/getting_started.md 109ed9bda6e541b88eb3400a44e15df7
|
||||
msgid "Smooth conversation inference, better than V100"
|
||||
msgstr "可以流畅进行对话推理,有卡顿感,但好于V100"
|
||||
|
||||
#: ../../getting_started/getting_started.md 10f5bc076f524127a956d7a23f3666ba
|
||||
#: ../../getting_started/getting_started.md 3f2a05dc610f461faa989f3c12750d00
|
||||
msgid "V100"
|
||||
msgstr "V100"
|
||||
|
||||
#: ../../getting_started/getting_started.md 7d664e81984847c7accd08db93fad404
|
||||
#: ../../getting_started/getting_started.md 90e24795876546ecbb4796ca5d313514
|
||||
msgid "16 GB"
|
||||
msgstr "16 GB"
|
||||
|
||||
#: ../../getting_started/getting_started.md 86765bc9ab01409fb7f5edf04f9b32a5
|
||||
#: ../../getting_started/getting_started.md 34061757e69e4691b7a5ff3c2953f1e3
|
||||
msgid "Conversation inference possible, noticeable stutter"
|
||||
msgstr "可以进行对话推理,有明显卡顿"
|
||||
|
||||
#: ../../getting_started/getting_started.md:18 a0ac5591c0ac4ac6a385e562353daf22
|
||||
#: ../../getting_started/getting_started.md:18 61d59cb27daf43eb9aa2775fa6dac820
|
||||
msgid "2. Install"
|
||||
msgstr "2. 安装"
|
||||
|
||||
#: ../../getting_started/getting_started.md:20 a64a9a5945074ece872509f8cb425da9
|
||||
#: ../../getting_started/getting_started.md:20 28a6b66c92464929bed8f04d5b841e8c
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"This project relies on a local MySQL database service, which you need to "
|
||||
"install locally. We recommend using Docker for installation."
|
||||
"1.This project relies on a local MySQL database service, which you need "
|
||||
"to install locally. We recommend using Docker for installation."
|
||||
msgstr "本项目依赖一个本地的 MySQL 数据库服务,你需要本地安装,推荐直接使用 Docker 安装。"
|
||||
|
||||
#: ../../getting_started/getting_started.md:25 11e799a372ab4d0f8269cd7be98bebc6
|
||||
#: ../../getting_started/getting_started.md:24 7793b19e96b44ba481b527a877c81170
|
||||
msgid "prepare server sql script"
|
||||
msgstr "准备db-gpt server sql脚本"
|
||||
|
||||
#: ../../getting_started/getting_started.md:29 e3743699e95e45eab3e140df266bb3b5
|
||||
msgid ""
|
||||
"We use [Chroma embedding database](https://github.com/chroma-core/chroma)"
|
||||
" as the default for our vector database, so there is no need for special "
|
||||
@ -117,11 +122,11 @@ msgstr ""
|
||||
"向量数据库我们默认使用的是Chroma内存数据库,所以无需特殊安装,如果有需要连接其他的同学,可以按照我们的教程进行安装配置。整个DB-"
|
||||
"GPT的安装过程,我们使用的是miniconda3的虚拟环境。创建虚拟环境,并安装python依赖包"
|
||||
|
||||
#: ../../getting_started/getting_started.md:34 dcab69c83d4c48b9bb19c4336ee74a66
|
||||
#: ../../getting_started/getting_started.md:38 be9f388e255c4bc7837029ce8237ef0b
|
||||
msgid "Before use DB-GPT Knowledge Management"
|
||||
msgstr "使用知识库管理功能之前"
|
||||
|
||||
#: ../../getting_started/getting_started.md:40 735aeb6ae8aa4344b7ff679548279acc
|
||||
#: ../../getting_started/getting_started.md:44 9175b34ed78c4310b048f53ac07b13cb
|
||||
msgid ""
|
||||
"Once the environment is installed, we have to create a new folder "
|
||||
"\"models\" in the DB-GPT project, and then we can put all the models "
|
||||
@ -130,33 +135,33 @@ msgstr ""
|
||||
"环境安装完成后,我们必须在DB-"
|
||||
"GPT项目中创建一个新文件夹\"models\",然后我们可以把从huggingface下载的所有模型放到这个目录下。"
|
||||
|
||||
#: ../../getting_started/getting_started.md:43 7cbefe131b24488b9be39b3e8ed4f563
|
||||
#: ../../getting_started/getting_started.md:47 40715051bd844b0187265d425debfbee
|
||||
#, fuzzy
|
||||
msgid "Notice make sure you have install git-lfs"
|
||||
msgstr "确保你已经安装了git-lfs"
|
||||
|
||||
#: ../../getting_started/getting_started.md:53 54ec90ebb969475988451cd66e6ff412
|
||||
#: ../../getting_started/getting_started.md:57 7d7e7b1fe72b4e0c96eba721a8aa2113
|
||||
msgid ""
|
||||
"The model files are large and will take a long time to download. During "
|
||||
"the download, let's configure the .env file, which needs to be copied and"
|
||||
" created from the .env.template"
|
||||
msgstr "模型文件很大,需要很长时间才能下载。在下载过程中,让我们配置.env文件,它需要从。env.template中复制和创建。"
|
||||
|
||||
#: ../../getting_started/getting_started.md:56 9bdadbee88af4683a4eb7b4f221fb4b8
|
||||
#: ../../getting_started/getting_started.md:60 6a25e5307bdb49a0afc69b9d17395a5a
|
||||
msgid "cp .env.template .env"
|
||||
msgstr "cp .env.template .env"
|
||||
|
||||
#: ../../getting_started/getting_started.md:59 6357c4a0154b4f08a079419ac408442d
|
||||
#: ../../getting_started/getting_started.md:63 14567647544f4036beaae158b59833f6
|
||||
msgid ""
|
||||
"You can configure basic parameters in the .env file, for example setting "
|
||||
"LLM_MODEL to the model to be used"
|
||||
msgstr "您可以在.env文件中配置基本参数,例如将LLM_MODEL设置为要使用的模型。"
|
||||
|
||||
#: ../../getting_started/getting_started.md:61 2f349f3ed3184b849ade2a15d5bf0c6c
|
||||
#: ../../getting_started/getting_started.md:65 1b459d413a4d4b7e883d1ec17384ca30
|
||||
msgid "3. Run"
|
||||
msgstr "3. 运行"
|
||||
|
||||
#: ../../getting_started/getting_started.md:62 fe408e4405bd48288e2e746386615925
|
||||
#: ../../getting_started/getting_started.md:66 ed15ee15450e4a028bf5aa05a9309697
|
||||
msgid ""
|
||||
"You can refer to this document to obtain the Vicuna weights: "
|
||||
"[Vicuna](https://github.com/lm-sys/FastChat/blob/main/README.md#model-"
|
||||
@ -165,7 +170,7 @@ msgstr ""
|
||||
"关于基础模型, 可以根据[Vicuna](https://github.com/lm-"
|
||||
"sys/FastChat/blob/main/README.md#model-weights) 合成教程进行合成。"
|
||||
|
||||
#: ../../getting_started/getting_started.md:64 c0acfe28007f459ca21174f968763fa3
|
||||
#: ../../getting_started/getting_started.md:68 de15a70920a94192a1f2017cbe3cdb55
|
||||
msgid ""
|
||||
"If you have difficulty with this step, you can also directly use the "
|
||||
"model from [this link](https://huggingface.co/Tribbiani/vicuna-7b) as a "
|
||||
@ -174,11 +179,7 @@ msgstr ""
|
||||
"如果此步有困难的同学,也可以直接使用[此链接](https://huggingface.co/Tribbiani/vicuna-"
|
||||
"7b)上的模型进行替代。"
|
||||
|
||||
#: ../../getting_started/getting_started.md:66 cc0f4c4e43f24b679f857a8d937528ee
|
||||
msgid "prepare server sql script"
|
||||
msgstr "准备db-gpt server sql脚本"
|
||||
|
||||
#: ../../getting_started/getting_started.md:72 386948064fe646f2b9f51a262dd64bf2
|
||||
#: ../../getting_started/getting_started.md:70 763aaed45fd948fab761552a7e06061a
|
||||
msgid ""
|
||||
"set .env configuration set your vector store type, "
|
||||
"eg:VECTOR_STORE_TYPE=Chroma, now we support Chroma and Milvus(version > "
|
||||
@ -187,17 +188,17 @@ msgstr ""
|
||||
"在.env文件设置向量数据库环境变量,eg:VECTOR_STORE_TYPE=Chroma, 目前我们支持了 Chroma and "
|
||||
"Milvus(version >2.1) "
|
||||
|
||||
#: ../../getting_started/getting_started.md:75 e6f6b06459944f2d8509703af365c664
|
||||
#: ../../getting_started/getting_started.md:73 a8f0dc3546c54a1098ff10157f980cef
|
||||
#, fuzzy
|
||||
msgid "Run db-gpt server"
|
||||
msgid "1.Run db-gpt server"
|
||||
msgstr "运行模型服务"
|
||||
|
||||
#: ../../getting_started/getting_started.md:80 489b595dc08a459ca2fd83b1389d3bbd
|
||||
#: ../../getting_started/getting_started.md:78 1715948545154c10af585de8960bf853
|
||||
#, fuzzy
|
||||
msgid "Open http://localhost:5000 with your browser to see the product."
|
||||
msgstr "打开浏览器访问http://localhost:5000"
|
||||
|
||||
#: ../../getting_started/getting_started.md:82 699afb01c9f243ab837cdc73252f624c
|
||||
#: ../../getting_started/getting_started.md:80 8ea9964df477473e866fe844dcf4be54
|
||||
msgid ""
|
||||
"If you want to access an external LLM service, you need to 1.set the "
|
||||
"variables LLM_MODEL=YOUR_MODEL_NAME "
|
||||
@ -205,7 +206,7 @@ msgid ""
|
||||
"file. 2.execute dbgpt_server.py in light mode"
|
||||
msgstr "如果你想访问外部的大模型服务,1.需要在.env文件设置模型名和外部模型服务地址。2.使用light模式启动服务"
|
||||
|
||||
#: ../../getting_started/getting_started.md:89 7df7f3870e1140d3a17dc322a46d6476
|
||||
#: ../../getting_started/getting_started.md:87 4c409a0e2a994f428712ab94b475e9bd
|
||||
msgid ""
|
||||
"If you want to learn about dbgpt-webui, read https://github.com/csunny"
|
||||
"/DB-GPT/tree/new-page-framework/datacenter"
|
||||
|
@ -8,7 +8,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.3.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-30 17:16+0800\n"
|
||||
"POT-Creation-Date: 2023-07-20 10:53+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
@ -19,31 +19,35 @@ msgstr ""
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../index.rst:34 ../../index.rst:45 00f3369727374a2da4b3e02c35c63363
|
||||
#: ../../index.rst:34 ../../index.rst:45 5bab1511780c442e9dd9e18519ad7ef3
|
||||
msgid "Getting Started"
|
||||
msgstr "开始"
|
||||
|
||||
#: ../../index.rst:56 ../../index.rst:77 f3b85d983f0141b5872a46451d20252a
|
||||
#: ../../index.rst:57 ../../index.rst:78 dbdeae6c9a344e7889d399a5dbbca872
|
||||
msgid "Modules"
|
||||
msgstr "模块"
|
||||
|
||||
#: ../../index.rst:91 ../../index.rst:107 614a2220f0004aadab14402dab5c926d
|
||||
#: ../../index.rst:92 ../../index.rst:108 43594aec45bf49609d7c7435c6472a9b
|
||||
msgid "Use Cases"
|
||||
msgstr "示例"
|
||||
|
||||
#: ../../index.rst:121 ../../index.rst:124 fae0a63e09f049a79ffbd8c1fd5b8f12
|
||||
#: ../../index.rst:122 ../../index.rst:125 013a0e5d909e4332ac49f6200924043e
|
||||
msgid "Reference"
|
||||
msgstr "参考"
|
||||
|
||||
#: ../../index.rst:148 ../../index.rst:154 d4113b3e7b2f4a1ba462e6a54062adff
|
||||
#: ../../index.rst:134 ../../index.rst:137 42907cf1d1bb491bae2fcd7ede06e421
|
||||
msgid "FAQ"
|
||||
msgstr ""
|
||||
|
||||
#: ../../index.rst:161 ../../index.rst:167 64f4a587bc1f4ca9a456b97fac5d6def
|
||||
msgid "Resources"
|
||||
msgstr "资源"
|
||||
|
||||
#: ../../index.rst:7 6548011d66b14e71bf3f028b82bb58d6
|
||||
#: ../../index.rst:7 df1ad6da308f49debd4053e5899a4c8d
|
||||
msgid "Welcome to DB-GPT!"
|
||||
msgstr "欢迎来到DB-GPT中文文档"
|
||||
|
||||
#: ../../index.rst:8 da9fdd494e664e7e83448d4993c30f60
|
||||
#: ../../index.rst:8 cc5b388ea2924968b2769e4b4a159761
|
||||
msgid ""
|
||||
"As large models are released and iterated upon, they are becoming "
|
||||
"increasingly intelligent. However, in the process of using large models, "
|
||||
@ -61,7 +65,7 @@ msgstr ""
|
||||
",我们启动了DB-"
|
||||
"GPT项目,为所有基于数据库的场景构建一个完整的私有大模型解决方案。该方案“”支持本地部署,既可应用于“独立私有环境”,又可根据业务模块进行“独立部署”和“隔离”,确保“大模型”的能力绝对私有、安全、可控。"
|
||||
|
||||
#: ../../index.rst:10 7650af0ff7bc4a93ad82c930c81cb7f5
|
||||
#: ../../index.rst:10 b831cd62a1ba46108dcf343792e8d67a
|
||||
msgid ""
|
||||
"**DB-GPT** is an experimental open-source project that uses localized GPT"
|
||||
" large models to interact with your data and environment. With this "
|
||||
@ -71,102 +75,102 @@ msgstr ""
|
||||
"DB-GPT 是一个开源的以数据库为基础的GPT实验项目,使用本地化的GPT大模型与您的数据和环境进行交互,无数据泄露风险100% 私密,100%"
|
||||
" 安全。"
|
||||
|
||||
#: ../../index.rst:12 18249811cea0456096f06da05638d474
|
||||
#: ../../index.rst:12 3baac7e36a824395a57c246e05560c67
|
||||
msgid "**Features**"
|
||||
msgstr "特性"
|
||||
|
||||
#: ../../index.rst:13 367c092acf1a49a0906f78433e6d9926
|
||||
#: ../../index.rst:13 6638568ff94e47f6a0b04a6775ca45d1
|
||||
msgid ""
|
||||
"Currently, we have released multiple key features, which are listed below"
|
||||
" to demonstrate our current capabilities:"
|
||||
msgstr "目前我们已经发布了多种关键的特性,这里一一列举展示一下当前发布的能力。"
|
||||
|
||||
#: ../../index.rst:15 01334c212ef44244a306f8cfb584c48f
|
||||
#: ../../index.rst:15 b49710ac36ed4a188280aeb3589f11fe
|
||||
msgid "SQL language capabilities - SQL generation - SQL diagnosis"
|
||||
msgstr "SQL语言能力 - SQL生成 - SQL诊断"
|
||||
|
||||
#: ../../index.rst:19 fe7396137ed84517bd99c93500f21bb6
|
||||
#: ../../index.rst:19 6bd495632b39477e8948677aeb4ab1ee
|
||||
msgid ""
|
||||
"Private domain Q&A and data processing - Database knowledge Q&A - Data "
|
||||
"processing"
|
||||
msgstr "私有领域问答与数据处理 - 数据库知识问答 - 数据处理"
|
||||
|
||||
#: ../../index.rst:23 c12af975ed2d451496a476d91b6bb4c5
|
||||
#: ../../index.rst:23 d1d3665a412246b9814139a938a29d1b
|
||||
msgid ""
|
||||
"Plugins - Support custom plugin execution tasks and natively support the "
|
||||
"Auto-GPT plugin, such as:"
|
||||
msgstr "插件模型 - 支持自定义插件执行任务,并原生支持Auto-GPT插件,例如:* SQL自动执行,获取查询结果 * 自动爬取学习知识"
|
||||
|
||||
#: ../../index.rst:26 97507eb30ab548fb9443b252e481d6be
|
||||
#: ../../index.rst:26 41e37b63a8f44fc09a4da3574b25fcd3
|
||||
msgid ""
|
||||
"Unified vector storage/indexing of knowledge base - Support for "
|
||||
"unstructured data such as PDF, Markdown, CSV, and WebURL"
|
||||
msgstr "知识库统一向量存储/索引 - 非结构化数据支持包括PDF、MarkDown、CSV、WebURL"
|
||||
|
||||
#: ../../index.rst:29 b2ff661c13d54cd79f27923163079706
|
||||
#: ../../index.rst:29 1acac8a6cbe54f7b8c31135189d726d6
|
||||
msgid ""
|
||||
"Milti LLMs Support - Supports multiple large language models, currently "
|
||||
"supporting Vicuna (7b, 13b), ChatGLM-6b (int4, int8) - TODO: codegen2, "
|
||||
"codet5p"
|
||||
msgstr "多模型支持 - 支持多种大语言模型, 当前已支持Vicuna(7b,13b), ChatGLM-6b(int4, int8)"
|
||||
|
||||
#: ../../index.rst:35 15178a36df624fa9b4a5acdab5060752
|
||||
#: ../../index.rst:35 481edf63bcd348b4b3e14353dc5be952
|
||||
msgid ""
|
||||
"How to get started using DB-GPT to interact with your data and "
|
||||
"environment."
|
||||
msgstr "开始使用DB-GPT与您的数据环境进行交互。"
|
||||
|
||||
#: ../../index.rst:36 8aa3a44286b244a4b9b92db285e5382b
|
||||
#: ../../index.rst:36 6b26a04661f7445198b3797442d3f178
|
||||
#, fuzzy
|
||||
msgid "`Quickstart Guide <./getting_started/getting_started.html>`_"
|
||||
msgstr "`使用指南 <./getting_started/getting_started.html>`_"
|
||||
|
||||
#: ../../index.rst:38 3f4ec91e5df44629aff6c94ffbaa37d7
|
||||
#: ../../index.rst:38 ff435ef9964d44508ad840de2950c1c8
|
||||
msgid "Concepts and terminology"
|
||||
msgstr "相关概念"
|
||||
|
||||
#: ../../index.rst:40 d0126674e0e24aefbf51d610b2fcf5da
|
||||
#: ../../index.rst:40 db9dfacbe93e4f858843c4d2a0e2dd25
|
||||
#, fuzzy
|
||||
msgid "`Concepts and Terminology <./getting_started/concepts.html>`_"
|
||||
msgstr "`相关概念 <./getting_started/concepts.html>`_"
|
||||
|
||||
#: ../../index.rst:42 c6d67e4c1b8346c18f2b99c8f5795627
|
||||
#: ../../index.rst:42 50faf8302ba140ce84897daef2c90ca2
|
||||
msgid "Coming soon..."
|
||||
msgstr ""
|
||||
|
||||
#: ../../index.rst:44 18d63d2d729246648743d1b7470029e0
|
||||
#: ../../index.rst:44 547bb007f90e43c096f25647cebefd7b
|
||||
msgid "`Tutorials <.getting_started/tutorials.html>`_"
|
||||
msgstr "`教程 <.getting_started/tutorials.html>`_"
|
||||
|
||||
#: ../../index.rst:58 5f93833b5e8f42b7b8728587f5054d8b
|
||||
#: ../../index.rst:59 89bc50c752b84e3fb789ce5da1b654dc
|
||||
msgid ""
|
||||
"These modules are the core abstractions with which we can interact with "
|
||||
"data and environment smoothly."
|
||||
msgstr "这些模块是我们可以与数据和环境顺利地进行交互的核心组成。"
|
||||
|
||||
#: ../../index.rst:59 d68173b40df146818ddf68b309bbd27d
|
||||
#: ../../index.rst:60 81f3860920964720958c62b3a6769f12
|
||||
msgid ""
|
||||
"It's very important for DB-GPT, DB-GPT also provide standard, extendable "
|
||||
"interfaces."
|
||||
msgstr "DB-GPT还提供了标准的、可扩展的接口。"
|
||||
|
||||
#: ../../index.rst:61 00257e3907b346d5bd007f2cfe52bac9
|
||||
#: ../../index.rst:62 15faf914d05544df86359e50fdc70483
|
||||
msgid ""
|
||||
"The docs for each module contain quickstart examples, how to guides, "
|
||||
"reference docs, and conceptual guides."
|
||||
msgstr "每个模块的文档都包含快速入门的例子、操作指南、参考文档和相关概念等内容。"
|
||||
|
||||
#: ../../index.rst:63 e04ba16ec943405080a26ec874e67823
|
||||
#: ../../index.rst:64 18a673690afa47df8e6dd3d065b2580f
|
||||
msgid "The modules are as follows"
|
||||
msgstr "组成模块如下:"
|
||||
|
||||
#: ../../index.rst:65 12d9ce3eda484316b3fcbbb2adb48b6e
|
||||
#: ../../index.rst:66 d2c20ed1b6ba4aa697090453b0f775a1
|
||||
msgid ""
|
||||
"`LLMs <./modules/llms.html>`_: Supported multi models management and "
|
||||
"integrations."
|
||||
msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 "
|
||||
|
||||
#: ../../index.rst:67 9d7ce6fea46c41c0940198499336430e
|
||||
#: ../../index.rst:68 d6c72e6fee3348e89422f9ff45804e3a
|
||||
msgid ""
|
||||
"`Prompts <./modules/prompts.html>`_: Prompt management, optimization, and"
|
||||
" serialization for multi database."
|
||||
@ -174,59 +178,59 @@ msgstr ""
|
||||
"`Prompt自动生成与优化 <./modules/prompts.html>`_: 自动化生成高质量的Prompt "
|
||||
",并进行优化,提高系统的响应效率"
|
||||
|
||||
#: ../../index.rst:69 0847158883424a98a02e58fd9c2d6744
|
||||
#: ../../index.rst:70 1756de47335d4eb7916a6c8b8b5ab70b
|
||||
msgid "`Plugins <./modules/plugins.html>`_: Plugins management, scheduler."
|
||||
msgstr "`Agent与插件: <./modules/plugins.html>`_:提供Agent和插件机制,使得用户可以自定义并增强系统的行为。"
|
||||
|
||||
#: ../../index.rst:71 6b2a8143adff4395b6a86a6d22078a87
|
||||
#: ../../index.rst:72 542efb88411f4cc192a08e7d28c84863
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"`Knowledge <./modules/knowledge.html>`_: Knowledge management, embedding,"
|
||||
" and search."
|
||||
msgstr "`知识库能力: <./modules/knowledge.html>`_: 支持私域知识库问答能力, "
|
||||
|
||||
#: ../../index.rst:73 70e831c592ca431791a614934061c148
|
||||
#: ../../index.rst:74 0204630cd5f14c68ada3bf11e0b0fbf5
|
||||
msgid ""
|
||||
"`Connections <./modules/connections.html>`_: Supported multi databases "
|
||||
"connection. management connections and interact with this."
|
||||
msgstr "`连接模块 <./modules/connections.html>`_: 用于连接不同的模块和数据源,实现数据的流转和交互 "
|
||||
|
||||
#: ../../index.rst:75 1a5eaebe25174828b60c17475dae7928
|
||||
#: ../../index.rst:76 f9363a963be44d0ea01bb5d65b69d0f8
|
||||
#, fuzzy
|
||||
msgid "`Vector <./modules/vector.html>`_: Supported multi vector database."
|
||||
msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 "
|
||||
|
||||
#: ../../index.rst:93 8a2ea497d36449febd1560dbecd5ec44
|
||||
#: ../../index.rst:94 7223c3b95e9446bcae40a355e6b02324
|
||||
msgid "Best Practices and built-in implementations for common DB-GPT use cases:"
|
||||
msgstr "DB-GPT用例的最佳实践和内置方法:"
|
||||
|
||||
#: ../../index.rst:95 fcc65a470c5643619688488cfe010f61
|
||||
#: ../../index.rst:96 c088f2ab9f2247ac9a8c9af31d0da7a6
|
||||
msgid ""
|
||||
"`Sql generation and diagnosis "
|
||||
"<./use_cases/sql_generation_and_diagnosis.html>`_: SQL generation and "
|
||||
"diagnosis."
|
||||
msgstr "`Sql生成和诊断 <./use_cases/sql_generation_and_diagnosis.html>`_: Sql生成和诊断。"
|
||||
|
||||
#: ../../index.rst:97 26b983fbb9394f94bf31636175cc6f95
|
||||
#: ../../index.rst:98 23973b07c6ba42088a714b048d4b43c4
|
||||
msgid ""
|
||||
"`knownledge Based QA <./use_cases/knownledge_based_qa.html>`_: A "
|
||||
"important scene for user to chat with database documents, codes, bugs and"
|
||||
" schemas."
|
||||
msgstr "`知识库问答 <./use_cases/knownledge_based_qa.html>`_: 用户与数据库文档、代码和bug聊天的重要场景\""
|
||||
|
||||
#: ../../index.rst:99 aa4cfc95a3bf463682952bf40f38c99b
|
||||
#: ../../index.rst:100 6a1d77bc56a14803b66ebc9f2b6c4b7b
|
||||
msgid ""
|
||||
"`Chatbots <./use_cases/chatbots.html>`_: Language model love to chat, use"
|
||||
" multi models to chat."
|
||||
msgstr "`聊天机器人 <./use_cases/chatbots.html>`_: 使用多模型进行对话"
|
||||
|
||||
#: ../../index.rst:101 e9b540aab91c44ae911bf96aa18c0f36
|
||||
#: ../../index.rst:102 621774a68cba46daa7112f865e6e3af9
|
||||
msgid ""
|
||||
"`Querying Database Data <./use_cases/query_database_data.html>`_: Query "
|
||||
"and Analysis data from databases and give charts."
|
||||
msgstr "`查询数据库数据 <./use_cases/query_database_data.html>`_:从数据库中查询和分析数据并给出图表。"
|
||||
|
||||
#: ../../index.rst:103 88a99b638b4a42abae39fe9de4f16927
|
||||
#: ../../index.rst:104 9f379fbe8aac47f4a1a53d84ab2a2f51
|
||||
msgid ""
|
||||
"`Interacting with apis <./use_cases/interacting_with_api.html>`_: "
|
||||
"Interact with apis, such as create a table, deploy a database cluster, "
|
||||
@ -235,33 +239,37 @@ msgstr ""
|
||||
"`API交互 <./use_cases/interacting_with_api.html>`_: "
|
||||
"与API交互,例如创建表、部署数据库集群、创建数据库等。"
|
||||
|
||||
#: ../../index.rst:105 53cb38b16ed04420934233aec02fd44b
|
||||
#: ../../index.rst:106 3dbb717e1b024a20bb6049facb616b1b
|
||||
msgid ""
|
||||
"`Tool use with plugins <./use_cases/tool_use_with_plugin>`_: According to"
|
||||
" Plugin use tools to manage databases autonomoly."
|
||||
msgstr "`插件工具 <./use_cases/tool_use_with_plugin>`_: 根据插件使用工具自主管理数据库。"
|
||||
|
||||
#: ../../index.rst:122 fbc2fe1374ac4909b1c66ea11bbfafec
|
||||
#: ../../index.rst:123 4acedc39ccf34e79b805189a11285a3a
|
||||
msgid ""
|
||||
"Full documentation on all methods, classes, installation methods, and "
|
||||
"integration setups for DB-GPT."
|
||||
msgstr "关于DB-GPT的所有方法、类、安装方法和集成设置的完整文档。"
|
||||
|
||||
#: ../../index.rst:133 c45530a6c3a34a42a0fd8bca6efe3b07
|
||||
#: ../../index.rst:135 622112f3cce34461ba7e0d52fa81d438
|
||||
msgid "DB-GPT FAQ."
|
||||
msgstr ""
|
||||
|
||||
#: ../../index.rst:146 a989256fd69f4bbfae73191b505c59fa
|
||||
msgid "Ecosystem"
|
||||
msgstr "环境系统"
|
||||
|
||||
#: ../../index.rst:135 e9f68f4abbdc4832889d0e343ce43d27
|
||||
#: ../../index.rst:148 1b5c410dd94842f2801f08540dd57647
|
||||
msgid "Guides for how other companies/products can be used with DB-GPT"
|
||||
msgstr "其他公司/产品如何与DB-GPT一起使用的方法指南"
|
||||
|
||||
#: ../../index.rst:150 c33686daaa7c44d28c96dc22c0d9480c
|
||||
#: ../../index.rst:163 8368b4c62ebe41ad91551e241dbcc4df
|
||||
msgid ""
|
||||
"Additional resources we think may be useful as you develop your "
|
||||
"application!"
|
||||
msgstr "“我们认为在您开发应用程序时可能有用的其他资源!”"
|
||||
|
||||
#: ../../index.rst:152 f6d108be181f47af80c4b8a5931ff172
|
||||
#: ../../index.rst:165 2a4cd897b20c4683979b44fb9d7470e6
|
||||
msgid ""
|
||||
"`Discord <https://discord.com/invite/twmZk3vv>`_: if your have some "
|
||||
"problem or ideas, you can talk from discord."
|
||||
|
@ -11,7 +11,7 @@ cp .env.template .env
|
||||
LLM_MODEL=vicuna-13b
|
||||
MODEL_SERVER=http://127.0.0.1:8000
|
||||
```
|
||||
now we support models vicuna-13b, vicuna-7b, chatglm-6b, flan-t5-base, guanaco-33b-merged, falcon-40b, gorilla-7b.
|
||||
now we support models vicuna-13b, vicuna-7b, chatglm-6b, flan-t5-base, guanaco-33b-merged, falcon-40b, gorilla-7b, llama-2-7b, llama-2-13b.
|
||||
|
||||
if you want use other model, such as chatglm-6b, you just need update .env config file.
|
||||
```
|
||||
|
@ -47,6 +47,9 @@ LLM_MODEL_CONFIG = {
|
||||
"gorilla-7b": os.path.join(MODEL_PATH, "gorilla-7b"),
|
||||
"gptj-6b": os.path.join(MODEL_PATH, "ggml-gpt4all-j-v1.3-groovy.bin"),
|
||||
"proxyllm": "proxyllm",
|
||||
"llama-2-7b": os.path.join(MODEL_PATH, "Llama-2-7b-chat-hf"),
|
||||
"llama-2-13b": os.path.join(MODEL_PATH, "Llama-2-13b-chat-hf"),
|
||||
"llama-2-70b": os.path.join(MODEL_PATH, "Llama-2-70b-chat-hf"),
|
||||
}
|
||||
|
||||
# Load model config
|
||||
|
@ -263,12 +263,26 @@ class ProxyllmAdapter(BaseLLMAdaper):
|
||||
return "proxyllm", None
|
||||
|
||||
|
||||
class Llama2Adapter(BaseLLMAdaper):
|
||||
"""The model adapter for llama-2"""
|
||||
|
||||
def match(self, model_path: str):
|
||||
return "llama-2" in model_path.lower()
|
||||
|
||||
def loader(self, model_path: str, from_pretrained_kwargs: dict):
|
||||
model, tokenizer = super().loader(model_path, from_pretrained_kwargs)
|
||||
model.config.eos_token_id = tokenizer.eos_token_id
|
||||
model.config.pad_token_id = tokenizer.pad_token_id
|
||||
return model, tokenizer
|
||||
|
||||
|
||||
register_llm_model_adapters(VicunaLLMAdapater)
|
||||
register_llm_model_adapters(ChatGLMAdapater)
|
||||
register_llm_model_adapters(GuanacoAdapter)
|
||||
register_llm_model_adapters(FalconAdapater)
|
||||
register_llm_model_adapters(GorillaAdapter)
|
||||
register_llm_model_adapters(GPT4AllAdapter)
|
||||
register_llm_model_adapters(Llama2Adapter)
|
||||
# TODO Default support vicuna, other model need to tests and Evaluate
|
||||
|
||||
# just for test_py, remove this later
|
||||
|
308
pilot/model/conversation.py
Normal file
308
pilot/model/conversation.py
Normal file
@ -0,0 +1,308 @@
|
||||
"""
|
||||
Fork from fastchat: https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
|
||||
|
||||
Conversation prompt templates.
|
||||
"""
|
||||
|
||||
import dataclasses
|
||||
from enum import auto, IntEnum
|
||||
from typing import List, Any, Dict, Callable
|
||||
|
||||
|
||||
class SeparatorStyle(IntEnum):
|
||||
"""Separator styles."""
|
||||
|
||||
ADD_COLON_SINGLE = auto()
|
||||
ADD_COLON_TWO = auto()
|
||||
ADD_COLON_SPACE_SINGLE = auto()
|
||||
NO_COLON_SINGLE = auto()
|
||||
NO_COLON_TWO = auto()
|
||||
ADD_NEW_LINE_SINGLE = auto()
|
||||
LLAMA2 = auto()
|
||||
CHATGLM = auto()
|
||||
CHATML = auto()
|
||||
CHATINTERN = auto()
|
||||
DOLLY = auto()
|
||||
RWKV = auto()
|
||||
PHOENIX = auto()
|
||||
ROBIN = auto()
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Conversation:
|
||||
"""A class that manages prompt templates and keeps all conversation history."""
|
||||
|
||||
# The name of this template
|
||||
name: str
|
||||
# The system prompt
|
||||
system: str
|
||||
# Two roles
|
||||
roles: List[str]
|
||||
# All messages. Each item is (role, message).
|
||||
messages: List[List[str]]
|
||||
# The number of few shot examples
|
||||
offset: int
|
||||
# Separators
|
||||
sep_style: SeparatorStyle
|
||||
sep: str
|
||||
sep2: str = None
|
||||
# Stop criteria (the default one is EOS token)
|
||||
stop_str: str = None
|
||||
# Stops generation if meeting any token in this list
|
||||
stop_token_ids: List[int] = None
|
||||
|
||||
# format system message
|
||||
system_formatter: Callable = None
|
||||
|
||||
def get_prompt(self) -> str:
|
||||
"""Get the prompt for generation."""
|
||||
if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:
|
||||
ret = self.system + self.sep
|
||||
for role, message in self.messages:
|
||||
if message:
|
||||
ret += role + ": " + message + self.sep
|
||||
else:
|
||||
ret += role + ":"
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.ADD_COLON_TWO:
|
||||
seps = [self.sep, self.sep2]
|
||||
ret = self.system + seps[0]
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if message:
|
||||
ret += role + ": " + message + seps[i % 2]
|
||||
else:
|
||||
ret += role + ":"
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE:
|
||||
ret = self.system + self.sep
|
||||
for role, message in self.messages:
|
||||
if message:
|
||||
ret += role + ": " + message + self.sep
|
||||
else:
|
||||
ret += role + ": " # must be end with a space
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE:
|
||||
ret = "" if self.system == "" else self.system + self.sep
|
||||
for role, message in self.messages:
|
||||
if message:
|
||||
ret += role + "\n" + message + self.sep
|
||||
else:
|
||||
ret += role + "\n"
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.NO_COLON_SINGLE:
|
||||
ret = self.system
|
||||
for role, message in self.messages:
|
||||
if message:
|
||||
ret += role + message + self.sep
|
||||
else:
|
||||
ret += role
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.NO_COLON_TWO:
|
||||
seps = [self.sep, self.sep2]
|
||||
ret = self.system
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if message:
|
||||
ret += role + message + seps[i % 2]
|
||||
else:
|
||||
ret += role
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.RWKV:
|
||||
ret = self.system
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if message:
|
||||
ret += (
|
||||
role
|
||||
+ ": "
|
||||
+ message.replace("\r\n", "\n").replace("\n\n", "\n")
|
||||
)
|
||||
ret += "\n\n"
|
||||
else:
|
||||
ret += role + ":"
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.LLAMA2:
|
||||
seps = [self.sep, self.sep2]
|
||||
ret = ""
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if message:
|
||||
if i == 0:
|
||||
ret += self.system + message
|
||||
else:
|
||||
ret += role + " " + message + seps[i % 2]
|
||||
else:
|
||||
ret += role
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.CHATGLM:
|
||||
# source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308
|
||||
# source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
|
||||
round_add_n = 1 if self.name == "chatglm2" else 0
|
||||
if self.system:
|
||||
ret = self.system + self.sep
|
||||
else:
|
||||
ret = ""
|
||||
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if i % 2 == 0:
|
||||
ret += f"[Round {i//2 + round_add_n}]{self.sep}"
|
||||
|
||||
if message:
|
||||
ret += f"{role}:{message}{self.sep}"
|
||||
else:
|
||||
ret += f"{role}:"
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.CHATML:
|
||||
ret = "" if self.system == "" else self.system + self.sep + "\n"
|
||||
for role, message in self.messages:
|
||||
if message:
|
||||
ret += role + "\n" + message + self.sep + "\n"
|
||||
else:
|
||||
ret += role + "\n"
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.CHATINTERN:
|
||||
# source: https://huggingface.co/internlm/internlm-chat-7b-8k/blob/bd546fa984b4b0b86958f56bf37f94aa75ab8831/modeling_internlm.py#L771
|
||||
seps = [self.sep, self.sep2]
|
||||
ret = self.system
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if i % 2 == 0:
|
||||
ret += "<s>"
|
||||
if message:
|
||||
ret += role + ":" + message + seps[i % 2] + "\n"
|
||||
else:
|
||||
ret += role + ":"
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.DOLLY:
|
||||
seps = [self.sep, self.sep2]
|
||||
ret = self.system
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if message:
|
||||
ret += role + ":\n" + message + seps[i % 2]
|
||||
if i % 2 == 1:
|
||||
ret += "\n\n"
|
||||
else:
|
||||
ret += role + ":\n"
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.PHOENIX:
|
||||
ret = self.system
|
||||
for role, message in self.messages:
|
||||
if message:
|
||||
ret += role + ": " + "<s>" + message + "</s>"
|
||||
else:
|
||||
ret += role + ": " + "<s>"
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.ROBIN:
|
||||
ret = self.system + self.sep
|
||||
for role, message in self.messages:
|
||||
if message:
|
||||
ret += role + ":\n" + message + self.sep
|
||||
else:
|
||||
ret += role + ":\n"
|
||||
return ret
|
||||
else:
|
||||
raise ValueError(f"Invalid style: {self.sep_style}")
|
||||
|
||||
def append_message(self, role: str, message: str):
|
||||
"""Append a new message."""
|
||||
self.messages.append([role, message])
|
||||
|
||||
def update_last_message(self, message: str):
|
||||
"""Update the last output.
|
||||
|
||||
The last message is typically set to be None when constructing the prompt,
|
||||
so we need to update it in-place after getting the response from a model.
|
||||
"""
|
||||
self.messages[-1][1] = message
|
||||
|
||||
def update_system_message(self, system_message: str):
|
||||
"""Update system message"""
|
||||
if self.system_formatter:
|
||||
self.system = self.system_formatter(system_message)
|
||||
else:
|
||||
self.system = system_message
|
||||
|
||||
def to_gradio_chatbot(self):
|
||||
"""Convert the conversation to gradio chatbot format."""
|
||||
ret = []
|
||||
for i, (role, msg) in enumerate(self.messages[self.offset :]):
|
||||
if i % 2 == 0:
|
||||
ret.append([msg, None])
|
||||
else:
|
||||
ret[-1][-1] = msg
|
||||
return ret
|
||||
|
||||
def to_openai_api_messages(self):
|
||||
"""Convert the conversation to OpenAI chat completion format."""
|
||||
ret = [{"role": "system", "content": self.system}]
|
||||
|
||||
for i, (_, msg) in enumerate(self.messages[self.offset :]):
|
||||
if i % 2 == 0:
|
||||
ret.append({"role": "user", "content": msg})
|
||||
else:
|
||||
if msg is not None:
|
||||
ret.append({"role": "assistant", "content": msg})
|
||||
return ret
|
||||
|
||||
def copy(self):
|
||||
return Conversation(
|
||||
name=self.name,
|
||||
system=self.system,
|
||||
roles=self.roles,
|
||||
messages=[[x, y] for x, y in self.messages],
|
||||
offset=self.offset,
|
||||
sep_style=self.sep_style,
|
||||
sep=self.sep,
|
||||
sep2=self.sep2,
|
||||
stop_str=self.stop_str,
|
||||
stop_token_ids=self.stop_token_ids,
|
||||
system_formatter=self.system_formatter,
|
||||
)
|
||||
|
||||
def dict(self):
|
||||
return {
|
||||
"template_name": self.name,
|
||||
"system": self.system,
|
||||
"roles": self.roles,
|
||||
"messages": self.messages,
|
||||
"offset": self.offset,
|
||||
}
|
||||
|
||||
|
||||
# A global registry for all conversation templates
|
||||
conv_templates: Dict[str, Conversation] = {}
|
||||
|
||||
|
||||
def register_conv_template(template: Conversation, override: bool = False):
|
||||
"""Register a new conversation template."""
|
||||
if not override:
|
||||
assert (
|
||||
template.name not in conv_templates
|
||||
), f"{template.name} has been registered."
|
||||
|
||||
conv_templates[template.name] = template
|
||||
|
||||
|
||||
def get_conv_template(name: str) -> Conversation:
|
||||
"""Get a conversation template."""
|
||||
return conv_templates[name].copy()
|
||||
|
||||
|
||||
# llama2 template
|
||||
# reference: https://github.com/facebookresearch/llama/blob/cfc3fc8c1968d390eb830e65c63865e980873a06/llama/generation.py#L212
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="llama-2",
|
||||
system="<s>[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. "
|
||||
"Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. "
|
||||
"Please ensure that your responses are socially unbiased and positive in nature.\n\n"
|
||||
"If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. "
|
||||
"If you don't know the answer to a question, please don't share false information.\n<</SYS>>\n\n",
|
||||
roles=("[INST]", "[/INST]"),
|
||||
messages=(),
|
||||
offset=0,
|
||||
sep_style=SeparatorStyle.LLAMA2,
|
||||
sep=" ",
|
||||
sep2=" </s><s>",
|
||||
stop_token_ids=[2],
|
||||
system_formatter=lambda msg: f"<s>[INST] <<SYS>>\n{msg}\n<</SYS>>\n\n",
|
||||
)
|
||||
)
|
||||
|
||||
# TODO Support other model conversation template
|
242
pilot/model/inference.py
Normal file
242
pilot/model/inference.py
Normal file
@ -0,0 +1,242 @@
|
||||
"""
|
||||
Fork from fastchat: https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/inference.py
|
||||
|
||||
"""
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import gc
|
||||
from typing import Iterable, Dict
|
||||
|
||||
import torch
|
||||
|
||||
import torch
|
||||
|
||||
from transformers.generation.logits_process import (
|
||||
LogitsProcessorList,
|
||||
RepetitionPenaltyLogitsProcessor,
|
||||
TemperatureLogitsWarper,
|
||||
TopKLogitsWarper,
|
||||
TopPLogitsWarper,
|
||||
)
|
||||
|
||||
from pilot.model.llm_utils import is_sentence_complete, is_partial_stop
|
||||
|
||||
|
||||
def prepare_logits_processor(
|
||||
temperature: float, repetition_penalty: float, top_p: float, top_k: int
|
||||
) -> LogitsProcessorList:
|
||||
processor_list = LogitsProcessorList()
|
||||
# TemperatureLogitsWarper doesn't accept 0.0, 1.0 makes it a no-op so we skip two cases.
|
||||
if temperature >= 1e-5 and temperature != 1.0:
|
||||
processor_list.append(TemperatureLogitsWarper(temperature))
|
||||
if repetition_penalty > 1.0:
|
||||
processor_list.append(RepetitionPenaltyLogitsProcessor(repetition_penalty))
|
||||
if 1e-8 <= top_p < 1.0:
|
||||
processor_list.append(TopPLogitsWarper(top_p))
|
||||
if top_k > 0:
|
||||
processor_list.append(TopKLogitsWarper(top_k))
|
||||
return processor_list
|
||||
|
||||
|
||||
@torch.inference_mode()
|
||||
def generate_stream(
|
||||
model,
|
||||
tokenizer,
|
||||
params: Dict,
|
||||
device: str,
|
||||
context_len: int,
|
||||
stream_interval: int = 2,
|
||||
judge_sent_end: bool = False,
|
||||
):
|
||||
# Read parameters
|
||||
prompt = params["prompt"]
|
||||
print(f"Prompt of model: \n{prompt}")
|
||||
len_prompt = len(prompt)
|
||||
temperature = float(params.get("temperature", 1.0))
|
||||
repetition_penalty = float(params.get("repetition_penalty", 1.0))
|
||||
top_p = float(params.get("top_p", 1.0))
|
||||
top_k = int(params.get("top_k", -1)) # -1 means disable
|
||||
max_new_tokens = int(params.get("max_new_tokens", 2048))
|
||||
echo = bool(params.get("echo", True))
|
||||
stop_str = params.get("stop", None)
|
||||
stop_token_ids = params.get("stop_token_ids", None) or []
|
||||
stop_token_ids.append(tokenizer.eos_token_id)
|
||||
|
||||
logits_processor = prepare_logits_processor(
|
||||
temperature, repetition_penalty, top_p, top_k
|
||||
)
|
||||
input_ids = tokenizer(prompt).input_ids
|
||||
|
||||
if model.config.is_encoder_decoder:
|
||||
max_src_len = context_len
|
||||
else: # truncate
|
||||
max_src_len = context_len - max_new_tokens - 1
|
||||
|
||||
input_ids = input_ids[-max_src_len:]
|
||||
output_ids = list(input_ids)
|
||||
input_echo_len = len(input_ids)
|
||||
|
||||
if model.config.is_encoder_decoder:
|
||||
encoder_output = model.encoder(
|
||||
input_ids=torch.as_tensor([input_ids], device=device)
|
||||
)[0]
|
||||
start_ids = torch.as_tensor(
|
||||
[[model.generation_config.decoder_start_token_id]],
|
||||
dtype=torch.int64,
|
||||
device=device,
|
||||
)
|
||||
|
||||
past_key_values = out = None
|
||||
sent_interrupt = False
|
||||
for i in range(max_new_tokens):
|
||||
if i == 0: # prefill
|
||||
if model.config.is_encoder_decoder:
|
||||
out = model.decoder(
|
||||
input_ids=start_ids,
|
||||
encoder_hidden_states=encoder_output,
|
||||
use_cache=True,
|
||||
)
|
||||
logits = model.lm_head(out[0])
|
||||
else:
|
||||
out = model(torch.as_tensor([input_ids], device=device), use_cache=True)
|
||||
logits = out.logits
|
||||
past_key_values = out.past_key_values
|
||||
else: # decoding
|
||||
if model.config.is_encoder_decoder:
|
||||
out = model.decoder(
|
||||
input_ids=torch.as_tensor(
|
||||
[[token] if not sent_interrupt else output_ids], device=device
|
||||
),
|
||||
encoder_hidden_states=encoder_output,
|
||||
use_cache=True,
|
||||
past_key_values=past_key_values if not sent_interrupt else None,
|
||||
)
|
||||
sent_interrupt = False
|
||||
|
||||
logits = model.lm_head(out[0])
|
||||
else:
|
||||
out = model(
|
||||
input_ids=torch.as_tensor(
|
||||
[[token] if not sent_interrupt else output_ids], device=device
|
||||
),
|
||||
use_cache=True,
|
||||
past_key_values=past_key_values if not sent_interrupt else None,
|
||||
)
|
||||
sent_interrupt = False
|
||||
logits = out.logits
|
||||
past_key_values = out.past_key_values
|
||||
|
||||
if logits_processor:
|
||||
if repetition_penalty > 1.0:
|
||||
tmp_output_ids = torch.as_tensor([output_ids], device=logits.device)
|
||||
else:
|
||||
tmp_output_ids = None
|
||||
last_token_logits = logits_processor(tmp_output_ids, logits[:, -1, :])[0]
|
||||
else:
|
||||
last_token_logits = logits[0, -1, :]
|
||||
|
||||
if device == "mps":
|
||||
# Switch to CPU by avoiding some bugs in mps backend.
|
||||
last_token_logits = last_token_logits.float().to("cpu")
|
||||
|
||||
if temperature < 1e-5 or top_p < 1e-8: # greedy
|
||||
_, indices = torch.topk(last_token_logits, 2)
|
||||
tokens = [int(index) for index in indices.tolist()]
|
||||
else:
|
||||
probs = torch.softmax(last_token_logits, dim=-1)
|
||||
indices = torch.multinomial(probs, num_samples=2)
|
||||
tokens = [int(token) for token in indices.tolist()]
|
||||
token = tokens[0]
|
||||
output_ids.append(token)
|
||||
|
||||
if token in stop_token_ids:
|
||||
stopped = True
|
||||
else:
|
||||
stopped = False
|
||||
|
||||
# Yield the output tokens
|
||||
if i % stream_interval == 0 or i == max_new_tokens - 1 or stopped:
|
||||
if echo:
|
||||
tmp_output_ids = output_ids
|
||||
rfind_start = len_prompt
|
||||
else:
|
||||
tmp_output_ids = output_ids[input_echo_len:]
|
||||
rfind_start = 0
|
||||
|
||||
output = tokenizer.decode(
|
||||
tmp_output_ids,
|
||||
skip_special_tokens=True,
|
||||
spaces_between_special_tokens=False,
|
||||
clean_up_tokenization_spaces=True,
|
||||
)
|
||||
# TODO: For the issue of incomplete sentences interrupting output, apply a patch and others can also modify it to a more elegant way
|
||||
if judge_sent_end and stopped and not is_sentence_complete(output):
|
||||
if len(tokens) > 1:
|
||||
token = tokens[1]
|
||||
output_ids[-1] = token
|
||||
else:
|
||||
output_ids.pop()
|
||||
stopped = False
|
||||
sent_interrupt = True
|
||||
|
||||
partially_stopped = False
|
||||
if stop_str:
|
||||
if isinstance(stop_str, str):
|
||||
pos = output.rfind(stop_str, rfind_start)
|
||||
if pos != -1:
|
||||
output = output[:pos]
|
||||
stopped = True
|
||||
else:
|
||||
partially_stopped = is_partial_stop(output, stop_str)
|
||||
elif isinstance(stop_str, Iterable):
|
||||
for each_stop in stop_str:
|
||||
pos = output.rfind(each_stop, rfind_start)
|
||||
if pos != -1:
|
||||
output = output[:pos]
|
||||
stopped = True
|
||||
break
|
||||
else:
|
||||
partially_stopped = is_partial_stop(output, each_stop)
|
||||
if partially_stopped:
|
||||
break
|
||||
else:
|
||||
raise ValueError("Invalid stop field type.")
|
||||
|
||||
# Prevent yielding partial stop sequence
|
||||
if not partially_stopped:
|
||||
yield output
|
||||
# yield {
|
||||
# "text": output,
|
||||
# "usage": {
|
||||
# "prompt_tokens": input_echo_len,
|
||||
# "completion_tokens": i,
|
||||
# "total_tokens": input_echo_len + i,
|
||||
# },
|
||||
# "finish_reason": None,
|
||||
# }
|
||||
|
||||
if stopped:
|
||||
break
|
||||
|
||||
# Finish stream event, which contains finish reason
|
||||
if i == max_new_tokens - 1:
|
||||
finish_reason = "length"
|
||||
elif stopped:
|
||||
finish_reason = "stop"
|
||||
else:
|
||||
finish_reason = None
|
||||
yield output
|
||||
# yield {
|
||||
# "text": output,
|
||||
# "usage": {
|
||||
# "prompt_tokens": input_echo_len,
|
||||
# "completion_tokens": i,
|
||||
# "total_tokens": input_echo_len + i,
|
||||
# },
|
||||
# "finish_reason": finish_reason,
|
||||
# }
|
||||
|
||||
# Clean
|
||||
del past_key_values, out
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
@ -8,6 +8,11 @@ import copy
|
||||
import torch
|
||||
|
||||
from pilot.conversation import ROLE_ASSISTANT, ROLE_USER
|
||||
from pilot.scene.base_message import ModelMessage, _parse_model_messages
|
||||
|
||||
# TODO move sep to scene prompt of model
|
||||
_CHATGLM_SEP = "\n"
|
||||
_CHATGLM2_SEP = "\n\n"
|
||||
|
||||
|
||||
@torch.inference_mode()
|
||||
@ -32,42 +37,20 @@ def chatglm_generate_stream(
|
||||
generate_kwargs["temperature"] = temperature
|
||||
|
||||
# TODO, Fix this
|
||||
print(prompt)
|
||||
messages = prompt.split(stop)
|
||||
#
|
||||
# # Add history conversation
|
||||
hist = [HistoryEntry()]
|
||||
system_messages = []
|
||||
for message in messages[:-2]:
|
||||
if len(message) <= 0:
|
||||
continue
|
||||
if "human:" in message:
|
||||
hist[-1].add_question(message.split("human:")[1])
|
||||
elif "system:" in message:
|
||||
msg = message.split("system:")[1]
|
||||
hist[-1].add_question(msg)
|
||||
system_messages.append(msg)
|
||||
elif "ai:" in message:
|
||||
hist[-1].add_answer(message.split("ai:")[1])
|
||||
hist.append(HistoryEntry())
|
||||
else:
|
||||
# TODO
|
||||
# hist[-1].add_question(message.split("system:")[1])
|
||||
# once_conversation.append(f"""###system:{message} """)
|
||||
pass
|
||||
|
||||
try:
|
||||
query = messages[-2].split("human:")[1]
|
||||
except IndexError:
|
||||
query = messages[-3].split("human:")[1]
|
||||
hist = build_history(hist)
|
||||
# print(prompt)
|
||||
# messages = prompt.split(stop)
|
||||
messages: List[ModelMessage] = params["messages"]
|
||||
query, system_messages, hist = _parse_model_messages(messages)
|
||||
system_messages_str = "".join(system_messages)
|
||||
if not hist:
|
||||
# No history conversation, but has system messages, merge to user`s query
|
||||
query = prompt_adaptation(system_messages, query)
|
||||
query = prompt_adaptation(system_messages_str, query)
|
||||
else:
|
||||
# history exist, add system message to head of history
|
||||
hist[0][0] = system_messages_str + _CHATGLM2_SEP + hist[0][0]
|
||||
|
||||
print("Query Message: ", query)
|
||||
print("hist: ", hist)
|
||||
# output = ""
|
||||
# i = 0
|
||||
|
||||
for i, (response, new_hist) in enumerate(
|
||||
model.stream_chat(tokenizer, query, hist, **generate_kwargs)
|
||||
@ -103,10 +86,10 @@ def build_history(hist: List[HistoryEntry]) -> List[List[str]]:
|
||||
return list(filter(lambda hl: hl is not None, map(lambda h: h.to_list(), hist)))
|
||||
|
||||
|
||||
def prompt_adaptation(system_messages: List[str], human_message: str) -> str:
|
||||
if not system_messages:
|
||||
def prompt_adaptation(system_messages_str: str, human_message: str) -> str:
|
||||
if not system_messages_str or system_messages_str == "":
|
||||
return human_message
|
||||
system_messages_str = " ".join(system_messages)
|
||||
# TODO Multi-model prompt adaptation
|
||||
adaptation_rules = [
|
||||
r"Question:\s*{}\s*", # chat_db scene
|
||||
r"Goals:\s*{}\s*", # chat_execution
|
||||
@ -119,4 +102,4 @@ def prompt_adaptation(system_messages: List[str], human_message: str) -> str:
|
||||
if re.search(pattern, system_messages_str):
|
||||
return system_messages_str
|
||||
# https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
|
||||
return f"{system_messages_str}\n\n问:{human_message}\n\n答:"
|
||||
return system_messages_str + _CHATGLM2_SEP + human_message
|
||||
|
@ -3,8 +3,10 @@
|
||||
|
||||
import json
|
||||
import requests
|
||||
from typing import List
|
||||
from pilot.configs.config import Config
|
||||
from pilot.conversation import ROLE_ASSISTANT, ROLE_USER
|
||||
from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
|
||||
|
||||
CFG = Config()
|
||||
|
||||
@ -20,36 +22,17 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
|
||||
"Token": CFG.proxy_api_key,
|
||||
}
|
||||
|
||||
messages = prompt.split(stop)
|
||||
messages: List[ModelMessage] = params["messages"]
|
||||
# Add history conversation
|
||||
for message in messages:
|
||||
if len(message) <= 0:
|
||||
continue
|
||||
if "human:" in message:
|
||||
history.append(
|
||||
{"role": "user", "content": message.split("human:")[1]},
|
||||
)
|
||||
elif "system:" in message:
|
||||
history.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": message.split("system:")[1],
|
||||
}
|
||||
)
|
||||
elif "ai:" in message:
|
||||
history.append(
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": message.split("ai:")[1],
|
||||
}
|
||||
)
|
||||
if message.role == ModelMessageRoleType.HUMAN:
|
||||
history.append({"role": "user", "content": message.content})
|
||||
elif message.role == ModelMessageRoleType.SYSTEM:
|
||||
history.append({"role": "system", "content": message.content})
|
||||
elif message.role == ModelMessageRoleType.AI:
|
||||
history.append({"role": "assistant", "content": message.content})
|
||||
else:
|
||||
history.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": message,
|
||||
}
|
||||
)
|
||||
pass
|
||||
|
||||
# Move the last user's information to the end
|
||||
temp_his = history[::-1]
|
||||
|
@ -10,7 +10,6 @@ from typing import List, Optional
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.model.base import Message
|
||||
from pilot.server.llmserver import generate_output
|
||||
|
||||
|
||||
def create_chat_completion(
|
||||
@ -115,3 +114,17 @@ class Iteratorize:
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.stop_now = True
|
||||
|
||||
|
||||
def is_sentence_complete(output: str):
|
||||
"""Check whether the output is a complete sentence."""
|
||||
end_symbols = (".", "?", "!", "...", "。", "?", "!", "…", '"', "'", "”")
|
||||
return output.endswith(end_symbols)
|
||||
|
||||
|
||||
def is_partial_stop(output: str, stop_str: str):
|
||||
"""Check whether the output contains a partial stop str."""
|
||||
for i in range(0, min(len(output), len(stop_str))):
|
||||
if stop_str.startswith(output[-i:]):
|
||||
return True
|
||||
return False
|
||||
|
@ -53,8 +53,15 @@ class BaseOutputParser(ABC):
|
||||
|
||||
""" TODO Multi mode output handler, rewrite this for multi model, use adapter mode.
|
||||
"""
|
||||
model_context = data.get("model_context")
|
||||
if model_context and "prompt_echo_len_char" in model_context:
|
||||
prompt_echo_len_char = int(model_context.get("prompt_echo_len_char", -1))
|
||||
if prompt_echo_len_char != -1:
|
||||
skip_echo_len = prompt_echo_len_char
|
||||
|
||||
if data.get("error_code", 0) == 0:
|
||||
if "vicuna" in CFG.LLM_MODEL:
|
||||
if "vicuna" in CFG.LLM_MODEL or "llama-2" in CFG.LLM_MODEL:
|
||||
# TODO Judging from model_context
|
||||
# output = data["text"][skip_echo_len + 11:].strip()
|
||||
output = data["text"][skip_echo_len:].strip()
|
||||
elif "guanaco" in CFG.LLM_MODEL:
|
||||
|
@ -2,6 +2,7 @@ import time
|
||||
from abc import ABC, abstractmethod
|
||||
import datetime
|
||||
import traceback
|
||||
import warnings
|
||||
import json
|
||||
from pydantic import BaseModel, Field, root_validator, validator, Extra
|
||||
from typing import (
|
||||
@ -37,6 +38,8 @@ from pilot.scene.base_message import (
|
||||
HumanMessage,
|
||||
AIMessage,
|
||||
ViewMessage,
|
||||
ModelMessage,
|
||||
ModelMessageRoleType,
|
||||
)
|
||||
from pilot.configs.config import Config
|
||||
|
||||
@ -116,6 +119,7 @@ class BaseChat(ABC):
|
||||
payload = {
|
||||
"model": self.llm_model,
|
||||
"prompt": self.generate_llm_text(),
|
||||
"messages": self.generate_llm_messages(),
|
||||
"temperature": float(self.prompt_template.temperature),
|
||||
"max_new_tokens": int(self.prompt_template.max_new_tokens),
|
||||
"stop": self.prompt_template.sep,
|
||||
@ -227,6 +231,7 @@ class BaseChat(ABC):
|
||||
return self.nostream_call()
|
||||
|
||||
def generate_llm_text(self) -> str:
|
||||
warnings.warn("This method is deprecated - please use `generate_llm_messages`.")
|
||||
text = ""
|
||||
### Load scene setting or character definition
|
||||
if self.prompt_template.template_define:
|
||||
@ -244,24 +249,62 @@ class BaseChat(ABC):
|
||||
text += self.__load_user_message()
|
||||
return text
|
||||
|
||||
def __load_system_message(self):
|
||||
def generate_llm_messages(self) -> List[ModelMessage]:
|
||||
"""
|
||||
Structured prompt messages interaction between dbgpt-server and llm-server
|
||||
See https://github.com/csunny/DB-GPT/issues/328
|
||||
"""
|
||||
messages = []
|
||||
### Load scene setting or character definition as system message
|
||||
if self.prompt_template.template_define:
|
||||
messages.append(
|
||||
ModelMessage(
|
||||
role=ModelMessageRoleType.SYSTEM,
|
||||
content=self.prompt_template.template_define,
|
||||
)
|
||||
)
|
||||
### Load prompt
|
||||
messages += self.__load_system_message(str_message=False)
|
||||
### Load examples
|
||||
messages += self.__load_example_messages(str_message=False)
|
||||
|
||||
### Load History
|
||||
messages += self.__load_histroy_messages(str_message=False)
|
||||
|
||||
### Load User Input
|
||||
messages += self.__load_user_message(str_message=False)
|
||||
return messages
|
||||
|
||||
def __load_system_message(self, str_message: bool = True):
|
||||
system_convs = self.current_message.get_system_conv()
|
||||
system_text = ""
|
||||
system_messages = []
|
||||
for system_conv in system_convs:
|
||||
system_text += (
|
||||
system_conv.type + ":" + system_conv.content + self.prompt_template.sep
|
||||
)
|
||||
return system_text
|
||||
system_messages.append(
|
||||
ModelMessage(role=system_conv.type, content=system_conv.content)
|
||||
)
|
||||
return system_text if str_message else system_messages
|
||||
|
||||
def __load_user_message(self):
|
||||
def __load_user_message(self, str_message: bool = True):
|
||||
user_conv = self.current_message.get_user_conv()
|
||||
user_messages = []
|
||||
if user_conv:
|
||||
return user_conv.type + ":" + user_conv.content + self.prompt_template.sep
|
||||
user_text = (
|
||||
user_conv.type + ":" + user_conv.content + self.prompt_template.sep
|
||||
)
|
||||
user_messages.append(
|
||||
ModelMessage(role=user_conv.type, content=user_conv.content)
|
||||
)
|
||||
return user_text if str_message else user_messages
|
||||
else:
|
||||
raise ValueError("Hi! What do you want to talk about?")
|
||||
|
||||
def __load_example_messages(self):
|
||||
def __load_example_messages(self, str_message: bool = True):
|
||||
example_text = ""
|
||||
example_messages = []
|
||||
if self.prompt_template.example_selector:
|
||||
for round_conv in self.prompt_template.example_selector.examples():
|
||||
for round_message in round_conv["messages"]:
|
||||
@ -269,16 +312,22 @@ class BaseChat(ABC):
|
||||
SystemMessage.type,
|
||||
ViewMessage.type,
|
||||
]:
|
||||
message_type = round_message["type"]
|
||||
message_content = round_message["data"]["content"]
|
||||
example_text += (
|
||||
round_message["type"]
|
||||
message_type
|
||||
+ ":"
|
||||
+ round_message["data"]["content"]
|
||||
+ message_content
|
||||
+ self.prompt_template.sep
|
||||
)
|
||||
return example_text
|
||||
example_messages.append(
|
||||
ModelMessage(role=message_type, content=message_content)
|
||||
)
|
||||
return example_text if str_message else example_messages
|
||||
|
||||
def __load_histroy_messages(self):
|
||||
def __load_histroy_messages(self, str_message: bool = True):
|
||||
history_text = ""
|
||||
history_messages = []
|
||||
if self.prompt_template.need_historical_messages:
|
||||
if self.history_message:
|
||||
logger.info(
|
||||
@ -290,12 +339,17 @@ class BaseChat(ABC):
|
||||
ViewMessage.type,
|
||||
SystemMessage.type,
|
||||
]:
|
||||
message_type = first_message["type"]
|
||||
message_content = first_message["data"]["content"]
|
||||
history_text += (
|
||||
first_message["type"]
|
||||
message_type
|
||||
+ ":"
|
||||
+ first_message["data"]["content"]
|
||||
+ message_content
|
||||
+ self.prompt_template.sep
|
||||
)
|
||||
history_messages.append(
|
||||
ModelMessage(role=message_type, content=message_content)
|
||||
)
|
||||
|
||||
index = self.chat_retention_rounds - 1
|
||||
for round_conv in self.history_message[-index:]:
|
||||
@ -304,12 +358,17 @@ class BaseChat(ABC):
|
||||
SystemMessage.type,
|
||||
ViewMessage.type,
|
||||
]:
|
||||
message_type = round_message["type"]
|
||||
message_content = round_message["data"]["content"]
|
||||
history_text += (
|
||||
round_message["type"]
|
||||
message_type
|
||||
+ ":"
|
||||
+ round_message["data"]["content"]
|
||||
+ message_content
|
||||
+ self.prompt_template.sep
|
||||
)
|
||||
history_messages.append(
|
||||
ModelMessage(role=message_type, content=message_content)
|
||||
)
|
||||
|
||||
else:
|
||||
### user all history
|
||||
@ -320,14 +379,19 @@ class BaseChat(ABC):
|
||||
SystemMessage.type,
|
||||
ViewMessage.type,
|
||||
]:
|
||||
message_type = message["type"]
|
||||
message_content = message["data"]["content"]
|
||||
history_text += (
|
||||
message["type"]
|
||||
message_type
|
||||
+ ":"
|
||||
+ message["data"]["content"]
|
||||
+ message_content
|
||||
+ self.prompt_template.sep
|
||||
)
|
||||
history_messages.append(
|
||||
ModelMessage(role=message_type, content=message_content)
|
||||
)
|
||||
|
||||
return history_text
|
||||
return history_text if str_message else history_messages
|
||||
|
||||
def current_ai_response(self) -> str:
|
||||
for message in self.current_message.messages:
|
||||
|
@ -6,6 +6,7 @@ from typing import (
|
||||
Dict,
|
||||
Generic,
|
||||
List,
|
||||
Tuple,
|
||||
NamedTuple,
|
||||
Optional,
|
||||
Sequence,
|
||||
@ -80,6 +81,22 @@ class SystemMessage(BaseMessage):
|
||||
return "system"
|
||||
|
||||
|
||||
class ModelMessage(BaseModel):
|
||||
"""Type of message that interaction between dbgpt-server and llm-server"""
|
||||
|
||||
"""Similar to openai's message format"""
|
||||
role: str
|
||||
content: str
|
||||
|
||||
|
||||
class ModelMessageRoleType:
|
||||
""" "Type of ModelMessage role"""
|
||||
|
||||
SYSTEM = "system"
|
||||
HUMAN = "human"
|
||||
AI = "ai"
|
||||
|
||||
|
||||
class Generation(BaseModel):
|
||||
"""Output of a single generation."""
|
||||
|
||||
@ -146,3 +163,35 @@ def _message_from_dict(message: dict) -> BaseMessage:
|
||||
|
||||
def messages_from_dict(messages: List[dict]) -> List[BaseMessage]:
|
||||
return [_message_from_dict(m) for m in messages]
|
||||
|
||||
|
||||
def _parse_model_messages(
|
||||
messages: List[ModelMessage],
|
||||
) -> Tuple[str, List[str], List[List[str, str]]]:
|
||||
""" "
|
||||
Parameters:
|
||||
messages: List of message from base chat.
|
||||
Returns:
|
||||
A tuple contains user prompt, system message list and history message list
|
||||
str: user prompt
|
||||
List[str]: system messages
|
||||
List[List[str]]: history message of user and assistant
|
||||
"""
|
||||
user_prompt = ""
|
||||
system_messages: List[str] = []
|
||||
history_messages: List[List[str]] = [[]]
|
||||
|
||||
for message in messages[:-1]:
|
||||
if message.role == "human":
|
||||
history_messages[-1].append(message.content)
|
||||
elif message.role == "system":
|
||||
system_messages.append(message.content)
|
||||
elif message.role == "ai":
|
||||
history_messages[-1].append(message.content)
|
||||
history_messages.append([])
|
||||
if messages[-1].role != "human":
|
||||
raise ValueError("Hi! What do you want to talk about?")
|
||||
# Keep message pair of [user message, assistant message]
|
||||
history_messages = list(filter(lambda x: len(x) == 2, history_messages))
|
||||
user_prompt = messages[-1].content
|
||||
return user_prompt, system_messages, history_messages
|
||||
|
@ -2,8 +2,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from functools import cache
|
||||
from typing import List
|
||||
from typing import List, Dict, Tuple
|
||||
from pilot.model.llm_out.vicuna_base_llm import generate_stream
|
||||
from pilot.model.conversation import Conversation, get_conv_template
|
||||
from pilot.scene.base_message import ModelMessage, ModelMessageRoleType
|
||||
|
||||
|
||||
class BaseChatAdpter:
|
||||
@ -17,6 +19,52 @@ class BaseChatAdpter:
|
||||
"""Return the generate stream handler func"""
|
||||
pass
|
||||
|
||||
def get_conv_template(self) -> Conversation:
|
||||
return None
|
||||
|
||||
def model_adaptation(self, params: Dict) -> Tuple[Dict, Dict]:
|
||||
"""Params adaptation"""
|
||||
conv = self.get_conv_template()
|
||||
messages = params.get("messages")
|
||||
# Some model scontext to dbgpt server
|
||||
model_context = {"prompt_echo_len_char": -1}
|
||||
if not conv or not messages:
|
||||
# Nothing to do
|
||||
return params, model_context
|
||||
conv = conv.copy()
|
||||
system_messages = []
|
||||
for message in messages:
|
||||
role, content = None, None
|
||||
if isinstance(message, ModelMessage):
|
||||
role = message.role
|
||||
content = message.content
|
||||
elif isinstance(message, dict):
|
||||
role = message["role"]
|
||||
content = message["content"]
|
||||
else:
|
||||
raise ValueError(f"Invalid message type: {message}")
|
||||
|
||||
if role == ModelMessageRoleType.SYSTEM:
|
||||
# Support for multiple system messages
|
||||
system_messages.append(content)
|
||||
elif role == ModelMessageRoleType.HUMAN:
|
||||
conv.append_message(conv.roles[0], content)
|
||||
elif role == ModelMessageRoleType.AI:
|
||||
conv.append_message(conv.roles[1], content)
|
||||
else:
|
||||
raise ValueError(f"Unknown role: {role}")
|
||||
if system_messages:
|
||||
conv.update_system_message("".join(system_messages))
|
||||
# Add a blank message for the assistant.
|
||||
conv.append_message(conv.roles[1], None)
|
||||
new_prompt = conv.get_prompt()
|
||||
# Overwrite the original prompt
|
||||
# TODO remote bos token and eos token from tokenizer_config.json of model
|
||||
prompt_echo_len_char = len(new_prompt.replace("</s>", "").replace("<s>", ""))
|
||||
model_context["prompt_echo_len_char"] = prompt_echo_len_char
|
||||
params["prompt"] = new_prompt
|
||||
return params, model_context
|
||||
|
||||
|
||||
llm_model_chat_adapters: List[BaseChatAdpter] = []
|
||||
|
||||
@ -134,12 +182,26 @@ class GPT4AllChatAdapter(BaseChatAdpter):
|
||||
return gpt4all_generate_stream
|
||||
|
||||
|
||||
class Llama2ChatAdapter(BaseChatAdpter):
|
||||
def match(self, model_path: str):
|
||||
return "llama-2" in model_path.lower()
|
||||
|
||||
def get_conv_template(self) -> Conversation:
|
||||
return get_conv_template("llama-2")
|
||||
|
||||
def get_generate_stream_func(self):
|
||||
from pilot.model.inference import generate_stream
|
||||
|
||||
return generate_stream
|
||||
|
||||
|
||||
register_llm_model_chat_adapter(VicunaChatAdapter)
|
||||
register_llm_model_chat_adapter(ChatGLMChatAdapter)
|
||||
register_llm_model_chat_adapter(GuanacoChatAdapter)
|
||||
register_llm_model_chat_adapter(FalconChatAdapter)
|
||||
register_llm_model_chat_adapter(GorillaChatAdapter)
|
||||
register_llm_model_chat_adapter(GPT4AllChatAdapter)
|
||||
register_llm_model_chat_adapter(Llama2ChatAdapter)
|
||||
|
||||
# Proxy model for test and develop, it's cheap for us now.
|
||||
register_llm_model_chat_adapter(ProxyllmChatAdapter)
|
||||
|
@ -5,6 +5,7 @@ import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
import uvicorn
|
||||
from fastapi import BackgroundTasks, FastAPI, Request
|
||||
@ -24,6 +25,7 @@ from pilot.configs.model_config import *
|
||||
from pilot.model.llm_out.vicuna_base_llm import get_embeddings
|
||||
from pilot.model.loader import ModelLoader
|
||||
from pilot.server.chat_adapter import get_llm_chat_adapter
|
||||
from pilot.scene.base_message import ModelMessage
|
||||
|
||||
CFG = Config()
|
||||
|
||||
@ -75,6 +77,8 @@ class ModelWorker:
|
||||
|
||||
def generate_stream_gate(self, params):
|
||||
try:
|
||||
# params adaptation
|
||||
params, model_context = self.llm_chat_adapter.model_adaptation(params)
|
||||
for output in self.generate_stream_func(
|
||||
self.model, self.tokenizer, params, DEVICE, CFG.MAX_POSITION_EMBEDDINGS
|
||||
):
|
||||
@ -82,10 +86,8 @@ class ModelWorker:
|
||||
# The gpt4all thread shares stdout with the parent process,
|
||||
# and opening it may affect the frontend output.
|
||||
print("output: ", output)
|
||||
ret = {
|
||||
"text": output,
|
||||
"error_code": 0,
|
||||
}
|
||||
# return some model context to dgt-server
|
||||
ret = {"text": output, "error_code": 0, "model_context": model_context}
|
||||
yield json.dumps(ret).encode() + b"\0"
|
||||
|
||||
except torch.cuda.CudaError:
|
||||
@ -128,6 +130,7 @@ app = FastAPI()
|
||||
|
||||
|
||||
class PromptRequest(BaseModel):
|
||||
messages: List[ModelMessage]
|
||||
prompt: str
|
||||
temperature: float
|
||||
max_new_tokens: int
|
||||
@ -170,6 +173,7 @@ async def api_generate_stream(request: Request):
|
||||
@app.post("/generate")
|
||||
def generate(prompt_request: PromptRequest) -> str:
|
||||
params = {
|
||||
"messages": prompt_request.messages,
|
||||
"prompt": prompt_request.prompt,
|
||||
"temperature": prompt_request.temperature,
|
||||
"max_new_tokens": prompt_request.max_new_tokens,
|
||||
|
Loading…
Reference in New Issue
Block a user