mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-13 22:15:35 +00:00
Merge branch 'dbgpt_doc' into feature-xuyuan-openai-proxy
This commit is contained in:
commit
677bc30e18
Binary file not shown.
Before Width: | Height: | Size: 255 KiB After Width: | Height: | Size: 142 KiB |
@ -14,7 +14,7 @@ project = "DB-GPT"
|
||||
copyright = "2023, csunny"
|
||||
author = "csunny"
|
||||
|
||||
version = "0.1.0"
|
||||
version = "👏👏 0.2.2"
|
||||
html_title = project + " " + version
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
@ -5,21 +5,21 @@ This is a collection of DB-GPT tutorials on Medium.
|
||||
|
||||
DB-GPT is divided into several functions, including chat with knowledge base, execute SQL, chat with database, and execute plugins.
|
||||
|
||||
###Introduce
|
||||
[What is DB-GPT](https://www.youtube.com/watch?v=QszhVJerc0I) by csunny (https://github.com/csunny/DB-GPT):
|
||||
### Introduce
|
||||
[What is DB-GPT](https://www.youtube.com/watch?v=QszhVJerc0I)
|
||||
|
||||
### Knowledge
|
||||
|
||||
[How to Create your own knowledge repository](https://db-gpt.readthedocs.io/en/latest/modules/knownledge.html)
|
||||
[How to Create your own knowledge repository](https://db-gpt.readthedocs.io/en/latest/modules/knowledge.html)
|
||||
|
||||
[Add new Knowledge demonstration](../../assets/new_knownledge_en.gif)
|
||||
[Add new Knowledge demonstration](https://github.com/csunny/DB-GPT/blob/main/assets/new_knownledge_en.gif)
|
||||
|
||||
### SQL Generation
|
||||
[sql generation demonstration](../../assets/demo_en.gif)
|
||||
[sql generation demonstration](https://github.com/csunny/DB-GPT/blob/main/assets/demo_en.gif)
|
||||
|
||||
### SQL Execute
|
||||
[sql execute demonstration](../../assets/auto_sql_en.gif)
|
||||
[sql execute demonstration](https://github.com/csunny/DB-GPT/blob/main/assets/auto_sql_en.gif)
|
||||
|
||||
|
||||
### Plugins
|
||||
[db plugins demonstration](../../assets/dbgpt_bytebase_plugin.gif)
|
||||
[db plugins demonstration](https://github.com/csunny/DB-GPT/blob/main/assets/auto_plugin.gif)
|
@ -68,7 +68,7 @@ It's very important for DB-GPT, DB-GPT also provide standard, extendable interfa
|
||||
|
||||
- `Plugins <./modules/plugins.html>`_: Plugins management, scheduler.
|
||||
|
||||
- `Knownledge <./modules/knownledge.html>`_: Knownledge management, embedding, and search.
|
||||
- `Knowledge <./modules/knowledge.html>`_: Knowledge management, embedding, and search.
|
||||
|
||||
- `Connections <./modules/connections.html>`_: Supported multi databases connection. management connections and interact with this.
|
||||
|
||||
@ -81,8 +81,8 @@ It's very important for DB-GPT, DB-GPT also provide standard, extendable interfa
|
||||
./modules/llms.md
|
||||
./modules/prompts.md
|
||||
./modules/plugins.md
|
||||
./modules/connections.md
|
||||
./modules/knownledge.md
|
||||
./modules/connections.rst
|
||||
./modules/knowledge.rst
|
||||
|
||||
Use Cases
|
||||
---------
|
||||
|
@ -8,7 +8,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.1.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-13 18:04+0800\n"
|
||||
"POT-Creation-Date: 2023-06-14 14:51+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
@ -19,72 +19,72 @@ msgstr ""
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../getting_started/tutorials.md:1 7011a2ab0e7f45ddb1fa85b6479cc442
|
||||
#: ../../getting_started/tutorials.md:1 f80ad98cf2d444a3a159655b37ae4d4f
|
||||
msgid "Tutorials"
|
||||
msgstr "教程"
|
||||
|
||||
#: ../../getting_started/tutorials.md:4 960f88b9c1b64940bfa0576bab5b0314
|
||||
#: ../../getting_started/tutorials.md:4 ff8771c31b024e538dbbaaeaccec8aa7
|
||||
msgid "This is a collection of DB-GPT tutorials on Medium."
|
||||
msgstr "这是知乎上DB-GPT教程的集合。."
|
||||
|
||||
#: ../../getting_started/tutorials.md:6 1c8db33581ea4928905e029a98b9a155
|
||||
#: ../../getting_started/tutorials.md:6 0c1c544a5d184375aabd7fe6918042e2
|
||||
msgid ""
|
||||
"DB-GPT is divided into several functions, including chat with knowledge "
|
||||
"base, execute SQL, chat with database, and execute plugins."
|
||||
msgstr ""
|
||||
|
||||
#: ../../getting_started/tutorials.md:8 3915395cc45742519bf0c607eeafc489
|
||||
#: ../../getting_started/tutorials.md:8 b5e106a97bbd4467b071bb05d837d7d9
|
||||
msgid "Introduce"
|
||||
msgstr ""
|
||||
|
||||
#: ../../getting_started/tutorials.md:9 9df43436183d4b41a75a09fec15743d7
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"###Introduce [What is DB-"
|
||||
"GPT](https://www.youtube.com/watch?v=QszhVJerc0I) by csunny "
|
||||
"(https://github.com/csunny/DB-GPT):"
|
||||
msgid "[What is DB-GPT](https://www.youtube.com/watch?v=QszhVJerc0I)"
|
||||
msgstr ""
|
||||
"###Introduce [什么是DB-"
|
||||
"GPT](https://www.bilibili.com/video/BV1SM4y1a7Nj/?buvid=551b023900b290f9497610b2155a2668&is_story_h5=false&mid=%2BVyE%2Fwau5woPcUKieCWS0A%3D%3D&p=1&plat_id=116&share_from=ugc&share_medium=iphone&share_plat=ios&share_session_id=5D08B533-82A4-4D40-9615-7826065B4574&share_source=GENERIC&share_tag=s_i×tamp=1686307943&unique_k=bhO3lgQ&up_id=31375446)"
|
||||
" by csunny (https://github.com/csunny/DB-GPT)"
|
||||
|
||||
#: ../../getting_started/tutorials.md:11 e213736923574b2cb039a457d789c27c
|
||||
#: ../../getting_started/tutorials.md:11 3096811f862649bf84ff3cd29cdf14db
|
||||
msgid "Knowledge"
|
||||
msgstr "知识库"
|
||||
|
||||
#: ../../getting_started/tutorials.md:13 90b5472735a644168d51c054ed882748
|
||||
#: ../../getting_started/tutorials.md:13 ea00f3de8c754bf2950e735a2f14043a
|
||||
msgid ""
|
||||
"[How to Create your own knowledge repository](https://db-"
|
||||
"gpt.readthedocs.io/en/latest/modules/knownledge.html)"
|
||||
"gpt.readthedocs.io/en/latest/modules/knowledge.html)"
|
||||
msgstr ""
|
||||
"[怎么创建自己的知识库](https://db-"
|
||||
"gpt.readthedocs.io/en/latest/modules/knownledge.html)"
|
||||
"gpt.readthedocs.io/en/latest/modules/knowledge.html)"
|
||||
|
||||
#: ../../getting_started/tutorials.md:15 6a851e1e88ea4bcbaf7ee742a12224ef
|
||||
#: ../../getting_started/tutorials.md:15 07195f11314945989eeeb9400c8a9b43
|
||||
msgid "[Add new Knowledge demonstration](../../assets/new_knownledge_en.gif)"
|
||||
msgstr "[新增知识库演示](../../assets/new_knownledge_en.gif)"
|
||||
|
||||
#: ../../getting_started/tutorials.md:17 59887be89d8046e28956f909fcbbc9dc
|
||||
#: ../../getting_started/tutorials.md:17 5245cd247a184f63a10f735f414f303f
|
||||
msgid "SQL Generation"
|
||||
msgstr ""
|
||||
|
||||
#: ../../getting_started/tutorials.md:18 ee5decd8441d40ae8a240a19c1a5a74a
|
||||
#: ../../getting_started/tutorials.md:18 38077ab510264112b6156c27b8880967
|
||||
#, fuzzy
|
||||
msgid "[sql generation demonstration](../../assets/demo_en.gif)"
|
||||
msgstr "[sql生成演示](../../assets/demo_en.gif)"
|
||||
|
||||
#: ../../getting_started/tutorials.md:20 5d25c5d307c24c9198f2b52e70f2421c
|
||||
#: ../../getting_started/tutorials.md:20 c0a6f9fefbb9404695fe3bffb6ecc577
|
||||
msgid "SQL Execute"
|
||||
msgstr "SQL执行"
|
||||
|
||||
#: ../../getting_started/tutorials.md:21 ee5decd8441d40ae8a240a19c1a5a74a
|
||||
#: ../../getting_started/tutorials.md:21 39fe94853f9c4165b40812c57171a6f4
|
||||
#, fuzzy
|
||||
msgid "[sql execute demonstration](../../assets/auto_sql_en.gif)"
|
||||
msgstr "[sql execute 演示](../../assets/auto_sql_en.gif)"
|
||||
|
||||
|
||||
#: ../../getting_started/tutorials.md:26 4487ef393e004e7c936f5104727212a4
|
||||
#: ../../getting_started/tutorials.md:24 0fd9770dbf3c49b0b644599dc70187a7
|
||||
#, fuzzy
|
||||
msgid "Plugins"
|
||||
msgstr "DB Plugins"
|
||||
|
||||
#: ../../getting_started/tutorials.md:27 ee5decd8441d40ae8a240a19c1a5a74a
|
||||
#: ../../getting_started/tutorials.md:25 fc9830406c39473ab32df00a33340385
|
||||
#, fuzzy
|
||||
msgid "[db plugins demonstration](../../assets/dbgpt_bytebase_plugin.gif)"
|
||||
msgstr "[db plugins 演示](../../assets/dbgpt_bytebase_plugin.gif)"
|
||||
|
@ -8,7 +8,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.1.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-11 14:10+0800\n"
|
||||
"POT-Creation-Date: 2023-06-14 14:51+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
@ -17,33 +17,33 @@ msgstr ""
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.11.0\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../index.rst:34 ../../index.rst:45 e3275f133efd471582d952301a6e243e
|
||||
#: ../../index.rst:34 ../../index.rst:45 558ad9c9fc2240589fabce35463ca24c
|
||||
msgid "Getting Started"
|
||||
msgstr "开始"
|
||||
|
||||
#: ../../index.rst:56 ../../index.rst:75 86e2ce002e604304a4032aa1555b36cb
|
||||
#: ../../index.rst:56 ../../index.rst:75 abd248b06ac142a68a89e0fb7e477536
|
||||
msgid "Modules"
|
||||
msgstr "模块"
|
||||
|
||||
#: ../../index.rst:88 ../../index.rst:104 b15c23cfcc084df9a8f8f9990e6903ac
|
||||
#: ../../index.rst:88 ../../index.rst:104 e046db82aa5845598b373ca1b9a73ec2
|
||||
msgid "Use Cases"
|
||||
msgstr "示例"
|
||||
|
||||
#: ../../index.rst:118 ../../index.rst:121 70605b76fe5348299dd5d48d8ab6a77c
|
||||
#: ../../index.rst:118 ../../index.rst:121 e6fbf08bf59b48afb4ac3f479d88256b
|
||||
msgid "Reference"
|
||||
msgstr "参考"
|
||||
|
||||
#: ../../index.rst:145 ../../index.rst:151 f62cf565fab64977b0efbd50e83540cc
|
||||
#: ../../index.rst:145 ../../index.rst:151 03496f75357d4b4d9fd721516faa5e54
|
||||
msgid "Resources"
|
||||
msgstr "资源"
|
||||
|
||||
#: ../../index.rst:7 c8b3a0ca759f432095161f7baccde1c4
|
||||
#: ../../index.rst:7 e0764d3d816244b4bf047032c1a28760
|
||||
msgid "Welcome to DB-GPT!"
|
||||
msgstr "欢迎来到DB-GPT中文文档"
|
||||
|
||||
#: ../../index.rst:8 0167fea2c4df4181bc10d6e71527d005
|
||||
#: ../../index.rst:8 a3500e3f721348ce859c7c774b59b41a
|
||||
msgid ""
|
||||
"As large models are released and iterated upon, they are becoming "
|
||||
"increasingly intelligent. However, in the process of using large models, "
|
||||
@ -56,217 +56,207 @@ msgid ""
|
||||
"independent private environments but also to be independently deployed "
|
||||
"and isolated according to business modules, ensuring that the ability of "
|
||||
"large models is absolutely private, secure, and controllable."
|
||||
msgstr "随着大型模型的发布和迭代,它们变得越来越智能。然而,在使用大型模型的过程中,"
|
||||
"我们在数据安全和隐私方面面临着重大挑战。我们需要确保我们的敏感数据和环境得到完全控制,"
|
||||
"避免任何数据隐私泄露或安全风险。基于此,我们启动了DB-GPT项目,为所有基于数据库的"
|
||||
"场景构建一个完整的私有大模型解决方案。该方案“”支持本地部署,既可应用于“独立私"
|
||||
"有环境”,又可根据业务模块进行“独立部署”和“隔离”,确保“大模型”的能力绝对"
|
||||
"私有、安全、可控。"
|
||||
msgstr ""
|
||||
"随着大型模型的发布和迭代,它们变得越来越智能。然而,在使用大型模型的过程中,我们在数据安全和隐私方面面临着重大挑战。我们需要确保我们的敏感数据和环境得到完全控制,避免任何数据隐私泄露或安全风险。基于此"
|
||||
",我们启动了DB-"
|
||||
"GPT项目,为所有基于数据库的场景构建一个完整的私有大模型解决方案。该方案“”支持本地部署,既可应用于“独立私有环境”,又可根据业务模块进行“独立部署”和“隔离”,确保“大模型”的能力绝对私有、安全、可控。"
|
||||
|
||||
#: ../../index.rst:10 36b847a04d624286a4942cd77821da8c
|
||||
#: ../../index.rst:10 a8d66309ae5244d88b3c599a5ff97137
|
||||
msgid ""
|
||||
"**DB-GPT** is an experimental open-source project that uses localized GPT"
|
||||
" large models to interact with your data and environment. With this "
|
||||
"solution, you can be assured that there is no risk of data leakage, and "
|
||||
"your data is 100% private and secure."
|
||||
msgstr "DB-GPT 是一个开源的以数据库为基础的GPT实验项目,使用本地化的"
|
||||
"GPT大模型与您的数据和环境进行交互,无数据泄露风险"
|
||||
"100% 私密,100% 安全。"
|
||||
msgstr ""
|
||||
"DB-GPT 是一个开源的以数据库为基础的GPT实验项目,使用本地化的GPT大模型与您的数据和环境进行交互,无数据泄露风险100% 私密,100%"
|
||||
" 安全。"
|
||||
|
||||
#: ../../index.rst:12 d20166d203934385b811740f4d5eda33
|
||||
#: ../../index.rst:12 806ae939ad9349ccb4375a236cbaf418
|
||||
msgid "**Features**"
|
||||
msgstr "特性"
|
||||
|
||||
#: ../../index.rst:13 03f9de47513b4bc9a26f31e1d2d8ad60
|
||||
#: ../../index.rst:13 abd6265d37a64b00939e9c0a78f11a5e
|
||||
msgid ""
|
||||
"Currently, we have released multiple key features, which are listed below"
|
||||
" to demonstrate our current capabilities:"
|
||||
msgstr "目前我们已经发布了多种关键的特性,这里一一列举展示一下当前发布的能力。"
|
||||
|
||||
#: ../../index.rst:15 abc51c99bc6e49d5b0105c7d95e391da
|
||||
#: ../../index.rst:15 6484ed0d92654283a7cbee6cb6b54821
|
||||
msgid "SQL language capabilities - SQL generation - SQL diagnosis"
|
||||
msgstr "SQL语言能力 - SQL生成 - SQL诊断"
|
||||
|
||||
#: ../../index.rst:19 e9ba27f21fd84ecf973640fa021b06b6
|
||||
#: ../../index.rst:19 0d7ef0cf6ec649e5a9c7e076bc30ca1a
|
||||
msgid ""
|
||||
"Private domain Q&A and data processing - Database knowledge Q&A - Data "
|
||||
"processing"
|
||||
msgstr "私有领域问答与数据处理 - 数据库知识问答 - 数据处理"
|
||||
|
||||
#: ../../index.rst:23 a4584012b6634553abef5a4ee6ddf509
|
||||
#: ../../index.rst:23 83d1fc0f88cc42bca5206c4b9915ce65
|
||||
msgid ""
|
||||
"Plugins - Support custom plugin execution tasks and natively support the "
|
||||
"Auto-GPT plugin, such as:"
|
||||
msgstr "插件模型 - 支持自定义插件执行任务,并原生支持Auto-GPT插件,例如:"
|
||||
"* SQL自动执行,获取查询结果 * 自动爬取学习知识"
|
||||
msgstr "插件模型 - 支持自定义插件执行任务,并原生支持Auto-GPT插件,例如:* SQL自动执行,获取查询结果 * 自动爬取学习知识"
|
||||
|
||||
#: ../../index.rst:26 b08674d7a7da4405b9388e296bc2cd57
|
||||
#: ../../index.rst:26 7790cb466b1d455298d5eaa7582fc5ed
|
||||
msgid ""
|
||||
"Unified vector storage/indexing of knowledge base - Support for "
|
||||
"unstructured data such as PDF, Markdown, CSV, and WebURL"
|
||||
msgstr "知识库统一向量存储/索引 - 非结构化数据支持包括PDF、MarkDown、CSV、WebURL"
|
||||
|
||||
#: ../../index.rst:29 cf4bc81d46b4418b81a78242cbc7f984
|
||||
#: ../../index.rst:29 7354784dc4ba494e82f5d3acaac7730b
|
||||
msgid ""
|
||||
"Milti LLMs Support - Supports multiple large language models, currently "
|
||||
"supporting Vicuna (7b, 13b), ChatGLM-6b (int4, int8) - TODO: codegen2, "
|
||||
"codet5p"
|
||||
msgstr "多模型支持 - 支持多种大语言模型, 当前已支持Vicuna(7b,13b), ChatGLM-6b(int4, int8)"
|
||||
Guanaco, Goriila, Falcon等系列模型"
|
||||
|
||||
#: ../../index.rst:35 681ae172eea64b718e0f6fc734d041b1
|
||||
#: ../../index.rst:35 8598e71986834a24aa390603b84288d1
|
||||
msgid ""
|
||||
"How to get started using DB-GPT to interact with your data and "
|
||||
"environment."
|
||||
msgstr "开始使用DB-GPT与您的数据环境进行交互。"
|
||||
|
||||
#: ../../index.rst:36 87f507e0c27a4a38ba2a5c19e804549f
|
||||
#: ../../index.rst:36 8685d294df2040d294523c068969a966
|
||||
msgid "`Quickstart Guid <./getting_started/getting_started.html>`_"
|
||||
msgstr "`使用指南 <./getting_started/getting_started.html>`_"
|
||||
|
||||
#: ../../index.rst:38 ab35a5cd96c548ecb0c285fd822f652a
|
||||
#: ../../index.rst:38 bd33c376f4d54a24957cdbd86ac969f1
|
||||
msgid "Concepts and terminology"
|
||||
msgstr "相关概念"
|
||||
|
||||
#: ../../index.rst:40 3fbd5c96df084ef889442a0b89ad6c05
|
||||
#: ../../index.rst:40 2e9ac1f015bf4d37b0b76de94a831f5b
|
||||
msgid "`Concepts and terminology <./getting_started/concepts.html>`_"
|
||||
msgstr "`相关概念 <./getting_started/concepts.html>`_"
|
||||
|
||||
#: ../../index.rst:42 6d9a0d727ce14edfbdcf678c6fbba76b
|
||||
#: ../../index.rst:42 d641ad48ed334db983d8906948b4b430
|
||||
msgid "Coming soon..."
|
||||
msgstr "未完待续。。。"
|
||||
msgstr ""
|
||||
|
||||
#: ../../index.rst:44 58cdc41dce264a3e83de565501298010
|
||||
#: ../../index.rst:44 7b94cd856f154fbe9580d8595e9afe6a
|
||||
msgid "`Tutorials <.getting_started/tutorials.html>`_"
|
||||
msgstr "`教程 <.getting_started/tutorials.html>`_"
|
||||
|
||||
#: ../../index.rst:58 20d67b324c23468e8f2cac6d9100b9f5
|
||||
#: ../../index.rst:58 eaf73bfa0d484e79b90d4fa1d82d4cf6
|
||||
msgid ""
|
||||
"These modules are the core abstractions with which we can interact with "
|
||||
"data and environment smoothly."
|
||||
msgstr "这些模块是我们可以与数据和环境顺利地进行交互的核心组成。"
|
||||
|
||||
|
||||
#: ../../index.rst:59 45a14052370f4860a72d8e831269d184
|
||||
#: ../../index.rst:59 4ccf7a955ed241b4ad7b80bef0a3ad59
|
||||
msgid ""
|
||||
"It's very important for DB-GPT, DB-GPT also provide standard, extendable "
|
||||
"interfaces."
|
||||
msgstr "DB-GPT还提供了标准的、可扩展的接口。"
|
||||
|
||||
#: ../../index.rst:61 7c78c2ddc4104a8b9688472072c3225c
|
||||
#: ../../index.rst:61 93a217c7147f47e6a3917784313c2eb3
|
||||
msgid ""
|
||||
"The docs for each module contain quickstart examples, how to guides, "
|
||||
"reference docs, and conceptual guides."
|
||||
msgstr "每个模块的文档都包含快速入门的例子、操作指南、参考文档和相关概念等内容。"
|
||||
|
||||
#: ../../index.rst:63 4bcc203282434ca9b77d20c4115a646a
|
||||
#: ../../index.rst:63 1876f94cc11e44d29b4391011c175fd6
|
||||
msgid "The modules are as follows"
|
||||
msgstr "组成模块如下:"
|
||||
|
||||
#: ../../index.rst:65 c87f13e106b5443a824df5ca85331df4
|
||||
#: ../../index.rst:65 c5c0bc53b69448389bf94c5b3b0230a1
|
||||
msgid ""
|
||||
"`LLMs <./modules/llms.html>`_: Supported multi models management and "
|
||||
"integrations."
|
||||
msgstr "`LLMs <./modules/llms.html>`_:基于FastChat提供大模型的运行环境。支持多模型管理和集成。 "
|
||||
|
||||
#: ../../index.rst:67 3447e10b61804b48a786ee12beaaedfd
|
||||
#: ../../index.rst:67 b382b320fdd746e1940b67c0b6ff3d7d
|
||||
msgid ""
|
||||
"`Prompts <./modules/prompts.html>`_: Prompt management, optimization, and"
|
||||
" serialization for multi database."
|
||||
msgstr "`Prompt自动生成与优化 <./modules/prompts.html>`_: 自动化生成高质量的Prompt"
|
||||
" ,并进行优化,提高系统的响应效率"
|
||||
msgstr ""
|
||||
"`Prompt自动生成与优化 <./modules/prompts.html>`_: 自动化生成高质量的Prompt "
|
||||
",并进行优化,提高系统的响应效率"
|
||||
|
||||
#: ../../index.rst:69 a3182673127141888fdc13560e7dcfb3
|
||||
#: ../../index.rst:69 a2d90f213158420cad5a8d65e8484bbd
|
||||
msgid "`Plugins <./modules/plugins.html>`_: Plugins management, scheduler."
|
||||
msgstr "`Agent与插件: <./modules/plugins.html>`_:提供Agent和插件机制,使得用户可以自定义并增强系统的行为。"
|
||||
|
||||
#: ../../index.rst:71 66abfffcb9c0466f9a3988ecfb19fc9e
|
||||
#: ../../index.rst:71 a39a7a70dda94414b0625a897119795e
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"`Knownledge <./modules/knownledge.html>`_: Knownledge management, "
|
||||
"embedding, and search."
|
||||
msgstr "`知识库能力: <./modules/knownledge.html>`_: 支持私域知识库问答能力, "
|
||||
"`Knowledge <./modules/knowledge.html>`_: Knowledge management, embedding,"
|
||||
" and search."
|
||||
msgstr "`知识库能力: <./modules/knowledge.html>`_: 支持私域知识库问答能力, "
|
||||
|
||||
#: ../../index.rst:73 1027a33646614790a4d88f29285ab0fd
|
||||
#: ../../index.rst:73 8c93459d2deb4085addb33628e5a6fde
|
||||
msgid ""
|
||||
"`Connections <./modules/connections.html>`_: Supported multi databases "
|
||||
"connection. management connections and interact with this."
|
||||
msgstr "`连接模块 <./modules/connections.html>`_: 用于连接不同的模块和数据源,实现数据的流转和交互 "
|
||||
|
||||
|
||||
#: ../../index.rst:90 53b58e6e531841878fbc8616841d5e9e
|
||||
#: ../../index.rst:90 707311d87e294078a093c092dd9e61c8
|
||||
msgid "Best Practices and built-in implementations for common DB-GPT use cases:"
|
||||
msgstr "DB-GPT用例的最佳实践和内置方法:"
|
||||
|
||||
#: ../../index.rst:92 a5c664233fe04417ba9bb0415fd686d7
|
||||
#: ../../index.rst:92 71c2be99432b413d928e5b18cbf3abd1
|
||||
msgid ""
|
||||
"`Sql generation and diagnosis "
|
||||
"<./use_cases/sql_generation_and_diagnosis.html>`_: SQL generation and "
|
||||
"diagnosis."
|
||||
msgstr "`Sql生成和诊断 "
|
||||
"<./use_cases/sql_generation_and_diagnosis.html>`_: Sql生成和诊断。"
|
||||
msgstr "`Sql生成和诊断 <./use_cases/sql_generation_and_diagnosis.html>`_: Sql生成和诊断。"
|
||||
|
||||
|
||||
#: ../../index.rst:94 04c63b56e77b45e5b4e7bd1db45ea10f
|
||||
#: ../../index.rst:94 4101e1e6f9354c53addca1e3d07b0234
|
||||
msgid ""
|
||||
"`knownledge Based QA <./use_cases/knownledge_based_qa.html>`_: A "
|
||||
"important scene for user to chat with database documents, codes, bugs and"
|
||||
" schemas."
|
||||
msgstr "`知识库问答 <./use_cases/knownledge_based_qa.html>`_: "
|
||||
"用户与数据库文档、代码和bug聊天的重要场景"。
|
||||
msgstr "`知识库问答 <./use_cases/knownledge_based_qa.html>`_: 用户与数据库文档、代码和bug聊天的重要场景\""
|
||||
|
||||
#: ../../index.rst:96 415e2b9f640341a084f893781e2b3ec0
|
||||
#: ../../index.rst:96 87ae536fb701462991897494606d4b15
|
||||
msgid ""
|
||||
"`Chatbots <./use_cases/chatbots.html>`_: Language model love to chat, use"
|
||||
" multi models to chat."
|
||||
msgstr "`聊天机器人 <./use_cases/chatbots.html>`_: 使用多模型进行对话"
|
||||
|
||||
#: ../../index.rst:98 59a7ec39d2034fb794a9272d55607122
|
||||
#: ../../index.rst:98 bf65b6a3036f4984b2641e3c621d764c
|
||||
msgid ""
|
||||
"`Querying Database Data <./use_cases/query_database_data.html>`_: Query "
|
||||
"and Analysis data from databases and give charts."
|
||||
msgstr "`查询数据库数据 <./use_cases/query_database_data.html>`_:"
|
||||
"从数据库中查询和分析数据并给出图表。"
|
||||
msgstr "`查询数据库数据 <./use_cases/query_database_data.html>`_:从数据库中查询和分析数据并给出图表。"
|
||||
|
||||
#: ../../index.rst:100 3bd098eda9044bd39e4bba28a82f4195
|
||||
#: ../../index.rst:100 c854edf150624005bd5f62f2e3e0789b
|
||||
msgid ""
|
||||
"`Interacting with apis <./use_cases/interacting_with_api.html>`_: "
|
||||
"Interact with apis, such as create a table, deploy a database cluster, "
|
||||
"create a database and so on."
|
||||
msgstr "`API交互 <./use_cases/interacting_with_api.html>`_: "
|
||||
msgstr ""
|
||||
"`API交互 <./use_cases/interacting_with_api.html>`_: "
|
||||
"与API交互,例如创建表、部署数据库集群、创建数据库等。"
|
||||
|
||||
|
||||
#: ../../index.rst:102 66daab899d7b4e528eda70779ab79676
|
||||
#: ../../index.rst:102 55ed3ed4ec7a45458f6686c437d65e41
|
||||
msgid ""
|
||||
"`Tool use with plugins <./use_cases/tool_use_with_plugin>`_: According to"
|
||||
" Plugin use tools to manage databases autonomoly."
|
||||
msgstr "`插件工具 <./use_cases/tool_use_with_plugin>`_:"
|
||||
" 根据插件使用工具自主管理数据库。"
|
||||
msgstr "`插件工具 <./use_cases/tool_use_with_plugin>`_: 根据插件使用工具自主管理数据库。"
|
||||
|
||||
#: ../../index.rst:119 e5a84e2dc87d4a06aa77ef4d77fb7bcb
|
||||
#: ../../index.rst:119 e3b12d97d32c4c198a85e052b2a365c7
|
||||
msgid ""
|
||||
"Full documentation on all methods, classes, installation methods, and "
|
||||
"integration setups for DB-GPT."
|
||||
msgstr "关于DB-GPT的所有方法、类、安装方法和集成设置的完整文档。"
|
||||
|
||||
#: ../../index.rst:130 7c51e39ad3824c5f8575390adbcba738
|
||||
#: ../../index.rst:130 1b82c35990b84b6bba05fd565ba6c32f
|
||||
msgid "Ecosystem"
|
||||
msgstr "环境系统"
|
||||
|
||||
#: ../../index.rst:132 b59e9ddba86945c1bebe395b2863174c
|
||||
#: ../../index.rst:132 ef97b07c83e7469bba5a8bc36e1ed83f
|
||||
msgid "Guides for how other companies/products can be used with DB-GPT"
|
||||
msgstr "其他公司/产品如何与DB-GPT一起使用的方法指南"
|
||||
|
||||
#: ../../index.rst:147 992bf68cc48a425696c02429d39f86e3
|
||||
#: ../../index.rst:147 e597de3a7df7460d985c1a30589ce3bb
|
||||
msgid ""
|
||||
"Additional resources we think may be useful as you develop your "
|
||||
"application!"
|
||||
msgstr "“我们认为在您开发应用程序时可能有用的其他资源!”"
|
||||
|
||||
#: ../../index.rst:149 d99277006b05438c8d2e8088242f239c
|
||||
#: ../../index.rst:149 1438b93c57e6459799c5656893c15aa7
|
||||
msgid ""
|
||||
"`Discord <https://discord.com/invite/twmZk3vv>`_: if your have some "
|
||||
"problem or ideas, you can talk from discord."
|
||||
msgstr "`Discord <https://discord.com/invite/twmZk3vv>`_:"
|
||||
"如果您有任何问题,可以到discord中进行交流。"
|
||||
msgstr "`Discord <https://discord.com/invite/twmZk3vv>`_:如果您有任何问题,可以到discord中进行交流。"
|
||||
|
||||
|
@ -8,7 +8,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.1.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-11 14:10+0800\n"
|
||||
"POT-Creation-Date: 2023-06-14 14:51+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
@ -17,18 +17,33 @@ msgstr ""
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.11.0\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../modules/connections.md:1 21de23e95a6c4405a242fb9a0f4e5f2b
|
||||
#: ../../modules/connections.rst:2 ../../modules/connections.rst:10
|
||||
#: c9783fd80b0b476abfbf25d4aa23b6ae
|
||||
msgid "Connections"
|
||||
msgstr "连接模块"
|
||||
|
||||
#: ../../modules/connections.md:3 0f09b3be20cd409f92c2ba819dbf45eb
|
||||
#: ../../modules/connections.rst:3 95fecfbcae0e4170affd6a5e41a4fe56
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"In order to interact more conveniently with users' private environments, "
|
||||
"the project has designed a connection module, which can support "
|
||||
"connection to databases, Excel, knowledge bases, and other environments "
|
||||
"to achieve information and data exchange."
|
||||
msgstr "为了更方便地与用户的私有环境进行交互,项目设计了一个连接模块,可以支持"
|
||||
"与数据库、Excel、知识库等环境的连接,实现信息和数据的交换。"
|
||||
"**In order to interact more conveniently with users' private "
|
||||
"environments, the project has designed a connection module, which can "
|
||||
"support connection to databases, Excel, knowledge bases, and other "
|
||||
"environments to achieve information and data exchange.**"
|
||||
msgstr "为了更方便地与用户的私有环境进行交互,项目设计了一个连接模块,可以支持与数据库、Excel、知识库等环境的连接,实现信息和数据的交换。"
|
||||
|
||||
#: ../../modules/connections.rst:5 3a5a6124903c4d1fba8a8eb8426616b7
|
||||
msgid ""
|
||||
"DB-GPT provides base class BaseConnect, you can inheriting and implement "
|
||||
"get_session(), get_table_names(), get_index_info(), get_database_list() "
|
||||
"and run()."
|
||||
msgstr "DB-GPT提供了基础连接模块,你可以继承这个类,然后实现get_session(), get_table_names(), get_index_info(), get_database_list() "
|
||||
"and run()这些方法即可,如果你的数据库是关系型数据库,可以直接继承RDBMSDatabase即可"
|
||||
|
||||
#: ../../modules/connections.rst:7 78ef1226d576458fbd1a9f1043936fc8
|
||||
msgid ""
|
||||
"`mysql_connection <./connections/mysql_connection.html>`_: supported "
|
||||
"mysql_connection."
|
||||
msgstr "mysql connection使用方法"
|
||||
|
||||
|
@ -0,0 +1,36 @@
|
||||
# SOME DESCRIPTIVE TITLE.
|
||||
# Copyright (C) 2023, csunny
|
||||
# This file is distributed under the same license as the DB-GPT package.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.1.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-14 14:51+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
"Language-Team: zh_CN <LL@li.org>\n"
|
||||
"Plural-Forms: nplurals=1; plural=0;\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../modules/connections/mysql/mysql_connection.md:1
|
||||
#: 69e13b9e53ff466c9ec85715741dd938
|
||||
msgid "MYSQL Connection"
|
||||
msgstr "MYSQL连接"
|
||||
|
||||
#: ../../modules/connections/mysql/mysql_connection.md:3
|
||||
#: 4b8446fe31204688a87a5cff1180bec1
|
||||
msgid "MYSQL can connect mysql server."
|
||||
msgstr "MYSQL可以连接mysql server"
|
||||
|
||||
#: ../../modules/connections/mysql/mysql_connection.md:5
|
||||
#: 5a2141fba4bd41269d4ad73885067e0d
|
||||
msgid "inheriting the RDBMSDatabase"
|
||||
msgstr "继承RDBMSDatabase"
|
||||
|
98
docs/locales/zh_CN/LC_MESSAGES/modules/knowledge.po
Normal file
98
docs/locales/zh_CN/LC_MESSAGES/modules/knowledge.po
Normal file
@ -0,0 +1,98 @@
|
||||
# SOME DESCRIPTIVE TITLE.
|
||||
# Copyright (C) 2023, csunny
|
||||
# This file is distributed under the same license as the DB-GPT package.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.1.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-14 15:12+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
"Language-Team: zh_CN <LL@li.org>\n"
|
||||
"Plural-Forms: nplurals=1; plural=0;\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../modules/knowledge.rst:2 ../../modules/knowledge.rst:30
|
||||
#: e98ef6095fc54f8f8dc045cfa1733dc2
|
||||
msgid "Knowledge"
|
||||
msgstr "知识"
|
||||
|
||||
#: ../../modules/knowledge.rst:4 51340dd2758e42ee8e96c3935de53438
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"As the knowledge base is currently the most significant user demand "
|
||||
"scenario, we natively support the construction and processing of "
|
||||
"knowledge bases. At the same time, we also provide multiple knowledge "
|
||||
"base management strategies in this project, such as pdf knowledge,md "
|
||||
"knowledge, txt knowledge, word knowledge, ppt knowledge:"
|
||||
msgstr "由于知识库是当前用户需求最显著的场景,我们原生支持知识库的构建和处理。同时,我们还在本项目中提供了多种知识库管理策略,如:pdf,md "
|
||||
", txt, word, ppt"
|
||||
|
||||
#: ../../modules/knowledge.rst:7 25eeb187843a4d9baa4d0c0a404eec65
|
||||
msgid "**Create your own knowledge repository**"
|
||||
msgstr "创建你自己的知识库"
|
||||
|
||||
#: ../../modules/knowledge.rst:9 bed8a8f08c194ff59a31dc53f67561c1
|
||||
msgid ""
|
||||
"1.Place personal knowledge files or folders in the pilot/datasets "
|
||||
"directory."
|
||||
msgstr "1.将个人知识文件或文件夹放在pilot/datasets目录中。"
|
||||
|
||||
#: ../../modules/knowledge.rst:11 6e03e1a2799a432f8319c3aaf33e2867
|
||||
msgid ""
|
||||
"We currently support many document formats: txt, pdf, md, html, doc, ppt,"
|
||||
" and url."
|
||||
msgstr "当前支持txt, pdf, md, html, doc, ppt, url文档格式"
|
||||
|
||||
#: ../../modules/knowledge.rst:13 883ebf16fe7f4e1fbc73ef7430104e79
|
||||
msgid "before execution: python -m spacy download zh_core_web_sm"
|
||||
msgstr "在执行之前请先执行python -m spacy download zh_core_web_sm"
|
||||
|
||||
#: ../../modules/knowledge.rst:15 59f4bfa8c1064391919ce2af69f2d4c9
|
||||
msgid ""
|
||||
"2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma "
|
||||
"(now only support Chroma and Milvus, if you set Milvus, please set "
|
||||
"MILVUS_URL and MILVUS_PORT)"
|
||||
msgstr "2.更新你的.env,设置你的向量存储类型,VECTOR_STORE_TYPE=Chroma(现在只支持Chroma和Milvus,如果你设置了Milvus,请设置MILVUS_URL和MILVUS_PORT)"
|
||||
|
||||
#: ../../modules/knowledge.rst:18 be600a4d93094045b78a43307dfc8f5f
|
||||
#, fuzzy
|
||||
msgid "2.Run the knowledge repository script in the tools directory."
|
||||
msgstr "3.在tools目录执行知识入库脚本"
|
||||
|
||||
#: ../../modules/knowledge.rst:20 b27eddbbf6c74993a6653575f57fff18
|
||||
msgid ""
|
||||
"python tools/knowledge_init.py note : --vector_name : your vector store "
|
||||
"name default_value:default"
|
||||
msgstr ""
|
||||
|
||||
#: ../../modules/knowledge.rst:23 f32dc12aedc94ffc8fee77a4b6e0ec88
|
||||
msgid ""
|
||||
"3.Add the knowledge repository in the interface by entering the name of "
|
||||
"your knowledge repository (if not specified, enter \"default\") so you "
|
||||
"can use it for Q&A based on your knowledge base."
|
||||
msgstr "如果选择新增知识库,在界面上新增知识库输入你的知识库名"
|
||||
|
||||
#: ../../modules/knowledge.rst:25 5b1412c8beb24784bd2a93fe5c487b7b
|
||||
msgid ""
|
||||
"Note that the default vector model used is text2vec-large-chinese (which "
|
||||
"is a large model, so if your personal computer configuration is not "
|
||||
"enough, it is recommended to use text2vec-base-chinese). Therefore, "
|
||||
"ensure that you download the model and place it in the models directory."
|
||||
msgstr ""
|
||||
"注意,这里默认向量模型是text2vec-large-chinese(模型比较大,如果个人电脑配置不够建议采用text2vec-base-"
|
||||
"chinese),因此确保需要将模型download下来放到models目录中。"
|
||||
|
||||
#: ../../modules/knowledge.rst:27 67773e32b01c48628c80b6fab8c90146
|
||||
msgid ""
|
||||
"`pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf "
|
||||
"embedding."
|
||||
msgstr "pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embedding."
|
||||
|
@ -0,0 +1,55 @@
|
||||
# SOME DESCRIPTIVE TITLE.
|
||||
# Copyright (C) 2023, csunny
|
||||
# This file is distributed under the same license as the DB-GPT package.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.1.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-14 14:51+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
"Language-Team: zh_CN <LL@li.org>\n"
|
||||
"Plural-Forms: nplurals=1; plural=0;\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../modules/knowledge/markdown/markdown_embedding.md:1
|
||||
#: b5fd3aea05a64590955b958b753bf22a
|
||||
msgid "MarkdownEmbedding"
|
||||
msgstr "MarkdownEmbedding"
|
||||
|
||||
#: ../../modules/knowledge/markdown/markdown_embedding.md:3
|
||||
#: 0f98ce5b34d44c6f9c828e4b497984de
|
||||
msgid ""
|
||||
"markdown embedding can import md text into a vector knowledge base. The "
|
||||
"entire embedding process includes the read (loading data), data_process "
|
||||
"(data processing), and index_to_store (embedding to the vector database) "
|
||||
"methods."
|
||||
msgstr ""
|
||||
"markdown embedding 可以导入md格式的文档到向量数据库, 整个导入过程分为数据读取read(), "
|
||||
"数据预处理data_process()和数据进向量数据库index_to_store()"
|
||||
|
||||
#: ../../modules/knowledge/markdown/markdown_embedding.md:5
|
||||
#: 7f5ebfa8c7c146d7a340baca85634e16
|
||||
msgid "inheriting the SourceEmbedding"
|
||||
msgstr "继承SourceEmbedding"
|
||||
|
||||
#: ../../modules/knowledge/markdown/markdown_embedding.md:17
|
||||
#: 732e946bc9d149a5af802b239304b943
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"implement read() and data_process() read() method allows you to read data"
|
||||
" and split data into chunk"
|
||||
msgstr "实现read方法可以加载数据"
|
||||
|
||||
#: ../../modules/knowledge/markdown/markdown_embedding.md:33
|
||||
#: f7e53658aee7403688b333b24ff08ce2
|
||||
msgid "data_process() method allows you to pre processing your ways"
|
||||
msgstr "实现data_process方法可以进行数据预处理"
|
||||
|
@ -0,0 +1,56 @@
|
||||
# SOME DESCRIPTIVE TITLE.
|
||||
# Copyright (C) 2023, csunny
|
||||
# This file is distributed under the same license as the DB-GPT package.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.1.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-14 14:51+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
"Language-Team: zh_CN <LL@li.org>\n"
|
||||
"Plural-Forms: nplurals=1; plural=0;\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../modules/knowledge/pdf/pdf_embedding.md:1
|
||||
#: fe600a1f3f9f492da81652ebd3d6d52d
|
||||
msgid "PDFEmbedding"
|
||||
msgstr ""
|
||||
|
||||
#: ../../modules/knowledge/pdf/pdf_embedding.md:3
|
||||
#: a26a7d6ff041476b975bab5c0bf9f506
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"pdfembedding can import PDF text into a vector knowledge base. The entire"
|
||||
" embedding process includes the read (loading data), data_process (data "
|
||||
"processing), and index_to_store (embedding to the vector database) "
|
||||
"methods."
|
||||
msgstr ""
|
||||
"pdf embedding 可以导入pdf格式的文档到向量数据库, 整个导入过程分为数据读取read(), "
|
||||
"数据预处理data_process()和数据进向量数据库index_to_store()"
|
||||
|
||||
#: ../../modules/knowledge/pdf/pdf_embedding.md:5
|
||||
#: 1895f2a6272c43f0b328caba092102a9
|
||||
msgid "inheriting the SourceEmbedding"
|
||||
msgstr "继承SourceEmbedding"
|
||||
|
||||
#: ../../modules/knowledge/pdf/pdf_embedding.md:17
|
||||
#: 2a4a349398354f9cb3e8d9630a4b8696
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"implement read() and data_process() read() method allows you to read data"
|
||||
" and split data into chunk"
|
||||
msgstr "实现read方法可以加载数据"
|
||||
|
||||
#: ../../modules/knowledge/pdf/pdf_embedding.md:34
|
||||
#: 9b5c6d3e9e96443a908a09a8a762ea7a
|
||||
msgid "data_process() method allows you to pre processing your ways"
|
||||
msgstr "实现data_process方法可以进行数据预处理"
|
||||
|
@ -0,0 +1,55 @@
|
||||
# SOME DESCRIPTIVE TITLE.
|
||||
# Copyright (C) 2023, csunny
|
||||
# This file is distributed under the same license as the DB-GPT package.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.1.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-14 14:51+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
"Language-Team: zh_CN <LL@li.org>\n"
|
||||
"Plural-Forms: nplurals=1; plural=0;\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../modules/knowledge/ppt/ppt_embedding.md:1
|
||||
#: 2cdb249b2b284064a0c9117d051e35d4
|
||||
msgid "PPTEmbedding"
|
||||
msgstr ""
|
||||
|
||||
#: ../../modules/knowledge/ppt/ppt_embedding.md:3
|
||||
#: 71676e9b35434a849a206788da8f1394
|
||||
msgid ""
|
||||
"ppt embedding can import ppt text into a vector knowledge base. The "
|
||||
"entire embedding process includes the read (loading data), data_process "
|
||||
"(data processing), and index_to_store (embedding to the vector database) "
|
||||
"methods."
|
||||
msgstr ""
|
||||
"ppt embedding 可以导入ppt格式的文档到向量数据库, 整个导入过程分为数据读取read(), "
|
||||
"数据预处理data_process()和数据进向量数据库index_to_store()"
|
||||
|
||||
#: ../../modules/knowledge/ppt/ppt_embedding.md:5
|
||||
#: 016aeae4786e4d5bad815670bd109481
|
||||
msgid "inheriting the SourceEmbedding"
|
||||
msgstr "继承SourceEmbedding"
|
||||
|
||||
#: ../../modules/knowledge/ppt/ppt_embedding.md:17
|
||||
#: 2fb5b9dc912342df8c275cfd0e993fe0
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"implement read() and data_process() read() method allows you to read data"
|
||||
" and split data into chunk"
|
||||
msgstr "实现read方法可以加载数据"
|
||||
|
||||
#: ../../modules/knowledge/ppt/ppt_embedding.md:31
|
||||
#: 9a00f72c7ec84bde9971579c720d2628
|
||||
msgid "data_process() method allows you to pre processing your ways"
|
||||
msgstr "实现data_process方法可以进行数据预处理"
|
||||
|
@ -0,0 +1,55 @@
|
||||
# SOME DESCRIPTIVE TITLE.
|
||||
# Copyright (C) 2023, csunny
|
||||
# This file is distributed under the same license as the DB-GPT package.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.1.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-14 14:51+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
"Language-Team: zh_CN <LL@li.org>\n"
|
||||
"Plural-Forms: nplurals=1; plural=0;\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../modules/knowledge/url/url_embedding.md:1
|
||||
#: e6d335e613ec4c3a80b89de67ba93098
|
||||
msgid "URL Embedding"
|
||||
msgstr ""
|
||||
|
||||
#: ../../modules/knowledge/url/url_embedding.md:3
|
||||
#: 25e7643335264bdaaa9386ded243d51d
|
||||
msgid ""
|
||||
"url embedding can import PDF text into a vector knowledge base. The "
|
||||
"entire embedding process includes the read (loading data), data_process "
|
||||
"(data processing), and index_to_store (embedding to the vector database) "
|
||||
"methods."
|
||||
msgstr ""
|
||||
"url embedding 可以导入url格式的文档到向量数据库, 整个导入过程分为数据读取read(), "
|
||||
"数据预处理data_process()和数据进向量数据库index_to_store()"
|
||||
|
||||
#: ../../modules/knowledge/url/url_embedding.md:5
|
||||
#: 4b8ca6d93ed0412ab1e640bd42b400ac
|
||||
msgid "inheriting the SourceEmbedding"
|
||||
msgstr "继承SourceEmbedding"
|
||||
|
||||
#: ../../modules/knowledge/url/url_embedding.md:17
|
||||
#: 5d69d27adc70406db97c398a339f6453
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"implement read() and data_process() read() method allows you to read data"
|
||||
" and split data into chunk"
|
||||
msgstr "实现read方法可以加载数据"
|
||||
|
||||
#: ../../modules/knowledge/url/url_embedding.md:34
|
||||
#: 7d055e181d9b4d47965ab249b18bd704
|
||||
msgid "data_process() method allows you to pre processing your ways"
|
||||
msgstr "实现data_process方法可以进行数据预处理"
|
||||
|
@ -0,0 +1,55 @@
|
||||
# SOME DESCRIPTIVE TITLE.
|
||||
# Copyright (C) 2023, csunny
|
||||
# This file is distributed under the same license as the DB-GPT package.
|
||||
# FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
|
||||
#
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.1.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-14 14:51+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
"Language-Team: zh_CN <LL@li.org>\n"
|
||||
"Plural-Forms: nplurals=1; plural=0;\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../modules/knowledge/word/word_embedding.md:1
|
||||
#: 1b3272def692480bb101060a33d076c6
|
||||
msgid "WordEmbedding"
|
||||
msgstr ""
|
||||
|
||||
#: ../../modules/knowledge/word/word_embedding.md:3
|
||||
#: a7ea0e94e5c74dab9aa7fb80ed42ed39
|
||||
msgid ""
|
||||
"word embedding can import word doc/docx text into a vector knowledge "
|
||||
"base. The entire embedding process includes the read (loading data), "
|
||||
"data_process (data processing), and index_to_store (embedding to the "
|
||||
"vector database) methods."
|
||||
msgstr ""
|
||||
"word embedding 可以导入word格式的doc文档到向量数据库, 整个导入过程分为数据读取read(), "
|
||||
"数据预处理data_process()和数据进向量数据库index_to_store()"
|
||||
|
||||
#: ../../modules/knowledge/word/word_embedding.md:5
|
||||
#: 12ba9527ef0745538dffb6b1dcf96933
|
||||
msgid "inheriting the SourceEmbedding"
|
||||
msgstr "继承SourceEmbedding"
|
||||
|
||||
#: ../../modules/knowledge/word/word_embedding.md:17
|
||||
#: a4e5e7553f4a43b0b79ba0de83268ef0
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
"implement read() and data_process() read() method allows you to read data"
|
||||
" and split data into chunk"
|
||||
msgstr "实现read方法可以加载数据"
|
||||
|
||||
#: ../../modules/knowledge/word/word_embedding.md:29
|
||||
#: 188a434dee7543f89cf5f1584f29ca62
|
||||
msgid "data_process() method allows you to pre processing your ways"
|
||||
msgstr "实现data_process方法可以进行数据预处理"
|
||||
|
@ -8,7 +8,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: DB-GPT 0.1.0\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2023-06-11 14:10+0800\n"
|
||||
"POT-Creation-Date: 2023-06-14 14:51+0800\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language: zh_CN\n"
|
||||
@ -17,10 +17,26 @@ msgstr ""
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Generated-By: Babel 2.11.0\n"
|
||||
"Generated-By: Babel 2.12.1\n"
|
||||
|
||||
#: ../../use_cases/sql_generation_and_diagnosis.md:1
|
||||
#: 8900f8d9f3034b20a96df1d5c611eaa1
|
||||
#: f19384aa3b1d4fc2bed3aa94abe80a3c
|
||||
msgid "SQL generation and diagnosis"
|
||||
msgstr "SQL生成和诊断"
|
||||
|
||||
#: ../../use_cases/sql_generation_and_diagnosis.md:3
|
||||
#: 0aa11c79b20544b9a1376d184f58a516
|
||||
msgid ""
|
||||
"DB-GPT provides SQL generation and diagnostic capabilities. With advanced"
|
||||
" natural language processing algorithms and a deep understanding of SQL "
|
||||
"syntax and semantics, the model can generate complex SQL queries from "
|
||||
"natural language input, as well as diagnose and fix issues with existing "
|
||||
"queries. This allows users to save time and streamline their workflow, "
|
||||
"while also improving the accuracy and efficiency of their SQL operations."
|
||||
msgstr ""
|
||||
|
||||
#: ../../use_cases/sql_generation_and_diagnosis.md:5
|
||||
#: ffcbe5278cdc41a7b656f03bf82a8b26
|
||||
msgid "[SQL Generation](../../assets/demo_en.gif)"
|
||||
msgstr ""
|
||||
|
||||
|
@ -1,4 +0,0 @@
|
||||
# Connections
|
||||
|
||||
In order to interact more conveniently with users' private environments, the project has designed a connection module, which can support connection to databases, Excel, knowledge bases, and other environments to achieve information and data exchange.
|
||||
|
16
docs/modules/connections.rst
Normal file
16
docs/modules/connections.rst
Normal file
@ -0,0 +1,16 @@
|
||||
Connections
|
||||
---------
|
||||
**In order to interact more conveniently with users' private environments, the project has designed a connection module, which can support connection to databases, Excel, knowledge bases, and other environments to achieve information and data exchange.**
|
||||
|
||||
DB-GPT provides base class BaseConnect, you can inheriting and implement get_session(), get_table_names(), get_index_info(), get_database_list() and run().
|
||||
|
||||
- `mysql_connection <./connections/mysql_connection.html>`_: supported mysql_connection.
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Connections
|
||||
:name: mysql_connection
|
||||
:hidden:
|
||||
|
||||
./connections/mysql/mysql_connection.md
|
18
docs/modules/connections/mysql/mysql_connection.md
Normal file
18
docs/modules/connections/mysql/mysql_connection.md
Normal file
@ -0,0 +1,18 @@
|
||||
MYSQL Connection
|
||||
==================================
|
||||
MYSQL can connect mysql server.
|
||||
|
||||
inheriting the RDBMSDatabase
|
||||
```
|
||||
class MySQLConnect(RDBMSDatabase):
|
||||
"""Connect MySQL Database fetch MetaData
|
||||
Args:
|
||||
Usage:
|
||||
"""
|
||||
|
||||
type: str = "MySQL"
|
||||
dialect: str = "mysql"
|
||||
driver: str = "pymysql"
|
||||
|
||||
default_db = ["information_schema", "performance_schema", "sys", "mysql"]
|
||||
```
|
40
docs/modules/knowledge.rst
Normal file
40
docs/modules/knowledge.rst
Normal file
@ -0,0 +1,40 @@
|
||||
Knowledge
|
||||
---------
|
||||
|
||||
| As the knowledge base is currently the most significant user demand scenario, we natively support the construction and processing of knowledge bases. At the same time, we also provide multiple knowledge base management strategies in this project, such as pdf knowledge,md knowledge, txt knowledge, word knowledge, ppt knowledge:
|
||||
|
||||
|
||||
**Create your own knowledge repository**
|
||||
|
||||
1.Place personal knowledge files or folders in the pilot/datasets directory.
|
||||
|
||||
We currently support many document formats: txt, pdf, md, html, doc, ppt, and url.
|
||||
|
||||
before execution: python -m spacy download zh_core_web_sm
|
||||
|
||||
2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma
|
||||
(now only support Chroma and Milvus, if you set Milvus, please set MILVUS_URL and MILVUS_PORT)
|
||||
|
||||
2.Run the knowledge repository script in the tools directory.
|
||||
|
||||
python tools/knowledge_init.py
|
||||
note : --vector_name : your vector store name default_value:default
|
||||
|
||||
3.Add the knowledge repository in the interface by entering the name of your knowledge repository (if not specified, enter "default") so you can use it for Q&A based on your knowledge base.
|
||||
|
||||
Note that the default vector model used is text2vec-large-chinese (which is a large model, so if your personal computer configuration is not enough, it is recommended to use text2vec-base-chinese). Therefore, ensure that you download the model and place it in the models directory.
|
||||
|
||||
- `pdf_embedding <./knowledge/pdf_embedding.html>`_: supported pdf embedding.
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Knowledge
|
||||
:name: pdf_embedding
|
||||
:hidden:
|
||||
|
||||
./knowledge/pdf/pdf_embedding.md
|
||||
./knowledge/markdown/markdown_embedding.md
|
||||
./knowledge/word/word_embedding.md
|
||||
./knowledge/url/url_embedding.md
|
||||
./knowledge/ppt/ppt_embedding.md
|
42
docs/modules/knowledge/markdown/markdown_embedding.md
Normal file
42
docs/modules/knowledge/markdown/markdown_embedding.md
Normal file
@ -0,0 +1,42 @@
|
||||
MarkdownEmbedding
|
||||
==================================
|
||||
markdown embedding can import md text into a vector knowledge base. The entire embedding process includes the read (loading data), data_process (data processing), and index_to_store (embedding to the vector database) methods.
|
||||
|
||||
inheriting the SourceEmbedding
|
||||
|
||||
```
|
||||
class MarkdownEmbedding(SourceEmbedding):
|
||||
"""pdf embedding for read pdf document."""
|
||||
|
||||
def __init__(self, file_path, vector_store_config):
|
||||
"""Initialize with pdf path."""
|
||||
super().__init__(file_path, vector_store_config)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
```
|
||||
implement read() and data_process()
|
||||
read() method allows you to read data and split data into chunk
|
||||
|
||||
```
|
||||
@register
|
||||
def read(self):
|
||||
"""Load from markdown path."""
|
||||
loader = EncodeTextLoader(self.file_path)
|
||||
textsplitter = SpacyTextSplitter(
|
||||
pipeline="zh_core_web_sm",
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE,
|
||||
chunk_overlap=100,
|
||||
)
|
||||
return loader.load_and_split(textsplitter)
|
||||
```
|
||||
|
||||
data_process() method allows you to pre processing your ways
|
||||
```
|
||||
@register
|
||||
def data_process(self, documents: List[Document]):
|
||||
i = 0
|
||||
for d in documents:
|
||||
documents[i].page_content = d.page_content.replace("\n", "")
|
||||
i += 1
|
||||
return documents
|
||||
```
|
43
docs/modules/knowledge/pdf/pdf_embedding.md
Normal file
43
docs/modules/knowledge/pdf/pdf_embedding.md
Normal file
@ -0,0 +1,43 @@
|
||||
PDFEmbedding
|
||||
==================================
|
||||
pdfembedding can import PDF text into a vector knowledge base. The entire embedding process includes the read (loading data), data_process (data processing), and index_to_store (embedding to the vector database) methods.
|
||||
|
||||
inheriting the SourceEmbedding
|
||||
```
|
||||
class PDFEmbedding(SourceEmbedding):
|
||||
"""pdf embedding for read pdf document."""
|
||||
|
||||
def __init__(self, file_path, vector_store_config):
|
||||
"""Initialize with pdf path."""
|
||||
super().__init__(file_path, vector_store_config)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
```
|
||||
|
||||
implement read() and data_process()
|
||||
read() method allows you to read data and split data into chunk
|
||||
```
|
||||
@register
|
||||
def read(self):
|
||||
"""Load from pdf path."""
|
||||
loader = PyPDFLoader(self.file_path)
|
||||
# textsplitter = CHNDocumentSplitter(
|
||||
# pdf=True, sentence_size=CFG.KNOWLEDGE_CHUNK_SIZE
|
||||
# )
|
||||
textsplitter = SpacyTextSplitter(
|
||||
pipeline="zh_core_web_sm",
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE,
|
||||
chunk_overlap=100,
|
||||
)
|
||||
return loader.load_and_split(textsplitter)
|
||||
```
|
||||
data_process() method allows you to pre processing your ways
|
||||
```
|
||||
@register
|
||||
def data_process(self, documents: List[Document]):
|
||||
i = 0
|
||||
for d in documents:
|
||||
documents[i].page_content = d.page_content.replace("\n", "")
|
||||
i += 1
|
||||
return documents
|
||||
```
|
40
docs/modules/knowledge/ppt/ppt_embedding.md
Normal file
40
docs/modules/knowledge/ppt/ppt_embedding.md
Normal file
@ -0,0 +1,40 @@
|
||||
PPTEmbedding
|
||||
==================================
|
||||
ppt embedding can import ppt text into a vector knowledge base. The entire embedding process includes the read (loading data), data_process (data processing), and index_to_store (embedding to the vector database) methods.
|
||||
|
||||
inheriting the SourceEmbedding
|
||||
```
|
||||
class PPTEmbedding(SourceEmbedding):
|
||||
"""ppt embedding for read ppt document."""
|
||||
|
||||
def __init__(self, file_path, vector_store_config):
|
||||
"""Initialize with pdf path."""
|
||||
super().__init__(file_path, vector_store_config)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
```
|
||||
|
||||
implement read() and data_process()
|
||||
read() method allows you to read data and split data into chunk
|
||||
```
|
||||
@register
|
||||
def read(self):
|
||||
"""Load from ppt path."""
|
||||
loader = UnstructuredPowerPointLoader(self.file_path)
|
||||
textsplitter = SpacyTextSplitter(
|
||||
pipeline="zh_core_web_sm",
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE,
|
||||
chunk_overlap=200,
|
||||
)
|
||||
return loader.load_and_split(textsplitter)
|
||||
```
|
||||
data_process() method allows you to pre processing your ways
|
||||
```
|
||||
@register
|
||||
def data_process(self, documents: List[Document]):
|
||||
i = 0
|
||||
for d in documents:
|
||||
documents[i].page_content = d.page_content.replace("\n", "")
|
||||
i += 1
|
||||
return documents
|
||||
```
|
47
docs/modules/knowledge/url/url_embedding.md
Normal file
47
docs/modules/knowledge/url/url_embedding.md
Normal file
@ -0,0 +1,47 @@
|
||||
URL Embedding
|
||||
==================================
|
||||
url embedding can import PDF text into a vector knowledge base. The entire embedding process includes the read (loading data), data_process (data processing), and index_to_store (embedding to the vector database) methods.
|
||||
|
||||
inheriting the SourceEmbedding
|
||||
```
|
||||
class URLEmbedding(SourceEmbedding):
|
||||
"""url embedding for read url document."""
|
||||
|
||||
def __init__(self, file_path, vector_store_config):
|
||||
"""Initialize with url path."""
|
||||
super().__init__(file_path, vector_store_config)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
```
|
||||
|
||||
implement read() and data_process()
|
||||
read() method allows you to read data and split data into chunk
|
||||
```
|
||||
@register
|
||||
def read(self):
|
||||
"""Load from url path."""
|
||||
loader = WebBaseLoader(web_path=self.file_path)
|
||||
if CFG.LANGUAGE == "en":
|
||||
text_splitter = CharacterTextSplitter(
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE,
|
||||
chunk_overlap=20,
|
||||
length_function=len,
|
||||
)
|
||||
else:
|
||||
text_splitter = CHNDocumentSplitter(pdf=True, sentence_size=1000)
|
||||
return loader.load_and_split(text_splitter)
|
||||
```
|
||||
data_process() method allows you to pre processing your ways
|
||||
```
|
||||
@register
|
||||
def data_process(self, documents: List[Document]):
|
||||
i = 0
|
||||
for d in documents:
|
||||
content = d.page_content.replace("\n", "")
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
for tag in soup(["!doctype", "meta"]):
|
||||
tag.extract()
|
||||
documents[i].page_content = soup.get_text()
|
||||
i += 1
|
||||
return documents
|
||||
```
|
38
docs/modules/knowledge/word/word_embedding.md
Normal file
38
docs/modules/knowledge/word/word_embedding.md
Normal file
@ -0,0 +1,38 @@
|
||||
WordEmbedding
|
||||
==================================
|
||||
word embedding can import word doc/docx text into a vector knowledge base. The entire embedding process includes the read (loading data), data_process (data processing), and index_to_store (embedding to the vector database) methods.
|
||||
|
||||
inheriting the SourceEmbedding
|
||||
```
|
||||
class WordEmbedding(SourceEmbedding):
|
||||
"""word embedding for read word document."""
|
||||
|
||||
def __init__(self, file_path, vector_store_config):
|
||||
"""Initialize with word path."""
|
||||
super().__init__(file_path, vector_store_config)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
```
|
||||
|
||||
implement read() and data_process()
|
||||
read() method allows you to read data and split data into chunk
|
||||
```
|
||||
@register
|
||||
def read(self):
|
||||
"""Load from word path."""
|
||||
loader = UnstructuredWordDocumentLoader(self.file_path)
|
||||
textsplitter = CHNDocumentSplitter(
|
||||
pdf=True, sentence_size=CFG.KNOWLEDGE_CHUNK_SIZE
|
||||
)
|
||||
return loader.load_and_split(textsplitter)
|
||||
```
|
||||
data_process() method allows you to pre processing your ways
|
||||
```
|
||||
@register
|
||||
def data_process(self, documents: List[Document]):
|
||||
i = 0
|
||||
for d in documents:
|
||||
documents[i].page_content = d.page_content.replace("\n", "")
|
||||
i += 1
|
||||
return documents
|
||||
```
|
@ -1 +0,0 @@
|
||||
# Chatbot
|
@ -1 +0,0 @@
|
||||
# Interacting with api
|
@ -1 +1,5 @@
|
||||
# SQL generation and diagnosis
|
||||
# SQL generation and diagnosis
|
||||
|
||||
DB-GPT provides SQL generation and diagnostic capabilities. With advanced natural language processing algorithms and a deep understanding of SQL syntax and semantics, the model can generate complex SQL queries from natural language input, as well as diagnose and fix issues with existing queries. This allows users to save time and streamline their workflow, while also improving the accuracy and efficiency of their SQL operations.
|
||||
|
||||
[SQL Generation](../../assets/demo_en.gif)
|
@ -77,19 +77,23 @@ def load_native_plugins(cfg: Config):
|
||||
print("load_native_plugins")
|
||||
### TODO 默认拉主分支,后续拉发布版本
|
||||
branch_name = cfg.plugins_git_branch
|
||||
native_plugin_repo ="DB-GPT-Plugins"
|
||||
native_plugin_repo = "DB-GPT-Plugins"
|
||||
url = "https://github.com/csunny/{repo}/archive/{branch}.zip"
|
||||
response = requests.get(url.format(repo=native_plugin_repo, branch=branch_name),
|
||||
headers={'Authorization': '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'})
|
||||
response = requests.get(
|
||||
url.format(repo=native_plugin_repo, branch=branch_name),
|
||||
headers={"Authorization": "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"},
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
plugins_path_path = Path(PLUGINS_DIR)
|
||||
files = glob.glob(os.path.join(plugins_path_path, f'{native_plugin_repo}*'))
|
||||
files = glob.glob(os.path.join(plugins_path_path, f"{native_plugin_repo}*"))
|
||||
for file in files:
|
||||
os.remove(file)
|
||||
now = datetime.datetime.now()
|
||||
time_str = now.strftime('%Y%m%d%H%M%S')
|
||||
file_name = f"{plugins_path_path}/{native_plugin_repo}-{branch_name}-{time_str}.zip"
|
||||
time_str = now.strftime("%Y%m%d%H%M%S")
|
||||
file_name = (
|
||||
f"{plugins_path_path}/{native_plugin_repo}-{branch_name}-{time_str}.zip"
|
||||
)
|
||||
print(file_name)
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(response.content)
|
||||
|
@ -66,7 +66,6 @@ class Database:
|
||||
self._sample_rows_in_table_info = set()
|
||||
self._indexes_in_table_info = indexes_in_table_info
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_uri(
|
||||
cls, database_uri: str, engine_args: Optional[dict] = None, **kwargs: Any
|
||||
@ -399,7 +398,6 @@ class Database:
|
||||
ans = cursor.fetchall()
|
||||
return ans[0][1]
|
||||
|
||||
|
||||
def get_fields(self, table_name):
|
||||
"""Get column fields about specified table."""
|
||||
session = self._db_sessions()
|
||||
|
@ -154,8 +154,8 @@ class Config(metaclass=Singleton):
|
||||
|
||||
### EMBEDDING Configuration
|
||||
self.EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text2vec")
|
||||
self.KNOWLEDGE_CHUNK_SIZE = int(os.getenv("KNOWLEDGE_CHUNK_SIZE", 500))
|
||||
self.KNOWLEDGE_SEARCH_TOP_SIZE = int(os.getenv("KNOWLEDGE_SEARCH_TOP_SIZE", 10))
|
||||
self.KNOWLEDGE_CHUNK_SIZE = int(os.getenv("KNOWLEDGE_CHUNK_SIZE", 100))
|
||||
self.KNOWLEDGE_SEARCH_TOP_SIZE = int(os.getenv("KNOWLEDGE_SEARCH_TOP_SIZE", 5))
|
||||
### SUMMARY_CONFIG Configuration
|
||||
self.SUMMARY_CONFIG = os.getenv("SUMMARY_CONFIG", "VECTOR")
|
||||
|
||||
|
@ -14,8 +14,8 @@ LOGDIR = os.path.join(ROOT_PATH, "logs")
|
||||
DATASETS_DIR = os.path.join(PILOT_PATH, "datasets")
|
||||
DATA_DIR = os.path.join(PILOT_PATH, "data")
|
||||
nltk.data.path = [os.path.join(PILOT_PATH, "nltk_data")] + nltk.data.path
|
||||
PLUGINS_DIR = os.path.join(ROOT_PATH, "plugins")
|
||||
FONT_DIR = os.path.join(PILOT_PATH, "fonts")
|
||||
PLUGINS_DIR = os.path.join(ROOT_PATH, "plugins")
|
||||
FONT_DIR = os.path.join(PILOT_PATH, "fonts")
|
||||
|
||||
current_directory = os.getcwd()
|
||||
|
||||
@ -43,6 +43,7 @@ LLM_MODEL_CONFIG = {
|
||||
"guanaco-33b-merged": os.path.join(MODEL_PATH, "guanaco-33b-merged"),
|
||||
"falcon-40b": os.path.join(MODEL_PATH, "falcon-40b"),
|
||||
"gorilla-7b": os.path.join(MODEL_PATH, "gorilla-7b"),
|
||||
"gptj-6b": os.path.join(MODEL_PATH, "ggml-gpt4all-j-v1.3-groovy.bin"),
|
||||
"proxyllm": "proxyllm",
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@ from pilot.configs.config import Config
|
||||
|
||||
CFG = Config()
|
||||
|
||||
|
||||
class ClickHouseConnector(RDBMSDatabase):
|
||||
"""ClickHouseConnector"""
|
||||
|
||||
@ -17,19 +18,21 @@ class ClickHouseConnector(RDBMSDatabase):
|
||||
|
||||
default_db = ["information_schema", "performance_schema", "sys", "mysql"]
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_config(cls) -> RDBMSDatabase:
|
||||
"""
|
||||
Todo password encryption
|
||||
Returns:
|
||||
"""
|
||||
return cls.from_uri_db(cls,
|
||||
CFG.LOCAL_DB_PATH,
|
||||
engine_args={"pool_size": 10, "pool_recycle": 3600, "echo": True})
|
||||
return cls.from_uri_db(
|
||||
cls,
|
||||
CFG.LOCAL_DB_PATH,
|
||||
engine_args={"pool_size": 10, "pool_recycle": 3600, "echo": True},
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_uri_db(cls, db_path: str,
|
||||
engine_args: Optional[dict] = None, **kwargs: Any) -> RDBMSDatabase:
|
||||
def from_uri_db(
|
||||
cls, db_path: str, engine_args: Optional[dict] = None, **kwargs: Any
|
||||
) -> RDBMSDatabase:
|
||||
db_url: str = cls.connect_driver + "://" + db_path
|
||||
return cls.from_uri(db_url, engine_args, **kwargs)
|
||||
|
@ -6,6 +6,7 @@ from pilot.configs.config import Config
|
||||
|
||||
CFG = Config()
|
||||
|
||||
|
||||
class DuckDbConnect(RDBMSDatabase):
|
||||
"""Connect Duckdb Database fetch MetaData
|
||||
Args:
|
||||
@ -20,19 +21,21 @@ class DuckDbConnect(RDBMSDatabase):
|
||||
|
||||
default_db = ["information_schema", "performance_schema", "sys", "mysql"]
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_config(cls) -> RDBMSDatabase:
|
||||
"""
|
||||
Todo password encryption
|
||||
Returns:
|
||||
"""
|
||||
return cls.from_uri_db(cls,
|
||||
CFG.LOCAL_DB_PATH,
|
||||
engine_args={"pool_size": 10, "pool_recycle": 3600, "echo": True})
|
||||
return cls.from_uri_db(
|
||||
cls,
|
||||
CFG.LOCAL_DB_PATH,
|
||||
engine_args={"pool_size": 10, "pool_recycle": 3600, "echo": True},
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_uri_db(cls, db_path: str,
|
||||
engine_args: Optional[dict] = None, **kwargs: Any) -> RDBMSDatabase:
|
||||
def from_uri_db(
|
||||
cls, db_path: str, engine_args: Optional[dict] = None, **kwargs: Any
|
||||
) -> RDBMSDatabase:
|
||||
db_url: str = cls.connect_driver + "://" + db_path
|
||||
return cls.from_uri(db_url, engine_args, **kwargs)
|
||||
|
@ -5,9 +5,6 @@ from typing import Optional, Any
|
||||
from pilot.connections.rdbms.rdbms_connect import RDBMSDatabase
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class MSSQLConnect(RDBMSDatabase):
|
||||
"""Connect MSSQL Database fetch MetaData
|
||||
Args:
|
||||
@ -18,6 +15,4 @@ class MSSQLConnect(RDBMSDatabase):
|
||||
dialect: str = "mssql"
|
||||
driver: str = "pyodbc"
|
||||
|
||||
default_db = ["master", "model", "msdb", "tempdb","modeldb", "resource"]
|
||||
|
||||
|
||||
default_db = ["master", "model", "msdb", "tempdb", "modeldb", "resource"]
|
||||
|
@ -5,9 +5,6 @@ from typing import Optional, Any
|
||||
from pilot.connections.rdbms.rdbms_connect import RDBMSDatabase
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class MySQLConnect(RDBMSDatabase):
|
||||
"""Connect MySQL Database fetch MetaData
|
||||
Args:
|
||||
@ -19,5 +16,3 @@ class MySQLConnect(RDBMSDatabase):
|
||||
driver: str = "pymysql"
|
||||
|
||||
default_db = ["information_schema", "performance_schema", "sys", "mysql"]
|
||||
|
||||
|
||||
|
@ -2,8 +2,10 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from pilot.connections.rdbms.rdbms_connect import RDBMSDatabase
|
||||
|
||||
|
||||
class OracleConnector(RDBMSDatabase):
|
||||
"""OracleConnector"""
|
||||
|
||||
type: str = "ORACLE"
|
||||
|
||||
driver: str = "oracle"
|
||||
|
@ -2,6 +2,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from pilot.connections.rdbms.rdbms_connect import RDBMSDatabase
|
||||
|
||||
|
||||
class PostgresConnector(RDBMSDatabase):
|
||||
"""PostgresConnector is a class which Connector"""
|
||||
|
||||
|
@ -57,18 +57,19 @@ CFG = Config()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
def __extract_json(s):
|
||||
i = s.index('{')
|
||||
count = 1 # 当前所在嵌套深度,即还没闭合的'{'个数
|
||||
for j, c in enumerate(s[i + 1:], start=i + 1):
|
||||
if c == '}':
|
||||
count -= 1
|
||||
elif c == '{':
|
||||
count += 1
|
||||
if count == 0:
|
||||
break
|
||||
assert (count == 0) # 检查是否找到最后一个'}'
|
||||
return s[i:j + 1]
|
||||
|
||||
ss = """here's a sql statement that can be used to generate a histogram to analyze the distribution of user orders in different cities:select u.city, count(*) as order_countfrom tran_order oleft join user u on o.user_id = u.idgroup by u.city;this will return the number of orders for each city that has at least one order. we can use this data to generate a histogram that shows the distribution of orders across different cities.here's the response in the required format:{ "thoughts": "here's a sql statement that can be used to generate a histogram to analyze the distribution of user orders in different cities:\n\nselect u.city, count(*) as order_count\nfrom tran_order o\nleft join user u on o.user_id = u.id\ngroup by u.city;", "speak": "here's a sql statement that can be used to generate a histogram to analyze the distribution of user orders in different cities.", "command": { "name": "histogram-executor", "args": { "title": "distribution of user orders in different cities", "sql": "select u.city, count(*) as order_count\nfrom tran_order o\nleft join user u on o.user_id = u.id\ngroup by u.city;" } }}"""
|
||||
print(__extract_json(ss))
|
||||
def __extract_json(s):
|
||||
i = s.index("{")
|
||||
count = 1 # 当前所在嵌套深度,即还没闭合的'{'个数
|
||||
for j, c in enumerate(s[i + 1 :], start=i + 1):
|
||||
if c == "}":
|
||||
count -= 1
|
||||
elif c == "{":
|
||||
count += 1
|
||||
if count == 0:
|
||||
break
|
||||
assert count == 0 # 检查是否找到最后一个'}'
|
||||
return s[i : j + 1]
|
||||
|
||||
ss = """here's a sql statement that can be used to generate a histogram to analyze the distribution of user orders in different cities:select u.city, count(*) as order_countfrom tran_order oleft join user u on o.user_id = u.idgroup by u.city;this will return the number of orders for each city that has at least one order. we can use this data to generate a histogram that shows the distribution of orders across different cities.here's the response in the required format:{ "thoughts": "here's a sql statement that can be used to generate a histogram to analyze the distribution of user orders in different cities:\n\nselect u.city, count(*) as order_count\nfrom tran_order o\nleft join user u on o.user_id = u.id\ngroup by u.city;", "speak": "here's a sql statement that can be used to generate a histogram to analyze the distribution of user orders in different cities.", "command": { "name": "histogram-executor", "args": { "title": "distribution of user orders in different cities", "sql": "select u.city, count(*) as order_count\nfrom tran_order o\nleft join user u on o.user_id = u.id\ngroup by u.city;" } }}"""
|
||||
print(__extract_json(ss))
|
||||
|
@ -35,13 +35,12 @@ class RDBMSDatabase(BaseConnect):
|
||||
"""SQLAlchemy wrapper around a database."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
engine,
|
||||
schema: Optional[str] = None,
|
||||
metadata: Optional[MetaData] = None,
|
||||
ignore_tables: Optional[List[str]] = None,
|
||||
include_tables: Optional[List[str]] = None,
|
||||
|
||||
self,
|
||||
engine,
|
||||
schema: Optional[str] = None,
|
||||
metadata: Optional[MetaData] = None,
|
||||
ignore_tables: Optional[List[str]] = None,
|
||||
include_tables: Optional[List[str]] = None,
|
||||
):
|
||||
"""Create engine from database URI."""
|
||||
self._engine = engine
|
||||
@ -61,18 +60,37 @@ class RDBMSDatabase(BaseConnect):
|
||||
Todo password encryption
|
||||
Returns:
|
||||
"""
|
||||
return cls.from_uri_db(cls,
|
||||
CFG.LOCAL_DB_HOST,
|
||||
CFG.LOCAL_DB_PORT,
|
||||
CFG.LOCAL_DB_USER,
|
||||
CFG.LOCAL_DB_PASSWORD,
|
||||
engine_args={"pool_size": 10, "pool_recycle": 3600, "echo": True})
|
||||
return cls.from_uri_db(
|
||||
cls,
|
||||
CFG.LOCAL_DB_HOST,
|
||||
CFG.LOCAL_DB_PORT,
|
||||
CFG.LOCAL_DB_USER,
|
||||
CFG.LOCAL_DB_PASSWORD,
|
||||
engine_args={"pool_size": 10, "pool_recycle": 3600, "echo": True},
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_uri_db(cls, host: str, port: int, user: str, pwd: str, db_name: str = None,
|
||||
engine_args: Optional[dict] = None, **kwargs: Any) -> RDBMSDatabase:
|
||||
db_url: str = cls.connect_driver + "://" + CFG.LOCAL_DB_USER + ":" + CFG.LOCAL_DB_PASSWORD + "@" + CFG.LOCAL_DB_HOST + ":" + str(
|
||||
CFG.LOCAL_DB_PORT)
|
||||
def from_uri_db(
|
||||
cls,
|
||||
host: str,
|
||||
port: int,
|
||||
user: str,
|
||||
pwd: str,
|
||||
db_name: str = None,
|
||||
engine_args: Optional[dict] = None,
|
||||
**kwargs: Any,
|
||||
) -> RDBMSDatabase:
|
||||
db_url: str = (
|
||||
cls.connect_driver
|
||||
+ "://"
|
||||
+ CFG.LOCAL_DB_USER
|
||||
+ ":"
|
||||
+ CFG.LOCAL_DB_PASSWORD
|
||||
+ "@"
|
||||
+ CFG.LOCAL_DB_HOST
|
||||
+ ":"
|
||||
+ str(CFG.LOCAL_DB_PORT)
|
||||
)
|
||||
if cls.dialect:
|
||||
db_url = cls.dialect + "+" + db_url
|
||||
if db_name:
|
||||
@ -81,7 +99,7 @@ class RDBMSDatabase(BaseConnect):
|
||||
|
||||
@classmethod
|
||||
def from_uri(
|
||||
cls, database_uri: str, engine_args: Optional[dict] = None, **kwargs: Any
|
||||
cls, database_uri: str, engine_args: Optional[dict] = None, **kwargs: Any
|
||||
) -> RDBMSDatabase:
|
||||
"""Construct a SQLAlchemy engine from URI."""
|
||||
_engine_args = engine_args or {}
|
||||
@ -167,7 +185,7 @@ class RDBMSDatabase(BaseConnect):
|
||||
tbl
|
||||
for tbl in self._metadata.sorted_tables
|
||||
if tbl.name in set(all_table_names)
|
||||
and not (self.dialect == "sqlite" and tbl.name.startswith("sqlite_"))
|
||||
and not (self.dialect == "sqlite" and tbl.name.startswith("sqlite_"))
|
||||
]
|
||||
|
||||
tables = []
|
||||
@ -180,7 +198,7 @@ class RDBMSDatabase(BaseConnect):
|
||||
create_table = str(CreateTable(table).compile(self._engine))
|
||||
table_info = f"{create_table.rstrip()}"
|
||||
has_extra_info = (
|
||||
self._indexes_in_table_info or self._sample_rows_in_table_info
|
||||
self._indexes_in_table_info or self._sample_rows_in_table_info
|
||||
)
|
||||
if has_extra_info:
|
||||
table_info += "\n\n/*"
|
||||
|
@ -7,7 +7,7 @@ lang_dicts = {
|
||||
"learn_more_markdown": "该服务是仅供非商业用途的研究预览。受 Vicuna-13B 模型 [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) 的约束",
|
||||
"model_control_param": "模型参数",
|
||||
"sql_generate_mode_direct": "直接执行结果",
|
||||
"sql_generate_mode_none": "db问答",
|
||||
"sql_generate_mode_none": "DB问答",
|
||||
"max_input_token_size": "最大输出Token数",
|
||||
"please_choose_database": "请选择数据",
|
||||
"sql_generate_diagnostics": "SQL生成与诊断",
|
||||
@ -44,7 +44,7 @@ lang_dicts = {
|
||||
"learn_more_markdown": "The service is a research preview intended for non-commercial use only. subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of Vicuna-13B",
|
||||
"model_control_param": "Model Parameters",
|
||||
"sql_generate_mode_direct": "Execute directly",
|
||||
"sql_generate_mode_none": "db chat",
|
||||
"sql_generate_mode_none": "DB chat",
|
||||
"max_input_token_size": "Maximum output token size",
|
||||
"please_choose_database": "Please choose database",
|
||||
"sql_generate_diagnostics": "SQL Generation & Diagnostics",
|
||||
|
@ -2,6 +2,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import torch
|
||||
import os
|
||||
from typing import List
|
||||
from functools import cache
|
||||
from transformers import (
|
||||
@ -107,7 +108,7 @@ class GuanacoAdapter(BaseLLMAdaper):
|
||||
def loader(self, model_path: str, from_pretrained_kwargs: dict):
|
||||
tokenizer = LlamaTokenizer.from_pretrained(model_path)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_path, load_in_4bit=True, device_map={"": 0}, **from_pretrained_kwargs
|
||||
model_path, load_in_4bit=True, **from_pretrained_kwargs
|
||||
)
|
||||
return model, tokenizer
|
||||
|
||||
@ -126,7 +127,6 @@ class FalconAdapater(BaseLLMAdaper):
|
||||
model_path,
|
||||
load_in_4bit=True, # quantize
|
||||
quantization_config=bnb_config,
|
||||
device_map={"": 0},
|
||||
trust_remote_code=True,
|
||||
**from_pretrained_kwagrs,
|
||||
)
|
||||
@ -134,7 +134,6 @@ class FalconAdapater(BaseLLMAdaper):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_path,
|
||||
trust_remote_code=True,
|
||||
device_map={"": 0},
|
||||
**from_pretrained_kwagrs,
|
||||
)
|
||||
return model, tokenizer
|
||||
@ -185,18 +184,26 @@ class RWKV4LLMAdapter(BaseLLMAdaper):
|
||||
|
||||
|
||||
class GPT4AllAdapter(BaseLLMAdaper):
|
||||
"""A light version for someone who want practise LLM use laptop."""
|
||||
"""
|
||||
A light version for someone who want practise LLM use laptop.
|
||||
All model names see: https://gpt4all.io/models/models.json
|
||||
"""
|
||||
|
||||
def match(self, model_path: str):
|
||||
return "gpt4all" in model_path
|
||||
|
||||
def loader(self, model_path: str, from_pretrained_kwargs: dict):
|
||||
# TODO
|
||||
pass
|
||||
import gpt4all
|
||||
|
||||
if model_path is None and from_pretrained_kwargs.get("model_name") is None:
|
||||
model = gpt4all.GPT4All("ggml-gpt4all-j-v1.3-groovy")
|
||||
else:
|
||||
path, file = os.path.split(model_path)
|
||||
model = gpt4all.GPT4All(model_path=path, model_name=file)
|
||||
return model, None
|
||||
|
||||
|
||||
class ProxyllmAdapter(BaseLLMAdaper):
|
||||
|
||||
"""The model adapter for local proxy"""
|
||||
|
||||
def match(self, model_path: str):
|
||||
@ -211,6 +218,7 @@ register_llm_model_adapters(ChatGLMAdapater)
|
||||
register_llm_model_adapters(GuanacoAdapter)
|
||||
register_llm_model_adapters(FalconAdapater)
|
||||
register_llm_model_adapters(GorillaAdapter)
|
||||
register_llm_model_adapters(GPT4AllAdapter)
|
||||
# TODO Default support vicuna, other model need to tests and Evaluate
|
||||
|
||||
# just for test_py, remove this later
|
||||
|
23
pilot/model/llm_out/gpt4all_llm.py
Normal file
23
pilot/model/llm_out/gpt4all_llm.py
Normal file
@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding:utf-8 -*-
|
||||
import threading
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
def gpt4all_generate_stream(model, tokenizer, params, device, max_position_embeddings):
|
||||
stop = params.get("stop", "###")
|
||||
prompt = params["prompt"]
|
||||
role, query = prompt.split(stop)[1].split(":")
|
||||
print(f"gpt4all, role: {role}, query: {query}")
|
||||
|
||||
def worker():
|
||||
model.generate(prompt=query, streaming=True)
|
||||
|
||||
t = threading.Thread(target=worker)
|
||||
t.start()
|
||||
|
||||
while t.is_alive():
|
||||
yield sys.stdout.output
|
||||
time.sleep(0.01)
|
||||
t.join()
|
@ -51,7 +51,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
|
||||
}
|
||||
)
|
||||
|
||||
# Move the last user's information to the end
|
||||
# Move the last user's information to the end
|
||||
temp_his = history[::-1]
|
||||
last_user_input = None
|
||||
for m in temp_his:
|
||||
@ -76,7 +76,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
|
||||
text = ""
|
||||
for line in res.iter_lines():
|
||||
if line:
|
||||
json_data = line.split(b': ', 1)[1]
|
||||
json_data = line.split(b": ", 1)[1]
|
||||
decoded_line = json_data.decode("utf-8")
|
||||
if decoded_line.lower() != "[DONE]".lower():
|
||||
obj = json.loads(json_data)
|
||||
|
@ -73,12 +73,12 @@ class ModelLoader(metaclass=Singleton):
|
||||
|
||||
elif self.device == "cuda":
|
||||
kwargs = {"torch_dtype": torch.float16}
|
||||
num_gpus = int(num_gpus)
|
||||
num_gpus = torch.cuda.device_count()
|
||||
|
||||
if num_gpus != 1:
|
||||
kwargs["device_map"] = "auto"
|
||||
if max_gpu_memory is None:
|
||||
kwargs["device_map"] = "sequential"
|
||||
# if max_gpu_memory is None:
|
||||
# kwargs["device_map"] = "sequential"
|
||||
|
||||
available_gpu_memory = get_gpu_memory(num_gpus)
|
||||
kwargs["max_memory"] = {
|
||||
|
@ -51,7 +51,7 @@ class BaseOutputParser(ABC):
|
||||
|
||||
""" TODO Multi mode output handler, rewrite this for multi model, use adapter mode.
|
||||
"""
|
||||
if data["error_code"] == 0:
|
||||
if data.get("error_code", 0) == 0:
|
||||
if "vicuna" in CFG.LLM_MODEL:
|
||||
# output = data["text"][skip_echo_len + 11:].strip()
|
||||
output = data["text"][skip_echo_len:].strip()
|
||||
@ -121,17 +121,17 @@ class BaseOutputParser(ABC):
|
||||
raise ValueError("Model server error!code=" + respObj_ex["error_code"])
|
||||
|
||||
def __extract_json(slef, s):
|
||||
i = s.index('{')
|
||||
i = s.index("{")
|
||||
count = 1 # 当前所在嵌套深度,即还没闭合的'{'个数
|
||||
for j, c in enumerate(s[i + 1:], start=i + 1):
|
||||
if c == '}':
|
||||
for j, c in enumerate(s[i + 1 :], start=i + 1):
|
||||
if c == "}":
|
||||
count -= 1
|
||||
elif c == '{':
|
||||
elif c == "{":
|
||||
count += 1
|
||||
if count == 0:
|
||||
break
|
||||
assert (count == 0) # 检查是否找到最后一个'}'
|
||||
return s[i:j + 1]
|
||||
assert count == 0 # 检查是否找到最后一个'}'
|
||||
return s[i : j + 1]
|
||||
|
||||
def parse_prompt_response(self, model_out_text) -> T:
|
||||
"""
|
||||
|
@ -134,6 +134,7 @@ class BaseChat(ABC):
|
||||
return payload
|
||||
|
||||
def stream_call(self):
|
||||
# TODO Retry when server connection error
|
||||
payload = self.__call_base()
|
||||
|
||||
self.skip_echo_len = len(payload.get("prompt").replace("</s>", " ")) + 11
|
||||
@ -187,19 +188,19 @@ class BaseChat(ABC):
|
||||
)
|
||||
)
|
||||
|
||||
# ### MOCK
|
||||
# ai_response_text = """{
|
||||
# "thoughts": "可以从users表和tran_order表联合查询,按城市和订单数量进行分组统计,并使用柱状图展示。",
|
||||
# "reasoning": "为了分析用户在不同城市的分布情况,需要查询users表和tran_order表,使用LEFT JOIN将两个表联合起来。按照城市进行分组,统计每个城市的订单数量。使用柱状图展示可以直观地看出每个城市的订单数量,方便比较。",
|
||||
# "speak": "根据您的分析目标,我查询了用户表和订单表,统计了每个城市的订单数量,并生成了柱状图展示。",
|
||||
# "command": {
|
||||
# "name": "histogram-executor",
|
||||
# "args": {
|
||||
# "title": "订单城市分布柱状图",
|
||||
# "sql": "SELECT users.city, COUNT(tran_order.order_id) AS order_count FROM users LEFT JOIN tran_order ON users.user_name = tran_order.user_name GROUP BY users.city"
|
||||
# }
|
||||
# }
|
||||
# }"""
|
||||
# ### MOCK
|
||||
# ai_response_text = """{
|
||||
# "thoughts": "可以从users表和tran_order表联合查询,按城市和订单数量进行分组统计,并使用柱状图展示。",
|
||||
# "reasoning": "为了分析用户在不同城市的分布情况,需要查询users表和tran_order表,使用LEFT JOIN将两个表联合起来。按照城市进行分组,统计每个城市的订单数量。使用柱状图展示可以直观地看出每个城市的订单数量,方便比较。",
|
||||
# "speak": "根据您的分析目标,我查询了用户表和订单表,统计了每个城市的订单数量,并生成了柱状图展示。",
|
||||
# "command": {
|
||||
# "name": "histogram-executor",
|
||||
# "args": {
|
||||
# "title": "订单城市分布柱状图",
|
||||
# "sql": "SELECT users.city, COUNT(tran_order.order_id) AS order_count FROM users LEFT JOIN tran_order ON users.user_name = tran_order.user_name GROUP BY users.city"
|
||||
# }
|
||||
# }
|
||||
# }"""
|
||||
|
||||
self.current_message.add_ai_message(ai_response_text)
|
||||
prompt_define_response = (
|
||||
|
@ -80,7 +80,6 @@ class ChatWithPlugin(BaseChat):
|
||||
def __list_to_prompt_str(self, list: List) -> str:
|
||||
return "\n".join(f"{i + 1 + 1}. {item}" for i, item in enumerate(list))
|
||||
|
||||
|
||||
def generate(self, p) -> str:
|
||||
return super().generate(p)
|
||||
|
||||
|
@ -31,7 +31,7 @@ class PluginChatOutputParser(BaseOutputParser):
|
||||
command, thoughts, speak = (
|
||||
response["command"],
|
||||
response["thoughts"],
|
||||
response["speak"]
|
||||
response["speak"],
|
||||
)
|
||||
return PluginAction(command, speak, thoughts)
|
||||
|
||||
|
@ -56,7 +56,9 @@ class ChatDefaultKnowledge(BaseChat):
|
||||
context = context[:2000]
|
||||
input_values = {"context": context, "question": self.current_user_input}
|
||||
except NoIndexException:
|
||||
raise ValueError("you have no default knowledge store, please execute python knowledge_init.py")
|
||||
raise ValueError(
|
||||
"you have no default knowledge store, please execute python knowledge_init.py"
|
||||
)
|
||||
return input_values
|
||||
|
||||
def do_with_prompt_response(self, prompt_response):
|
||||
|
@ -5,7 +5,6 @@ import sys
|
||||
from dotenv import load_dotenv
|
||||
|
||||
|
||||
|
||||
if "pytest" in sys.argv or "pytest" in sys.modules or os.getenv("CI"):
|
||||
print("Setting random seed to 42")
|
||||
random.seed(42)
|
||||
|
@ -37,7 +37,6 @@ def get_llm_chat_adapter(model_path: str) -> BaseChatAdpter:
|
||||
|
||||
|
||||
class VicunaChatAdapter(BaseChatAdpter):
|
||||
|
||||
"""Model chat Adapter for vicuna"""
|
||||
|
||||
def match(self, model_path: str):
|
||||
@ -60,7 +59,6 @@ class ChatGLMChatAdapter(BaseChatAdpter):
|
||||
|
||||
|
||||
class CodeT5ChatAdapter(BaseChatAdpter):
|
||||
|
||||
"""Model chat adapter for CodeT5"""
|
||||
|
||||
def match(self, model_path: str):
|
||||
@ -72,7 +70,6 @@ class CodeT5ChatAdapter(BaseChatAdpter):
|
||||
|
||||
|
||||
class CodeGenChatAdapter(BaseChatAdpter):
|
||||
|
||||
"""Model chat adapter for CodeGen"""
|
||||
|
||||
def match(self, model_path: str):
|
||||
@ -127,11 +124,22 @@ class GorillaChatAdapter(BaseChatAdpter):
|
||||
return generate_stream
|
||||
|
||||
|
||||
class GPT4AllChatAdapter(BaseChatAdpter):
|
||||
def match(self, model_path: str):
|
||||
return "gpt4all" in model_path
|
||||
|
||||
def get_generate_stream_func(self):
|
||||
from pilot.model.llm_out.gpt4all_llm import gpt4all_generate_stream
|
||||
|
||||
return gpt4all_generate_stream
|
||||
|
||||
|
||||
register_llm_model_chat_adapter(VicunaChatAdapter)
|
||||
register_llm_model_chat_adapter(ChatGLMChatAdapter)
|
||||
register_llm_model_chat_adapter(GuanacoChatAdapter)
|
||||
register_llm_model_chat_adapter(FalconChatAdapter)
|
||||
register_llm_model_chat_adapter(GorillaChatAdapter)
|
||||
register_llm_model_chat_adapter(GPT4AllChatAdapter)
|
||||
|
||||
# Proxy model for test and develop, it's cheap for us now.
|
||||
register_llm_model_chat_adapter(ProxyllmChatAdapter)
|
||||
|
@ -39,9 +39,13 @@ class ModelWorker:
|
||||
)
|
||||
|
||||
if not isinstance(self.model, str):
|
||||
if hasattr(self.model.config, "max_sequence_length"):
|
||||
if hasattr(self.model, "config") and hasattr(
|
||||
self.model.config, "max_sequence_length"
|
||||
):
|
||||
self.context_len = self.model.config.max_sequence_length
|
||||
elif hasattr(self.model.config, "max_position_embeddings"):
|
||||
elif hasattr(self.model, "config") and hasattr(
|
||||
self.model.config, "max_position_embeddings"
|
||||
):
|
||||
self.context_len = self.model.config.max_position_embeddings
|
||||
|
||||
else:
|
||||
@ -69,7 +73,10 @@ class ModelWorker:
|
||||
for output in self.generate_stream_func(
|
||||
self.model, self.tokenizer, params, DEVICE, CFG.MAX_POSITION_EMBEDDINGS
|
||||
):
|
||||
print("output: ", output)
|
||||
# Please do not open the output in production!
|
||||
# The gpt4all thread shares stdout with the parent process,
|
||||
# and opening it may affect the frontend output.
|
||||
# print("output: ", output)
|
||||
ret = {
|
||||
"text": output,
|
||||
"error_code": 0,
|
||||
@ -79,6 +86,12 @@ class ModelWorker:
|
||||
except torch.cuda.CudaError:
|
||||
ret = {"text": "**GPU OutOfMemory, Please Refresh.**", "error_code": 0}
|
||||
yield json.dumps(ret).encode() + b"\0"
|
||||
except Exception as e:
|
||||
ret = {
|
||||
"text": f"**LLMServer Generate Error, Please CheckErrorInfo.**: {e}",
|
||||
"error_code": 0,
|
||||
}
|
||||
yield json.dumps(ret).encode() + b"\0"
|
||||
|
||||
def get_embeddings(self, prompt):
|
||||
return get_embeddings(self.model, self.tokenizer, prompt)
|
||||
|
@ -667,8 +667,8 @@ if __name__ == "__main__":
|
||||
|
||||
args = parser.parse_args()
|
||||
logger.info(f"args: {args}")
|
||||
|
||||
# init config
|
||||
|
||||
# init config
|
||||
cfg = Config()
|
||||
|
||||
load_native_plugins(cfg)
|
||||
@ -682,7 +682,7 @@ if __name__ == "__main__":
|
||||
"pilot.commands.built_in.audio_text",
|
||||
"pilot.commands.built_in.image_gen",
|
||||
]
|
||||
# exclude commands
|
||||
# exclude commands
|
||||
command_categories = [
|
||||
x for x in command_categories if x not in cfg.disabled_command_categories
|
||||
]
|
||||
|
@ -30,7 +30,11 @@ class MarkdownEmbedding(SourceEmbedding):
|
||||
def read(self):
|
||||
"""Load from markdown path."""
|
||||
loader = EncodeTextLoader(self.file_path)
|
||||
textsplitter = SpacyTextSplitter(pipeline='zh_core_web_sm', chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=200)
|
||||
textsplitter = SpacyTextSplitter(
|
||||
pipeline="zh_core_web_sm",
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE,
|
||||
chunk_overlap=100,
|
||||
)
|
||||
return loader.load_and_split(textsplitter)
|
||||
|
||||
@register
|
||||
|
@ -29,7 +29,9 @@ class PDFEmbedding(SourceEmbedding):
|
||||
# pdf=True, sentence_size=CFG.KNOWLEDGE_CHUNK_SIZE
|
||||
# )
|
||||
textsplitter = SpacyTextSplitter(
|
||||
pipeline="zh_core_web_sm", chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=200
|
||||
pipeline="zh_core_web_sm",
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE,
|
||||
chunk_overlap=100,
|
||||
)
|
||||
return loader.load_and_split(textsplitter)
|
||||
|
||||
|
@ -25,7 +25,11 @@ class PPTEmbedding(SourceEmbedding):
|
||||
def read(self):
|
||||
"""Load from ppt path."""
|
||||
loader = UnstructuredPowerPointLoader(self.file_path)
|
||||
textsplitter = SpacyTextSplitter(pipeline='zh_core_web_sm', chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=200)
|
||||
textsplitter = SpacyTextSplitter(
|
||||
pipeline="zh_core_web_sm",
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE,
|
||||
chunk_overlap=200,
|
||||
)
|
||||
return loader.load_and_split(textsplitter)
|
||||
|
||||
@register
|
||||
|
@ -78,7 +78,7 @@ class DBSummaryClient:
|
||||
model_name=LLM_MODEL_CONFIG[CFG.EMBEDDING_MODEL],
|
||||
vector_store_config=vector_store_config,
|
||||
)
|
||||
table_docs =knowledge_embedding_client.similar_search(query, topk)
|
||||
table_docs = knowledge_embedding_client.similar_search(query, topk)
|
||||
ans = [d.page_content for d in table_docs]
|
||||
return ans
|
||||
|
||||
@ -147,8 +147,6 @@ class DBSummaryClient:
|
||||
logger.info("init db profile success...")
|
||||
|
||||
|
||||
|
||||
|
||||
def _get_llm_response(query, db_input, dbsummary):
|
||||
chat_param = {
|
||||
"temperature": 0.7,
|
||||
|
@ -43,15 +43,14 @@ CFG = Config()
|
||||
# "tps": 50
|
||||
# }
|
||||
|
||||
|
||||
class MysqlSummary(DBSummary):
|
||||
"""Get mysql summary template."""
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.type = "MYSQL"
|
||||
self.summery = (
|
||||
"""{{"database_name": "{name}", "type": "{type}", "tables": "{tables}", "qps": "{qps}", "tps": {tps}}}"""
|
||||
)
|
||||
self.summery = """{{"database_name": "{name}", "type": "{type}", "tables": "{tables}", "qps": "{qps}", "tps": {tps}}}"""
|
||||
self.tables = {}
|
||||
self.tables_info = []
|
||||
self.vector_tables_info = []
|
||||
@ -92,9 +91,12 @@ class MysqlSummary(DBSummary):
|
||||
self.tables[table_name] = table_summary.get_columns()
|
||||
self.table_columns_info.append(table_summary.get_columns())
|
||||
# self.table_columns_json.append(table_summary.get_summary_json())
|
||||
table_profile = "table name:{table_name},table description:{table_comment}".format(
|
||||
table_name=table_name, table_comment=self.db.get_show_create_table(table_name)
|
||||
table_profile = (
|
||||
"table name:{table_name},table description:{table_comment}".format(
|
||||
table_name=table_name,
|
||||
table_comment=self.db.get_show_create_table(table_name),
|
||||
)
|
||||
)
|
||||
self.table_columns_json.append(table_profile)
|
||||
# self.tables_info.append(table_summary.get_summery())
|
||||
|
||||
@ -108,7 +110,11 @@ class MysqlSummary(DBSummary):
|
||||
|
||||
def get_db_summery(self):
|
||||
return self.summery.format(
|
||||
name=self.name, type=self.type, tables=";".join(self.vector_tables_info), qps=1000, tps=1000
|
||||
name=self.name,
|
||||
type=self.type,
|
||||
tables=";".join(self.vector_tables_info),
|
||||
qps=1000,
|
||||
tps=1000,
|
||||
)
|
||||
|
||||
def get_table_summary(self):
|
||||
@ -153,7 +159,12 @@ class MysqlTableSummary(TableSummary):
|
||||
self.indexes_info.append(index_summary.get_summery())
|
||||
|
||||
self.json_summery = self.json_summery_template.format(
|
||||
name=name, comment=comment_map[name], fields=self.fields_info, indexes=self.indexes_info, size_in_bytes=1000, rows=1000
|
||||
name=name,
|
||||
comment=comment_map[name],
|
||||
fields=self.fields_info,
|
||||
indexes=self.indexes_info,
|
||||
size_in_bytes=1000,
|
||||
rows=1000,
|
||||
)
|
||||
|
||||
def get_summery(self):
|
||||
@ -203,7 +214,9 @@ class MysqlIndexSummary(IndexSummary):
|
||||
self.bind_fields = index[1]
|
||||
|
||||
def get_summery(self):
|
||||
return self.summery_template.format(name=self.name, bind_fields=self.bind_fields)
|
||||
return self.summery_template.format(
|
||||
name=self.name, bind_fields=self.bind_fields
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -49,6 +49,7 @@ llama-index==0.5.27
|
||||
pymysql
|
||||
unstructured==0.6.3
|
||||
grpcio==1.47.5
|
||||
gpt4all==0.3.0
|
||||
|
||||
auto-gpt-plugin-template
|
||||
pymdown-extensions
|
||||
|
Loading…
Reference in New Issue
Block a user