fix: conflicts

This commit is contained in:
csunny 2023-06-13 21:41:18 +08:00
commit 77d2abba5d
13 changed files with 119 additions and 77 deletions

1
.gitignore vendored
View File

@ -145,3 +145,4 @@ pilot/nltk_data
logswebserver.log.* logswebserver.log.*
.history/* .history/*
.plugin_env

14
.plugin_env.template Normal file
View File

@ -0,0 +1,14 @@
####################################################################################
## [DB-GPT-Bytebase-Plugin] ###
#####################################################################################
HOST_NAME={your-host-ip, to execute command operate}
HOST_USER=root
HOST_PASSWORD={your-host-password}
SSH_PORT=22
BYTE_BASE_COOKIE={your-bytebase-cookie}
BYTE_BASE_DOMAIN={your-bytebase-server-address}
BYTE_BASE_DEFAULT_DEV_INSTANCE=mysql_dev
BYTE_BASE_DEFAULT_TEST_INSTANCE=mysql_test
BYTE_BASE_DEFAULT_PROD_INSTANCE=mysql_prod
DEFAULT_PROJECT_NAME={your-default-project}

View File

@ -43,7 +43,7 @@ Currently, we have released multiple key features, which are listed below to dem
- Automatic execution of SQL and retrieval of query results - Automatic execution of SQL and retrieval of query results
- Automatic crawling and learning of knowledge - Automatic crawling and learning of knowledge
- Unified vector storage/indexing of knowledge base - Unified vector storage/indexing of knowledge base
- Support for unstructured data such as PDF, Markdown, CSV, and WebURL - Support for unstructured data such as PDF, TXT, Markdown, CSV, DOC, PPT, and WebURL
- Milti LLMs Support - Milti LLMs Support
- Supports multiple large language models, currently supporting Vicuna (7b, 13b), ChatGLM-6b (int4, int8), guanaco(7b,13b,33b), Gorilla(7b,13b) - Supports multiple large language models, currently supporting Vicuna (7b, 13b), ChatGLM-6b (int4, int8), guanaco(7b,13b,33b), Gorilla(7b,13b)

View File

@ -1,2 +0,0 @@
# Concepts

View File

@ -3,6 +3,8 @@
This is a collection of DB-GPT tutorials on Medium. This is a collection of DB-GPT tutorials on Medium.
DB-GPT is divided into several functions, including chat with knowledge base, execute SQL, chat with database, and execute plugins.
###Introduce ###Introduce
[What is DB-GPT](https://www.youtube.com/watch?v=QszhVJerc0I) by csunny (https://github.com/csunny/DB-GPT): [What is DB-GPT](https://www.youtube.com/watch?v=QszhVJerc0I) by csunny (https://github.com/csunny/DB-GPT):
@ -12,5 +14,12 @@ This is a collection of DB-GPT tutorials on Medium.
[Add new Knowledge demonstration](../../assets/new_knownledge_en.gif) [Add new Knowledge demonstration](../../assets/new_knownledge_en.gif)
### DB Plugins ### SQL Generation
[db plugins demonstration](../../assets/auto_sql_en.gif) [sql generation demonstration](../../assets/demo_en.gif)
### SQL Execute
[sql execute demonstration](../../assets/auto_sql_en.gif)
### Plugins
[db plugins demonstration](../../assets/dbgpt_bytebase_plugin.gif)

View File

@ -8,7 +8,7 @@ msgid ""
msgstr "" msgstr ""
"Project-Id-Version: DB-GPT 0.1.0\n" "Project-Id-Version: DB-GPT 0.1.0\n"
"Report-Msgid-Bugs-To: \n" "Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-06-13 11:38+0800\n" "POT-Creation-Date: 2023-06-13 18:04+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n" "Language: zh_CN\n"
@ -27,33 +27,65 @@ msgstr "教程"
msgid "This is a collection of DB-GPT tutorials on Medium." msgid "This is a collection of DB-GPT tutorials on Medium."
msgstr "这是知乎上DB-GPT教程的集合。" msgstr "这是知乎上DB-GPT教程的集合。"
#: ../../getting_started/tutorials.md:6 3915395cc45742519bf0c607eeafc489 #: ../../getting_started/tutorials.md:6 1c8db33581ea4928905e029a98b9a155
msgid ""
"DB-GPT is divided into several functions, including chat with knowledge "
"base, execute SQL, chat with database, and execute plugins."
msgstr ""
#: ../../getting_started/tutorials.md:8 3915395cc45742519bf0c607eeafc489
#, fuzzy
msgid "" msgid ""
"###Introduce [What is DB-" "###Introduce [What is DB-"
"GPT](https://www.youtube.com/watch?v=QszhVJerc0I) by csunny " "GPT](https://www.youtube.com/watch?v=QszhVJerc0I) by csunny "
"(https://github.com/csunny/DB-GPT)" "(https://github.com/csunny/DB-GPT):"
msgstr "###Introduce [什么是DB-GPT](https://www.bilibili.com/video/BV1SM4y1a7Nj/?buvid=551b023900b290f9497610b2155a2668&is_story_h5=false&mid=%2BVyE%2Fwau5woPcUKieCWS0A%3D%3D&p=1&plat_id=116&share_from=ugc&share_medium=iphone&share_plat=ios&share_session_id=5D08B533-82A4-4D40-9615-7826065B4574&share_source=GENERIC&share_tag=s_i&timestamp=1686307943&unique_k=bhO3lgQ&up_id=31375446) by csunny (https://github.com/csunny/DB-GPT)" msgstr ""
"###Introduce [什么是DB-"
"GPT](https://www.bilibili.com/video/BV1SM4y1a7Nj/?buvid=551b023900b290f9497610b2155a2668&is_story_h5=false&mid=%2BVyE%2Fwau5woPcUKieCWS0A%3D%3D&p=1&plat_id=116&share_from=ugc&share_medium=iphone&share_plat=ios&share_session_id=5D08B533-82A4-4D40-9615-7826065B4574&share_source=GENERIC&share_tag=s_i&timestamp=1686307943&unique_k=bhO3lgQ&up_id=31375446)"
" by csunny (https://github.com/csunny/DB-GPT)"
#: ../../getting_started/tutorials.md:9 e213736923574b2cb039a457d789c27c #: ../../getting_started/tutorials.md:11 e213736923574b2cb039a457d789c27c
msgid "Knowledge" msgid "Knowledge"
msgstr "知识库" msgstr "知识库"
#: ../../getting_started/tutorials.md:11 90b5472735a644168d51c054ed882748 #: ../../getting_started/tutorials.md:13 90b5472735a644168d51c054ed882748
msgid "" msgid ""
"[How to Create your own knowledge repository](https://db-" "[How to Create your own knowledge repository](https://db-"
"gpt.readthedocs.io/en/latest/modules/knownledge.html)" "gpt.readthedocs.io/en/latest/modules/knownledge.html)"
msgstr "[怎么创建自己的知识库](https://db-" msgstr ""
"[怎么创建自己的知识库](https://db-"
"gpt.readthedocs.io/en/latest/modules/knownledge.html)" "gpt.readthedocs.io/en/latest/modules/knownledge.html)"
#: ../../getting_started/tutorials.md:13 6a851e1e88ea4bcbaf7ee742a12224ef #: ../../getting_started/tutorials.md:15 6a851e1e88ea4bcbaf7ee742a12224ef
msgid "[Add new Knowledge demonstration](../../assets/new_knownledge_en.gif)" msgid "[Add new Knowledge demonstration](../../assets/new_knownledge_en.gif)"
msgstr "[新增知识库演示](../../assets/new_knownledge_en.gif)" msgstr "[新增知识库演示](../../assets/new_knownledge_en.gif)"
#: ../../getting_started/tutorials.md:15 4487ef393e004e7c936f5104727212a4 #: ../../getting_started/tutorials.md:17 59887be89d8046e28956f909fcbbc9dc
msgid "DB Plugins" msgid "SQL Generation"
msgstr ""
#: ../../getting_started/tutorials.md:18 ee5decd8441d40ae8a240a19c1a5a74a
#, fuzzy
msgid "[sql generation demonstration](../../assets/demo_en.gif)"
msgstr "[sql生成演示](../../assets/demo_en.gif)"
#: ../../getting_started/tutorials.md:20 5d25c5d307c24c9198f2b52e70f2421c
msgid "SQL Execute"
msgstr "SQL执行"
#: ../../getting_started/tutorials.md:21 ee5decd8441d40ae8a240a19c1a5a74a
#, fuzzy
msgid "[sql execute demonstration](../../assets/auto_sql_en.gif)"
msgstr "[sql execute 演示](../../assets/auto_sql_en.gif)"
#: ../../getting_started/tutorials.md:26 4487ef393e004e7c936f5104727212a4
#, fuzzy
msgid "Plugins"
msgstr "DB Plugins" msgstr "DB Plugins"
#: ../../getting_started/tutorials.md:16 ee5decd8441d40ae8a240a19c1a5a74a #: ../../getting_started/tutorials.md:27 ee5decd8441d40ae8a240a19c1a5a74a
msgid "[db plugins demonstration](../../assets/auto_sql_en.gif)" #, fuzzy
msgstr "[db plugins 演示](../../assets/auto_sql_en.gif)" msgid "[db plugins demonstration](../../assets/dbgpt_bytebase_plugin.gif)"
msgstr "[db plugins 演示](../../assets/dbgpt_bytebase_plugin.gif)"

View File

@ -8,7 +8,7 @@ msgid ""
msgstr "" msgstr ""
"Project-Id-Version: DB-GPT 0.1.0\n" "Project-Id-Version: DB-GPT 0.1.0\n"
"Report-Msgid-Bugs-To: \n" "Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-06-11 14:10+0800\n" "POT-Creation-Date: 2023-06-13 18:04+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n" "Language: zh_CN\n"
@ -17,73 +17,81 @@ msgstr ""
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n" "Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.11.0\n" "Generated-By: Babel 2.12.1\n"
#: ../../modules/knownledge.md:1 ac3aa55568c0414a821a42aeed509ab2 #: ../../modules/knownledge.md:1 8c5aad32a2cc4c97bc988a1f4143097b
msgid "Knownledge" msgid "Knownledge"
msgstr "知识" msgstr "知识"
#: ../../modules/knownledge.md:3 1d57e3d2d790437ea54730477c67fdfb #: ../../modules/knownledge.md:3 d739696a9e6240c78db3906d55329636
msgid "" msgid ""
"As the knowledge base is currently the most significant user demand " "As the knowledge base is currently the most significant user demand "
"scenario, we natively support the construction and processing of " "scenario, we natively support the construction and processing of "
"knowledge bases. At the same time, we also provide multiple knowledge " "knowledge bases. At the same time, we also provide multiple knowledge "
"base management strategies in this project, such as:" "base management strategies in this project, such as:"
msgstr "由于知识库是当前用户需求最显著的场景,我们原生支持知识库的构建和处理。" msgstr "由于知识库是当前用户需求最显著的场景,我们原生支持知识库的构建和处理。同时,我们还在本项目中提供了多种知识库管理策略,如:"
"同时,我们还在本项目中提供了多种知识库管理策略,如:"
#: ../../modules/knownledge.md:4 784708fc19334742b73549d92a21ed32 #: ../../modules/knownledge.md:4 16e03ee1cd454786a736b6960c668c3a
msgid "Default built-in knowledge base" msgid "Default built-in knowledge base"
msgstr "默认内置知识库" msgstr "默认内置知识库"
#: ../../modules/knownledge.md:5 c65ccfabe79348c09e6fc13a10774ffd #: ../../modules/knownledge.md:5 b5c57a8c773b40d18e344862adf7790e
msgid "Custom addition of knowledge bases" msgid "Custom addition of knowledge bases"
msgstr "自定义新增知识库" msgstr "自定义新增知识库"
#: ../../modules/knownledge.md:6 fc8fded3e3634edfbe6001d9ea1add90 #: ../../modules/knownledge.md:6 7c05adacafe34780a73fa2bc6748f92f
msgid "" msgid ""
"Various usage scenarios such as constructing knowledge bases through " "Various usage scenarios such as constructing knowledge bases through "
"plugin capabilities and web crawling. Users only need to organize the " "plugin capabilities and web crawling. Users only need to organize the "
"knowledge documents, and they can use our existing capabilities to build " "knowledge documents, and they can use our existing capabilities to build "
"the knowledge base required for the large model." "the knowledge base required for the large model."
msgstr "各种使用场景,例如通过插件功能和爬虫构建知识库。用户只需要组织知识文档," msgstr "各种使用场景,例如通过插件功能和爬虫构建知识库。用户只需要组织知识文档,并且他们可以使用我们现有的功能来构建大型模型所需的知识库。"
"并且他们可以使用我们现有的功能来构建大型模型所需的知识库。"
#: ../../modules/knownledge.md:9 2fa8ae0edeef4380ab60c43754d93c93 #: ../../modules/knownledge.md:9 8b196a2a9efb435baf648a99d89e1220
msgid "Create your own knowledge repository" msgid "Create your own knowledge repository"
msgstr "创建你自己的知识库" msgstr "创建你自己的知识库"
#: ../../modules/knownledge.md:11 13dc4cea806e42c4887c45bbd84fb063 #: ../../modules/knownledge.md:11 370071fde98c4c59bb18735364602adf
msgid "" msgid ""
"1.Place personal knowledge files or folders in the pilot/datasets " "1.Place personal knowledge files or folders in the pilot/datasets "
"directory." "directory."
msgstr "1.将个人知识文件或文件夹放在pilot/datasets目录中。" msgstr "1.将个人知识文件或文件夹放在pilot/datasets目录中。"
#: ../../modules/knownledge.md:13 8dbf51249c9d47749e3fedbf9886479b #: ../../modules/knownledge.md:13 5ac32a1253c4433e87d64dccb2c8b600
msgid ""
"We currently support many document formats: txt, pdf, md, html, doc, ppt,"
" and url."
msgstr "当前支持txt, pdf, md, doc, ppt, html文档格式"
#: ../../modules/knownledge.md:15 1782a135e84f4e9f8cb090f8af935428
msgid "before execution:"
msgstr "在执行之前"
#: ../../modules/knownledge.md:22 43791873b7e043239e160790bbfc10e1
msgid "" msgid ""
"2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma " "2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma "
"(now only support Chroma and Milvus, if you set Milvus, please set " "(now only support Chroma and Milvus, if you set Milvus, please set "
"MILVUS_URL and MILVUS_PORT)" "MILVUS_URL and MILVUS_PORT)"
msgstr "2.更新你的.env设置你的向量存储类型VECTOR_STORE_TYPE=Chroma(现在只支持" msgstr "2.更新你的.env设置你的向量存储类型VECTOR_STORE_TYPE=Chroma(现在只支持Chroma和Milvus如果你设置了Milvus请设置MILVUS_URL和MILVUS_PORT)"
"Chroma和Milvus如果你设置了Milvus请设置MILVUS_URL和MILVUS_PORT)"
#: ../../modules/knownledge.md:16 e03cce8ad3b14100b8bb22dd98ea49ae #: ../../modules/knownledge.md:25 197e043db45e444e9c930f29be808f31
msgid "2.Run the knowledge repository script in the tools directory." msgid "2.Run the knowledge repository script in the tools directory."
msgstr "2.在tools目录执行知识入库脚本" msgstr "2.在tools目录执行知识入库脚本"
#: ../../modules/knownledge.md:26 a2919580cc324820b1217e31c8b22203 #: ../../modules/knownledge.md:34 abeb77ed400c4838b2ca8e14dcd89b29
msgid "" msgid ""
"3.Add the knowledge repository in the interface by entering the name of " "3.Add the knowledge repository in the interface by entering the name of "
"your knowledge repository (if not specified, enter \"default\") so you " "your knowledge repository (if not specified, enter \"default\") so you "
"can use it for Q&A based on your knowledge base." "can use it for Q&A based on your knowledge base."
msgstr "如果选择新增知识库,在界面上新增知识库输入你的知识库名" msgstr "如果选择新增知识库,在界面上新增知识库输入你的知识库名"
#: ../../modules/knownledge.md:28 236317becbb042f2acbf66c499a3b984 #: ../../modules/knownledge.md:36 dcff9efafd9d441b91c1389af2a49780
msgid "" msgid ""
"Note that the default vector model used is text2vec-large-chinese (which " "Note that the default vector model used is text2vec-large-chinese (which "
"is a large model, so if your personal computer configuration is not " "is a large model, so if your personal computer configuration is not "
"enough, it is recommended to use text2vec-base-chinese). Therefore, " "enough, it is recommended to use text2vec-base-chinese). Therefore, "
"ensure that you download the model and place it in the models directory." "ensure that you download the model and place it in the models directory."
msgstr "注意这里默认向量模型是text2vec-large-chinese(模型比较大,如果个人电脑" msgstr ""
"配置不够建议采用text2vec-base-chinese),因此确保需要将模型download下来放到models目录中。" "注意这里默认向量模型是text2vec-large-chinese(模型比较大如果个人电脑配置不够建议采用text2vec-base-"
"chinese),因此确保需要将模型download下来放到models目录中。"

View File

@ -10,5 +10,6 @@ if "pytest" in sys.argv or "pytest" in sys.modules or os.getenv("CI"):
# Load the users .env file into environment variables # Load the users .env file into environment variables
load_dotenv(verbose=True, override=True) load_dotenv(verbose=True, override=True)
load_dotenv(".plugin_env")
del load_dotenv del load_dotenv

View File

@ -17,14 +17,10 @@ nltk.data.path = [os.path.join(PILOT_PATH, "nltk_data")] + nltk.data.path
PLUGINS_DIR = os.path.join(ROOT_PATH, "plugins") PLUGINS_DIR = os.path.join(ROOT_PATH, "plugins")
FONT_DIR = os.path.join(PILOT_PATH, "fonts") FONT_DIR = os.path.join(PILOT_PATH, "fonts")
# 获取当前工作目录
current_directory = os.getcwd() current_directory = os.getcwd()
print("当前工作目录:", current_directory)
# 设置当前工作目录
new_directory = PILOT_PATH new_directory = PILOT_PATH
os.chdir(new_directory) os.chdir(new_directory)
print("新的工作目录:", os.getcwd())
DEVICE = ( DEVICE = (
"cuda" "cuda"

View File

@ -44,7 +44,7 @@ lang_dicts = {
"learn_more_markdown": "The service is a research preview intended for non-commercial use only. subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of Vicuna-13B", "learn_more_markdown": "The service is a research preview intended for non-commercial use only. subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of Vicuna-13B",
"model_control_param": "Model Parameters", "model_control_param": "Model Parameters",
"sql_generate_mode_direct": "Execute directly", "sql_generate_mode_direct": "Execute directly",
"sql_generate_mode_none": "chat to db", "sql_generate_mode_none": "db chat",
"max_input_token_size": "Maximum output token size", "max_input_token_size": "Maximum output token size",
"please_choose_database": "Please choose database", "please_choose_database": "Please choose database",
"sql_generate_diagnostics": "SQL Generation & Diagnostics", "sql_generate_diagnostics": "SQL Generation & Diagnostics",

View File

@ -51,7 +51,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
} }
) )
# 把最后一个用户的信息移动到末尾 # Move the last user's information to the end
temp_his = history[::-1] temp_his = history[::-1]
last_user_input = None last_user_input = None
for m in temp_his: for m in temp_his:
@ -66,7 +66,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
"messages": history, "messages": history,
"temperature": params.get("temperature"), "temperature": params.get("temperature"),
"max_tokens": params.get("max_new_tokens"), "max_tokens": params.get("max_new_tokens"),
"stream": True "stream": True,
} }
res = requests.post( res = requests.post(
@ -78,30 +78,9 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
if line: if line:
json_data = line.split(b': ', 1)[1] json_data = line.split(b': ', 1)[1]
decoded_line = json_data.decode("utf-8") decoded_line = json_data.decode("utf-8")
if decoded_line.lower() != '[DONE]'.lower(): if decoded_line.lower() != "[DONE]".lower():
obj = json.loads(json_data) obj = json.loads(json_data)
if obj['choices'][0]['delta'].get('content') is not None: if obj["choices"][0]["delta"].get("content") is not None:
content = obj['choices'][0]['delta']['content'] content = obj["choices"][0]["delta"]["content"]
text += content text += content
yield text yield text
# native result.
# payloads = {
# "model": "gpt-3.5-turbo", # just for test, remove this later
# "messages": history,
# "temperature": params.get("temperature"),
# "max_tokens": params.get("max_new_tokens"),
# }
#
# res = requests.post(
# CFG.proxy_server_url, headers=headers, json=payloads, stream=True
# )
#
# text = ""
# line = res.content
# if line:
# decoded_line = line.decode("utf-8")
# json_line = json.loads(decoded_line)
# print(json_line)
# text += json_line["choices"][0]["message"]["content"]
# yield text

View File

@ -1,3 +1,5 @@
from chromadb.errors import NoIndexException
from pilot.scene.base_chat import BaseChat, logger, headers from pilot.scene.base_chat import BaseChat, logger, headers
from pilot.scene.base import ChatScene from pilot.scene.base import ChatScene
from pilot.common.sql_database import Database from pilot.common.sql_database import Database
@ -46,12 +48,15 @@ class ChatDefaultKnowledge(BaseChat):
) )
def generate_input_values(self): def generate_input_values(self):
try:
docs = self.knowledge_embedding_client.similar_search( docs = self.knowledge_embedding_client.similar_search(
self.current_user_input, CFG.KNOWLEDGE_SEARCH_TOP_SIZE self.current_user_input, CFG.KNOWLEDGE_SEARCH_TOP_SIZE
) )
context = [d.page_content for d in docs] context = [d.page_content for d in docs]
context = context[:2000] context = context[:2000]
input_values = {"context": context, "question": self.current_user_input} input_values = {"context": context, "question": self.current_user_input}
except NoIndexException:
raise ValueError("you have no default knowledge store, please execute python knowledge_init.py")
return input_values return input_values
def do_with_prompt_response(self, prompt_response): def do_with_prompt_response(self, prompt_response):

View File

@ -43,7 +43,6 @@ if __name__ == "__main__":
parser.add_argument("--vector_name", type=str, default="default") parser.add_argument("--vector_name", type=str, default="default")
args = parser.parse_args() args = parser.parse_args()
vector_name = args.vector_name vector_name = args.vector_name
append_mode = args.append
store_type = CFG.VECTOR_STORE_TYPE store_type = CFG.VECTOR_STORE_TYPE
vector_store_config = {"vector_store_name": vector_name} vector_store_config = {"vector_store_name": vector_name}
print(vector_store_config) print(vector_store_config)