fix: conflicts

2025-08-14 22:43:49 +00:00 · 2023-06-13 21:41:18 +08:00 · 2023-06-13 21:41:18 +08:00 · 77d2abba5d
commit 77d2abba5d
parent a00bb87047 3a446d404b
13 changed files with 119 additions and 77 deletions
--- a/.gitignore
+++ b/.gitignore
@ -145,3 +145,4 @@ pilot/nltk_data

 logswebserver.log.*
 .history/*
+.plugin_env
--- a/.plugin_env.template
+++ b/.plugin_env.template
@ -0,0 +1,14 @@
+####################################################################################
+##                       [DB-GPT-Bytebase-Plugin]                                ###
+#####################################################################################
+HOST_NAME={your-host-ip, to execute command operate}
+HOST_USER=root
+HOST_PASSWORD={your-host-password}
+SSH_PORT=22
+
+BYTE_BASE_COOKIE={your-bytebase-cookie}
+BYTE_BASE_DOMAIN={your-bytebase-server-address}
+BYTE_BASE_DEFAULT_DEV_INSTANCE=mysql_dev
+BYTE_BASE_DEFAULT_TEST_INSTANCE=mysql_test
+BYTE_BASE_DEFAULT_PROD_INSTANCE=mysql_prod
+DEFAULT_PROJECT_NAME={your-default-project}
--- a/README.md
+++ b/README.md
@ -43,7 +43,7 @@ Currently, we have released multiple key features, which are listed below to dem
    - Automatic execution of SQL and retrieval of query results
    - Automatic crawling and learning of knowledge
 - Unified vector storage/indexing of knowledge base
-  - Support for unstructured data such as PDF, Markdown, CSV, and WebURL
+  - Support for unstructured data such as PDF, TXT, Markdown, CSV, DOC, PPT, and WebURL

 - Milti LLMs Support
  - Supports multiple large language models, currently supporting Vicuna (7b, 13b), ChatGLM-6b (int4, int8), guanaco(7b,13b,33b), Gorilla(7b,13b)
--- a/docs/getting_started/concepts.md
+++ b/docs/getting_started/concepts.md
@ -1,2 +0,0 @@
-# Concepts
-
--- a/docs/getting_started/tutorials.md
+++ b/docs/getting_started/tutorials.md
@ -3,6 +3,8 @@

 This is a collection of DB-GPT tutorials on Medium.  

+DB-GPT is divided into several functions, including chat with knowledge base, execute SQL, chat with database, and execute plugins.
+
 ###Introduce
 [What is DB-GPT](https://www.youtube.com/watch?v=QszhVJerc0I)  by csunny (https://github.com/csunny/DB-GPT):

@ -12,5 +14,12 @@ This is a collection of DB-GPT tutorials on Medium.

 [Add new Knowledge demonstration](../../assets/new_knownledge_en.gif)

-### DB Plugins
-[db plugins demonstration](../../assets/auto_sql_en.gif)
+### SQL Generation
+[sql generation demonstration](../../assets/demo_en.gif)
+
+### SQL Execute
+[sql execute demonstration](../../assets/auto_sql_en.gif)
+
+
+### Plugins
+[db plugins demonstration](../../assets/dbgpt_bytebase_plugin.gif)
--- a/docs/locales/zh_CN/LC_MESSAGES/getting_started/tutorials.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/getting_started/tutorials.po
@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 0.1.0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-06-13 11:38+0800\n"
+"POT-Creation-Date: 2023-06-13 18:04+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@ -27,33 +27,65 @@ msgstr "教程"
 msgid "This is a collection of DB-GPT tutorials on Medium."
 msgstr "这是知乎上DB-GPT教程的集合。．"

-#: ../../getting_started/tutorials.md:6 3915395cc45742519bf0c607eeafc489
+#: ../../getting_started/tutorials.md:6 1c8db33581ea4928905e029a98b9a155
+msgid ""
+"DB-GPT is divided into several functions, including chat with knowledge "
+"base, execute SQL, chat with database, and execute plugins."
+msgstr ""
+
+#: ../../getting_started/tutorials.md:8 3915395cc45742519bf0c607eeafc489
+#, fuzzy
 msgid ""
 "###Introduce [What is DB-"
 "GPT](https://www.youtube.com/watch?v=QszhVJerc0I)  by csunny "
-"(https://github.com/csunny/DB-GPT)"
-msgstr "###Introduce [什么是DB-GPT](https://www.bilibili.com/video/BV1SM4y1a7Nj/?buvid=551b023900b290f9497610b2155a2668&is_story_h5=false&mid=%2BVyE%2Fwau5woPcUKieCWS0A%3D%3D&p=1&plat_id=116&share_from=ugc&share_medium=iphone&share_plat=ios&share_session_id=5D08B533-82A4-4D40-9615-7826065B4574&share_source=GENERIC&share_tag=s_i&timestamp=1686307943&unique_k=bhO3lgQ&up_id=31375446)  by csunny (https://github.com/csunny/DB-GPT)"
+"(https://github.com/csunny/DB-GPT):"
+msgstr ""
+"###Introduce [什么是DB-"
+"GPT](https://www.bilibili.com/video/BV1SM4y1a7Nj/?buvid=551b023900b290f9497610b2155a2668&is_story_h5=false&mid=%2BVyE%2Fwau5woPcUKieCWS0A%3D%3D&p=1&plat_id=116&share_from=ugc&share_medium=iphone&share_plat=ios&share_session_id=5D08B533-82A4-4D40-9615-7826065B4574&share_source=GENERIC&share_tag=s_i&timestamp=1686307943&unique_k=bhO3lgQ&up_id=31375446)"
+"  by csunny (https://github.com/csunny/DB-GPT)"

-#: ../../getting_started/tutorials.md:9 e213736923574b2cb039a457d789c27c
+#: ../../getting_started/tutorials.md:11 e213736923574b2cb039a457d789c27c
 msgid "Knowledge"
 msgstr "知识库"

-#: ../../getting_started/tutorials.md:11 90b5472735a644168d51c054ed882748
+#: ../../getting_started/tutorials.md:13 90b5472735a644168d51c054ed882748
 msgid ""
 "[How to Create your own knowledge repository](https://db-"
 "gpt.readthedocs.io/en/latest/modules/knownledge.html)"
-msgstr "[怎么创建自己的知识库](https://db-"
+msgstr ""
+"[怎么创建自己的知识库](https://db-"
 "gpt.readthedocs.io/en/latest/modules/knownledge.html)"

-#: ../../getting_started/tutorials.md:13 6a851e1e88ea4bcbaf7ee742a12224ef
+#: ../../getting_started/tutorials.md:15 6a851e1e88ea4bcbaf7ee742a12224ef
 msgid "[Add new Knowledge demonstration](../../assets/new_knownledge_en.gif)"
 msgstr "[新增知识库演示](../../assets/new_knownledge_en.gif)"

-#: ../../getting_started/tutorials.md:15 4487ef393e004e7c936f5104727212a4
-msgid "DB Plugins"
+#: ../../getting_started/tutorials.md:17 59887be89d8046e28956f909fcbbc9dc
+msgid "SQL Generation"
+msgstr ""
+
+#: ../../getting_started/tutorials.md:18 ee5decd8441d40ae8a240a19c1a5a74a
+#, fuzzy
+msgid "[sql generation demonstration](../../assets/demo_en.gif)"
+msgstr "[sql生成演示](../../assets/demo_en.gif)"
+
+#: ../../getting_started/tutorials.md:20 5d25c5d307c24c9198f2b52e70f2421c
+msgid "SQL Execute"
+msgstr "SQL执行"
+
+#: ../../getting_started/tutorials.md:21 ee5decd8441d40ae8a240a19c1a5a74a
+#, fuzzy
+msgid "[sql execute demonstration](../../assets/auto_sql_en.gif)"
+msgstr "[sql execute 演示](../../assets/auto_sql_en.gif)"
+
+
+#: ../../getting_started/tutorials.md:26 4487ef393e004e7c936f5104727212a4
+#, fuzzy
+msgid "Plugins"
 msgstr "DB Plugins"

-#: ../../getting_started/tutorials.md:16 ee5decd8441d40ae8a240a19c1a5a74a
-msgid "[db plugins demonstration](../../assets/auto_sql_en.gif)"
-msgstr "[db plugins 演示](../../assets/auto_sql_en.gif)"
+#: ../../getting_started/tutorials.md:27 ee5decd8441d40ae8a240a19c1a5a74a
+#, fuzzy
+msgid "[db plugins demonstration](../../assets/dbgpt_bytebase_plugin.gif)"
+msgstr "[db plugins 演示](../../assets/dbgpt_bytebase_plugin.gif)"

--- a/docs/locales/zh_CN/LC_MESSAGES/modules/knownledge.po
+++ b/docs/locales/zh_CN/LC_MESSAGES/modules/knownledge.po
@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: DB-GPT 0.1.0\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2023-06-11 14:10+0800\n"
+"POT-Creation-Date: 2023-06-13 18:04+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@ -17,73 +17,81 @@ msgstr ""
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=utf-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"Generated-By: Babel 2.11.0\n"
+"Generated-By: Babel 2.12.1\n"

-#: ../../modules/knownledge.md:1 ac3aa55568c0414a821a42aeed509ab2
+#: ../../modules/knownledge.md:1 8c5aad32a2cc4c97bc988a1f4143097b
 msgid "Knownledge"
 msgstr "知识"

-#: ../../modules/knownledge.md:3 1d57e3d2d790437ea54730477c67fdfb
+#: ../../modules/knownledge.md:3 d739696a9e6240c78db3906d55329636
 msgid ""
 "As the knowledge base is currently the most significant user demand "
 "scenario, we natively support the construction and processing of "
 "knowledge bases. At the same time, we also provide multiple knowledge "
 "base management strategies in this project, such as:"
-msgstr "由于知识库是当前用户需求最显著的场景，我们原生支持知识库的构建和处理。"
-"同时，我们还在本项目中提供了多种知识库管理策略，如:"
+msgstr "由于知识库是当前用户需求最显著的场景，我们原生支持知识库的构建和处理。同时，我们还在本项目中提供了多种知识库管理策略，如:"

-#: ../../modules/knownledge.md:4 784708fc19334742b73549d92a21ed32
+#: ../../modules/knownledge.md:4 16e03ee1cd454786a736b6960c668c3a
 msgid "Default built-in knowledge base"
 msgstr "默认内置知识库"

-#: ../../modules/knownledge.md:5 c65ccfabe79348c09e6fc13a10774ffd
+#: ../../modules/knownledge.md:5 b5c57a8c773b40d18e344862adf7790e
 msgid "Custom addition of knowledge bases"
 msgstr "自定义新增知识库"

-#: ../../modules/knownledge.md:6 fc8fded3e3634edfbe6001d9ea1add90
+#: ../../modules/knownledge.md:6 7c05adacafe34780a73fa2bc6748f92f
 msgid ""
 "Various usage scenarios such as constructing knowledge bases through "
 "plugin capabilities and web crawling. Users only need to organize the "
 "knowledge documents, and they can use our existing capabilities to build "
 "the knowledge base required for the large model."
-msgstr "各种使用场景，例如通过插件功能和爬虫构建知识库。用户只需要组织知识文档，"
-"并且他们可以使用我们现有的功能来构建大型模型所需的知识库。"
+msgstr "各种使用场景，例如通过插件功能和爬虫构建知识库。用户只需要组织知识文档，并且他们可以使用我们现有的功能来构建大型模型所需的知识库。"

-#: ../../modules/knownledge.md:9 2fa8ae0edeef4380ab60c43754d93c93
+#: ../../modules/knownledge.md:9 8b196a2a9efb435baf648a99d89e1220
 msgid "Create your own knowledge repository"
 msgstr "创建你自己的知识库"

-#: ../../modules/knownledge.md:11 13dc4cea806e42c4887c45bbd84fb063
+#: ../../modules/knownledge.md:11 370071fde98c4c59bb18735364602adf
 msgid ""
 "1.Place personal knowledge files or folders in the pilot/datasets "
 "directory."
 msgstr "1.将个人知识文件或文件夹放在pilot/datasets目录中。"

-#: ../../modules/knownledge.md:13 8dbf51249c9d47749e3fedbf9886479b
+#: ../../modules/knownledge.md:13 5ac32a1253c4433e87d64dccb2c8b600
+msgid ""
+"We currently support many document formats: txt, pdf, md, html, doc, ppt,"
+" and url."
+msgstr "当前支持txt, pdf, md, doc, ppt, html文档格式"
+
+#: ../../modules/knownledge.md:15 1782a135e84f4e9f8cb090f8af935428
+msgid "before execution:"
+msgstr "在执行之前"
+
+#: ../../modules/knownledge.md:22 43791873b7e043239e160790bbfc10e1
 msgid ""
 "2.Update your .env, set your vector store type, VECTOR_STORE_TYPE=Chroma "
 "(now only support Chroma and Milvus, if you set Milvus, please set "
 "MILVUS_URL and MILVUS_PORT)"
-msgstr "2.更新你的.env，设置你的向量存储类型，VECTOR_STORE_TYPE=Chroma(现在只支持"
-"Chroma和Milvus，如果你设置了Milvus，请设置MILVUS_URL和MILVUS_PORT)"
+msgstr "2.更新你的.env，设置你的向量存储类型，VECTOR_STORE_TYPE=Chroma(现在只支持Chroma和Milvus，如果你设置了Milvus，请设置MILVUS_URL和MILVUS_PORT)"

-#: ../../modules/knownledge.md:16 e03cce8ad3b14100b8bb22dd98ea49ae
+#: ../../modules/knownledge.md:25 197e043db45e444e9c930f29be808f31
 msgid "2.Run the knowledge repository script in the tools directory."
 msgstr "2.在tools目录执行知识入库脚本"

-#: ../../modules/knownledge.md:26 a2919580cc324820b1217e31c8b22203
+#: ../../modules/knownledge.md:34 abeb77ed400c4838b2ca8e14dcd89b29
 msgid ""
 "3.Add the knowledge repository in the interface by entering the name of "
 "your knowledge repository (if not specified, enter \"default\") so you "
 "can use it for Q&A based on your knowledge base."
 msgstr "如果选择新增知识库，在界面上新增知识库输入你的知识库名"

-#: ../../modules/knownledge.md:28 236317becbb042f2acbf66c499a3b984
+#: ../../modules/knownledge.md:36 dcff9efafd9d441b91c1389af2a49780
 msgid ""
 "Note that the default vector model used is text2vec-large-chinese (which "
 "is a large model, so if your personal computer configuration is not "
 "enough, it is recommended to use text2vec-base-chinese). Therefore, "
 "ensure that you download the model and place it in the models directory."
-msgstr "注意，这里默认向量模型是text2vec-large-chinese(模型比较大，如果个人电脑"
-"配置不够建议采用text2vec-base-chinese),因此确保需要将模型download下来放到models目录中。"
+msgstr ""
+"注意，这里默认向量模型是text2vec-large-chinese(模型比较大，如果个人电脑配置不够建议采用text2vec-base-"
+"chinese),因此确保需要将模型download下来放到models目录中。"

--- a/pilot/configs/init.py
+++ b/pilot/configs/init.py
@ -10,5 +10,6 @@ if "pytest" in sys.argv or "pytest" in sys.modules or os.getenv("CI"):

 # Load the users .env file into environment variables
 load_dotenv(verbose=True, override=True)
+load_dotenv(".plugin_env")

 del load_dotenv
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@ -17,14 +17,10 @@ nltk.data.path = [os.path.join(PILOT_PATH, "nltk_data")] + nltk.data.path
 PLUGINS_DIR =  os.path.join(ROOT_PATH, "plugins")
 FONT_DIR =  os.path.join(PILOT_PATH, "fonts")

-# 获取当前工作目录
 current_directory = os.getcwd()
-print("当前工作目录：", current_directory)

-# 设置当前工作目录
 new_directory = PILOT_PATH
 os.chdir(new_directory)
-print("新的工作目录：", os.getcwd())

 DEVICE = (
    "cuda"
--- a/pilot/language/lang_content_mapping.py
+++ b/pilot/language/lang_content_mapping.py
@ -44,7 +44,7 @@ lang_dicts = {
        "learn_more_markdown": "The service is a research preview intended for non-commercial use only. subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of Vicuna-13B",
        "model_control_param": "Model Parameters",
        "sql_generate_mode_direct": "Execute directly",
-        "sql_generate_mode_none": "chat to db",
+        "sql_generate_mode_none": "db chat",
        "max_input_token_size": "Maximum output token size",
        "please_choose_database": "Please choose database",
        "sql_generate_diagnostics": "SQL Generation & Diagnostics",
--- a/pilot/model/llm_out/proxy_llm.py
+++ b/pilot/model/llm_out/proxy_llm.py
@ -51,7 +51,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
                }
            )

-    # 把最后一个用户的信息移动到末尾
+    # Move the last user's information to the end 
    temp_his = history[::-1]
    last_user_input = None
    for m in temp_his:
@ -66,7 +66,7 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
        "messages": history,
        "temperature": params.get("temperature"),
        "max_tokens": params.get("max_new_tokens"),
-        "stream": True
+        "stream": True,
    }

    res = requests.post(
@ -78,30 +78,9 @@ def proxyllm_generate_stream(model, tokenizer, params, device, context_len=2048)
        if line:
            json_data = line.split(b': ', 1)[1]
            decoded_line = json_data.decode("utf-8")
-            if decoded_line.lower() != '[DONE]'.lower():
+            if decoded_line.lower() != "[DONE]".lower():
                obj = json.loads(json_data)
-                if obj['choices'][0]['delta'].get('content') is not None:
-                    content = obj['choices'][0]['delta']['content']
+                if obj["choices"][0]["delta"].get("content") is not None:
+                    content = obj["choices"][0]["delta"]["content"]
                    text += content
            yield text
-
-    # native result.
-    # payloads = {
-    #     "model": "gpt-3.5-turbo",  # just for test, remove this later
-    #     "messages": history,
-    #     "temperature": params.get("temperature"),
-    #     "max_tokens": params.get("max_new_tokens"),
-    # }
-    #
-    # res = requests.post(
-    #     CFG.proxy_server_url, headers=headers, json=payloads, stream=True
-    # )
-    #
-    # text = ""
-    # line = res.content
-    # if line:
-    #     decoded_line = line.decode("utf-8")
-    #     json_line = json.loads(decoded_line)
-    #     print(json_line)
-    #     text += json_line["choices"][0]["message"]["content"]
-    #     yield text
--- a/pilot/scene/chat_knowledge/default/chat.py
+++ b/pilot/scene/chat_knowledge/default/chat.py
@ -1,3 +1,5 @@
+from chromadb.errors import NoIndexException
+
 from pilot.scene.base_chat import BaseChat, logger, headers
 from pilot.scene.base import ChatScene
 from pilot.common.sql_database import Database
@ -46,12 +48,15 @@ class ChatDefaultKnowledge(BaseChat):
        )

    def generate_input_values(self):
-        docs = self.knowledge_embedding_client.similar_search(
-            self.current_user_input, CFG.KNOWLEDGE_SEARCH_TOP_SIZE
-        )
-        context = [d.page_content for d in docs]
-        context = context[:2000]
-        input_values = {"context": context, "question": self.current_user_input}
+        try:
+            docs = self.knowledge_embedding_client.similar_search(
+                self.current_user_input, CFG.KNOWLEDGE_SEARCH_TOP_SIZE
+            )
+            context = [d.page_content for d in docs]
+            context = context[:2000]
+            input_values = {"context": context, "question": self.current_user_input}
+        except NoIndexException:
+            raise ValueError("you have no default knowledge store, please execute python knowledge_init.py")
        return input_values

    def do_with_prompt_response(self, prompt_response):
--- a/tools/knowlege_init.py
+++ b/tools/knowlege_init.py
@ -43,7 +43,6 @@ if __name__ == "__main__":
    parser.add_argument("--vector_name", type=str, default="default")
    args = parser.parse_args()
    vector_name = args.vector_name
-    append_mode = args.append
    store_type = CFG.VECTOR_STORE_TYPE
    vector_store_config = {"vector_store_name": vector_name}
    print(vector_store_config)