From c911dad4b3db42247023ac8ea2972b6b91c3cad4 Mon Sep 17 00:00:00 2001 From: aries-ckt <916701291@qq.com> Date: Thu, 1 Jun 2023 20:20:17 +0800 Subject: [PATCH 1/6] feature:db_summary --- pilot/scene/chat_knowledge/custom/chat.py | 5 +++-- pilot/scene/chat_knowledge/default/chat.py | 4 +++- pilot/scene/chat_knowledge/url/chat.py | 4 +++- pilot/scene/chat_knowledge/url/prompt.py | 7 +++---- pilot/server/webserver.py | 2 +- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/pilot/scene/chat_knowledge/custom/chat.py b/pilot/scene/chat_knowledge/custom/chat.py index a094b9d6f..7600bab79 100644 --- a/pilot/scene/chat_knowledge/custom/chat.py +++ b/pilot/scene/chat_knowledge/custom/chat.py @@ -56,8 +56,9 @@ class ChatNewKnowledge(BaseChat): docs = self.knowledge_embedding_client.similar_search( self.current_user_input, VECTOR_SEARCH_TOP_K ) - docs = docs[:2000] - input_values = {"context": docs, "question": self.current_user_input} + context = [d.page_content for d in docs] + context = context[:2000] + input_values = {"context": context, "question": self.current_user_input} return input_values def do_with_prompt_response(self, prompt_response): diff --git a/pilot/scene/chat_knowledge/default/chat.py b/pilot/scene/chat_knowledge/default/chat.py index 5d9c3ccf4..75cca471f 100644 --- a/pilot/scene/chat_knowledge/default/chat.py +++ b/pilot/scene/chat_knowledge/default/chat.py @@ -53,7 +53,9 @@ class ChatDefaultKnowledge(BaseChat): self.current_user_input, VECTOR_SEARCH_TOP_K ) docs = docs[:2000] - input_values = {"context": docs, "question": self.current_user_input} + context = [d.page_content for d in docs] + context = context[:2000] + input_values = {"context": context, "question": self.current_user_input} return input_values def do_with_prompt_response(self, prompt_response): diff --git a/pilot/scene/chat_knowledge/url/chat.py b/pilot/scene/chat_knowledge/url/chat.py index 096df92cb..12b0f22fd 100644 --- a/pilot/scene/chat_knowledge/url/chat.py +++ b/pilot/scene/chat_knowledge/url/chat.py @@ -59,7 +59,9 @@ class ChatUrlKnowledge(BaseChat): self.current_user_input, VECTOR_SEARCH_TOP_K ) docs = docs[:2000] - input_values = {"context": docs, "question": self.current_user_input} + context = [d.page_content for d in docs] + context = context[:2000] + input_values = {"context": context, "question": self.current_user_input} return input_values def do_with_prompt_response(self, prompt_response): diff --git a/pilot/scene/chat_knowledge/url/prompt.py b/pilot/scene/chat_knowledge/url/prompt.py index 8eaafd61e..20a69d8b2 100644 --- a/pilot/scene/chat_knowledge/url/prompt.py +++ b/pilot/scene/chat_knowledge/url/prompt.py @@ -11,11 +11,10 @@ from pilot.scene.chat_normal.out_parser import NormalChatOutputParser CFG = Config() -_DEFAULT_TEMPLATE = """ 基于以下已知的信息, 专业、简要的回答用户的问题, - 如果无法从提供的内容中获取答案, 请说: "知识库中提供的内容不足以回答此问题" 禁止胡乱编造。 - 已知内容: +_DEFAULT_TEMPLATE = """ Based on the known information, provide professional and concise answers to the user's questions. If the answer cannot be obtained from the provided content, please say: 'The information provided in the knowledge base is not sufficient to answer this question.' Fabrication is prohibited.。 + known information: {context} - 问题: + question: {question} """ diff --git a/pilot/server/webserver.py b/pilot/server/webserver.py index 9b892595a..f7655fd7d 100644 --- a/pilot/server/webserver.py +++ b/pilot/server/webserver.py @@ -643,7 +643,7 @@ def knowledge_embedding_store(vs_id, files): knowledge_embedding_client.knowledge_embedding() logger.info("knowledge embedding success") - return os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, vs_id, vs_id + ".vectordb") + return vs_id if __name__ == "__main__": From de034e422129047edf94387fb68bbc128e07c0c7 Mon Sep 17 00:00:00 2001 From: aries-ckt <916701291@qq.com> Date: Thu, 1 Jun 2023 20:28:13 +0800 Subject: [PATCH 2/6] feature:db_summary --- pilot/scene/chat_knowledge/default/chat.py | 1 - pilot/scene/chat_knowledge/url/chat.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pilot/scene/chat_knowledge/default/chat.py b/pilot/scene/chat_knowledge/default/chat.py index 75cca471f..9652ac0ee 100644 --- a/pilot/scene/chat_knowledge/default/chat.py +++ b/pilot/scene/chat_knowledge/default/chat.py @@ -52,7 +52,6 @@ class ChatDefaultKnowledge(BaseChat): docs = self.knowledge_embedding_client.similar_search( self.current_user_input, VECTOR_SEARCH_TOP_K ) - docs = docs[:2000] context = [d.page_content for d in docs] context = context[:2000] input_values = {"context": context, "question": self.current_user_input} diff --git a/pilot/scene/chat_knowledge/url/chat.py b/pilot/scene/chat_knowledge/url/chat.py index 12b0f22fd..0c7ac1cd2 100644 --- a/pilot/scene/chat_knowledge/url/chat.py +++ b/pilot/scene/chat_knowledge/url/chat.py @@ -58,7 +58,6 @@ class ChatUrlKnowledge(BaseChat): docs = self.knowledge_embedding_client.similar_search( self.current_user_input, VECTOR_SEARCH_TOP_K ) - docs = docs[:2000] context = [d.page_content for d in docs] context = context[:2000] input_values = {"context": context, "question": self.current_user_input} From d8b4e6bdc854082b087514d7305b445924b5d2ea Mon Sep 17 00:00:00 2001 From: aries-ckt <916701291@qq.com> Date: Thu, 1 Jun 2023 20:34:00 +0800 Subject: [PATCH 3/6] fix:url embedding --- pilot/scene/chat_knowledge/url/chat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pilot/scene/chat_knowledge/url/chat.py b/pilot/scene/chat_knowledge/url/chat.py index 0c7ac1cd2..cc8d89d4a 100644 --- a/pilot/scene/chat_knowledge/url/chat.py +++ b/pilot/scene/chat_knowledge/url/chat.py @@ -52,7 +52,9 @@ class ChatUrlKnowledge(BaseChat): ) # url soruce in vector - self.knowledge_embedding_client.knowledge_embedding() + if not self.knowledge_embedding_client.vector_exist(): + self.knowledge_embedding_client.knowledge_embedding() + logger.info("url embedding success") def generate_input_values(self): docs = self.knowledge_embedding_client.similar_search( From 6c955f43f6ee9a28f56127a76679d71ff54d8f87 Mon Sep 17 00:00:00 2001 From: aries-ckt <916701291@qq.com> Date: Thu, 1 Jun 2023 20:43:37 +0800 Subject: [PATCH 4/6] fix:url embedding --- pilot/source_embedding/knowledge_embedding.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pilot/source_embedding/knowledge_embedding.py b/pilot/source_embedding/knowledge_embedding.py index bb5331434..f58742ee9 100644 --- a/pilot/source_embedding/knowledge_embedding.py +++ b/pilot/source_embedding/knowledge_embedding.py @@ -82,6 +82,9 @@ class KnowledgeEmbedding: def similar_search(self, text, topk): return self.knowledge_embedding_client.similar_search(text, topk) + def vector_exist(self): + return self.knowledge_embedding_client.vector_name_exist() + def knowledge_persist_initialization(self, append_mode): documents = self._load_knownlege(self.file_path) self.vector_client = VectorStoreConnector( From 2e18c2e294081758abb264f0f75e6507eb45ac95 Mon Sep 17 00:00:00 2001 From: aries-ckt <916701291@qq.com> Date: Thu, 1 Jun 2023 20:47:16 +0800 Subject: [PATCH 5/6] fix:default chat --- pilot/scene/chat_knowledge/default/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pilot/scene/chat_knowledge/default/chat.py b/pilot/scene/chat_knowledge/default/chat.py index 9652ac0ee..1a482b154 100644 --- a/pilot/scene/chat_knowledge/default/chat.py +++ b/pilot/scene/chat_knowledge/default/chat.py @@ -17,7 +17,7 @@ from pilot.configs.model_config import ( VECTOR_SEARCH_TOP_K, ) -from pilot.scene.chat_normal.prompt import prompt +from pilot.scene.chat_knowledge.default.prompt import prompt from pilot.source_embedding.knowledge_embedding import KnowledgeEmbedding CFG = Config() From 45a3a8a696b610c1dab0540c1c0870e625c6dc27 Mon Sep 17 00:00:00 2001 From: aries-ckt <916701291@qq.com> Date: Thu, 1 Jun 2023 20:52:44 +0800 Subject: [PATCH 6/6] fix:SUMMARY_CONFIG --- .env.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.template b/.env.template index b06ef215e..8c5f55ca9 100644 --- a/.env.template +++ b/.env.template @@ -108,4 +108,4 @@ PROXY_SERVER_URL=http://127.0.0.1:3000/proxy_address #*******************************************************************# # ** SUMMARY_CONFIG #*******************************************************************# -SUMMARY_CONFIG=VECTOR \ No newline at end of file +SUMMARY_CONFIG=FAST \ No newline at end of file