mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-01 09:06:55 +00:00
feat:document summary
This commit is contained in:
@@ -13,7 +13,7 @@ class RemoteModelWorker(ModelWorker):
|
||||
def __init__(self) -> None:
|
||||
self.headers = {}
|
||||
# TODO Configured by ModelParameters
|
||||
self.timeout = 180
|
||||
self.timeout = 360
|
||||
self.host = None
|
||||
self.port = None
|
||||
|
||||
|
@@ -8,19 +8,21 @@ from pilot.scene.chat_knowledge.refine_summary.out_parser import ExtractRefineSu
|
||||
CFG = Config()
|
||||
|
||||
|
||||
PROMPT_SCENE_DEFINE = """Your job is to produce a final summary."""
|
||||
PROMPT_SCENE_DEFINE = """"""
|
||||
|
||||
_DEFAULT_TEMPLATE = """
|
||||
We have provided an existing summary up to a certain point: {existing_answer}\nWe have the opportunity to refine the existing summary (only if needed) with some more context below.\n------------\n{context}\n------------\nGiven the new context, refine the original summary.\nIf the context isn't useful, return the original summary.
|
||||
_DEFAULT_TEMPLATE_ZH = """根据提供的上下文信息,我们已经提供了一个到某一点的现有总结:{existing_answer}\n 我们有机会在下面提供的更多上下文信息的基础上进一步完善现有的总结(仅在需要的情况下)。请根据新的上下文信息,完善原来的总结。\n------------\n{context}\n------------\n如果上下文信息没有用处,请返回原来的总结。"""
|
||||
|
||||
_DEFAULT_TEMPLATE_EN = """
|
||||
We have provided an existing summary up to a certain point: {existing_answer}\nWe have the opportunity to refine the existing summary (only if needed) with some more context below.\n------------\n{context}\n------------\nGiven the new context, refine the original summary. \nIf the context isn't useful, return the original summary.
|
||||
please use original language.
|
||||
"""
|
||||
|
||||
_DEFAULT_TEMPLATE = (
|
||||
_DEFAULT_TEMPLATE_EN if CFG.LANGUAGE == "en" else _DEFAULT_TEMPLATE_ZH
|
||||
)
|
||||
|
||||
PROMPT_RESPONSE = """"""
|
||||
|
||||
|
||||
RESPONSE_FORMAT = """"""
|
||||
|
||||
|
||||
PROMPT_SEP = SeparatorStyle.SINGLE.value
|
||||
|
||||
PROMPT_NEED_NEED_STREAM_OUT = False
|
||||
|
@@ -9,19 +9,22 @@ CFG = Config()
|
||||
|
||||
# PROMPT_SCENE_DEFINE = """You are an expert Q&A system that is trusted around the world.\nAlways answer the query using the provided context information, and not prior knowledge.\nSome rules to follow:\n1. Never directly reference the given context in your answer.\n2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines."""
|
||||
|
||||
PROMPT_SCENE_DEFINE = """Your job is to produce a final summary."""
|
||||
PROMPT_SCENE_DEFINE = """"""
|
||||
|
||||
# _DEFAULT_TEMPLATE = """
|
||||
# Context information from multiple sources is below.\n---------------------\n
|
||||
# {context}
|
||||
# Given the information from multiple sources and not prior knowledge, answer the query.\nQuery: Describe what the provided text is about. Also describe some of the questions that this text can answer. \nAnswer: "
|
||||
# """
|
||||
_DEFAULT_TEMPLATE_ZH = """请根据提供的上下文信息的进行简洁地总结:
|
||||
{context}
|
||||
"""
|
||||
|
||||
_DEFAULT_TEMPLATE = """
|
||||
_DEFAULT_TEMPLATE_EN = """
|
||||
Write a concise summary of the following context:
|
||||
{context}
|
||||
please use original language.
|
||||
"""
|
||||
|
||||
_DEFAULT_TEMPLATE = (
|
||||
_DEFAULT_TEMPLATE_EN if CFG.LANGUAGE == "en" else _DEFAULT_TEMPLATE_ZH
|
||||
)
|
||||
|
||||
PROMPT_RESPONSE = """"""
|
||||
|
||||
|
||||
|
@@ -429,19 +429,22 @@ class KnowledgeService:
|
||||
from llama_index import PromptHelper
|
||||
from llama_index.prompts.default_prompt_selectors import DEFAULT_TREE_SUMMARIZE_PROMPT_SEL
|
||||
texts = [doc.page_content for doc in chunk_docs]
|
||||
prompt_helper = PromptHelper()
|
||||
prompt_helper = PromptHelper(context_window=2500)
|
||||
|
||||
texts = prompt_helper.repack(prompt=DEFAULT_TREE_SUMMARIZE_PROMPT_SEL, text_chunks=texts)
|
||||
logger.info(
|
||||
f"async_document_summary, doc:{doc.doc_name}, chunk_size:{len(texts)}, begin generate summary"
|
||||
)
|
||||
summary = self._llm_extract_summary(texts[0])
|
||||
# summaries = self._mapreduce_extract_summary(texts)
|
||||
outputs, summary = self._refine_extract_summary(texts[1:], summary)
|
||||
print(
|
||||
f"refine summary outputs:{outputs}"
|
||||
)
|
||||
summaries = prompt_helper.repack(prompt=DEFAULT_TREE_SUMMARIZE_PROMPT_SEL, text_chunks=outputs)
|
||||
summary = self._llm_extract_summary("|".join(summaries))
|
||||
# summary = self._llm_extract_summary(texts[0])
|
||||
summary = self._mapreduce_extract_summary(texts)
|
||||
# summaries = prompt_helper.repack(prompt=DEFAULT_TREE_SUMMARIZE_PROMPT_SEL, text_chunks=summaries)
|
||||
# if (len(summaries)) > 1:
|
||||
# outputs, summary = self._refine_extract_summary(summaries[1:], summaries[0])
|
||||
# else:
|
||||
# summary = self._llm_extract_summary("\n".join(summaries))
|
||||
# print(
|
||||
# f"refine summary outputs:{summaries}"
|
||||
# )
|
||||
print(
|
||||
f"final summary:{summary}"
|
||||
)
|
||||
@@ -565,33 +568,36 @@ class KnowledgeService:
|
||||
return outputs, summary
|
||||
|
||||
def _mapreduce_extract_summary(self, docs):
|
||||
"""Extract mapreduce summary by llm"""
|
||||
"""Extract mapreduce summary by llm
|
||||
map -> multi thread generate summary
|
||||
reduce -> merge the summaries by map process
|
||||
Args:
|
||||
docs:List[str]
|
||||
"""
|
||||
from pilot.scene.base import ChatScene
|
||||
from pilot.common.chat_util import llm_chat_response_nostream
|
||||
import uuid
|
||||
outputs = []
|
||||
tasks = []
|
||||
for doc in docs:
|
||||
chat_param = {
|
||||
"chat_session_id": uuid.uuid1(),
|
||||
"current_user_input": doc,
|
||||
"select_param": "summary",
|
||||
"model_name": CFG.LLM_MODEL,
|
||||
}
|
||||
tasks.append(llm_chat_response_nostream(
|
||||
if len(docs) == 1:
|
||||
summary = self._llm_extract_summary(doc=docs[0])
|
||||
return summary
|
||||
else:
|
||||
for doc in docs:
|
||||
chat_param = {
|
||||
"chat_session_id": uuid.uuid1(),
|
||||
"current_user_input": doc,
|
||||
"select_param": "summary",
|
||||
"model_name": CFG.LLM_MODEL,
|
||||
}
|
||||
tasks.append(llm_chat_response_nostream(
|
||||
ChatScene.ExtractSummary.value(), **{"chat_param": chat_param}
|
||||
))
|
||||
from pilot.common.chat_util import run_async_tasks
|
||||
summary_iters = run_async_tasks(tasks)
|
||||
summary = self._llm_extract_summary(" ".join(summary_iters))
|
||||
# from pilot.utils import utils
|
||||
# loop = utils.get_or_create_event_loop()
|
||||
# summary = loop.run_until_complete(
|
||||
# llm_chat_response_nostream(
|
||||
# ChatScene.ExtractRefineSummary.value(), **{"chat_param": chat_param}
|
||||
# )
|
||||
# )
|
||||
# outputs.append(summary)
|
||||
return summary
|
||||
))
|
||||
from pilot.common.chat_util import run_async_tasks
|
||||
summary_iters = run_async_tasks(tasks)
|
||||
from pilot.common.prompt_util import PromptHelper
|
||||
from llama_index.prompts.default_prompt_selectors import DEFAULT_TREE_SUMMARIZE_PROMPT_SEL
|
||||
prompt_helper = PromptHelper(context_window=2500)
|
||||
summary_iters = prompt_helper.repack(prompt=DEFAULT_TREE_SUMMARIZE_PROMPT_SEL, text_chunks=summary_iters)
|
||||
return self._mapreduce_extract_summary(summary_iters)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user