mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-10 04:37:59 +00:00
feat:summarize
This commit is contained in:
parent
a54eefefa2
commit
74850cbe09
@ -13,12 +13,12 @@ from dbgpt._private.pydantic import BaseModel, Field, model_to_dict
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class DeepSearchModel(BaseModel):
|
class SplitQueryModel(BaseModel):
|
||||||
"""Chart item model."""
|
"""Model for splitting queries in deep search actions."""
|
||||||
status: str = Field(
|
# status: str = Field(
|
||||||
...,
|
# ...,
|
||||||
description="The status of the current action, can be split_query, summary, or reflection.",
|
# description="The status of the current action, can be split_query, summary, or reflection.",
|
||||||
)
|
# )
|
||||||
tools: List[dict] = Field(
|
tools: List[dict] = Field(
|
||||||
default_factory=list,
|
default_factory=list,
|
||||||
description="List of tools to be used in the action.",
|
description="List of tools to be used in the action.",
|
||||||
@ -37,11 +37,40 @@ class DeepSearchModel(BaseModel):
|
|||||||
return model_to_dict(self)
|
return model_to_dict(self)
|
||||||
|
|
||||||
|
|
||||||
|
class ReflectionModel(BaseModel):
|
||||||
|
"""Model for Reflection."""
|
||||||
|
status: str = Field(
|
||||||
|
default_factory=list,
|
||||||
|
description="List of tools to be used in the action.",
|
||||||
|
)
|
||||||
|
knowledge_gap: str = Field(
|
||||||
|
...,
|
||||||
|
description="The intention of the current action, describing what you want to achieve.",
|
||||||
|
)
|
||||||
|
sub_queries: List[str] = Field(
|
||||||
|
default_factory=list,
|
||||||
|
description="List of sub-queries generated from the current action.",
|
||||||
|
)
|
||||||
|
tools: List[dict] = Field(
|
||||||
|
default_factory=list,
|
||||||
|
description="List of tools to be used in the action.",
|
||||||
|
)
|
||||||
|
thought: str = Field(
|
||||||
|
...,
|
||||||
|
description="The thought of the current action, describing what you want to achieve.",
|
||||||
|
)
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
"""Convert to dict."""
|
||||||
|
return model_to_dict(self)
|
||||||
|
|
||||||
|
|
||||||
class DeepSearchAction(ToolAction):
|
class DeepSearchAction(ToolAction):
|
||||||
"""React action class."""
|
"""React action class."""
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
"""Tool action init."""
|
"""Tool action init."""
|
||||||
|
self.state = "split_query"
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -73,8 +102,9 @@ class DeepSearchAction(ToolAction):
|
|||||||
) -> ActionOutput:
|
) -> ActionOutput:
|
||||||
"""Perform the action."""
|
"""Perform the action."""
|
||||||
try:
|
try:
|
||||||
action_param: DeepSearchModel = self._input_convert(
|
# state = "split_query"
|
||||||
ai_message, DeepSearchModel
|
action_param: ReflectionModel = self._input_convert(
|
||||||
|
ai_message, ReflectionModel
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception(str(e))
|
logger.exception(str(e))
|
||||||
@ -83,29 +113,34 @@ class DeepSearchAction(ToolAction):
|
|||||||
content="The requested correctly structured answer could not be found.",
|
content="The requested correctly structured answer could not be found.",
|
||||||
)
|
)
|
||||||
|
|
||||||
if action_param.status == "split_query":
|
sub_queries = action_param.sub_queries
|
||||||
sub_queries = action_param.sub_queries
|
if action_param.status == "summarize":
|
||||||
# execute knowledge search
|
return ActionOutput(
|
||||||
if not action_param.tools:
|
is_exe_success=True,
|
||||||
return ActionOutput(
|
content=action_param.thought,
|
||||||
is_exe_success=False,
|
terminate=True,
|
||||||
content="No tools available for knowledge search.",
|
)
|
||||||
)
|
if not action_param.tools:
|
||||||
if action_param.tools:
|
return ActionOutput(
|
||||||
for tool in action_param.tools:
|
is_exe_success=False,
|
||||||
if tool.get("tool_type") == "KnowledgeRetrieve":
|
content="No tools available for knowledge search.",
|
||||||
knowledge_args = action_param.get("args", {})
|
)
|
||||||
if not knowledge_args:
|
if action_param.tools:
|
||||||
return ActionOutput(
|
for tool in action_param.tools:
|
||||||
is_exe_success=False,
|
# state = "knowledge_search"
|
||||||
content="No arguments provided for knowledge search.",
|
if tool.get("tool_type") == "KnowledgeRetrieve":
|
||||||
)
|
knowledge_args = tool.get("args", {})
|
||||||
act_out = await self.knowledge_retrieve(
|
if not knowledge_args:
|
||||||
sub_queries,
|
return ActionOutput(
|
||||||
knowledge_args,
|
is_exe_success=False,
|
||||||
self.resource,
|
content="No arguments provided for knowledge search.",
|
||||||
)
|
)
|
||||||
|
act_out = await self.knowledge_retrieve(
|
||||||
|
sub_queries,
|
||||||
|
knowledge_args,
|
||||||
|
self.resource,
|
||||||
|
)
|
||||||
|
act_out.terminate = False
|
||||||
|
|
||||||
# if "parser" in kwargs and isinstance(kwargs["parser"], ReActOutputParser):
|
# if "parser" in kwargs and isinstance(kwargs["parser"], ReActOutputParser):
|
||||||
# parser = kwargs["parser"]
|
# parser = kwargs["parser"]
|
||||||
@ -138,21 +173,17 @@ class DeepSearchAction(ToolAction):
|
|||||||
lang=self.language, question=query
|
lang=self.language, question=query
|
||||||
)
|
)
|
||||||
query_context_map[query] = resource_prompt
|
query_context_map[query] = resource_prompt
|
||||||
|
content = "\n".join([
|
||||||
|
f"{query}:{context}" for query, context in query_context_map.items()]
|
||||||
|
)
|
||||||
action_output = ActionOutput(
|
action_output = ActionOutput(
|
||||||
is_exe_success=True,
|
is_exe_success=True,
|
||||||
content="\n".join([
|
content=content,
|
||||||
f"{query}:{context}" for query, context in query_context_map.items()]
|
view=content,
|
||||||
),
|
observations=content,
|
||||||
view="\n".join([
|
|
||||||
f"{query}:{context}" for query, context in query_context_map.items()]
|
|
||||||
),
|
|
||||||
observations=query_context_map,
|
|
||||||
)
|
)
|
||||||
return action_output
|
return action_output
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def _do_run(
|
async def _do_run(
|
||||||
self,
|
self,
|
||||||
ai_message: str,
|
ai_message: str,
|
||||||
|
@ -74,29 +74,36 @@ selecting the right search tools.
|
|||||||
# The current time is: {{ now_time }}.
|
# The current time is: {{ now_time }}.
|
||||||
# """
|
# """
|
||||||
_DEEPSEARCH_SYSTEM_TEMPLATE = """
|
_DEEPSEARCH_SYSTEM_TEMPLATE = """
|
||||||
你是一个深度搜索助手。你的任务是你将用户原始问题一个或者多个子问题,并且给出可用知识库工具和搜索工具来回答问题或解决问题。
|
你是一个深度搜索助手。
|
||||||
|
<目标>
|
||||||
|
你的任务是根据用户的问题或任务,选择合适的知识检索工具和搜索工具来回答问题或解决问题。
|
||||||
|
你需要根据已经搜到的知识和搜索到的信息:
|
||||||
|
{{most_recent_memories}}判断是否需要更多的知识或信息来回答问题。
|
||||||
|
如果需要更多的知识或信息,你需要提出后续的子问题来扩展你的理解。
|
||||||
|
</目标>
|
||||||
|
|
||||||
<可用工具>
|
<可用工具>
|
||||||
1. KnowledgeRetrieve: 查询内部知识库以获取信息。\n可用知识库: {{knowledge_tools}}
|
1. KnowledgeRetrieve: 查询内部知识库以获取信息。\n可用知识库: {{knowledge_tools}}
|
||||||
2. WebSearch: 进行互联网搜索以获取最新或额外信息。\n 可用搜索工具: {{search_tools}}
|
2. WebSearch: 进行互联网搜索以获取最新或额外信息。\n 可用搜索工具: {{search_tools}}
|
||||||
3. 总结: 对多个来源的信息进行总结和综合。
|
|
||||||
</可用工具>
|
</可用工具>
|
||||||
|
|
||||||
<流程>
|
<流程>
|
||||||
1. 分析任务并创建搜索计划。
|
1. 分析任务并创建搜索计划。
|
||||||
2. 选择使用一个或多个工具收集信息。
|
2. 选择使用一个或多个工具收集信息。
|
||||||
|
3. 对收集到的信息进行反思,判断是否足够回答问题。
|
||||||
</流程>
|
</流程>
|
||||||
|
|
||||||
<回复格式>
|
<回复格式>
|
||||||
严格按以下JSON格式输出,确保可直接解析:
|
严格按以下JSON格式输出,确保可直接解析:
|
||||||
{
|
{
|
||||||
"status": "split_query (拆解搜索计划) | summary (仅当可用知识可以回答用户问题) | reflection (反思) "
|
|
||||||
"tools": [{
|
"tools": [{
|
||||||
"tool_type": "工具类型"
|
"tool_type": "工具类型"
|
||||||
"args": "args1",
|
"args": "args1",
|
||||||
}],
|
}],
|
||||||
"intention": "当前你的意图,
|
"intention": "当前你的意图,
|
||||||
"sub_queries": [],
|
"sub_queries": ["子问题1", "子问题2"],
|
||||||
|
"knowledge_gap": "总结缺乏关于性能指标和基准的信息",
|
||||||
|
"status": "reflection(反思) | summarize(最后总结)",
|
||||||
}
|
}
|
||||||
</回复格式>
|
</回复格式>
|
||||||
|
|
||||||
@ -116,13 +123,14 @@ _DEEPSEARCH_SYSTEM_TEMPLATE = """
|
|||||||
: 2022年诺贝尔文学奖得主
|
: 2022年诺贝尔文学奖得主
|
||||||
返回
|
返回
|
||||||
{
|
{
|
||||||
"status": "split_query"
|
"status": "reflection"
|
||||||
"tools"?: [{
|
"tools"?: [{
|
||||||
"tool_type": "KnowledgeRetrieve"
|
"tool_type": "KnowledgeRetrieve"
|
||||||
"args": "knowledge_name",
|
"args": "knowledge_name",
|
||||||
}],
|
}],
|
||||||
"intention": "你的拆解意图,
|
"intention": "你的拆解意图,
|
||||||
"sub_queries": [],
|
"knowledge_gap": "总结缺乏关于2022年诺贝尔文学奖得主的信息",
|
||||||
|
"sub_queries": ["子问题1","子问题2"],
|
||||||
}
|
}
|
||||||
</示例>
|
</示例>
|
||||||
|
|
||||||
@ -135,6 +143,36 @@ _DEEPSEARCH_SYSTEM_TEMPLATE = """
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
_DEEPSEARCH_USER_TEMPLATE = """"""
|
_DEEPSEARCH_USER_TEMPLATE = """"""
|
||||||
|
_DEEPSEARCH_FINIAL_SUMMARY_TEMPLATE = """
|
||||||
|
<GOAL>
|
||||||
|
Generate a high-quality summary of the provided context.
|
||||||
|
</GOAL>
|
||||||
|
|
||||||
|
<REQUIREMENTS>
|
||||||
|
When creating a NEW summary:
|
||||||
|
1. Highlight the most relevant information related to the user topic from the search results
|
||||||
|
2. Ensure a coherent flow of information
|
||||||
|
|
||||||
|
When EXTENDING an existing summary:
|
||||||
|
{{most_recent_memories}}
|
||||||
|
1. Read the existing summary and new search results carefully.
|
||||||
|
2. Compare the new information with the existing summary.
|
||||||
|
3. For each piece of new information:
|
||||||
|
a. If it's related to existing points, integrate it into the relevant paragraph.
|
||||||
|
b. If it's entirely new but relevant, add a new paragraph with a smooth transition.
|
||||||
|
c. If it's not relevant to the user topic, skip it.
|
||||||
|
4. Ensure all additions are relevant to the user's topic.
|
||||||
|
5. Verify that your final output differs from the input summary.
|
||||||
|
< /REQUIREMENTS >
|
||||||
|
|
||||||
|
< FORMATTING >
|
||||||
|
- Start directly with the updated summary, without preamble or titles. Do not use XML tags in the output.
|
||||||
|
< /FORMATTING >
|
||||||
|
|
||||||
|
<Task>
|
||||||
|
Think carefully about the provided Context first. Then generate a summary of the context to address the User Input.
|
||||||
|
</Task>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
_REACT_WRITE_MEMORY_TEMPLATE = """\
|
_REACT_WRITE_MEMORY_TEMPLATE = """\
|
||||||
@ -176,42 +214,7 @@ class DeepSearchAgent(ConversableAgent):
|
|||||||
"""Init indicator AssistantAgent."""
|
"""Init indicator AssistantAgent."""
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
self._init_actions([DeepSearchAction, Terminate])
|
self._init_actions([DeepSearchAction])
|
||||||
|
|
||||||
# async def _a_init_reply_message(
|
|
||||||
# self,
|
|
||||||
# received_message: AgentMessage,
|
|
||||||
# rely_messages: Optional[List[AgentMessage]] = None,
|
|
||||||
# ) -> AgentMessage:
|
|
||||||
# reply_message = super()._init_reply_message(received_message, rely_messages)
|
|
||||||
#
|
|
||||||
# tool_packs = ToolPack.from_resource(self.resource)
|
|
||||||
# action_space = []
|
|
||||||
# action_space_names = []
|
|
||||||
# action_space_simple_desc = []
|
|
||||||
# if tool_packs:
|
|
||||||
# tool_pack = tool_packs[0]
|
|
||||||
# for tool in tool_pack.sub_resources:
|
|
||||||
# tool_desc, _ = await tool.get_prompt(lang=self.language)
|
|
||||||
# action_space_names.append(tool.name)
|
|
||||||
# action_space.append(tool_desc)
|
|
||||||
# if isinstance(tool, BaseTool):
|
|
||||||
# tool_simple_desc = tool.description
|
|
||||||
# else:
|
|
||||||
# tool_simple_desc = tool.get_prompt()
|
|
||||||
# action_space_simple_desc.append(f"{tool.name}: {tool_simple_desc}")
|
|
||||||
# else:
|
|
||||||
# for action in self.actions:
|
|
||||||
# action_space_names.append(action.name)
|
|
||||||
# action_space.append(action.get_action_description())
|
|
||||||
# # self.actions
|
|
||||||
# reply_message.context = {
|
|
||||||
# "max_steps": self.max_retry_count,
|
|
||||||
# "action_space": "\n".join(action_space),
|
|
||||||
# "action_space_names": ", ".join(action_space_names),
|
|
||||||
# "action_space_simple_desc": "\n".join(action_space_simple_desc),
|
|
||||||
# }
|
|
||||||
# return reply_message
|
|
||||||
|
|
||||||
async def preload_resource(self) -> None:
|
async def preload_resource(self) -> None:
|
||||||
await super().preload_resource()
|
await super().preload_resource()
|
||||||
@ -289,14 +292,44 @@ class DeepSearchAgent(ConversableAgent):
|
|||||||
"knowledge_desc": self.resource.retriever_desc,
|
"knowledge_desc": self.resource.retriever_desc,
|
||||||
})
|
})
|
||||||
|
|
||||||
# new_resource = self.resource.apply(apply_func=_remove_tool)
|
|
||||||
# if new_resource:
|
|
||||||
# resource_prompt, resource_reference = await new_resource.get_prompt(
|
|
||||||
# lang=self.language, question=question
|
|
||||||
# )
|
|
||||||
# return resource_prompt, resource_reference
|
|
||||||
return json.dumps(abilities, ensure_ascii=False), []
|
return json.dumps(abilities, ensure_ascii=False), []
|
||||||
|
|
||||||
|
async def build_system_prompt(
|
||||||
|
self,
|
||||||
|
question: Optional[str] = None,
|
||||||
|
most_recent_memories: Optional[str] = None,
|
||||||
|
resource_vars: Optional[Dict] = None,
|
||||||
|
context: Optional[Dict[str, Any]] = None,
|
||||||
|
is_retry_chat: bool = False,
|
||||||
|
):
|
||||||
|
"""Build system prompt."""
|
||||||
|
system_prompt = None
|
||||||
|
if self.bind_prompt:
|
||||||
|
prompt_param = {}
|
||||||
|
if resource_vars:
|
||||||
|
prompt_param.update(resource_vars)
|
||||||
|
if context:
|
||||||
|
prompt_param.update(context)
|
||||||
|
if self.bind_prompt.template_format == "f-string":
|
||||||
|
system_prompt = self.bind_prompt.template.format(
|
||||||
|
**prompt_param,
|
||||||
|
)
|
||||||
|
elif self.bind_prompt.template_format == "jinja2":
|
||||||
|
system_prompt = Template(self.bind_prompt.template).render(prompt_param)
|
||||||
|
else:
|
||||||
|
logger.warning("Bind prompt template not exsit or format not support!")
|
||||||
|
if not system_prompt:
|
||||||
|
param: Dict = context if context else {}
|
||||||
|
system_prompt = await self.build_prompt(
|
||||||
|
question=question,
|
||||||
|
is_system=True,
|
||||||
|
most_recent_memories=most_recent_memories,
|
||||||
|
resource_vars=resource_vars,
|
||||||
|
is_retry_chat=is_retry_chat,
|
||||||
|
**param,
|
||||||
|
)
|
||||||
|
return system_prompt
|
||||||
|
|
||||||
def prepare_act_param(
|
def prepare_act_param(
|
||||||
self,
|
self,
|
||||||
received_message: Optional[AgentMessage],
|
received_message: Optional[AgentMessage],
|
||||||
@ -350,6 +383,8 @@ class DeepSearchAgent(ConversableAgent):
|
|||||||
rely_action_out=last_out,
|
rely_action_out=last_out,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
if not last_out.terminate:
|
||||||
|
self.profile.system_prompt_template = _DEEPSEARCH_FINIAL_SUMMARY_TEMPLATE
|
||||||
span.metadata["action_out"] = last_out.to_dict() if last_out else None
|
span.metadata["action_out"] = last_out.to_dict() if last_out else None
|
||||||
if not last_out:
|
if not last_out:
|
||||||
raise ValueError("Action should return value!")
|
raise ValueError("Action should return value!")
|
||||||
@ -366,7 +401,7 @@ class DeepSearchAgent(ConversableAgent):
|
|||||||
) -> Union[str, List["AgentMessage"]]:
|
) -> Union[str, List["AgentMessage"]]:
|
||||||
memories = await self.memory.read(observation)
|
memories = await self.memory.read(observation)
|
||||||
not_json_memories = []
|
not_json_memories = []
|
||||||
messages = []
|
# messages = []
|
||||||
structured_memories = []
|
structured_memories = []
|
||||||
for m in memories:
|
for m in memories:
|
||||||
if m.raw_observation:
|
if m.raw_observation:
|
||||||
@ -381,46 +416,48 @@ class DeepSearchAgent(ConversableAgent):
|
|||||||
except Exception:
|
except Exception:
|
||||||
not_json_memories.append(m.raw_observation)
|
not_json_memories.append(m.raw_observation)
|
||||||
|
|
||||||
for mem_dict in structured_memories:
|
# for mem_dict in structured_memories:
|
||||||
question = mem_dict.get("question")
|
# question = mem_dict.get("question")
|
||||||
thought = mem_dict.get("thought")
|
# thought = mem_dict.get("thought")
|
||||||
action = mem_dict.get("action")
|
# action = mem_dict.get("action")
|
||||||
action_input = mem_dict.get("action_input")
|
# action_input = mem_dict.get("action_input")
|
||||||
observation = mem_dict.get("observation")
|
# observation = mem_dict.get("observation")
|
||||||
if question:
|
# if question:
|
||||||
messages.append(
|
# messages.append(
|
||||||
AgentMessage(
|
# AgentMessage(
|
||||||
content=f"Question: {question}",
|
# content=f"Question: {question}",
|
||||||
role=ModelMessageRoleType.HUMAN,
|
# role=ModelMessageRoleType.HUMAN,
|
||||||
)
|
# )
|
||||||
)
|
# )
|
||||||
ai_content = []
|
# ai_content = []
|
||||||
if thought:
|
# if thought:
|
||||||
ai_content.append(f"Thought: {thought}")
|
# ai_content.append(f"Thought: {thought}")
|
||||||
if action:
|
# if action:
|
||||||
ai_content.append(f"Action: {action}")
|
# ai_content.append(f"Action: {action}")
|
||||||
if action_input:
|
# if action_input:
|
||||||
ai_content.append(f"Action Input: {action_input}")
|
# ai_content.append(f"Action Input: {action_input}")
|
||||||
messages.append(
|
# messages.append(
|
||||||
AgentMessage(
|
# AgentMessage(
|
||||||
content="\n".join(ai_content),
|
# content="\n".join(ai_content),
|
||||||
role=ModelMessageRoleType.AI,
|
# role=ModelMessageRoleType.AI,
|
||||||
)
|
# )
|
||||||
)
|
# )
|
||||||
|
#
|
||||||
if observation:
|
# if observation:
|
||||||
messages.append(
|
# messages.append(
|
||||||
AgentMessage(
|
# AgentMessage(
|
||||||
content=f"Observation: {observation}",
|
# content=f"Observation: {observation}",
|
||||||
role=ModelMessageRoleType.HUMAN,
|
# role=ModelMessageRoleType.HUMAN,
|
||||||
)
|
# )
|
||||||
)
|
# )
|
||||||
|
#
|
||||||
if not messages and not_json_memories:
|
# if not messages and not_json_memories:
|
||||||
messages.append(
|
# messages.append(
|
||||||
AgentMessage(
|
# AgentMessage(
|
||||||
content="\n".join(not_json_memories),
|
# content="\n".join(not_json_memories),
|
||||||
role=ModelMessageRoleType.HUMAN,
|
# role=ModelMessageRoleType.HUMAN,
|
||||||
)
|
# )
|
||||||
)
|
# )
|
||||||
return messages
|
return "\n".join([
|
||||||
|
mem_dict.get("observation") for mem_dict in structured_memories
|
||||||
|
])
|
||||||
|
Loading…
Reference in New Issue
Block a user