feat(ChatDB): ChatDB Use fintune model

1.Compatible with community pure sql output model
This commit is contained in:
yhjun1026 2023-11-17 15:59:35 +08:00
parent 343d64652c
commit ea363a43ad
26 changed files with 106 additions and 78 deletions

View File

@ -242,7 +242,7 @@ class ApiCall:
return False
def __deal_error_md_tags(self, all_context, api_context, include_end: bool = True):
error_md_tags = ["```", "```python", "```xml", "```json", "```markdown"]
error_md_tags = ["```", "```python", "```xml", "```json", "```markdown", "```sql"]
if include_end == False:
md_tag_end = ""
else:
@ -261,7 +261,6 @@ class ApiCall:
return all_context
def api_view_context(self, all_context: str, display_mode: bool = False):
error_mk_tags = ["```", "```python", "```xml"]
call_context_map = extract_content_open_ending(
all_context, self.agent_prefix, self.agent_end, True
)
@ -294,8 +293,10 @@ class ApiCall:
now_time = datetime.now().timestamp() * 1000
cost = (now_time - self.start_time) / 1000
cost_str = "{:.2f}".format(cost)
for tag in error_mk_tags:
all_context = all_context.replace(tag + api_context, api_context)
all_context = self.__deal_error_md_tags(
all_context, api_context
)
all_context = all_context.replace(
api_context,
f'\n<span style="color:green">Waiting...{cost_str}S</span>\n',
@ -444,29 +445,36 @@ class ApiCall:
Returns:
ChartView protocol text
"""
if self.__is_need_wait_plugin_call(llm_text):
# wait api call generate complete
if self.check_last_plugin_call_ready(llm_text):
self.update_from_context(llm_text)
for key, value in self.plugin_status_map.items():
if value.status == Status.TODO.value:
value.status = Status.RUNNING.value
logging.info(f"sql展示执行:{value.name},{value.args}")
try:
sql = value.args["sql"]
if sql is not None and len(sql) > 0:
data_df = sql_run_func(sql)
value.df = data_df
value.api_result = json.loads(data_df.to_json(orient='records', date_format='iso', date_unit='s'))
value.status = Status.COMPLETED.value
else:
value.status = Status.FAILED.value
value.err_msg = "No executable sql"
try:
if self.__is_need_wait_plugin_call(llm_text):
# wait api call generate complete
if self.check_last_plugin_call_ready(llm_text):
self.update_from_context(llm_text)
for key, value in self.plugin_status_map.items():
if value.status == Status.TODO.value:
value.status = Status.RUNNING.value
logging.info(f"sql展示执行:{value.name},{value.args}")
try:
sql = value.args["sql"]
if sql is not None and len(sql) > 0:
data_df = sql_run_func(sql)
value.df = data_df
value.api_result = json.loads(
data_df.to_json(orient='records', date_format='iso', date_unit='s'))
value.status = Status.COMPLETED.value
else:
value.status = Status.FAILED.value
value.err_msg = "No executable sql"
except Exception as e:
value.status = Status.FAILED.value
value.err_msg = str(e)
value.end_time = datetime.now().timestamp() * 1000
except Exception as e:
logging.error("Api parsing exception", e)
value.status = Status.FAILED.value
value.err_msg = "Api parsing exception," + str(e)
except Exception as e:
value.status = Status.FAILED.value
value.err_msg = str(e)
value.end_time = datetime.now().timestamp() * 1000
return self.api_view_context(llm_text, True)

View File

@ -215,6 +215,7 @@ class BaseOutputParser(ABC):
.replace("\\n", " ")
.replace("\n", " ")
.replace("\\", " ")
.replace("\_", "_")
)
cleaned_output = self.__illegal_json_ends(cleaned_output)
return cleaned_output

View File

@ -111,6 +111,10 @@ class BaseChat(ABC):
def do_action(self, prompt_response):
return prompt_response
def message_adjust(self):
pass
def get_llm_speak(self, prompt_define_response):
if hasattr(prompt_define_response, "thoughts"):
if isinstance(prompt_define_response.thoughts, dict):
@ -294,6 +298,8 @@ class BaseChat(ABC):
view_message = view_message.replace("\n", "\\n")
self.current_message.add_view_message(view_message)
self.message_adjust()
span.end()
except Exception as e:
print(traceback.format_exc())

View File

@ -64,7 +64,7 @@ class ChatAgent(BaseChat):
return input_values
def stream_plugin_call(self, text):
text = text.replace("\n", " ")
text = text.replace("\\n", " ").replace("\n", " ").replace("\_", "_").replace("\\", " ")
with root_tracer.start_span(
"ChatAgent.stream_plugin_call.api_call", metadata={"text": text}
):

View File

@ -42,7 +42,8 @@ _DEFAULT_TEMPLATE_ZH = """
3.根据上面约束的方式生成每个工具的调用对于工具使用的提示文本需要在工具使用前生成
4.如果用户目标无法理解和意图不明确优先使用搜索引擎工具
5.参数内容可能需要根据用户的目标推理得到不仅仅是从文本提取
6.约束条件和工具信息作为推理过程的辅助信息不要表达在给用户的输出内容中
6.约束条件和工具信息作为推理过程的辅助信息对应内容不要表达在给用户的输出内容中
7.不要把<api-call></api-call>部分内容放在markdown标签里
{expand_constraints}
工具列表:

View File

@ -100,7 +100,7 @@ class ChatExcel(BaseChat):
return result
def stream_plugin_call(self, text):
text = text.replace("\n", " ")
text = text.replace("\\n", " ").replace("\n", " ").replace("\_", "_").replace("\\", " ")
with root_tracer.start_span(
"ChatExcel.stream_plugin_call.run_display_sql", metadata={"text": text}
):

View File

@ -12,7 +12,7 @@ CFG = Config()
_PROMPT_SCENE_DEFINE_EN = "You are a data analysis expert. "
_DEFAULT_TEMPLATE_EN = """
Please use the data structure information in the above historical dialogue and combine it with data analysis to answer the user's questions while satisfying the constraints.
Please use the data structure column analysis information generated in the above historical dialogue to answer the user's questions through duckdb sql data analysis under the following constraints..
Constraint:
1.Please fully understand the user's problem and use duckdb sql for analysis. The analysis content is returned in the output format required below. Please output the sql in the corresponding sql parameter.
@ -30,14 +30,14 @@ User Questions:
_PROMPT_SCENE_DEFINE_ZH = """你是一个数据分析专家!"""
_DEFAULT_TEMPLATE_ZH = """
请使用上述历史对话中生成的数据结构信息在满足下面约束条件下通过duckdb sql数据分析回答用户的问题
请使用历史对话中的数据结构信息在满足下面约束条件下通过duckdb sql数据分析回答用户的问题
约束条件:
1.请充分理解用户的问题使用duckdb sql的方式进行分析 分析内容按下面要求的输出格式返回sql请输出在对应的sql参数中
2.请从如下给出的展示方式种选择最优的一种用以进行数据渲染将类型名称放入返回要求格式的name参数值种如果找不到最合适的则使用'Table'作为展示方式可用数据展示方式如下: {disply_type}
3.SQL中需要使用的表名是: {table_name},请检查你生成的sql不要使用没在数据结构中的列名
4.优先使用数据分析的方式回答如果用户问题不涉及数据分析内容你可以按你的理解进行回答
5.要求的输出格式中<api-call></api-call>部分需要被代码解析执行请确保这部分内容按要求输出不要参考历史信息的返回格式请按下面要求返回
请确保你的输出格式如下:
请确保你的输出内容格式如下:
对用户说的想法摘要.<api-call><name>[数据展示方式]</name><args><sql>[正确的duckdb数据分析sql]</sql></args></api-call>
用户问题{user_input}
@ -59,7 +59,7 @@ PROMPT_NEED_STREAM_OUT = True
# Temperature is a configuration hyperparameter that controls the randomness of language model output.
# A high temperature produces more unpredictable and creative results, while a low temperature produces more common and conservative output.
# For example, if you adjust the temperature to 0.5, the model will usually generate text that is more predictable and less creative than if you set the temperature to 1.0.
PROMPT_TEMPERATURE = 0.8
PROMPT_TEMPERATURE = 0.3
prompt = PromptTemplate(
template_scene=ChatScene.ChatExcel.value(),

View File

@ -4,6 +4,7 @@ from typing import Any, Dict
from pilot.scene.base_message import (
HumanMessage,
ViewMessage,
AIMessage
)
from pilot.scene.base_chat import BaseChat
from pilot.scene.base import ChatScene
@ -59,3 +60,14 @@ class ExcelLearning(BaseChat):
"file_name": self.excel_reader.excel_file_name
}
return input_values
def message_adjust(self):
### adjust learning result in messages
view_message = ""
for message in self.current_message.messages:
if message.type == ViewMessage.type:
view_message = message.content
for message in self.current_message.messages:
if message.type == AIMessage.type:
message.content = view_message

View File

@ -21,7 +21,6 @@ class LearningExcelOutputParser(BaseOutputParser):
super().__init__(sep=sep, is_stream_out=is_stream_out)
self.is_downgraded = False
def parse_prompt_response(self, model_out_text):
try:
clean_str = super().parse_prompt_response(model_out_text)
@ -29,7 +28,7 @@ class LearningExcelOutputParser(BaseOutputParser):
response = json.loads(clean_str)
for key in sorted(response):
if key.strip() == "DataAnalysis":
desciption = response[key]
desciption = response[key]
if key.strip() == "ColumnAnalysis":
clounms = response[key]
if key.strip() == "AnalysisProgram":
@ -37,38 +36,40 @@ class LearningExcelOutputParser(BaseOutputParser):
return ExcelResponse(desciption=desciption, clounms=clounms, plans=plans)
except Exception as e:
logger.error(f"parse_prompt_response Faild!{str(e)}")
self.is_downgraded = True
return ExcelResponse(desciption=model_out_text, clounms=self.data_schema, plans=None)
clounms = []
for name in self.data_schema:
clounms.append({name: "-"})
return ExcelResponse(desciption=model_out_text, clounms=clounms, plans=None)
def __build_colunms_html(self, clounms_data):
html_colunms = f"### **Data Structure**\n"
column_index = 0
for item in clounms_data:
column_index += 1
keys = item.keys()
for key in keys:
html_colunms = (
html_colunms + f"- **{column_index}.[{key}]** _{item[key]}_\n"
)
return html_colunms
def __build_plans_html(self, plans_data):
html_plans = f"### **Analysis plans**\n"
index = 0
if plans_data:
for item in plans_data:
index += 1
html_plans = html_plans + f"{item} \n"
return html_plans
def parse_view_response(self, speak, data, prompt_response) -> str:
if data and not isinstance(data, str):
### tool out data to table view
html_title = f"### **Data Summary**\n{data.desciption} "
html_colunms = f"### **Data Structure**\n"
if self.is_downgraded:
column_index = 0
for item in data.clounms:
column_index += 1
html_colunms = (
html_colunms + f"- **{column_index}.[{item}]** _未知_\n"
)
else:
column_index = 0
for item in data.clounms:
column_index += 1
keys = item.keys()
for key in keys:
html_colunms = (
html_colunms + f"- **{column_index}.[{key}]** _{item[key]}_\n"
)
html_colunms = self.__build_colunms_html(data.clounms)
html_plans = self.__build_plans_html(data.plans)
html_plans = f"### **Recommended analysis plan**\n"
index = 0
if data.plans:
for item in data.plans:
index += 1
html_plans = html_plans + f"{item} \n"
html = f"""{html_title}\n{html_colunms}\n{html_plans}"""
return html
else:

View File

@ -28,12 +28,11 @@ _DEFAULT_TEMPLATE_ZH = """
下面是用户文件{file_name}的一部分数据请学习理解该数据的结构和内容按要求输出解析结果:
{data_example}
分析各列数据的含义和作用并对专业术语进行简单明了的解释, 如果是时间类型请给出时间格式类似:yyyy-MM-dd HH:MM:ss.
将列名作为key分析解释作为value生成json数组如[\\{{"列名1": "分析解释内容1"\\}},\\{{"列名2":"分析解释2"\\}}]并输出在返回json内容的ColumnAnalysis属性中.
请不要修改或者翻译列名确保和给出数据列名一致
将列名作为属性名分析解释作为属性值,组成json数组并输出在返回json内容的ColumnAnalysis属性中.
请不要修改或者翻译列名确保和给出数据列名一致.
针对数据从不同维度提供一些有用的分析思路给用户
提供一些分析方案思路请一步一步思考
请以确保只以JSON格式回答格式如下
请一步一步思考,确保只以JSON格式回答具体格式如下
{response}
"""
@ -67,7 +66,7 @@ PROMPT_NEED_STREAM_OUT = False
# Temperature is a configuration hyperparameter that controls the randomness of language model output.
# A high temperature produces more unpredictable and creative results, while a low temperature produces more common and conservative output.
# For example, if you adjust the temperature to 0.5, the model will usually generate text that is more predictable and less creative than if you set the temperature to 1.0.
PROMPT_TEMPERATURE = 0.5
PROMPT_TEMPERATURE = 0.8
prompt = PromptTemplate(
template_scene=ChatScene.ExcelLearning.value(),

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1 +1 @@
self.__BUILD_MANIFEST=function(s,c,a,e,t,d,n,b,k,h,i,f){return{__rewrites:{beforeFiles:[],afterFiles:[],fallback:[]},"/":["static/chunks/29107295-90b90cb30c825230.js",s,c,a,d,n,b,"static/chunks/539-dcd22f1f6b99ebee.js","static/chunks/pages/index-e5fd29b9e2d6bb59.js"],"/_error":["static/chunks/pages/_error-dee72aff9b2e2c12.js"],"/agent":[s,c,e,k,t,d,"static/chunks/pages/agent-762425b419303d9d.js"],"/chat":["static/chunks/pages/chat-12498cfa4a376095.js"],"/chat/[scene]/[id]":["static/chunks/pages/chat/[scene]/[id]-39a3901132926636.js"],"/database":[s,c,a,e,t,n,h,"static/chunks/643-d8f53f40dd3c5b40.js","static/chunks/pages/database-b4f32916b9d484a7.js"],"/knowledge":[i,s,c,e,k,t,d,n,"static/chunks/109-0dace28dd2667396.js","static/chunks/pages/knowledge-fbe0df9d6a60a0b5.js"],"/knowledge/chunk":[e,t,"static/chunks/pages/knowledge/chunk-765a4b202d79ac28.js"],"/models":[i,s,c,a,f,h,"static/chunks/pages/models-cb9ab490969e70dd.js"],"/prompt":[s,c,a,f,"static/chunks/837-e6d4d1eb9e057050.js",b,"static/chunks/607-b224c640f6907e4b.js","static/chunks/pages/prompt-7f839dfd56bc4c20.js"],sortedPages:["/","/_app","/_error","/agent","/chat","/chat/[scene]/[id]","/database","/knowledge","/knowledge/chunk","/models","/prompt"]}}("static/chunks/64-91b49d45b9846775.js","static/chunks/479-b20198841f9a6a1e.js","static/chunks/9-bb2c54d5c06ba4bf.js","static/chunks/442-197e6cbc1e54109a.js","static/chunks/813-cce9482e33f2430c.js","static/chunks/924-ba8e16df4d61ff5c.js","static/chunks/411-d9eba2657c72f766.js","static/chunks/270-2f094a936d056513.js","static/chunks/365-a224ec0807392b35.js","static/chunks/928-74244889bd7f2699.js","static/chunks/75fc9c18-a784766a129ec5fb.js","static/chunks/947-5980a3ff49069ddd.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
self.__BUILD_MANIFEST=function(s,c,a,e,t,d,n,k,b,h,f,i){return{__rewrites:{beforeFiles:[],afterFiles:[],fallback:[]},"/":["static/chunks/29107295-90b90cb30c825230.js",s,c,a,d,n,k,"static/chunks/539-dcd22f1f6b99ebee.js","static/chunks/pages/index-b1c8f59fe7e5d7df.js"],"/_error":["static/chunks/pages/_error-dee72aff9b2e2c12.js"],"/agent":[s,c,e,b,t,d,"static/chunks/pages/agent-762425b419303d9d.js"],"/chat":["static/chunks/pages/chat-12498cfa4a376095.js"],"/chat/[scene]/[id]":["static/chunks/pages/chat/[scene]/[id]-39a3901132926636.js"],"/database":[s,c,a,e,t,n,h,"static/chunks/643-d8f53f40dd3c5b40.js","static/chunks/pages/database-b4f32916b9d484a7.js"],"/knowledge":[f,s,c,e,b,t,d,n,"static/chunks/109-0dace28dd2667396.js","static/chunks/pages/knowledge-fbe0df9d6a60a0b5.js"],"/knowledge/chunk":[e,t,"static/chunks/pages/knowledge/chunk-765a4b202d79ac28.js"],"/models":[f,s,c,a,i,h,"static/chunks/pages/models-cb9ab490969e70dd.js"],"/prompt":[s,c,a,i,"static/chunks/837-e6d4d1eb9e057050.js",k,"static/chunks/607-b224c640f6907e4b.js","static/chunks/pages/prompt-7f839dfd56bc4c20.js"],sortedPages:["/","/_app","/_error","/agent","/chat","/chat/[scene]/[id]","/database","/knowledge","/knowledge/chunk","/models","/prompt"]}}("static/chunks/64-91b49d45b9846775.js","static/chunks/479-b20198841f9a6a1e.js","static/chunks/9-bb2c54d5c06ba4bf.js","static/chunks/442-197e6cbc1e54109a.js","static/chunks/813-cce9482e33f2430c.js","static/chunks/924-ba8e16df4d61ff5c.js","static/chunks/411-d9eba2657c72f766.js","static/chunks/270-2f094a936d056513.js","static/chunks/365-a224ec0807392b35.js","static/chunks/928-74244889bd7f2699.js","static/chunks/75fc9c18-a784766a129ec5fb.js","static/chunks/947-5980a3ff49069ddd.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long