bugfix(ChatExcel): ChatExcel Language confusion bug

1.Fix ChatExcel Language confusion bug
2025-07-31 15:47:05 +00:00 · 2023-11-09 16:04:45 +08:00 · 2023-11-09 16:04:45 +08:00 · 5924f34219
commit 5924f34219
parent 2b948c34a5
15 changed files with 20 additions and 18 deletions
--- a/pilot/scene/chat_data/chat_excel/excel_analyze/chat.py
+++ b/pilot/scene/chat_data/chat_excel/excel_analyze/chat.py
@ -76,7 +76,7 @@ class ChatExcel(BaseChat):
            return None
        chat_param = {
            "chat_session_id": self.chat_session_id,
-            "user_input": "[" + self.excel_reader.excel_file_name + "]" + " Analysis！",
+            "user_input": f"{self.excel_reader.excel_file_name} analyze！",
            "parent_mode": self.chat_mode,
            "select_param": self.excel_reader.excel_file_name,
            "excel_reader": self.excel_reader,
--- a/pilot/scene/chat_data/chat_excel/excel_analyze/prompt.py
+++ b/pilot/scene/chat_data/chat_excel/excel_analyze/prompt.py
@ -22,7 +22,7 @@ Constraint:
    5.The <api-call></api-call> part of the required output format needs to be parsed by the code. Please ensure that this part of the content is output as required.
    
 Please respond in the following format:
-    thoughts.<api-call><name>[Data display method]</name><args><sql>[Correct duckdb data analysis sql]</sql></args></api-call>
+    Summary of your analytical thinking.<api-call><name>[Data display method]</name><args><sql>[Correct duckdb data analysis sql]</sql></args></api-call>
    
 User Questions:
    {user_input}
@ -36,9 +36,9 @@ _DEFAULT_TEMPLATE_ZH = """
 	2.请从如下给出的展示方式种选择最优的一种用以进行数据渲染，将类型名称放入返回要求格式的name参数值种，如果找不到最合适的则使用'Table'作为展示方式，可用数据展示方式如下: {disply_type}
 	3.SQL中需要使用的表名是: {table_name},请检查你生成的sql，不要使用没在数据结构中的列名，。
 	4.优先使用数据分析的方式回答，如果用户问题不涉及数据分析内容，你可以按你的理解进行回答
-	5.要求的输出格式中<api-call></api-call>部分需要被代码解析只想，请确保这部分内容按要求输出
+	5.要求的输出格式中<api-call></api-call>部分需要被代码解析执行，请确保这部分内容按要求输出
 请确保你的输出格式如下:
-    你的想法.<api-call><name>[数据展示方式]</name><args><sql>[正确的duckdb数据分析sql]</sql></args></api-call>
+    分析思路总结.<api-call><name>[数据展示方式]</name><args><sql>[正确的duckdb数据分析sql]</sql></args></api-call>

 用户问题：{user_input}
 """
--- a/pilot/scene/chat_data/chat_excel/excel_learning/chat.py
+++ b/pilot/scene/chat_data/chat_excel/excel_learning/chat.py
@ -55,5 +55,6 @@ class ExcelLearning(BaseChat):

        input_values = {
            "data_example": json.dumps(datas, cls=DateTimeEncoder),
+            "file_name": self.excel_reader.excel_file_name
        }
        return input_values
--- a/pilot/scene/chat_data/chat_excel/excel_learning/prompt.py
+++ b/pilot/scene/chat_data/chat_excel/excel_learning/prompt.py
@ -12,7 +12,7 @@ CFG = Config()
 _PROMPT_SCENE_DEFINE_EN = "You are a data analysis expert. "

 _DEFAULT_TEMPLATE_EN = """
-This is an example data，please learn to understand the structure and content of this data:
+The following is part of the data of the user file {file_name}. Please learn to understand the structure and content of the data and output the parsing results as required:
    {data_example}
 Explain the meaning and function of each column, and give a simple and clear explanation of the technical terms， If it is a Date column, please summarize the Date format like: yyyy-MM-dd HH:MM:ss.
 Please do not modify or translate the column names, make sure they are consistent with the given data column names.
@ -25,7 +25,7 @@ Please return your answer in JSON format, the return format is as follows:
 _PROMPT_SCENE_DEFINE_ZH = "你是一个数据分析专家. "

 _DEFAULT_TEMPLATE_ZH = """
-下面是一份示例数据，请学习理解该数据的结构和内容:
+下面是用户文件{file_name}的一部分数据，请学习理解该数据的结构和内容，按要求输出解析结果:
    {data_example}
 分析各列数据的含义和作用，并对专业术语进行简单明了的解释, 如果是时间类型请给出时间格式类似:yyyy-MM-dd HH:MM:ss.
 请不要修改或者翻译列名，确保和给出数据列名一致.
--- a/pilot/scene/chat_data/chat_excel/excel_reader.py
+++ b/pilot/scene/chat_data/chat_excel/excel_reader.py
@ -251,7 +251,7 @@ if __name__ == "__main__":
 class ExcelReader:
    def __init__(self, file_path):
        file_name = os.path.basename(file_path)
-        file_name_without_extension = os.path.splitext(file_name)[0]
+        self.file_name_without_extension = os.path.splitext(file_name)[0]
        encoding, confidence = detect_encoding(file_path)
        logging.error(f"Detected Encoding: {encoding} (Confidence: {confidence})")
        self.excel_file_name = file_name
@ -280,7 +280,8 @@ class ExcelReader:
        for column_name in df_tmp.columns:
            self.columns_map.update({column_name: excel_colunm_format(column_name)})
            try:
-                self.df[column_name] = pd.to_numeric(self.df[column_name])
+                if not pd.api.types.is_datetime64_ns_dtype(self.df[column_name]) :
+                    self.df[column_name] = pd.to_numeric(self.df[column_name])
                self.df[column_name] = self.df[column_name].fillna(0)
            except Exception as e:
                print("can't transfor numeric column" + column_name)
--- a/pilot/server/static/404.html
+++ b/pilot/server/static/404.html
--- a/pilot/server/static/404/index.html
+++ b/pilot/server/static/404/index.html
--- a/pilot/server/static/agent/index.html
+++ b/pilot/server/static/agent/index.html
--- a/pilot/server/static/chat/[scene]/[id]/index.html
+++ b/pilot/server/static/chat/[scene]/[id]/index.html
--- a/pilot/server/static/chat/index.html
+++ b/pilot/server/static/chat/index.html
--- a/pilot/server/static/database/index.html
+++ b/pilot/server/static/database/index.html
--- a/pilot/server/static/index.html
+++ b/pilot/server/static/index.html
--- a/pilot/server/static/knowledge/chunk/index.html
+++ b/pilot/server/static/knowledge/chunk/index.html
--- a/pilot/server/static/knowledge/index.html
+++ b/pilot/server/static/knowledge/index.html
--- a/pilot/server/static/prompt/index.html
+++ b/pilot/server/static/prompt/index.html