feat: (0.6)New UI (#1855)

Co-authored-by: 夏姜 <wenfengjiang.jwf@digital-engine.com> Co-authored-by: aries_ckt <916701291@qq.com> Co-authored-by: wb-lh513319 <wb-lh513319@alibaba-inc.com> Co-authored-by: csunny <cfqsunny@163.com>
2025-09-08 20:39:44 +00:00 · 2024-08-21 17:37:45 +08:00
parent 3fc82693ba
commit b124ecc10b
824 changed files with 93371 additions and 2515 deletions
--- a/dbgpt/app/scene/chat_data/chat_excel/excel_analyze/chat.py
+++ b/dbgpt/app/scene/chat_data/chat_excel/excel_analyze/chat.py
@@ -32,19 +32,15 @@ class ChatExcel(BaseChat):
            - model_name:(str) llm model name
            - select_param:(str) file path
        """
-        chat_mode = ChatScene.ChatExcel

        self.select_param = chat_param["select_param"]
        self.model_name = chat_param["model_name"]
        chat_param["chat_mode"] = ChatScene.ChatExcel
-        if has_path(self.select_param):
-            self.excel_reader = ExcelReader(self.select_param)
-        else:
-            self.excel_reader = ExcelReader(
-                os.path.join(
-                    KNOWLEDGE_UPLOAD_ROOT_PATH, chat_mode.value(), self.select_param
-                )
-            )
+        self.chat_param = chat_param
+        self.excel_reader = ExcelReader(
+            chat_param["chat_session_id"], self.select_param
+        )
+
        self.api_call = ApiCall()
        super().__init__(chat_param=chat_param)

@@ -65,9 +61,10 @@ class ChatExcel(BaseChat):
            "chat_session_id": self.chat_session_id,
            "user_input": "[" + self.excel_reader.excel_file_name + "]" + " Analyze！",
            "parent_mode": self.chat_mode,
-            "select_param": self.excel_reader.excel_file_name,
+            "select_param": self.select_param,
            "excel_reader": self.excel_reader,
            "model_name": self.model_name,
+            "user_name": self.chat_param.get("user_name", None),
        }
        learn_chat = ExcelLearning(**chat_param)
        result = await learn_chat.nostream_call()
--- a/dbgpt/app/scene/chat_data/chat_excel/excel_learning/chat.py
+++ b/dbgpt/app/scene/chat_data/chat_excel/excel_learning/chat.py
@@ -19,6 +19,7 @@ class ExcelLearning(BaseChat):
        select_param: str = None,
        excel_reader: Any = None,
        model_name: str = None,
+        user_name: str = None,
    ):
        chat_mode = ChatScene.ExcelLearning
        """ """
@@ -30,6 +31,7 @@ class ExcelLearning(BaseChat):
            "current_user_input": user_input,
            "select_param": select_param,
            "model_name": model_name,
+            "user_name": user_name,
        }
        super().__init__(chat_param=chat_param)
        if parent_mode:
--- a/dbgpt/app/scene/chat_data/chat_excel/excel_reader.py
+++ b/dbgpt/app/scene/chat_data/chat_excel/excel_reader.py
@@ -1,3 +1,5 @@
+import io
+import json
 import logging
 import os

@@ -17,6 +19,7 @@ from pyparsing import (
    delimitedList,
 )

+from dbgpt.util.file_client import FileClient
 from dbgpt.util.pd_utils import csv_colunm_foramt
 from dbgpt.util.string_utils import is_chinese_include_number

@@ -227,28 +230,67 @@ def is_chinese(text):


 class ExcelReader:
-    def __init__(self, file_path):
-        file_name = os.path.basename(file_path)
-        self.file_name_without_extension = os.path.splitext(file_name)[0]
-        encoding, confidence = detect_encoding(file_path)
-        logger.info(f"Detected Encoding: {encoding} (Confidence: {confidence})")
-        self.excel_file_name = file_name
-        self.extension = os.path.splitext(file_name)[1]
+    def __init__(self, conv_uid, file_param):
+        self.conv_uid = conv_uid
+        self.file_param = file_param
+        if isinstance(file_param, str) and os.path.isabs(file_param):
+            file_name = os.path.basename(file_param)
+            self.file_name_without_extension = os.path.splitext(file_name)[0]
+            encoding, confidence = detect_encoding(file_param)
+
+            self.excel_file_name = file_name
+            self.extension = os.path.splitext(file_name)[1]
+
+            file_info = file_param
+        else:
+            if isinstance(file_param, dict):
+                file_path = file_param.get("file_path", None)
+                if not file_path:
+                    raise ValueError("Not find file path!")
+                else:
+                    file_name = os.path.basename(file_path.replace(f"{conv_uid}_", ""))
+
+            else:
+                temp_obj = json.loads(file_param)
+                file_path = temp_obj.get("file_path", None)
+                file_name = os.path.basename(file_path.replace(f"{conv_uid}_", ""))
+
+            self.file_name_without_extension = os.path.splitext(file_name)[0]
+
+            self.excel_file_name = file_name
+            self.extension = os.path.splitext(file_name)[1]
+
+            file_client = FileClient()
+            file_info = file_client.read_file(
+                conv_uid=self.conv_uid, file_key=file_path
+            )
+
+            result = chardet.detect(file_info)
+            encoding = result["encoding"]
+            confidence = result["confidence"]
+
+        logger.info(
+            f"File Info:{len(file_info)},Detected Encoding: {encoding} (Confidence: {confidence})"
+        )
+
        # read excel file
-        if file_path.endswith(".xlsx") or file_path.endswith(".xls"):
-            df_tmp = pd.read_excel(file_path, index_col=False)
+        if file_name.endswith(".xlsx") or file_name.endswith(".xls"):
+            df_tmp = pd.read_excel(file_info, index_col=False)
            self.df = pd.read_excel(
-                file_path,
+                file_info,
                index_col=False,
                converters={i: csv_colunm_foramt for i in range(df_tmp.shape[1])},
            )
-        elif file_path.endswith(".csv"):
-            df_tmp = pd.read_csv(file_path, index_col=False, encoding=encoding)
-            self.df = pd.read_csv(
-                file_path,
+        elif file_name.endswith(".csv"):
+            df_tmp = pd.read_csv(
+                file_info if isinstance(file_info, str) else io.BytesIO(file_info),
+                index_col=False,
+                encoding=encoding,
+            )
+            self.df = pd.read_csv(
+                file_info if isinstance(file_info, str) else io.BytesIO(file_info),
                index_col=False,
                encoding=encoding,
-                # csv_colunm_foramt 可以修改更多，只是针对美元人民币符号，假如是“你好¥¥¥”则会报错！
                converters={i: csv_colunm_foramt for i in range(df_tmp.shape[1])},
            )
        else: