From 87f11b574ddb64acb021d0bc56087948881816a4 Mon Sep 17 00:00:00 2001
From: "alan.cl" <1165243776@qq.com>
Date: Mon, 13 Oct 2025 14:38:45 +0800
Subject: [PATCH] feat(benchmark): multi model post process

---
 .../service/benchmark/benchmark_service.py    | 28 +++++++++----------
 .../evaluate/service/benchmark/models.py      |  3 ++
 .../benchmark/user_input_execute_service.py   |  1 +
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py
index 4d322e20a..3ce2982b4 100644
--- a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py
+++ b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py
@@ -326,10 +326,10 @@ class BenchmarkService(
         )
 
         output_sets = BenchmarkDataSets[OutputType]()
-        output_list = []
+        output_list: List[OutputType] = []
 
-        written_batches = set()  # 记录已写入批次
-        complete_map = {}  # 记录任务完成状态，使用Dict[int, OutputType]
+        written_batches: set[int] = set()
+        complete_map: Dict[int, OutputType] = {}
 
         # 线程锁，保证线程安全
         lock = threading.Lock()
@@ -356,7 +356,6 @@ class BenchmarkService(
                     f" output={json.dumps(output.to_dict(), ensure_ascii=False)}"
                 )
 
-                # 线程安全地添加结果
                 with lock:
                     output_list.append(output)
 
@@ -490,15 +489,16 @@ class BenchmarkService(
                       input_file_path: str, output_file_path: str):
         """
             Post dispatch processing standard result compare LLM execute result
-             and write compare result to file
+            and write compare result to file
         """
-        self.user_input_execute_service.post_dispatch(
-            i,
-            config,
-            input_list,
-            None,
-            output_list[0].benchmark_data_sets.data_list,
-            input_file_path,
-            output_file_path,
-        )
+        for j, output_result in enumerate(output_list):
+            self.user_input_execute_service.post_dispatch(
+                i,
+                config,
+                input_list,
+                None,
+                output_result.benchmark_data_sets.data_list,
+                input_file_path,
+                output_file_path,
+            )
 
diff --git a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/models.py b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/models.py
index a7b34a494..e9a52603b 100644
--- a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/models.py
+++ b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/models.py
@@ -61,6 +61,7 @@ class AnswerExecuteModel:
     strategyConfig: Optional[DataCompareStrategyConfig] = None
     cotTokens: Optional[Any] = None
     cost_time: Optional[int] = None
+    llm_code: Optional[str] = None
 
     @staticmethod
     def from_dict(d: Dict[str, Any]) -> "AnswerExecuteModel":
@@ -83,6 +84,7 @@ class AnswerExecuteModel:
             strategyConfig=strategy_config,
             cotTokens=d.get("cotTokens"),
             cost_time=d.get("cost_time"),
+            llm_code=d.get("llm_code"),
         )
 
     def to_dict(self) -> Dict[str, Any]:
@@ -103,6 +105,7 @@ class AnswerExecuteModel:
             strategyConfig=cfg,
             cotTokens=self.cotTokens,
             cost_time=self.cost_time,
+            llm_code=self.llm_code,
         )
 
 
diff --git a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/user_input_execute_service.py b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/user_input_execute_service.py
index 735493d71..16f426fe6 100644
--- a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/user_input_execute_service.py
+++ b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/user_input_execute_service.py
@@ -268,6 +268,7 @@ class UserInputExecuteService:
             executeResult=execute_result,
             cotTokens=response.cot_tokens,
             errorMsg=error_msg,
+            llm_code=input.llm_code,
         )
 
     def _extract_sql_content(self, content: str) -> str: