diff --git a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/file_parse_service.py b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/file_parse_service.py index 33aaba9f2..4d0635af8 100644 --- a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/file_parse_service.py +++ b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/file_parse_service.py @@ -63,48 +63,137 @@ class FileParseService(ABC): is_execute: bool, llm_count: int, ): - mode = "EXECUTE" if is_execute else "BUILD" - records = [] - for cm in confirm_models: - row = dict( - serialNo=cm.serialNo, - analysisModelId=cm.analysisModelId, - question=cm.question, - selfDefineTags=cm.selfDefineTags, - prompt=cm.prompt, - standardAnswerSql=cm.standardAnswerSql, - llmOutput=cm.llmOutput, - executeResult=cm.executeResult, - errorMsg=cm.errorMsg, - compareResult=cm.compareResult.value if cm.compareResult else None, + """Write compare results to an Excel file instead of DB. + + The output Excel file will be named as '_round{round_id}.xlsx' and + sheet name is 'benchmark_compare_result'. If the file exists, it will + append rows; otherwise it will create a new file with headers. + """ + try: + # Ensure output directory exists + output_dir = Path(path).parent + output_dir.mkdir(parents=True, exist_ok=True) + + # Determine final excel file path: _round{round_id}.xlsx + base_name = Path(path).stem + extension = Path(path).suffix + if extension.lower() not in [".xlsx", ".xls"]: + extension = ".xlsx" + output_file = output_dir / f"{base_name}_round{round_id}{extension}" + + headers = [ + "serialNo", + "analysisModelId", + "question", + "selfDefineTags", + "prompt", + "standardAnswerSql", + "llmOutput", + "executeResult", + "errorMsg", + "compareResult", + ] + + # Load or create workbook and sheet + if output_file.exists(): + workbook = load_workbook(str(output_file)) + if "benchmark_compare_result" in workbook.sheetnames: + worksheet = workbook["benchmark_compare_result"] + else: + worksheet = workbook.create_sheet("benchmark_compare_result") + # Write headers if new sheet + for col_idx, header in enumerate(headers, 1): + worksheet.cell(row=1, column=col_idx, value=header) + else: + workbook = Workbook() + worksheet = workbook.active + worksheet.title = "benchmark_compare_result" + # Write headers + for col_idx, header in enumerate(headers, 1): + worksheet.cell(row=1, column=col_idx, value=header) + + # Determine start row to append + start_row = worksheet.max_row + 1 if worksheet.max_row else 2 + + # Append rows + for idx, cm in enumerate(confirm_models): + row_data = [ + cm.serialNo, + cm.analysisModelId, + cm.question, + cm.selfDefineTags, + cm.prompt, + cm.standardAnswerSql, + cm.llmOutput, + json.dumps(cm.executeResult, ensure_ascii=False) + if cm.executeResult is not None + else "", + cm.errorMsg, + cm.compareResult.value if cm.compareResult else None, + ] + for col_idx, value in enumerate(row_data, 1): + worksheet.cell(row=start_row + idx, column=col_idx, value=value) + + # Autosize columns (simple strategy) + for column in worksheet.columns: + max_length = 0 + column_letter = column[0].column_letter + for cell in column: + try: + if cell.value and len(str(cell.value)) > max_length: + max_length = len(str(cell.value)) + except Exception: + pass + adjusted_width = min(max(max_length + 2, 10), 80) + worksheet.column_dimensions[column_letter].width = adjusted_width + + workbook.save(str(output_file)) + workbook.close() + logger.info( + f"[write_data_compare_result] compare written to Excel: {output_file}" + ) + except Exception as e: + logger.error( + f"[write_data_compare_result] write excel error for path={path}: {e}", + exc_info=True, ) - records.append(row) - self._benchmark_dao.write_compare_results( - round_id=round_id, - mode=mode, - output_path=path, - records=records, - is_execute=is_execute, - llm_count=llm_count, - ) - print(f"[write_data_compare_result] compare written to DB for: {path}") def summary_and_write_multi_round_benchmark_result( self, output_path: str, round_id: int ) -> str: - summary_id = self._benchmark_dao.compute_and_save_summary(round_id, output_path) - summary = self._benchmark_dao.get_summary(round_id, output_path) - result = dict( - right=summary.right if summary else 0, - wrong=summary.wrong if summary else 0, - failed=summary.failed if summary else 0, - exception=summary.exception if summary else 0, - ) - logger.info( - f"[summary] summary saved to DB for round={round_id}," - f" output_path={output_path} -> {result}" - ) - return json.dumps(result, ensure_ascii=False) + """Compute summary from the Excel file and return JSON string. + + It will read the '_round{round_id}.xlsx' file and sheet + 'benchmark_compare_result', then count the compareResult column + (RIGHT/WRONG/FAILED/EXCEPTION) to build summary. + """ + try: + base_name = Path(output_path).stem + extension = Path(output_path).suffix + if extension.lower() not in [".xlsx", ".xls"]: + extension = ".xlsx" + excel_file = Path(output_path).parent / f"{base_name}_round{round_id}{extension}" + if not excel_file.exists(): + logger.warning(f"summary excel not found: {excel_file}") + result = dict(right=0, wrong=0, failed=0, exception=0) + return json.dumps(result, ensure_ascii=False) + + df = pd.read_excel(str(excel_file), sheet_name="benchmark_compare_result") + right = int((df["compareResult"] == "RIGHT").sum()) if "compareResult" in df.columns else 0 + wrong = int((df["compareResult"] == "WRONG").sum()) if "compareResult" in df.columns else 0 + failed = int((df["compareResult"] == "FAILED").sum()) if "compareResult" in df.columns else 0 + exception = int((df["compareResult"] == "EXCEPTION").sum()) if "compareResult" in df.columns else 0 + + result = dict(right=right, wrong=wrong, failed=failed, exception=exception) + logger.info( + f"[summary] summary computed from Excel for round={round_id}," + f" output_path={output_path} -> {result}" + ) + return json.dumps(result, ensure_ascii=False) + except Exception as e: + logger.error(f"summary compute error from excel: {e}", exc_info=True) + result = dict(right=0, wrong=0, failed=0, exception=0) + return json.dumps(result, ensure_ascii=False) def get_input_stream(self, location: str): """Get input stream from location