mirror of
https://github.com/csunny/DB-GPT.git
synced 2026-01-13 19:55:44 +00:00
fix(benchmark): remove useless code
This commit is contained in:
@@ -12,10 +12,7 @@ from dbgpt_serve.agent.app.recommend_question.recommend_question import (
|
||||
from dbgpt_serve.agent.hub.db.my_plugin_db import MyPluginEntity
|
||||
from dbgpt_serve.agent.hub.db.plugin_hub_db import PluginHubEntity
|
||||
from dbgpt_serve.datasource.manages.connect_config_db import ConnectConfigEntity
|
||||
from dbgpt_serve.evaluate.db.benchmark_db import (
|
||||
BenchmarkCompareEntity,
|
||||
BenchmarkSummaryEntity,
|
||||
)
|
||||
from dbgpt_serve.evaluate.db.benchmark_db import BenchmarkSummaryEntity
|
||||
from dbgpt_serve.file.models.models import ServeEntity as FileServeEntity
|
||||
from dbgpt_serve.flow.models.models import ServeEntity as FlowServeEntity
|
||||
from dbgpt_serve.flow.models.models import VariablesEntity as FlowVariableEntity
|
||||
@@ -40,6 +37,5 @@ _MODELS = [
|
||||
FlowServeEntity,
|
||||
RecommendQuestionEntity,
|
||||
FlowVariableEntity,
|
||||
BenchmarkCompareEntity,
|
||||
BenchmarkSummaryEntity,
|
||||
]
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
from .benchmark_db import (
|
||||
BenchmarkCompareEntity,
|
||||
BenchmarkResultDao,
|
||||
BenchmarkSummaryEntity,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"BenchmarkCompareEntity",
|
||||
"BenchmarkSummaryEntity",
|
||||
"BenchmarkResultDao",
|
||||
]
|
||||
|
||||
@@ -3,13 +3,11 @@ from datetime import datetime
|
||||
from typing import List, Optional
|
||||
|
||||
from sqlalchemy import (
|
||||
Boolean,
|
||||
Column,
|
||||
DateTime,
|
||||
Index,
|
||||
Integer,
|
||||
String,
|
||||
Text,
|
||||
UniqueConstraint,
|
||||
desc,
|
||||
func,
|
||||
@@ -20,64 +18,6 @@ from dbgpt.storage.metadata import BaseDao, Model
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BenchmarkCompareEntity(Model):
|
||||
"""Single compare record for one input serialNo in one round.
|
||||
|
||||
Fields match the JSON lines produced by FileParseService.write_data_compare_result.
|
||||
"""
|
||||
|
||||
__tablename__ = "benchmark_compare"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"round_id", "serial_no", "output_path", name="uk_round_serial_output"
|
||||
),
|
||||
)
|
||||
|
||||
id = Column(
|
||||
Integer, primary_key=True, autoincrement=True, comment="autoincrement id"
|
||||
)
|
||||
# Round and mode
|
||||
round_id = Column(Integer, nullable=False, comment="Benchmark round id")
|
||||
mode = Column(String(16), nullable=False, comment="BUILD or EXECUTE")
|
||||
|
||||
# Input & outputs
|
||||
serial_no = Column(Integer, nullable=False, comment="Input serial number")
|
||||
analysis_model_id = Column(String(255), nullable=False, comment="Analysis model id")
|
||||
question = Column(Text, nullable=False, comment="User question")
|
||||
self_define_tags = Column(String(255), nullable=True, comment="Self define tags")
|
||||
prompt = Column(Text, nullable=True, comment="Prompt text")
|
||||
|
||||
standard_answer_sql = Column(Text, nullable=True, comment="Standard answer SQL")
|
||||
llm_output = Column(Text, nullable=True, comment="LLM output text or JSON")
|
||||
execute_result = Column(
|
||||
Text, nullable=True, comment="Execution result JSON (serialized)"
|
||||
)
|
||||
error_msg = Column(Text, nullable=True, comment="Error message")
|
||||
|
||||
compare_result = Column(
|
||||
String(16), nullable=True, comment="RIGHT/WRONG/FAILED/EXCEPTION"
|
||||
)
|
||||
is_execute = Column(Boolean, default=False, comment="Whether this is EXECUTE mode")
|
||||
llm_count = Column(Integer, default=0, comment="Number of LLM outputs compared")
|
||||
|
||||
# Source path for traceability (original output jsonl file path)
|
||||
output_path = Column(
|
||||
String(512), nullable=False, comment="Original output file path"
|
||||
)
|
||||
|
||||
gmt_created = Column(DateTime, default=datetime.now, comment="Record creation time")
|
||||
gmt_modified = Column(
|
||||
DateTime,
|
||||
default=datetime.now,
|
||||
onupdate=datetime.now,
|
||||
comment="Record update time",
|
||||
)
|
||||
|
||||
Index("idx_bm_comp_round", "round_id")
|
||||
Index("idx_bm_comp_mode", "mode")
|
||||
Index("idx_bm_comp_serial", "serial_no")
|
||||
|
||||
|
||||
class BenchmarkSummaryEntity(Model):
|
||||
"""Summary result for one round and one output path.
|
||||
|
||||
@@ -171,18 +111,6 @@ class BenchmarkResultDao(BaseDao):
|
||||
session.add(summary)
|
||||
session.commit()
|
||||
|
||||
# Basic query helpers
|
||||
def list_compare_by_round(self, round_id: int, limit: int = 100, offset: int = 0):
|
||||
with self.session(commit=False) as session:
|
||||
return (
|
||||
session.query(BenchmarkCompareEntity)
|
||||
.filter(BenchmarkCompareEntity.round_id == round_id)
|
||||
.order_by(desc(BenchmarkCompareEntity.id))
|
||||
.limit(limit)
|
||||
.offset(offset)
|
||||
.all()
|
||||
)
|
||||
|
||||
def get_summary(
|
||||
self, round_id: int, output_path: str
|
||||
) -> Optional[BenchmarkSummaryEntity]:
|
||||
|
||||
@@ -57,7 +57,7 @@ BENCHMARK_SERVICE_COMPONENT_NAME = "dbgpt_serve_evaluate_benchmark_service"
|
||||
|
||||
STANDARD_BENCHMARK_FILE_PATH = os.path.join(
|
||||
BENCHMARK_DATA_ROOT_PATH,
|
||||
"2025_07_27_public_500_standard_benchmark_question_list_v2.xlsx",
|
||||
"2025_07_27_public_500_standard_benchmark_question_list.xlsx",
|
||||
)
|
||||
|
||||
BENCHMARK_OUTPUT_RESULT_PATH = os.path.join(BENCHMARK_DATA_ROOT_PATH, "result")
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
{"serialNo":1,"analysisModelId":"D2025050900161503000025249569","question":"各性别的平均年龄是多少,并按年龄顺序显示结果?","selfDefineTags":"KAGGLE_DS_1,CTE1","prompt":"...","knowledge":""}
|
||||
{"serialNo":2,"analysisModelId":"D2025050900161503000025249569","question":"不同投资目标下政府债券的总量是多少,并按目标名称排序?","selfDefineTags":"KAGGLE_DS_1,CTE1","prompt":"...","knowledge":""}
|
||||
{"serialNo":3,"analysisModelId":"D2025050900161503000025249569","question":"用于触发双模型结果数不相等的case","selfDefineTags":"TEST","prompt":"...","knowledge":""}
|
||||
{"serialNo":4,"analysisModelId":"D2025050900161503000025249569","question":"用于JSON对比策略的case","selfDefineTags":"TEST_JSON","prompt":"...","knowledge":""}
|
||||
@@ -1,5 +0,0 @@
|
||||
{"serialNo":1,"analysisModelId":"D2025050900161503000025249569","question":"各性别的平均年龄是多少,并按年龄顺序显示结果?","llmOutput":"with converted_data as (...)\nselect gender as `性别`, avg(age) as `平均年龄` from converted_data group by gender order by `平均年龄`;","executeResult":{"性别":["Female","Male"],"平均年龄":["27.73","27.84"]},"errorMsg":null}
|
||||
{"serialNo":2,"analysisModelId":"D2025050900161503000025249569","question":"不同投资目标下政府债券的总量是多少,并按目标名称排序?","llmOutput":"with gov_bonds_data as (...)\nselect objective as `objective`, sum(gov_bond_value) as `政府债券总量` from gov_bonds_data group by `objective` order by `objective`;","executeResult":{"objective":["Capital Appreciation","Growth","Income"],"政府债券总量":["117","54","15"]},"errorMsg":null}
|
||||
{"serialNo":3,"analysisModelId":"D2025050900161503000025249569","question":"用于触发双模型结果数不相等的case","llmOutput":"select 1","executeResult":{"colA":["x","y"]},"errorMsg":null}
|
||||
{"serialNo":4,"analysisModelId":"D2025050900161503000025249569","question":"用于JSON对比策略的case","llmOutput":"{\"check\":\"ok\"}","executeResult":null,"errorMsg":null}
|
||||
{"serialNo":5,"analysisModelId":"D2025050900161503000025249569","question":"缺少匹配标准的case","llmOutput":"select * from t","executeResult":null,"errorMsg":"execution error"}
|
||||
@@ -1,4 +0,0 @@
|
||||
{"serialNo":1,"analysisModelId":"D2025050900161503000025249569","question":"各性别的平均年龄是多少,并按年龄顺序显示结果?","llmOutput":"with converted_data as (...)\nselect gender as `性别`, avg(age) as `平均年龄` from converted_data group by gender order by `平均年龄`;","executeResult":{"性别":["Female","Male"],"平均年龄":["27.73","27.84"]},"errorMsg":null}
|
||||
{"serialNo":2,"analysisModelId":"D2025050900161503000025249569","question":"不同投资目标下政府债券的总量是多少,并按目标名称排序?","llmOutput":"with gov_bonds_data as (...)\nselect objective as `objective`, sum(gov_bond_value) as `政府债券总量` from gov_bonds_data group by `objective` order by `objective`;","executeResult":{"objective":["Capital Appreciation","Growth","Income"],"政府债券总量":["117","54","15"]},"errorMsg":null}
|
||||
{"serialNo":3,"analysisModelId":"D2025050900161503000025249569","question":"用于触发双模型结果数不相等的case","llmOutput":"select 1","executeResult":{"colA":["x","y"]},"errorMsg":null}
|
||||
{"serialNo":4,"analysisModelId":"D2025050900161503000025249569","question":"用于JSON对比策略的case","llmOutput":"{\"check\":\"ok\"}","executeResult":null,"errorMsg":null}
|
||||
@@ -1,4 +0,0 @@
|
||||
{"serialNo":1,"analysisModelId":"D2025050900161503000025249569","question":"各性别的平均年龄是多少,并按年龄顺序显示结果?","llmOutput":"with converted_data as (...)\nselect gender as `性别`, avg(age) as `平均年龄` from converted_data group by gender order by `平均年龄`;","executeResult":{"性别":["Female","Male"],"平均年龄":["27.73","27.84"]},"errorMsg":null}
|
||||
{"serialNo":2,"analysisModelId":"D2025050900161503000025249569","question":"不同投资目标下政府债券的总量是多少,并按目标名称排序?","llmOutput":"with gov_bonds_data as (...)\nselect objective as `objective`, sum(gov_bond_value) as `政府债券总量` from gov_bonds_data group by `objective` order by `objective`;","executeResult":{"objective":["Capital Appreciation","Growth","Income"],"政府债券总量":["117","54","15"]},"errorMsg":null}
|
||||
{"serialNo":3,"analysisModelId":"D2025050900161503000025249569","question":"用于触发双模型结果数不相等的case","llmOutput":"select 1","executeResult":{"colB":["x","z","w"]},"errorMsg":null}
|
||||
{"serialNo":4,"analysisModelId":"D2025050900161503000025249569","question":"用于JSON对比策略的case","llmOutput":"{\"check\":\"ok\"}","executeResult":null,"errorMsg":null}
|
||||
Binary file not shown.
@@ -6,7 +6,7 @@ from typing import Dict, List, Optional, Union
|
||||
from dbgpt.util.benchmarks import StorageUtil
|
||||
from dbgpt_serve.evaluate.db.benchmark_db import BenchmarkResultDao
|
||||
from dbgpt_serve.evaluate.service.fetchdata.benchmark_data_manager import (
|
||||
get_benchmark_manager,
|
||||
get_benchmark_manager, BENCHMARK_DEFAULT_DB_SCHEMA,
|
||||
)
|
||||
|
||||
from .data_compare_service import DataCompareService
|
||||
@@ -26,8 +26,6 @@ from .models import (
|
||||
RoundAnswerConfirmModel,
|
||||
)
|
||||
|
||||
BENCHMARK_DEFAULT_DB_SCHEMA = "ant_icube_dev."
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ from dbgpt_ext.datasource.rdbms.conn_sqlite import SQLiteConnector
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
BENCHMARK_DEFAULT_DB_SCHEMA = "ant_icube_dev."
|
||||
|
||||
class BenchmarkDataConfig(BaseModel):
|
||||
"""Configuration for Benchmark Data Manager"""
|
||||
@@ -31,7 +32,7 @@ class BenchmarkDataConfig(BaseModel):
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
cache_dir: str = "cache"
|
||||
db_path: str = os.path.join(BENCHMARK_DATA_ROOT_PATH, "ant_icube_dev.db")
|
||||
db_path: str = os.path.join(BENCHMARK_DATA_ROOT_PATH, f"{BENCHMARK_DEFAULT_DB_SCHEMA}db")
|
||||
table_mapping_file: str = os.path.join(
|
||||
BENCHMARK_DATA_ROOT_PATH, "table_mapping.json"
|
||||
)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user