feat: update benchmark github repo data loading time (#2936)

This commit is contained in:
yaoyifan-yyf
2025-11-30 23:27:28 +08:00
committed by GitHub
parent 754575da28
commit 3ba2e9f104
2 changed files with 36 additions and 21 deletions

View File

@@ -32,6 +32,7 @@ from ...api.schemas import (
)
from ...config import ServeConfig
from ...models.models import ServeDao, ServeEntity
from ..fetchdata.benchmark_data_manager import get_benchmark_manager
from .benchmark_llm_task import BenchmarkLLMTask
from .data_compare_service import DataCompareService
from .file_parse_service import ExcelFileParseService
@@ -239,6 +240,17 @@ class BenchmarkService(
if not scene_key:
scene_key = EvaluationScene.DATASET.value
try:
manager = get_benchmark_manager(self._system_app)
await manager.load_data()
logger.info(
f"Benchmark dataset loaded from {manager._config.repo_url} "
f"dir={manager._config.data_dir}"
)
except Exception as e:
logger.error(f"Failed to load benchmark dataset before run: {e}")
raise
output_file_path = self._generate_output_file_full_path(
output_file_path, evaluate_code
)

View File

@@ -74,27 +74,6 @@ class BenchmarkDataManager(BaseComponent):
async def async_after_start(self):
logger.info("BenchmarkDataManager: async_after_start.")
try:
if not self._config.repo_url:
logger.info("BenchmarkDataManager: repo_url not set, skip auto load.")
return
if self._startup_loaded:
logger.info("BenchmarkDataManager: already loaded on startup, skip.")
return
logger.info(
f"BenchmarkDataManager: auto loading repo {self._config.repo_url} "
f"dir={self._config.data_dir}"
)
await get_benchmark_manager(self.system_app).load_from_github(
repo_url=self._config.repo_url, data_dir=self._config.data_dir
)
self._startup_loaded = True
logger.info("BenchmarkDataManager: auto load finished.")
except Exception as e:
logger.error(f"BenchmarkDataManager: auto load failed: {e}")
async def async_before_stop(self):
try:
logger.info("BenchmarkDataManager: closing resources before stop...")
@@ -142,6 +121,30 @@ class BenchmarkDataManager(BaseComponent):
"""Run blocking function in thread to avoid blocking event loop"""
return await asyncio.to_thread(func, *args, **kwargs)
async def load_data(self):
logger.info("BenchmarkDataManager: start load_data.")
try:
if not self._config.repo_url:
logger.info("BenchmarkDataManager: repo_url not set, skip auto load.")
return
if self._startup_loaded:
logger.info("BenchmarkDataManager: already loaded on startup, skip.")
return
logger.info(
f"BenchmarkDataManager: auto loading repo {self._config.repo_url} "
f"dir={self._config.data_dir}"
)
await get_benchmark_manager(self.system_app).load_from_github(
repo_url=self._config.repo_url, data_dir=self._config.data_dir
)
self._startup_loaded = True
logger.info("BenchmarkDataManager: auto load finished.")
except Exception as e:
logger.error(f"BenchmarkDataManager: auto load failed: {e}")
def _sanitize_column_name(self, name: str) -> str:
if name is None:
return ""