diff --git a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py index 01e192658..b990377d8 100644 --- a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py +++ b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py @@ -32,6 +32,7 @@ from ...api.schemas import ( ) from ...config import ServeConfig from ...models.models import ServeDao, ServeEntity +from ..fetchdata.benchmark_data_manager import get_benchmark_manager from .benchmark_llm_task import BenchmarkLLMTask from .data_compare_service import DataCompareService from .file_parse_service import ExcelFileParseService @@ -239,6 +240,17 @@ class BenchmarkService( if not scene_key: scene_key = EvaluationScene.DATASET.value + try: + manager = get_benchmark_manager(self._system_app) + await manager.load_data() + logger.info( + f"Benchmark dataset loaded from {manager._config.repo_url} " + f"dir={manager._config.data_dir}" + ) + except Exception as e: + logger.error(f"Failed to load benchmark dataset before run: {e}") + raise + output_file_path = self._generate_output_file_full_path( output_file_path, evaluate_code ) diff --git a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/fetchdata/benchmark_data_manager.py b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/fetchdata/benchmark_data_manager.py index 2791aa0d5..5c32d1a8a 100644 --- a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/fetchdata/benchmark_data_manager.py +++ b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/fetchdata/benchmark_data_manager.py @@ -74,27 +74,6 @@ class BenchmarkDataManager(BaseComponent): async def async_after_start(self): logger.info("BenchmarkDataManager: async_after_start.") - try: - if not self._config.repo_url: - logger.info("BenchmarkDataManager: repo_url not set, skip auto load.") - return - - if self._startup_loaded: - logger.info("BenchmarkDataManager: already loaded on startup, skip.") - return - - logger.info( - f"BenchmarkDataManager: auto loading repo {self._config.repo_url} " - f"dir={self._config.data_dir}" - ) - await get_benchmark_manager(self.system_app).load_from_github( - repo_url=self._config.repo_url, data_dir=self._config.data_dir - ) - self._startup_loaded = True - logger.info("BenchmarkDataManager: auto load finished.") - except Exception as e: - logger.error(f"BenchmarkDataManager: auto load failed: {e}") - async def async_before_stop(self): try: logger.info("BenchmarkDataManager: closing resources before stop...") @@ -142,6 +121,30 @@ class BenchmarkDataManager(BaseComponent): """Run blocking function in thread to avoid blocking event loop""" return await asyncio.to_thread(func, *args, **kwargs) + async def load_data(self): + logger.info("BenchmarkDataManager: start load_data.") + + try: + if not self._config.repo_url: + logger.info("BenchmarkDataManager: repo_url not set, skip auto load.") + return + + if self._startup_loaded: + logger.info("BenchmarkDataManager: already loaded on startup, skip.") + return + + logger.info( + f"BenchmarkDataManager: auto loading repo {self._config.repo_url} " + f"dir={self._config.data_dir}" + ) + await get_benchmark_manager(self.system_app).load_from_github( + repo_url=self._config.repo_url, data_dir=self._config.data_dir + ) + self._startup_loaded = True + logger.info("BenchmarkDataManager: auto load finished.") + except Exception as e: + logger.error(f"BenchmarkDataManager: auto load failed: {e}") + def _sanitize_column_name(self, name: str) -> str: if name is None: return ""