mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-27 12:33:16 +00:00
feat: async loading benchmark data
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
@@ -35,6 +36,7 @@ from dbgpt_app.base import (
|
||||
from dbgpt_app.component_configs import initialize_components
|
||||
from dbgpt_app.config import ApplicationConfig, ServiceWebParameters, SystemParameters
|
||||
from dbgpt_serve.core import add_exception_handler
|
||||
from dbgpt_serve.evaluate.service.fetchdata.benchmark_data_manager import BenchmarkDataManager, get_benchmark_manager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
@@ -144,6 +146,15 @@ def initialize_app(param: ApplicationConfig, args: List[str] = None):
|
||||
# After init, when the database is ready
|
||||
system_app.after_init()
|
||||
|
||||
# Async fetch benchmark dataset from Falcon
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
loop.create_task(load_benchmark_data())
|
||||
else:
|
||||
loop.run_until_complete(load_benchmark_data())
|
||||
|
||||
|
||||
|
||||
binding_port = web_config.port
|
||||
binding_host = web_config.host
|
||||
if not web_config.light:
|
||||
@@ -319,6 +330,43 @@ def parse_args():
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
async def load_benchmark_data():
|
||||
"""Load benchmark data from GitHub repository into SQLite database"""
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger.info("Starting benchmark data loading process...")
|
||||
|
||||
try:
|
||||
manager = get_benchmark_manager(system_app)
|
||||
|
||||
async with manager:
|
||||
logger.info("Fetching data from GitHub repository...")
|
||||
result = await manager.load_from_github(
|
||||
repo_url="https://github.com/inclusionAI/Falcon",
|
||||
data_dir="data/source"
|
||||
)
|
||||
|
||||
# Log detailed results
|
||||
logger.info("\nBenchmark Data Loading Summary:")
|
||||
logger.info(f"Total CSV files processed: {result['total_files']}")
|
||||
logger.info(f"Successfully imported: {result['successful']}")
|
||||
logger.info(f"Failed imports: {result['failed']}")
|
||||
|
||||
if result['failed'] > 0:
|
||||
logger.warning(f"Encountered {result['failed']} failures during import")
|
||||
|
||||
# Verify the loaded data
|
||||
table_info = await manager.get_table_info()
|
||||
logger.info(f"Loaded {len(table_info)} tables into database")
|
||||
|
||||
return {
|
||||
'import_result': result,
|
||||
'table_info': table_info
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to load benchmark data", exc_info=True)
|
||||
raise RuntimeError(f"Benchmark data loading failed: {str(e)}") from e
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Parse command line arguments
|
||||
_args = parse_args()
|
||||
|
Reference in New Issue
Block a user