From bfa1b11cecf4d6e9efe410960ed409566a2844c0 Mon Sep 17 00:00:00 2001 From: yaoyifan-yyf Date: Tue, 16 Dec 2025 19:59:19 +0800 Subject: [PATCH] feat: adjust benchmark data construct approach (#2948) --- .../service/benchmark/benchmark_service.py | 2 +- .../fetchdata/benchmark_data_manager.py | 541 +++++------------- pilot/benchmark_meta_data/table_mapping.json | 95 --- 3 files changed, 132 insertions(+), 506 deletions(-) delete mode 100644 pilot/benchmark_meta_data/table_mapping.json diff --git a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py index 410b351bb..40b9732c6 100644 --- a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py +++ b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/benchmark/benchmark_service.py @@ -40,7 +40,7 @@ from ...config import ServeConfig from ...models.models import ServeDao, ServeEntity from ..fetchdata.benchmark_data_manager import get_benchmark_manager from .data_compare_service import DataCompareService -from .ext.excel_file_parse import ExcelFileParseService +from .file_parse_service import ExcelFileParseService from .models import ( BaseInputModel, BenchmarkDataSets, diff --git a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/fetchdata/benchmark_data_manager.py b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/fetchdata/benchmark_data_manager.py index 205383b85..c1a18b9e1 100644 --- a/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/fetchdata/benchmark_data_manager.py +++ b/packages/dbgpt-serve/src/dbgpt_serve/evaluate/service/fetchdata/benchmark_data_manager.py @@ -1,17 +1,16 @@ import asyncio -import csv import hashlib -import json import logging import os +import re import shutil import tempfile import threading import time +import uuid import zipfile from concurrent.futures import ThreadPoolExecutor from concurrent.futures import TimeoutError as FutureTimeoutError -from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, cast import aiohttp @@ -36,12 +35,10 @@ class BenchmarkDataConfig(BaseModel): db_path: str = os.path.join( BENCHMARK_DATA_ROOT_PATH, f"{BENCHMARK_DEFAULT_DB_SCHEMA}db" ) - table_mapping_file: str = os.path.join( - BENCHMARK_DATA_ROOT_PATH, "table_mapping.json" - ) + table_mapping_file: Optional[str] = None cache_expiry_days: int = 1 - repo_url: str = "https://github.com/eosphoros-ai/Falcon" - data_dir: str = "data/source" + repo_url: str = "https://github.com/eosphoros-ai/Falcon/tree/yifan_1216" + data_dir: str = "dev_data/dev_databases" class BenchmarkDataManager(BaseComponent): @@ -56,7 +53,6 @@ class BenchmarkDataManager(BaseComponent): self._config = config or BenchmarkDataConfig() self._http_session: Optional[aiohttp.ClientSession] = None self._connector: Optional[SQLiteConnector] = None - self._table_mappings = self._load_mappings() self._lock = asyncio.Lock() self.temp_dir: Optional[str] = None @@ -142,59 +138,6 @@ class BenchmarkDataManager(BaseComponent): except Exception as e: logger.error(f"BenchmarkDataManager: auto load failed: {e}") - def _sanitize_column_name(self, name: str) -> str: - if name is None: - return "" - name = str(name).strip().strip('"').strip("'") - invalid_chars = [ - "-", - " ", - ".", - ",", - ";", - ":", - "!", - "?", - "'", - '"', - "(", - ")", - "[", - "]", - "{", - "}", - "\t", - "\r", - "\n", - "\x00", - ] - while name and name[-1] in invalid_chars: - name = name[:-1] - for ch in invalid_chars: - if ch in name: - name = name.replace(ch, "_") - while "__" in name: - name = name.replace("__", "_") - if name and not (name[0].isalpha() or name[0] == "_"): - name = "_" + name - return name.lower() - - def _sanitize_and_dedup_headers(self, headers: List[str]) -> List[str]: - sanitized: List[str] = [] - used: set = set() - for idx, h in enumerate(headers): - name = self._sanitize_column_name(h) - if not name: - name = f"col_{idx}" - base = name - k = 2 - while name in used or not name: - name = f"{base}_{k}" - k += 1 - used.add(name) - sanitized.append(name) - return sanitized - # ========================================================== # 通用查询(阻塞实现,在线程池中调用,支持超时与可中断) @@ -292,7 +235,7 @@ class BenchmarkDataManager(BaseComponent): return result.rowcount if timeout is not None: - # 使用ThreadPoolExecutor实现超时控制,类似于基类中DuckDB的实现 + # 使用ThreadPoolExecutor实现超时控制 with ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(_execute_write) try: @@ -307,13 +250,7 @@ class BenchmarkDataManager(BaseComponent): async def query( self, query: str, params: tuple = (), timeout: Optional[float] = None ) -> List[Dict]: - """Execute query and return results as dict list - - Args: - query: SQL query string - params: Query parameters - timeout: Query timeout in seconds (optional) - """ + """Execute query and return results as dict list""" await self.init_connector() cols, rows = await self._run_in_thread( self._query_blocking, query, params, timeout @@ -321,7 +258,7 @@ class BenchmarkDataManager(BaseComponent): return [dict(zip(cols, row)) for row in rows] async def load_from_github( - self, repo_url: str, data_dir: str = "data/source" + self, repo_url: str, data_dir: str = "dev_data/dev_databases" ) -> Dict: """Main method to load data from GitHub repository""" try: @@ -330,14 +267,14 @@ class BenchmarkDataManager(BaseComponent): # 1. Download or use cached repository repo_dir = await self._download_repo_contents(repo_url) - # 2. Find all CSV files recursively - csv_files = self._discover_csv_files(repo_dir, data_dir) - if not csv_files: - raise ValueError("No CSV files found") - logger.info(f"Found {len(csv_files)} CSV files") + # 2. Find all SQLite files recursively in the specified data_dir + sqlite_files = self._discover_sqlite_files(repo_dir, data_dir) + if not sqlite_files: + raise ValueError(f"No SQLite files found in {data_dir}") + logger.info(f"Found {len(sqlite_files)} SQLite files") - # 3. Import to SQLite - result = await self._import_to_database(csv_files) + # 3. Merge all SQLite files into the main database + result = await self._merge_sqlite_databases(sqlite_files) return result except Exception as e: @@ -389,63 +326,8 @@ class BenchmarkDataManager(BaseComponent): except Exception as e: logger.error(f"Failed to clear cache: {str(e)}") - def _load_mappings(self) -> Dict[str, str]: - """Load table name mappings from config file""" - if not self._config.table_mapping_file or not os.path.exists( - self._config.table_mapping_file - ): - logger.warning( - f"Table mapping file not found: {self._config.table_mapping_file}" - ) - return {} - - try: - with open(self._config.table_mapping_file, "r", encoding="utf-8") as f: - mapping = json.load(f) - return { - key: value.split(".")[-1] if "." in value else value - for key, value in mapping.items() - } - except Exception as e: - logger.error(f"Failed to load table mapping: {str(e)}") - return {} - - def _sanitize_table_name(self, name: str) -> str: - """Normalize table names using mappings""" - mapped_name = self._table_mappings.get(name.lower(), name) - if mapped_name is None: - mapped_name = name or "" - - invalid_chars = [ - "-", - " ", - ".", - ",", - ";", - ":", - "!", - "?", - "'", - '"', - "(", - ")", - "[", - "]", - "{", - "}", - ] - while mapped_name and mapped_name[-1] in invalid_chars: - mapped_name = mapped_name[:-1] - for char in invalid_chars: - if char in mapped_name: - mapped_name = mapped_name.replace(char, "_") - while "__" in mapped_name: - mapped_name = mapped_name.replace("__", "_") - - return (mapped_name or "").lower() - async def _download_repo_contents(self, repo_url: str) -> str: - """Download repository with caching""" + """Download repository with caching, supporting branch URLs""" cache_path = self._get_cache_path(repo_url) # Use cache if valid @@ -455,21 +337,45 @@ class BenchmarkDataManager(BaseComponent): # Download fresh copy self.temp_dir = tempfile.mkdtemp() - zip_url = ( - repo_url.replace("github.com", "api.github.com/repos") + "/zipball/main" - ) + + # Simple parsing for github.com URLs + github_pattern = r"github\.com/([^/]+)/([^/]+)(?:/tree/(.+))?" + match = re.search(github_pattern, repo_url) + + if match: + owner, repo, branch = match.groups() + branch = branch or "main" # Default to main if no tree/branch specified + zip_url = f"https://api.github.com/repos/{owner}/{repo}/zipball/{branch}" + else: + # Fallback for generic structure or direct zip links + if repo_url.endswith(".zip"): + zip_url = repo_url + else: + # Default fallback behavior from original code + zip_url = ( + repo_url.replace("github.com", "api.github.com/repos") + + "/zipball/main" + ) + logger.info(f"Downloading from GitHub repo: {zip_url}") try: if self._http_session is None: self._http_session = aiohttp.ClientSession() - async with self._http_session.get(zip_url) as response: - response.raise_for_status() + + headers = {"Accept": "application/vnd.github.v3+json"} + async with self._http_session.get(zip_url, headers=headers) as response: + if response.status != 200: + text_resp = await response.text() + raise RuntimeError( + f"GitHub API Error {response.status}: {text_resp}" + ) + zip_path = os.path.join(self.temp_dir, "repo.zip") with open(zip_path, "wb") as f: while True: - chunk = await response.content.read(1024) + chunk = await response.content.read(1024 * 1024) # 1MB chunks if not chunk: break f.write(chunk) @@ -515,297 +421,112 @@ class BenchmarkDataManager(BaseComponent): raise ValueError("No valid directory found after extraction") return os.path.join(self.temp_dir, extracted_dirs[0]) - def _discover_csv_files(self, base_dir: str, search_dir: str) -> List[Dict]: - """Find all CSV files recursively""" + def _discover_sqlite_files(self, base_dir: str, search_dir: str) -> List[str]: + """Find all SQLite files recursively in the search directory""" full_search_dir = os.path.join(base_dir, search_dir) if search_dir else base_dir if not os.path.exists(full_search_dir): raise ValueError(f"Directory not found: {full_search_dir}") - csv_files = [] + sqlite_files = [] for root, _, files in os.walk(full_search_dir): for file in files: - if file.lower().endswith(".csv"): - rel_path = os.path.relpath(root, start=base_dir) - csv_files.append( - { - "full_path": os.path.join(root, file), - "rel_path": rel_path, - "file_name": file, - } - ) - return csv_files + if file.lower().endswith(".sqlite"): + full_path = os.path.join(root, file) + sqlite_files.append(full_path) + return sqlite_files - async def _import_to_database(self, csv_files: List[Dict]) -> Dict: - """Import CSV data to SQLite""" + async def _merge_sqlite_databases(self, sqlite_files: List[str]) -> Dict: + """Merge multiple SQLite files into the main database""" await self.init_connector() assert self._connector is not None - results = { - "total_files": len(csv_files), - "successful": 0, - "failed": 0, - "tables_created": [], - } - def _process_one_file(file_info: Dict) -> Tuple[bool, Optional[str]]: - table_name = "" - try: - path_parts = [p for p in file_info["rel_path"].split(os.sep) if p] - table_name = "_".join(path_parts + [Path(file_info["file_name"]).stem]) - table_name = self._sanitize_table_name(table_name) - - with self._connector.session_scope() as session: - session.execute(text(f'DROP TABLE IF EXISTS "{table_name}"')) - session.commit() - encodings = ["utf-8-sig", "utf-8", "latin-1", "iso-8859-1", "cp1252"] - - for encoding in encodings: - try: - with open(file_info["full_path"], "r", encoding=encoding) as f: - content = f.read() - - if not content.strip(): - raise ValueError("File is empty") - - content = content.replace("\r\n", "\n").replace("\r", "\n") - lines = [line for line in content.split("\n") if line.strip()] - if not lines: - raise ValueError("No data after normalization") - - header_line = lines[0] - data_line = lines[1] if len(lines) > 1 else "" - - try: - sample_for_sniff = "\n".join(lines[:10]) - sniffer = csv.Sniffer() - try: - dialect = sniffer.sniff(sample_for_sniff) - except Exception: - # Fallback: choose delimiter by counting common - # separators in header/data line - delims = [",", "\t", ";", "|"] - counts = { - d: (header_line.count(d) if header_line else 0) - + (data_line.count(d) if data_line else 0) - for d in delims - } - best = ( - max(counts, key=counts.get) - if any(counts.values()) - else "," - ) - - class _DefaultDialect(csv.Dialect): - delimiter = best - quotechar = '"' - doublequote = True - skipinitialspace = False - lineterminator = "\n" - quoting = csv.QUOTE_MINIMAL - - dialect = _DefaultDialect() - - try: - has_header = sniffer.has_header("\n".join(lines[:50])) - except Exception: - has_header = True - - header_row = ( - list(csv.reader([header_line], dialect))[0] - if header_line - else [] - ) - first_data_row = ( - list(csv.reader([data_line], dialect))[0] - if data_line - else [] - ) - - # Heuristic: if has_header is False but header_row looks - # like names (mostly alphabetic), treat as header - if not has_header: - - def _looks_like_header(tokens: List[str]) -> bool: - if not tokens: - return False - # 非空、重复少、字母比例高 - cleaned = [ - str(t).strip() for t in tokens if str(t).strip() - ] - if not cleaned: - return False - # 允许少量数字,但大多以字母开头 - alpha_starts = sum( - 1 - for t in cleaned - if t and (t[0].isalpha() or t[0] == "_") - ) - return alpha_starts >= max( - 1, int(0.6 * len(cleaned)) - ) - - if _looks_like_header(header_row): - has_header = True - - if not has_header: - num_cols_guess = len(header_row) - headers = [f"col_{i}" for i in range(num_cols_guess)] - first_data_row = header_row - else: - headers = header_row - - num_cols = ( - len(first_data_row) if first_data_row else len(headers) - ) - - # no header - if not headers or all( - (not str(h).strip()) for h in headers - ): - headers = [f"col_{i}" for i in range(num_cols or 1)] - - headers = self._sanitize_and_dedup_headers(headers) - - if num_cols <= 0: - num_cols = len(headers) - headers = headers[:num_cols] - if not headers or any( - h is None or h == "" for h in headers - ): - raise csv.Error("Invalid headers after sanitization") - - create_sql = f''' - CREATE TABLE IF NOT EXISTS "{table_name}" ( - {", ".join([f'"{h}" TEXT' for h in headers])} - ) - ''' - insert_sql = f''' - INSERT INTO "{table_name}" ({ - ", ".join([f'"{h}"' for h in headers]) - }) - VALUES ({ - ", ".join([":" + f"p{i}" for i in range(len(headers))]) - }) - ''' - - with self._connector.session_scope() as session: - logger.debug( - f"Table: {table_name}, headers(final): {headers}" - ) - session.execute(text(create_sql)) - - reader = csv.reader(lines, dialect) - if has_header: - next(reader, None) - - batch_params: List[Dict[str, Any]] = [] - for row in reader: - if not row: - continue - if len(row) != len(headers): - if len(row) < len(headers): - row += [None] * (len(headers) - len(row)) - else: - row = row[: len(headers)] - params = { - f"p{i}": (row[i] if i < len(row) else None) - for i in range(len(headers)) - } - batch_params.append(params) - if len(batch_params) >= 1000: - session.execute(text(insert_sql), batch_params) - batch_params = [] - if batch_params: - session.execute(text(insert_sql), batch_params) - session.commit() - - return True, table_name - - except csv.Error: - self._import_with_simple_split_blocking(table_name, content) - return True, table_name - - except UnicodeDecodeError: - continue - except Exception as e: - logger.warning(f"Error with encoding {encoding}: {str(e)}") - continue + def _worker(): + results = { + "total_files": len(sqlite_files), + "successful": 0, + "failed": 0, + "tables_merged": [], + } + with self._connector.session_scope() as session: + # 获取底层的 sqlite3 连接对象 + connection_proxy = session.connection() + # 兼容不同版本的 SQLAlchemy 获取底层连接的方式 try: - with open(file_info["full_path"], "rb") as f: - content = f.read().decode("ascii", errors="ignore") - if content.strip(): - self._import_with_simple_split_blocking(table_name, content) - return True, table_name - else: - raise ValueError("File is empty or unreadable") - except Exception as e: - return ( - False, - f"Failed to process {file_info['file_name']}: {str(e)}", - ) + # SQLAlchemy 1.4+ / 2.0 + raw_conn = connection_proxy.connection.dbapi_connection + except AttributeError: + try: + # 旧版本或某些驱动 + raw_conn = connection_proxy.connection + except AttributeError: + # 最后的尝试 + raw_conn = session.get_bind().raw_connection() - except Exception as e: - return ( - False, - f"Failed to process {file_info.get('full_path', '')}: {str(e)}", - ) + # 确保 raw_conn 是 sqlite3 的连接对象 + if not raw_conn: + raise RuntimeError("Failed to get raw sqlite3 connection") - for file_info in csv_files: - ok, info = await self._run_in_thread(_process_one_file, file_info) - if ok: - results["successful"] += 1 - if info: - results["tables_created"].append(info) - else: - results["failed"] += 1 - logger.error(info) + cursor = raw_conn.cursor() - return results + for db_path in sqlite_files: + src_alias = f"src_db_{uuid.uuid4().hex[:8]}" + try: + try: + cursor.execute("PRAGMA database_list") + attached_dbs = cursor.fetchall() + for _, name, _ in attached_dbs: + if name not in ("main", "temp"): + cursor.execute(f"DETACH DATABASE {name}") + except Exception as cleanup_err: + logger.warning(f"Cleanup warning: {cleanup_err}") - def _import_with_simple_split_blocking(self, table_name: str, content: str): - """Fallback method for malformed CSV files (blocking, 使用 SQLAlchemy 执行)""" - assert self._connector is not None - content = content.replace("\r\n", "\n").replace("\r", "\n") - lines = [line for line in content.split("\n") if line.strip()] - if not lines: - raise ValueError("No data found after cleaning") + cursor.execute(f"ATTACH DATABASE ? AS {src_alias}", (db_path,)) - first_line = lines[0] - delimiter = "," if "," in first_line else "\t" if "\t" in first_line else ";" + cursor.execute( + f"SELECT name, sql FROM {src_alias}.sqlite_master " + f"WHERE type='table' AND name NOT LIKE 'sqlite_%'" + ) + tables = cursor.fetchall() - raw_headers = first_line.split(delimiter) - headers = self._sanitize_and_dedup_headers(raw_headers) - actual_columns = len(headers) + for table_name, create_sql in tables: + cursor.execute( + "SELECT name FROM sqlite_master " + "WHERE type='table' " + "AND name=?", + (table_name,), + ) + if not cursor.fetchone(): + cursor.execute(create_sql) + cursor.execute( + f'INSERT INTO main."{table_name}" ' + f'SELECT * FROM {src_alias}."{table_name}"' + ) + results["tables_merged"].append(table_name) + else: + logger.warning( + f"Table '{table_name}' exists. Skipping." + ) - create_sql = f""" - CREATE TABLE IF NOT EXISTS "{table_name}" ( - {", ".join([f'"{h}" TEXT' for h in headers])} - ) - """ + raw_conn.commit() + results["successful"] += 1 - insert_sql = f""" - INSERT INTO "{table_name}" ({", ".join([f'"{h}"' for h in headers])}) - VALUES ({", ".join([":" + f"p{i}" for i in range(actual_columns)])}) - """ + except Exception as e: + logger.error(f"Failed to merge {db_path}: {e}") + results["failed"] += 1 + try: + raw_conn.rollback() + except Exception: + pass + finally: + try: + cursor.execute(f"DETACH DATABASE {src_alias}") + except Exception: + pass - with self._connector.session_scope() as session: - session.execute(text(create_sql)) - batch: List[Dict[str, Any]] = [] - for line in lines[1:]: - row = line.split(delimiter) - if len(row) != actual_columns: - if len(row) < actual_columns: - row += [None] * (actual_columns - len(row)) - else: - row = row[:actual_columns] - params = {f"p{i}": row[i] for i in range(actual_columns)} - batch.append(params) - if len(batch) >= 1000: - session.execute(text(insert_sql), batch) - batch = [] - if batch: - session.execute(text(insert_sql), batch) - session.commit() + return results + + return await self._run_in_thread(_worker) async def get_table_info_simple(self) -> List[str]: """Return simplified table info: table(column1,column2,...)""" diff --git a/pilot/benchmark_meta_data/table_mapping.json b/pilot/benchmark_meta_data/table_mapping.json deleted file mode 100644 index 25ed34b99..000000000 --- a/pilot/benchmark_meta_data/table_mapping.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "data_source_10_indexdata": "ant_icube_dev.stock_exchange_index_data", - "data_source_10_indexinfo": "ant_icube_dev.stock_exchange_index_info", - "data_source_11_price": "ant_icube_dev.bakery_sales_price", - "data_source_11_sales": "ant_icube_dev.bakery_sales_sale", - "data_source_12_events1": "ant_icube_dev.google_merchandise_events", - "data_source_12_items": "ant_icube_dev.google_merchandise_items", - "data_source_12_users": "ant_icube_dev.google_merchandise_users", - "data_source_13_features": "ant_icube_dev.walmart_features", - "data_source_13_sales": "ant_icube_dev.walmart_sales", - "data_source_13_stores": "ant_icube_dev.walmart_stores", - "data_source_14_inventory": "ant_icube_dev.mexico_toy_inventory", - "data_source_14_products": "ant_icube_dev.mexico_toy_products", - "data_source_14_sales": "ant_icube_dev.mexico_toy_sales", - "data_source_14_stores": "ant_icube_dev.mexico_toy_stores", - "data_source_15_cardbase": "ant_icube_dev.credit_card_card_base", - "data_source_15_customerbase": "ant_icube_dev.credit_card_customer_base", - "data_source_15_fraudbase": "ant_icube_dev.credit_card_fraud_base", - "data_source_15_transactionbase": "ant_icube_dev.credit_card_transaction_base", - "data_source_16_marks": "ant_icube_dev.school_marks", - "data_source_16_students": "ant_icube_dev.school_students", - "data_source_16_subjects": "ant_icube_dev.school_subject", - "data_source_16_teachers": "ant_icube_dev.school_teachers", - "data_source_17_df_customers": "ant_icube_dev.ecommerce_order_customers", - "data_source_17_df_orderitems": "ant_icube_dev.ecommerce_order_order_items", - "data_source_17_df_orders": "ant_icube_dev.ecommerce_order_orders", - "data_source_17_df_payments": "ant_icube_dev.ecommerce_order_payments", - "data_source_17_df_products": "ant_icube_dev.ecommerce_order_products", - "data_source_18_corruption": "ant_icube_dev.world_economic_corruption", - "data_source_18_cost_of_living": "ant_icube_dev.world_economic_cost_of_living", - "data_source_18_richest_countries": "ant_icube_dev.world_economic_richest_countries", - "data_source_18_tourism": "ant_icube_dev.world_economic_tourism", - "data_source_18_unemployment": "ant_icube_dev.world_economic_unemployment", - "data_source_19_drinks": "ant_icube_dev.alcohol_and_life_expectancy_drinks", - "data_source_19_lifeexpectancy-verbose": "ant_icube_dev.alcohol_and_life_expectancy_verbose", - "data_source_1_finance_data": "ant_icube_dev.di_finance_data", - "data_source_20_drivers_data": "ant_icube_dev.city_ride_data_drivers", - "data_source_20_rides_data": "ant_icube_dev.city_ride_data_rides", - "data_source_21_e_customers": "ant_icube_dev.di_data_cleaning_for_customer_database_e_customers", - "data_source_21_e_orders": "ant_icube_dev.di_data_cleaning_for_customer_database_e_orders", - "data_source_21_e_products": "ant_icube_dev.di_data_cleaning_for_customer_database_e_products", - "data_source_22_ufc_country_data": "ant_icube_dev.ufc_country_data", - "data_source_22_ufc_events_stats": "ant_icube_dev.ufc_events_stats", - "data_source_22_ufc_fighters_stats": "ant_icube_dev.ufc_fighters_stats", - "data_source_23_ben10_aliens": "ant_icube_dev.di_ben10_alien_universe_realistic_battle_dataset_aliens", - "data_source_23_ben10_battles": "ant_icube_dev.di_ben10_alien_universe_realistic_battle_dataset_battles", - "data_source_23_ben10_enemies": "ant_icube_dev.di_ben10_alien_universe_realistic_battle_dataset_enemies", - "data_source_24_blinkit_customer_feedback": "ant_icube_dev.blinkit_customers", - "data_source_24_blinkit_customers": "ant_icube_dev.blinkit_customers", - "data_source_24_blinkit_delivery_performance": "ant_icube_dev.blinkit_delivery_performance", - "data_source_24_blinkit_inventory": "ant_icube_dev.blinkit_inventory", - "data_source_24_blinkit_inventorynew": "ant_icube_dev.blinkit_inventory", - "data_source_24_blinkit_marketing_performance": "ant_icube_dev.blinkit_delivery_performance", - "data_source_24_blinkit_order_items": "ant_icube_dev.blinkit_order_items", - "data_source_24_blinkit_orders": "ant_icube_dev.blinkit_orders", - "data_source_24_blinkit_products": "ant_icube_dev.blinkit_products", - "data_source_25_bakutech_bakutech_product_categories": "ant_icube_dev.tech_sales_product_categories", - "data_source_25_bakutech_bakutech_product_subcategories": "ant_icube_dev.tech_sales_product_subcategories", - "data_source_25_bakutech_bakutech_sales_data": "ant_icube_dev.tech_sales_sales_data", - "data_source_25_bakutech_bakutech_assets": "ant_icube_dev.tech_sales_assets", - "data_source_25_bakutech_bakutech_customer_lookup": "ant_icube_dev.tech_sales_customer_lookup", - "data_source_25_bakutech_bakutech_dates": "ant_icube_dev.tech_sales_dates", - "data_source_25_bakutech_bakutech_product_returns": "ant_icube_dev.tech_sales_product_returns", - "data_source_25_bakutech_bakutech_products_lookup": "ant_icube_dev.tech_sales_product_lookup", - "data_source_26_appearances": "ant_icube_dev.football_appereances", - "data_source_26_games": "ant_icube_dev.football_games", - "data_source_26_leagues": "ant_icube_dev.football_leagues", - "data_source_26_players": "ant_icube_dev.football_players", - "data_source_26_shots": "ant_icube_dev.football_shots", - "data_source_26_teams": "ant_icube_dev.football_teams", - "data_source_26_teamstats": "ant_icube_dev.football_teamstats", - "data_source_27_categories": "ant_icube_dev.grocery_sales_categories", - "data_source_27_cities": "ant_icube_dev.grocery_sales_cities", - "data_source_27_countries": "ant_icube_dev.grocery_sales_countries", - "data_source_27_customers": "ant_極cube_dev.grocery_sales_customers", - "data_source_27_employees": "ant_icube_dev.grocery_sales_employees", - "data_source_27_products": "ant_icube_dev.grocery_sales_products", - "data_source_27_sales": "ant_icube_dev.grocery_sales_sales", - "data_source_28_customers": "ant_icube_dev.online_shop_customers", - "data_source_28_order_items": "ant_icube_dev.online_shop_order_items", - "data_source_28_orders": "ant_icube_dev.online_shop_orders", - "data_source_28_payment": "ant_icube_dev.online_shop_payment", - "data_source_28_products": "ant_icube_dev.online_shop_products", - "data_source_28_reviews": "ant_icube_dev.online_shop_reviews", - "data_source_28_shipments": "ant_icube_dev.online_shop_shipments", - "data_source_28_suppliers": "ant_icube_dev.online_shop_suppliers", - "data_source_2_finance_loan_approval_prediction_data": "ant_icube_dev.di_finance_loan_approval_prediction_data", - "data_source_3_stock_details_5_years 3": "ant_icube_dev.di_massive_yahoo_finance_dataset_0805", - "data_source_4_wa_fn-usec_-accounts-receivable 2": "ant_icube_dev.di_finance_factoring_ibm_late_payment_histories", - "data_source_5_unicorns till sep 2022": "ant_icube_dev.di_unicorn_startups", - "data_source_6_sales dataset": "ant_icube_dev.di_sales_dataset", - "data_source_7_vgsales": "ant_icube_dev.di_video_game_sales", - "data_source_8_googleplaystore": "ant_icube_dev.di_google_play_store_apps", - "data_source_9_final": "ant_icube_dev.di_global_lnternet_users" -} \ No newline at end of file