fix: table error fix

This commit is contained in:
yaoyifan-yyf
2025-10-20 13:48:40 +08:00
parent 2a823ee25c
commit e69a4e587f

View File

@@ -475,8 +475,13 @@ class BenchmarkDataManager(BaseComponent):
dialect = sniffer.sniff(sample_for_sniff)
except Exception:
# Fallback: choose delimiter by counting common separators in header/data line
delims = [",", "\t", ";", "|"]
counts = {d: (header_line.count(d) if header_line else 0) + (data_line.count(d) if data_line else 0) for d in delims}
best = max(counts, key=counts.get) if any(counts.values()) else ","
class _DefaultDialect(csv.Dialect):
delimiter = ","
delimiter = best
quotechar = '"'
doublequote = True
skipinitialspace = False
@@ -501,6 +506,21 @@ class BenchmarkDataManager(BaseComponent):
else []
)
# Heuristic: if has_header is False but header_row looks like names (mostly alphabetic), treat as header
if not has_header:
def _looks_like_header(tokens: List[str]) -> bool:
if not tokens:
return False
# 非空、重复少、字母比例高
cleaned = [str(t).strip() for t in tokens if str(t).strip()]
if not cleaned:
return False
# 允许少量数字,但大多以字母开头
alpha_starts = sum(1 for t in cleaned if t and (t[0].isalpha() or t[0] == '_'))
return alpha_starts >= max(1, int(0.6 * len(cleaned)))
if _looks_like_header(header_row):
has_header = True
if not has_header:
num_cols_guess = len(header_row)
headers = [f"col_{i}" for i in range(num_cols_guess)]