From 19bbc6fa8dc6956eea1e918db874153144bfecf4 Mon Sep 17 00:00:00 2001 From: iterminatorheart <123625928+iterminatorheart@users.noreply.github.com> Date: Sun, 19 Oct 2025 12:40:48 +0800 Subject: [PATCH] feat: evaluation dataset info pages (#2911) Co-authored-by: VLADIMIR KOBZEV Co-authored-by: Aries-ckt <916701291@qq.com> Co-authored-by: xiandu.wl --- web/client/api/models_evaluation/datasets.ts | 16 ++ web/client/api/models_evaluation/index.ts | 7 +- web/client/api/models_evaluation/model.ts | 6 - web/client/api/models_evaluation/result.ts | 4 +- web/components/layout/side-bar.tsx | 4 +- .../models_evaluation/EvaluationHeader.tsx | 44 +-- .../models_evaluation/EvaluationList.tsx | 70 ++--- .../models_evaluation/NewEvaluationModal.tsx | 52 ++-- .../components/bar-chart.tsx | 22 +- .../models_evaluation/components/nav-to.tsx | 39 +++ .../context/EvaluationContext.tsx | 19 +- .../hooks/useEvaluationList.ts | 20 +- .../models_evaluation/styles.module.css | 3 + web/pages/models_evaluation/[code].tsx | 251 ++++++++--------- .../models_evaluation/datasets/index.tsx | 262 ++++++++++++++++++ web/pages/models_evaluation/detail.tsx | 17 -- web/pages/models_evaluation/index.tsx | 24 +- web/pages/models_evaluation/styles.module.css | 9 + web/types/models_evaluation.ts | 5 +- 19 files changed, 581 insertions(+), 293 deletions(-) create mode 100644 web/client/api/models_evaluation/datasets.ts delete mode 100644 web/client/api/models_evaluation/model.ts rename web/{pages => components}/models_evaluation/EvaluationHeader.tsx (66%) rename web/{pages => components}/models_evaluation/EvaluationList.tsx (67%) rename web/{pages => components}/models_evaluation/NewEvaluationModal.tsx (79%) rename web/{pages => components}/models_evaluation/components/bar-chart.tsx (83%) create mode 100644 web/components/models_evaluation/components/nav-to.tsx rename web/{pages => components}/models_evaluation/context/EvaluationContext.tsx (80%) rename web/{pages => components}/models_evaluation/hooks/useEvaluationList.ts (65%) create mode 100644 web/components/models_evaluation/styles.module.css create mode 100644 web/pages/models_evaluation/datasets/index.tsx delete mode 100644 web/pages/models_evaluation/detail.tsx diff --git a/web/client/api/models_evaluation/datasets.ts b/web/client/api/models_evaluation/datasets.ts new file mode 100644 index 000000000..2c02764f0 --- /dev/null +++ b/web/client/api/models_evaluation/datasets.ts @@ -0,0 +1,16 @@ +import { GET } from '../index'; + +// 获取数据集列表 +export const getBenchmarkDatasets = () => { + return GET(`/api/v2/serve/evaluate/benchmark/list_datasets`); +}; + +// 获取数据集下的物理表列表 +export const getBenchmarkDatasetTables = (datasetId: string) => { + return GET(`/api/v2/serve/evaluate/benchmark/dataset/${datasetId}`); +}; + +// 获取表数据 +export const getBenchmarkTableRows = (datasetId: string, table: string) => { + return GET(`/api/v2/serve/evaluate/benchmark/dataset/${datasetId}/${table}/rows`); +}; diff --git a/web/client/api/models_evaluation/index.ts b/web/client/api/models_evaluation/index.ts index d38fe0095..4c839582a 100644 --- a/web/client/api/models_evaluation/index.ts +++ b/web/client/api/models_evaluation/index.ts @@ -1,7 +1,4 @@ -import type { - getBenchmarkTaskListRequest, - createBenchmarkTaskRequest, -} from '@/types/models_evaluation'; +import type { createBenchmarkTaskRequest, getBenchmarkTaskListRequest } from '@/types/models_evaluation'; import { getUserId } from '@/utils'; import { GET, POST } from '../index'; @@ -23,4 +20,4 @@ export const createBenchmarkTask = (data: createBenchmarkTaskRequest) => { 'user-id': userId, }, }); -}; \ No newline at end of file +}; diff --git a/web/client/api/models_evaluation/model.ts b/web/client/api/models_evaluation/model.ts deleted file mode 100644 index 8cb7439ea..000000000 --- a/web/client/api/models_evaluation/model.ts +++ /dev/null @@ -1,6 +0,0 @@ -import { GET } from '../index'; - -// 获取可用模型列表 -export const getUsableModels = () => { - return GET>('/api/v1/model/types'); -}; \ No newline at end of file diff --git a/web/client/api/models_evaluation/result.ts b/web/client/api/models_evaluation/result.ts index 69f7268cf..29d4a986f 100644 --- a/web/client/api/models_evaluation/result.ts +++ b/web/client/api/models_evaluation/result.ts @@ -2,5 +2,5 @@ import { GET } from '../index'; // 获取评测结果详情 export const getBenchmarkResultDetail = (evaluateCode: string) => { - return GET(`/api/v2/serve/evaluate/benchmark/result/${evaluateCode}`); -}; \ No newline at end of file + return GET(`/api/v2/serve/evaluate/benchmark/result/${evaluateCode}`); +}; diff --git a/web/components/layout/side-bar.tsx b/web/components/layout/side-bar.tsx index 5a767c9f6..8425a664e 100644 --- a/web/components/layout/side-bar.tsx +++ b/web/components/layout/side-bar.tsx @@ -307,7 +307,7 @@ function SideBar() { /> ), path: '/models_evaluation', - } + }, ]; if (hasAdmin) { items.push({ @@ -421,7 +421,7 @@ function SideBar() { const language = i18n.language; if (language === 'zh') moment.locale('zh-cn'); if (language === 'en') moment.locale('en'); - }, []); + }, [i18n.language]); useEffect(() => { setLogo(mode === 'dark' ? '/logo_s_latest.png' : '/logo_zh_latest.png'); diff --git a/web/pages/models_evaluation/EvaluationHeader.tsx b/web/components/models_evaluation/EvaluationHeader.tsx similarity index 66% rename from web/pages/models_evaluation/EvaluationHeader.tsx rename to web/components/models_evaluation/EvaluationHeader.tsx index e3a74912f..522250efc 100644 --- a/web/pages/models_evaluation/EvaluationHeader.tsx +++ b/web/components/models_evaluation/EvaluationHeader.tsx @@ -1,17 +1,18 @@ -import { TabKey } from "@/types/models_evaluation"; -import Icon, { ReloadOutlined, SearchOutlined } from "@ant-design/icons"; -import { Button, Input, Segmented, Tooltip } from "antd"; -import { t } from "i18next"; -import { useState } from "react"; -import { NewEvaluationModal } from "./NewEvaluationModal"; -import { useEvaluation } from "./context/EvaluationContext"; +import { TabKey } from '@/types/models_evaluation'; +import { ReloadOutlined, SearchOutlined } from '@ant-design/icons'; +import { Button, Input, Segmented, Tooltip } from 'antd'; +import { t } from 'i18next'; +import { useState } from 'react'; +import { NewEvaluationModal } from './NewEvaluationModal'; +import { NavTo } from './components/nav-to'; +import { useEvaluation } from './context/EvaluationContext'; type Props = { - activeKey?: TabKey, + activeKey?: TabKey; onTabChange?: (v: TabKey) => void; filterValue?: string; onSearch?: (v: string) => void; -} +}; export const EvaluationHeader = (props: Props) => { const { onTabChange, activeKey = 'all', filterValue = '', onSearch } = props; @@ -21,14 +22,14 @@ export const EvaluationHeader = (props: Props) => { const onFilterChange = (e: any) => { onSearch?.(e.target?.value); - } + }; const createEvaluations = () => { setEvaluationVisible(true); - } + }; return ( -
+
{ - - setEvaluationVisible(false)} - onOk={refresh} - /> + setEvaluationVisible(false)} onOk={refresh} />
); -} \ No newline at end of file +}; diff --git a/web/pages/models_evaluation/EvaluationList.tsx b/web/components/models_evaluation/EvaluationList.tsx similarity index 67% rename from web/pages/models_evaluation/EvaluationList.tsx rename to web/components/models_evaluation/EvaluationList.tsx index a165cd716..4c3b796de 100644 --- a/web/pages/models_evaluation/EvaluationList.tsx +++ b/web/components/models_evaluation/EvaluationList.tsx @@ -1,16 +1,16 @@ -import { Button, Table, Tag, Tooltip } from "antd"; -import React, { useCallback, useEffect } from "react"; -import { EvaluationItem } from "@/types/models_evaluation"; -import { useEvaluation } from "./context/EvaluationContext"; -import { useRouter } from "next/router"; - +import { EvaluationItem } from '@/types/models_evaluation'; +import { Button, Table, Tag, Tooltip } from 'antd'; +import { useRouter } from 'next/router'; +import React, { useCallback, useEffect } from 'react'; +import { useEvaluation } from './context/EvaluationContext'; +import styles from './styles.module.css'; interface EvaluationListProps { filterValue?: string; type?: string; } -export const EvaluationList: React.FC = (props) => { - const { filterValue = '', type = 'all' } = props; +export const EvaluationList: React.FC = () => { + // const { filterValue = '', type = 'all' } = props; const { data, loading, getModelsEvaluation } = useEvaluation(); const router = useRouter(); @@ -25,10 +25,10 @@ export const EvaluationList: React.FC = (props) => { const columns = [ { - title: 'ID', - dataIndex: 'evaluate_code', - key: 'evaluate_code', - width: '20%', + title: '评测场景', + dataIndex: 'scene_key', + key: 'scene_key', + width: '10%', }, { title: '任务名称', @@ -36,6 +36,17 @@ export const EvaluationList: React.FC = (props) => { key: 'scene_value', width: '10%', }, + { + title: '评测集名称', + dataIndex: 'datasets_name', + key: 'datasets_name', + width: '20%', + render: (datasets_name: string) => ( + +

{datasets_name}

+
+ ), + }, { title: '创建时间', dataIndex: 'gmt_create', @@ -53,18 +64,17 @@ export const EvaluationList: React.FC = (props) => { dataIndex: 'model_list', key: 'model_list', width: '10%', - render: (model_list: string[]) => ( - {model_list.join(',')} - ), + render: (model_list: string[]) => {model_list.join(',')}, }, { title: '状态', dataIndex: 'state', key: 'state', + width: '5%', render: (state: string, record: EvaluationItem) => { let color = 'default'; let text = state; - + if (state === 'running') { color = 'blue'; text = '运行中'; @@ -84,34 +94,25 @@ export const EvaluationList: React.FC = (props) => { {text} - ) + ); } return {text}; }, }, { - title: '可执行率', - key: 'executable_rate', + title: '评测轮次', + dataIndex: 'round_time', + key: 'round_time', width: '10%', - render: () => --, // 暂时显示默认值 - }, - { - title: '正确率', - key: 'correct_rate', - width: '10%', - render: () => --, // 暂时显示默认值 }, { title: '操作', + width: '5%', key: 'action', render: (_: any, record: EvaluationItem) => { return ( - ); @@ -121,7 +122,8 @@ export const EvaluationList: React.FC = (props) => { return ( = (props) => { loading={loading} columns={columns} dataSource={data?.items || []} - rowKey="evaluate_code" + rowKey='evaluate_code' /> ); -}; \ No newline at end of file +}; diff --git a/web/pages/models_evaluation/NewEvaluationModal.tsx b/web/components/models_evaluation/NewEvaluationModal.tsx similarity index 79% rename from web/pages/models_evaluation/NewEvaluationModal.tsx rename to web/components/models_evaluation/NewEvaluationModal.tsx index 4fee33dd9..4667ca1d4 100644 --- a/web/pages/models_evaluation/NewEvaluationModal.tsx +++ b/web/components/models_evaluation/NewEvaluationModal.tsx @@ -1,11 +1,10 @@ -import { apiInterceptors } from "@/client/api"; -import { getUsableModels } from "@/client/api/models_evaluation/model"; -import { createBenchmarkTask } from "@/client/api/models_evaluation"; -import { useRequest } from "ahooks"; -import { Form, Input, InputNumber, Modal, Select, Slider, message } from "antd"; -import { useState } from "react"; -import { useTranslation } from "react-i18next"; -import { createBenchmarkTaskRequest } from "@/types/models_evaluation"; +import { apiInterceptors, getUsableModels } from '@/client/api'; +import { createBenchmarkTask } from '@/client/api/models_evaluation'; +import { createBenchmarkTaskRequest } from '@/types/models_evaluation'; +import { useRequest } from 'ahooks'; +import { Form, Input, InputNumber, Modal, Select, Slider, message } from 'antd'; +import { useState } from 'react'; +import { useTranslation } from 'react-i18next'; interface Props { open: boolean; @@ -26,17 +25,17 @@ export const NewEvaluationModal = (props: Props) => { return data || []; }, { - onSuccess: (data) => { + onSuccess: data => { const options = data.map((item: string) => ({ label: item, value: item, })); setModelOptions(options); }, - onError: (error) => { + onError: error => { message.error(t('get_model_list_failed') + ': ' + error.message); }, - } + }, ); // 创建评测任务 @@ -61,10 +60,10 @@ export const NewEvaluationModal = (props: Props) => { onOk?.(); // 触发外部的onOk回调,用于刷新列表 onCancel(); }, - onError: (error) => { + onError: error => { message.error(t('create_evaluation_failed') + ': ' + error.message); }, - } + }, ); const handleOk = async () => { @@ -92,7 +91,7 @@ export const NewEvaluationModal = (props: Props) => { > { > - +
- ) -} + ); +}; -export default EvaluationDetail; \ No newline at end of file +export default EvaluationDetail; diff --git a/web/pages/models_evaluation/datasets/index.tsx b/web/pages/models_evaluation/datasets/index.tsx new file mode 100644 index 000000000..d1a29eadc --- /dev/null +++ b/web/pages/models_evaluation/datasets/index.tsx @@ -0,0 +1,262 @@ +import { apiInterceptors } from '@/client/api'; +import { + getBenchmarkDatasetTables, + getBenchmarkDatasets, + getBenchmarkTableRows, +} from '@/client/api/models_evaluation/datasets'; +import { NavTo } from '@/components/models_evaluation/components/nav-to'; +import { Card, Spin, Table, Tree, TreeDataNode, Typography } from 'antd'; +import React, { Key, useEffect, useState } from 'react'; +import styles from '../styles.module.css'; + +const { Title, Text } = Typography; + +// 定义数据类型 +interface Dataset { + dataset_id: string; + name: string; + tableCount: number; +} + +interface TableColumn { + name: string; + type: string; +} + +interface TableInfo { + name: string; + rowCount: number; + columns: TableColumn[]; +} + +interface TableRow { + [key: string]: any; +} + +interface TableData { + table: string; + limit: number; + rows: TableRow[]; +} + +type CustomTreeDataNode = TreeDataNode & { + parent?: string; // 指向父节点 +}; + +const DatasetsForEvaluation = () => { + const [tableData, setTableData] = useState(null); + const [loading, setLoading] = useState({ + datasets: false, + tables: false, + tableData: false, + }); + const [selectedDataset, setSelectedDataset] = useState(null); + const [selectedTable, setSelectedTable] = useState(null); + // 构造树结构数据 + const [treeData, setTreeData] = useState([]); + + // 获取数据集列表 + useEffect(() => { + async function init() { + const result: Dataset[] = await fetchDatasets(); + setTreeData( + result.map((item: Dataset) => ({ + title: `${item.name}(${item.tableCount}张表)`, + key: item.dataset_id, + selectable: false, + })), + ); + + setSelectedDataset(prevState => { + if (prevState && result.map(item => item.dataset_id).includes(prevState)) return prevState; + return result[0]?.dataset_id; + }); + } + init(); + }, []); + + const fetchDatasets = async () => { + try { + setLoading(prev => ({ ...prev, datasets: true })); + const [err, data] = await apiInterceptors(getBenchmarkDatasets()); + + if (err) { + console.error('获取数据集列表失败:', err); + return; + } + + return data || []; + } catch (err) { + console.error('获取数据集列表失败:', err); + } finally { + setLoading(prev => ({ ...prev, datasets: false })); + } + }; + + // 获取数据集下的表列表 + const fetchTables = async (datasetId: string): Promise => { + try { + setLoading(prev => ({ ...prev, tables: true })); + setSelectedTable(null); + + const [err, data] = await apiInterceptors(getBenchmarkDatasetTables(datasetId)); + + if (err) { + console.error('获取表列表失败:', err); + return []; + } + + return data || []; + } catch (err) { + console.error('获取表列表失败:', err); + return []; + } finally { + setLoading(prev => ({ ...prev, tables: false })); + } + }; + + const updateTreeData = ( + list: CustomTreeDataNode[], + key: React.Key, + children: CustomTreeDataNode[], + ): CustomTreeDataNode[] => + list.map(node => { + if (node.key === key) { + return { + ...node, + children, + }; + } + if (node.children) { + return { + ...node, + children: updateTreeData(node.children, key, children), + }; + } + return node; + }); + + const loadTreeData = async ({ key, children }: any) => { + if (children) { + return; + } + const tables = await fetchTables(key); + setTreeData((prev: CustomTreeDataNode[]) => + updateTreeData( + prev, + key, + tables.map(item => ({ + title: item.name, + key: item.name, + parent: key, // 保留父节点的指针 + isLeaf: true, + })), + ), + ); + return; + }; + + const onTableSelected = async (selectedKeys: Key[], { selectedNodes }: { selectedNodes: CustomTreeDataNode[] }) => { + setSelectedDataset(selectedNodes[0].parent as string); + setSelectedTable(selectedKeys[0] as string); + }; + + // 获取表数据 + const fetchTableData = async (datasetId: string, tableName: string) => { + try { + setLoading(prev => ({ ...prev, tableData: true })); + + const [err, data] = await apiInterceptors(getBenchmarkTableRows(datasetId, tableName)); + + if (err) { + console.error('获取表数据失败:', err); + return; + } + + setTableData(data || null); + } catch (err) { + console.error('获取表数据失败:', err); + } finally { + setLoading(prev => ({ ...prev, tableData: false })); + } + }; + + useEffect(() => { + if (selectedDataset && selectedTable) { + fetchTableData(selectedDataset, selectedTable); + } else { + setTableData(null); + } + }, [selectedDataset, selectedTable]); + + // 生成表格列定义 + const generateColumns = () => { + if (!tableData || tableData.rows.length === 0) return []; + + const firstRow = tableData.rows[0]; + return Object.keys(firstRow).map((key, index) => ({ + title: key, + dataIndex: key, + key: key, + width: index === 0 ? 100 : undefined, + })); + }; + + return ( +
+ + 评测数据集 + 返回评测任务列表 + + } + className={`w-full h-full flex-1 flex flex-col ${styles['page-card']}`} + > +
+ {/* 左侧数据集列表 */} +
+ + 数据集列表 + +
+ +
+
+ + {/* 右侧表数据 */} +
+
+ + 表数据<span className='font-normal text-sm'>(仅展示前10条数据)</span> + + {selectedTable && {selectedTable}} +
+
+ {loading.tableData ? ( +
+ +
+ ) : tableData && tableData.rows.length > 0 ? ( +
+ ) : selectedTable ? ( + 暂无数据 + ) : ( + 请先选择一个表 + )} + + + + + + ); +}; + +export default DatasetsForEvaluation; diff --git a/web/pages/models_evaluation/detail.tsx b/web/pages/models_evaluation/detail.tsx deleted file mode 100644 index 3143809b4..000000000 --- a/web/pages/models_evaluation/detail.tsx +++ /dev/null @@ -1,17 +0,0 @@ -import { Card, Typography } from "antd"; -import React from "react"; - -const { Title, Text } = Typography; - -const EvaluationDetail = () => { - return ( -
- - 详情页面占位 - 这里是模型评估的详细信息页面 - -
- ); -}; - -export default EvaluationDetail; \ No newline at end of file diff --git a/web/pages/models_evaluation/index.tsx b/web/pages/models_evaluation/index.tsx index f45f92f31..17c68dbae 100644 --- a/web/pages/models_evaluation/index.tsx +++ b/web/pages/models_evaluation/index.tsx @@ -1,13 +1,12 @@ -import { ConfigProvider } from "antd"; -import React, { useState } from "react"; +import { ConfigProvider } from 'antd'; +import { useState } from 'react'; -import { EvaluationHeader } from "./EvaluationHeader"; -import { TabKey } from "@/types/models_evaluation"; -import { EvaluationList } from "./EvaluationList"; -import { EvaluationProvider } from "./context/EvaluationContext"; +import { EvaluationHeader } from '@/components/models_evaluation/EvaluationHeader'; +import { EvaluationList } from '@/components/models_evaluation/EvaluationList'; +import { EvaluationProvider } from '@/components/models_evaluation/context/EvaluationContext'; +import { TabKey } from '@/types/models_evaluation'; const ModelsEvaluation = () => { - const [activeKey, setActiveKey] = useState('all'); const [filterValue, setFilterValue] = useState(''); @@ -31,15 +30,12 @@ const ModelsEvaluation = () => { onSearch={setFilterValue} />
- +
- ) -} + ); +}; -export default ModelsEvaluation; \ No newline at end of file +export default ModelsEvaluation; diff --git a/web/pages/models_evaluation/styles.module.css b/web/pages/models_evaluation/styles.module.css index 29e6d0036..4a267ce44 100644 --- a/web/pages/models_evaluation/styles.module.css +++ b/web/pages/models_evaluation/styles.module.css @@ -1,3 +1,12 @@ .models-evaluation-detail :global .ant-card-body { overflow-y: auto; +} + +.table :global table { + display: table; +} + +.page-card :global .ant-card-body { + height: 100%; + overflow: hidden; } \ No newline at end of file diff --git a/web/types/models_evaluation.ts b/web/types/models_evaluation.ts index 66846c807..d89a0786d 100644 --- a/web/types/models_evaluation.ts +++ b/web/types/models_evaluation.ts @@ -42,6 +42,7 @@ export interface EvaluationItem { log_info: null; gmt_create: string; gmt_modified: string; + round_time: number; } export interface EvaluationData { @@ -64,7 +65,7 @@ export interface getBenchmarkTaskListRequest { page_size: number; filter_param?: string; sys_code?: string; -}; +} // 新的创建评测任务请求类型 export type createBenchmarkTaskRequest = { @@ -72,4 +73,4 @@ export type createBenchmarkTaskRequest = { model_list: string[]; temperature: number; max_tokens: number; -}; \ No newline at end of file +};