feat: evaluation dataset info pages (#2911)

Co-authored-by: VLADIMIR KOBZEV <vladimir.kobzev@improvado.io>
Co-authored-by: Aries-ckt <916701291@qq.com>
Co-authored-by: xiandu.wl <xiandu.wl@antgroup.com>
This commit is contained in:
iterminatorheart
2025-10-19 12:40:48 +08:00
committed by GitHub
parent d2e92e9382
commit 19bbc6fa8d
19 changed files with 581 additions and 293 deletions

View File

@@ -0,0 +1,16 @@
import { GET } from '../index';
// 获取数据集列表
export const getBenchmarkDatasets = () => {
return GET<null, any>(`/api/v2/serve/evaluate/benchmark/list_datasets`);
};
// 获取数据集下的物理表列表
export const getBenchmarkDatasetTables = (datasetId: string) => {
return GET<null, any>(`/api/v2/serve/evaluate/benchmark/dataset/${datasetId}`);
};
// 获取表数据
export const getBenchmarkTableRows = (datasetId: string, table: string) => {
return GET<null, any>(`/api/v2/serve/evaluate/benchmark/dataset/${datasetId}/${table}/rows`);
};

View File

@@ -1,7 +1,4 @@
import type {
getBenchmarkTaskListRequest,
createBenchmarkTaskRequest,
} from '@/types/models_evaluation';
import type { createBenchmarkTaskRequest, getBenchmarkTaskListRequest } from '@/types/models_evaluation';
import { getUserId } from '@/utils';
import { GET, POST } from '../index';
@@ -23,4 +20,4 @@ export const createBenchmarkTask = (data: createBenchmarkTaskRequest) => {
'user-id': userId,
},
});
};
};

View File

@@ -1,6 +0,0 @@
import { GET } from '../index';
// 获取可用模型列表
export const getUsableModels = () => {
return GET<null, Array<string>>('/api/v1/model/types');
};

View File

@@ -2,5 +2,5 @@ import { GET } from '../index';
// 获取评测结果详情
export const getBenchmarkResultDetail = (evaluateCode: string) => {
return GET<null, any>(`/api/v2/serve/evaluate/benchmark/result/${evaluateCode}`);
};
return GET<string, any>(`/api/v2/serve/evaluate/benchmark/result/${evaluateCode}`);
};

View File

@@ -307,7 +307,7 @@ function SideBar() {
/>
),
path: '/models_evaluation',
}
},
];
if (hasAdmin) {
items.push({
@@ -421,7 +421,7 @@ function SideBar() {
const language = i18n.language;
if (language === 'zh') moment.locale('zh-cn');
if (language === 'en') moment.locale('en');
}, []);
}, [i18n.language]);
useEffect(() => {
setLogo(mode === 'dark' ? '/logo_s_latest.png' : '/logo_zh_latest.png');

View File

@@ -1,17 +1,18 @@
import { TabKey } from "@/types/models_evaluation";
import Icon, { ReloadOutlined, SearchOutlined } from "@ant-design/icons";
import { Button, Input, Segmented, Tooltip } from "antd";
import { t } from "i18next";
import { useState } from "react";
import { NewEvaluationModal } from "./NewEvaluationModal";
import { useEvaluation } from "./context/EvaluationContext";
import { TabKey } from '@/types/models_evaluation';
import { ReloadOutlined, SearchOutlined } from '@ant-design/icons';
import { Button, Input, Segmented, Tooltip } from 'antd';
import { t } from 'i18next';
import { useState } from 'react';
import { NewEvaluationModal } from './NewEvaluationModal';
import { NavTo } from './components/nav-to';
import { useEvaluation } from './context/EvaluationContext';
type Props = {
activeKey?: TabKey,
activeKey?: TabKey;
onTabChange?: (v: TabKey) => void;
filterValue?: string;
onSearch?: (v: string) => void;
}
};
export const EvaluationHeader = (props: Props) => {
const { onTabChange, activeKey = 'all', filterValue = '', onSearch } = props;
@@ -21,14 +22,14 @@ export const EvaluationHeader = (props: Props) => {
const onFilterChange = (e: any) => {
onSearch?.(e.target?.value);
}
};
const createEvaluations = () => {
setEvaluationVisible(true);
}
};
return (
<div className="flex items-center justify-between">
<div className='flex items-center justify-between'>
<div className='flex items-center gap-4'>
<Segmented
className='backdrop-filter h-10 backdrop-blur-lg bg-white bg-opacity-30 border border-white rounded-lg shadow p-1 dark:border-[#6f7f95] dark:bg-[#6f7f95] dark:bg-opacity-60'
@@ -56,18 +57,19 @@ export const EvaluationHeader = (props: Props) => {
<Tooltip title={'刷新'}>
<ReloadOutlined onClick={refresh} className='p-2 cursor-pointer' />
</Tooltip>
<Button
className='border-none text-white bg-button-gradient h-full'
onClick={createEvaluations}
<NavTo
href='/models_evaluation/datasets'
className='border-none text-white bg-button-gradient h-full m-2'
type='primary'
openNewTab={true}
>
</NavTo>
<Button className='border-none text-white bg-button-gradient h-full' onClick={createEvaluations}>
{t('create_evaluation')}
</Button>
<NewEvaluationModal
open={evaluationVisible}
onCancel={() => setEvaluationVisible(false)}
onOk={refresh}
/>
<NewEvaluationModal open={evaluationVisible} onCancel={() => setEvaluationVisible(false)} onOk={refresh} />
</div>
</div>
);
}
};

View File

@@ -1,16 +1,16 @@
import { Button, Table, Tag, Tooltip } from "antd";
import React, { useCallback, useEffect } from "react";
import { EvaluationItem } from "@/types/models_evaluation";
import { useEvaluation } from "./context/EvaluationContext";
import { useRouter } from "next/router";
import { EvaluationItem } from '@/types/models_evaluation';
import { Button, Table, Tag, Tooltip } from 'antd';
import { useRouter } from 'next/router';
import React, { useCallback, useEffect } from 'react';
import { useEvaluation } from './context/EvaluationContext';
import styles from './styles.module.css';
interface EvaluationListProps {
filterValue?: string;
type?: string;
}
export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
const { filterValue = '', type = 'all' } = props;
export const EvaluationList: React.FC<EvaluationListProps> = () => {
// const { filterValue = '', type = 'all' } = props;
const { data, loading, getModelsEvaluation } = useEvaluation();
const router = useRouter();
@@ -25,10 +25,10 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
const columns = [
{
title: 'ID',
dataIndex: 'evaluate_code',
key: 'evaluate_code',
width: '20%',
title: '评测场景',
dataIndex: 'scene_key',
key: 'scene_key',
width: '10%',
},
{
title: '任务名称',
@@ -36,6 +36,17 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
key: 'scene_value',
width: '10%',
},
{
title: '评测集名称',
dataIndex: 'datasets_name',
key: 'datasets_name',
width: '20%',
render: (datasets_name: string) => (
<Tooltip title={datasets_name}>
<p className='truncate'>{datasets_name}</p>
</Tooltip>
),
},
{
title: '创建时间',
dataIndex: 'gmt_create',
@@ -53,18 +64,17 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
dataIndex: 'model_list',
key: 'model_list',
width: '10%',
render: (model_list: string[]) => (
<span>{model_list.join(',')}</span>
),
render: (model_list: string[]) => <span>{model_list.join(',')}</span>,
},
{
title: '状态',
dataIndex: 'state',
key: 'state',
width: '5%',
render: (state: string, record: EvaluationItem) => {
let color = 'default';
let text = state;
if (state === 'running') {
color = 'blue';
text = '运行中';
@@ -84,34 +94,25 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
<Tooltip title={record.log_info}>
<Tag color={color}>{text}</Tag>
</Tooltip>
)
);
}
return <Tag color={color}>{text}</Tag>;
},
},
{
title: '可执行率',
key: 'executable_rate',
title: '评测轮次',
dataIndex: 'round_time',
key: 'round_time',
width: '10%',
render: () => <span>--</span>, // 暂时显示默认值
},
{
title: '正确率',
key: 'correct_rate',
width: '10%',
render: () => <span>--</span>, // 暂时显示默认值
},
{
title: '操作',
width: '5%',
key: 'action',
render: (_: any, record: EvaluationItem) => {
return (
<Button
type="link"
disabled={record.state !== 'complete'}
onClick={() => goToDetail(record)}
>
<Button type='link' disabled={record.state !== 'complete'} onClick={() => goToDetail(record)}>
</Button>
);
@@ -121,7 +122,8 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
return (
<Table
className='w-full'
tableLayout='fixed'
className={`w-full ${styles.table}`}
pagination={{
total: data?.total_count || 0,
current: data?.page || 1,
@@ -133,7 +135,7 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
loading={loading}
columns={columns}
dataSource={data?.items || []}
rowKey="evaluate_code"
rowKey='evaluate_code'
/>
);
};
};

View File

@@ -1,11 +1,10 @@
import { apiInterceptors } from "@/client/api";
import { getUsableModels } from "@/client/api/models_evaluation/model";
import { createBenchmarkTask } from "@/client/api/models_evaluation";
import { useRequest } from "ahooks";
import { Form, Input, InputNumber, Modal, Select, Slider, message } from "antd";
import { useState } from "react";
import { useTranslation } from "react-i18next";
import { createBenchmarkTaskRequest } from "@/types/models_evaluation";
import { apiInterceptors, getUsableModels } from '@/client/api';
import { createBenchmarkTask } from '@/client/api/models_evaluation';
import { createBenchmarkTaskRequest } from '@/types/models_evaluation';
import { useRequest } from 'ahooks';
import { Form, Input, InputNumber, Modal, Select, Slider, message } from 'antd';
import { useState } from 'react';
import { useTranslation } from 'react-i18next';
interface Props {
open: boolean;
@@ -26,17 +25,17 @@ export const NewEvaluationModal = (props: Props) => {
return data || [];
},
{
onSuccess: (data) => {
onSuccess: data => {
const options = data.map((item: string) => ({
label: item,
value: item,
}));
setModelOptions(options);
},
onError: (error) => {
onError: error => {
message.error(t('get_model_list_failed') + ': ' + error.message);
},
}
},
);
// 创建评测任务
@@ -61,10 +60,10 @@ export const NewEvaluationModal = (props: Props) => {
onOk?.(); // 触发外部的onOk回调用于刷新列表
onCancel();
},
onError: (error) => {
onError: error => {
message.error(t('create_evaluation_failed') + ': ' + error.message);
},
}
},
);
const handleOk = async () => {
@@ -92,7 +91,7 @@ export const NewEvaluationModal = (props: Props) => {
>
<Form
form={form}
layout="vertical"
layout='vertical'
requiredMark={false}
initialValues={{
temperature: 0.2,
@@ -101,34 +100,34 @@ export const NewEvaluationModal = (props: Props) => {
>
<Form.Item
label={t('task_name')}
name="scene_value"
name='scene_value'
rules={[{ required: true, message: t('please_input_task_name') }]}
>
<Input placeholder={t('please_input_task_name')} />
</Form.Item>
<Form.Item
label={t('models_to_evaluate')}
name="model_list"
name='model_list'
rules={[
{ required: true, message: t('please_select_models_to_evaluate') },
{ type: 'array', min: 1, message: t('please_select_at_least_one_model') }
{ type: 'array', min: 1, message: t('please_select_at_least_one_model') },
]}
>
<Select
mode="multiple"
mode='multiple'
placeholder={t('please_select_models_to_evaluate')}
options={modelOptions}
loading={modelLoading}
showSearch
optionFilterProp="label"
optionFilterProp='label'
allowClear
/>
</Form.Item>
<Form.Item
label={t('temperature')}
name="temperature"
name='temperature'
rules={[{ required: true, message: t('please_input_temperature') }]}
>
<Slider
@@ -145,17 +144,12 @@ export const NewEvaluationModal = (props: Props) => {
<Form.Item
label={t('max_new_tokens')}
name="max_tokens"
name='max_tokens'
rules={[{ required: true, message: t('please_input_max_new_tokens') }]}
>
<InputNumber
min={1}
max={32768}
style={{ width: '100%' }}
placeholder={t('please_input_max_new_tokens')}
/>
<InputNumber min={1} max={32768} style={{ width: '100%' }} placeholder={t('please_input_max_new_tokens')} />
</Form.Item>
</Form>
</Modal>
);
};
};

View File

@@ -1,5 +1,5 @@
import React from 'react';
import { Column } from '@ant-design/plots';
import React from 'react';
interface ChartData {
name: string;
@@ -18,7 +18,7 @@ interface InnerDataItem {
value: number;
}
export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
export const BarChart: React.FC<BarChartProps> = ({ data }) => {
// 转换数据格式以适应Ant Design Charts
const chartData = data.map(item => ({
...item,
@@ -34,7 +34,7 @@ export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
axis: {
y: {
labelFormatter: '.00%',
}
},
},
label: {
text: (d: InnerDataItem) => (d.value * 100).toFixed(2) + '%',
@@ -42,14 +42,14 @@ export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
},
interaction: {
tooltip: {
render: (e: any, {title, items}: { title: string, items: InnerDataItem[]}) => {
render: (_e: any, { title, items }: { title: string; items: InnerDataItem[] }) => {
return (
<div key={title}>
<h4>{title}</h4>
{items.map((item) => {
{items.map(item => {
const { name, value, color } = item;
return (
<div className="flex justify-between gap-4">
<div className='flex justify-between gap-4' key={item.name}>
<div>
<span
style={{
@@ -68,11 +68,11 @@ export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
);
})}
</div>
)
}
}
}
);
},
},
},
};
return <Column {...config} />;
};
};

View File

@@ -0,0 +1,39 @@
import { Button, ButtonProps } from 'antd';
import { useRouter } from 'next/router';
import React, { useCallback } from 'react';
export const NavTo = ({
href,
type = 'link',
className = '',
openNewTab = false,
children,
}: {
href: string;
type?: ButtonProps['type'];
className?: string;
openNewTab?: boolean;
children: React.ReactNode;
}) => {
const goToList = useCallback(() => {
router.push(href);
}, [href]);
const router = useRouter();
if (openNewTab) {
return (
<Button type={type} className={className}>
<a href={href} target='_blank' rel='noopener noreferrer'>
{children}
</a>
</Button>
);
}
return (
<Button type={type} className={className} onClick={goToList}>
{children}
</Button>
);
};

View File

@@ -16,17 +16,8 @@ interface EvaluationProviderProps {
type?: string;
}
export const EvaluationProvider: React.FC<EvaluationProviderProps> = ({
children,
filterValue = '',
type = 'all'
}) => {
const {
data,
loading,
getModelsEvaluation,
refresh,
} = useEvaluationList({
export const EvaluationProvider: React.FC<EvaluationProviderProps> = ({ children, filterValue = '', type = 'all' }) => {
const { data, loading, getModelsEvaluation, refresh } = useEvaluationList({
filterValue,
type,
});
@@ -34,10 +25,10 @@ export const EvaluationProvider: React.FC<EvaluationProviderProps> = ({
return (
<EvaluationContext.Provider
value={{
refresh,
refresh,
data,
loading,
getModelsEvaluation
loading,
getModelsEvaluation,
}}
>
{children}

View File

@@ -1,8 +1,8 @@
import { apiInterceptors } from "@/client/api";
import { getBenchmarkTaskList } from "@/client/api/models_evaluation";
import { useRequest } from "ahooks";
import { message } from "antd";
import { EvaluationData, getBenchmarkTaskListRequest } from "@/types/models_evaluation";
import { apiInterceptors } from '@/client/api';
import { getBenchmarkTaskList } from '@/client/api/models_evaluation';
import { EvaluationData, getBenchmarkTaskListRequest } from '@/types/models_evaluation';
import { useRequest } from 'ahooks';
import { message } from 'antd';
interface UseEvaluationListProps {
filterValue?: string;
@@ -26,18 +26,16 @@ export const useEvaluationList = (props: UseEvaluationListProps) => {
sys_code: type === 'all' ? undefined : type,
};
const [_, data] = await apiInterceptors(
getBenchmarkTaskList(params)
);
const [_, data] = await apiInterceptors(getBenchmarkTaskList(params));
return data as EvaluationData;
},
{
manual: true,
onError: (e) => {
onError: e => {
message.error(e.message || '获取评估列表失败');
},
}
},
);
return {
@@ -46,4 +44,4 @@ export const useEvaluationList = (props: UseEvaluationListProps) => {
getModelsEvaluation,
refresh,
};
};
};

View File

@@ -0,0 +1,3 @@
.table :global table {
display: table;
}

View File

@@ -1,12 +1,11 @@
import { Card, Typography, Spin, Descriptions, Row, Col, Statistic, Button, Tabs, Table } from "antd";
import React, { useCallback, useEffect, useState } from "react";
import { useRouter } from "next/router";
import { apiInterceptors } from "@/client/api";
import { getBenchmarkResultDetail } from "@/client/api/models_evaluation/result";
import { BarChart } from "./components/bar-chart";
import styles from "./styles.module.css";
const { Title } = Typography;
import { apiInterceptors } from '@/client/api';
import { getBenchmarkResultDetail } from '@/client/api/models_evaluation/result';
import { BarChart } from '@/components/models_evaluation/components/bar-chart';
import { NavTo } from '@/components/models_evaluation/components/nav-to';
import { Button, Card, Col, Descriptions, Row, Spin, Statistic, Table, Tabs } from 'antd';
import { useRouter } from 'next/router';
import { useEffect, useState } from 'react';
import styles from './styles.module.css';
// 定义数据类型
interface BenchmarkSummary {
@@ -36,31 +35,41 @@ interface ChartData {
const EvaluationDetail = () => {
const router = useRouter();
const goToList = useCallback(() => {
router.push('/models_evaluation');
}, []);
const { code } = router.query;
return (
<div className="flex flex-col h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12">
<div className='flex flex-col h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12'>
<Card
title={
<>
<span></span>
<Button type="link" onClick={() => goToList()}>
</Button>
</>
<div className='flex justify-between'>
<div>
<span></span>
<NavTo href='/models_evaluation'></NavTo>
</div>
<div>
<NavTo href='/models_evaluation/datasets' openNewTab={true}>
</NavTo>
<Button
type='link'
target='_blank'
rel='noopener noreferrer'
href={`${process.env.API_BASE_URL}/api/v1/evaluate/benchmark_result_download?evaluate_code=${code}`}
>
</Button>
</div>
</div>
}
className={`w-full h-full flex flex-col ${styles['models-evaluation-detail']}`}
>
<EvaluationDetailContent />
</Card>
</div>
)
}
);
};
const EvaluationDetailContent = () => {
const router = useRouter();
const { code } = router.query;
const [loading, setLoading] = useState(true);
@@ -77,7 +86,7 @@ const EvaluationDetailContent = () => {
try {
setLoading(true);
const [err, data] = await apiInterceptors(getBenchmarkResultDetail(evaluateCode));
if (err) {
setError(err.message || '获取评测结果失败');
return;
@@ -94,31 +103,31 @@ const EvaluationDetailContent = () => {
if (router.isFallback) {
return (
<div className="flex justify-center items-center h-full">
<Spin size="large" />
<div className='flex justify-center items-center h-full'>
<Spin size='large' />
</div>
);
}
if (loading) {
return (
<div className="flex justify-center items-center h-full">
<Spin size="large" />
<div className='flex justify-center items-center h-full'>
<Spin size='large' />
</div>
);
}
if (error) {
return (
<div className="flex justify-center items-center h-full">
<div className="text-red-500">{error}</div>
<div className='flex justify-center items-center h-full'>
<div className='text-red-500'>{error}</div>
</div>
);
}
if (!resultData) {
return (
<div className="flex justify-center items-center h-full">
<div className='flex justify-center items-center h-full'>
<div></div>
</div>
);
@@ -130,140 +139,132 @@ const EvaluationDetailContent = () => {
const totalFailed = resultData.summaries.reduce((sum, item) => sum + item.failed, 0);
const totalException = resultData.summaries.reduce((sum, item) => sum + item.exception, 0);
const totalQuestions = totalRight + totalWrong + totalFailed + totalException;
// const overallAccuracy = totalQuestions > 0 ? totalRight / totalQuestions : 0;
// const overallExecRate = totalQuestions > 0 ? (totalRight + totalWrong) / totalQuestions : 0;
// 准备图表数据
const chartData: ChartData[] = resultData.summaries.map(item => [
{ name: '可执行率', label: item.llmCode, value: item.execRate },
{ name: '正确率', label: item.llmCode, value: item.accuracy }
]).flat();
const chartData: ChartData[] = resultData.summaries
.map(item => [
{ name: '可执行率', label: item.llmCode, value: item.execRate },
{ name: '正确率', label: item.llmCode, value: item.accuracy },
])
.flat();
return (
<>
<Descriptions
bordered
items={[{
key: '1',
label: '任务ID',
children: resultData.evaluate_code
}]}
items={[
{
key: '1',
label: '任务ID',
children: resultData.evaluate_code,
},
]}
/>
<div className="mt-6">
<Row gutter={16} className="mb-4">
<div className='mt-6'>
<Row gutter={16} className='mb-4'>
<Col span={4}>
<Statistic
title="模型数"
value={resultData.summaries?.length}
className="border rounded-lg p-4"
/>
<Statistic title='模型数' value={resultData.summaries?.length} className='border rounded-lg p-4' />
</Col>
<Col span={4}>
<Statistic
title="总题数"
value={totalQuestions}
className="border rounded-lg p-4"
/>
<Statistic title='总题数' value={totalQuestions} className='border rounded-lg p-4' />
</Col>
<Col span={4}>
<Statistic
title="正确题数"
value={totalRight}
className="border rounded-lg p-4"
/>
<Statistic title='正确题数' value={totalRight} className='border rounded-lg p-4' />
</Col>
<Col span={4}>
<Statistic
title="错误题数"
value={totalWrong}
className="border rounded-lg p-4"
/>
<Statistic title='错误题数' value={totalWrong} className='border rounded-lg p-4' />
</Col>
<Col span={4}>
<Statistic
title="失败题数"
value={totalFailed}
className="border rounded-lg p-4"
/>
<Statistic title='失败题数' value={totalFailed} className='border rounded-lg p-4' />
</Col>
</Row>
</div>
<ModelsTable
data={resultData.summaries ?? []}
/>
<ModelsTable data={resultData.summaries ?? []} />
<Tabs
items={[
{
key: 'overview',
label: '概览',
children: <BarChart data={chartData} height={400} />
}
children: <BarChart data={chartData} />,
},
]}
/>
</>
)
);
};
const ModelsTable = ({ data }: {data: BenchmarkSummary[] }) => {
const columns = [{
title: '轮次',
dataIndex: 'roundId',
width: '12.5%',
key: 'roundId'
}, {
title: '模型',
dataIndex: 'llmCode',
width: '12.5%',
key: 'llmCode'
}, {
title: '题目数',
width: '12.5%',
key: 'total',
render: (record: any) => record.right + record.wrong + record.failed,
}, {
title: '正确题数',
dataIndex: 'right',
width: '12.5%',
key: 'right'
}, {
title: '错误题数',
dataIndex: 'wrong',
width: '12.5%',
key: 'wrong'
}, {
title: '失败题数',
dataIndex: 'failed',
width: '12.5%',
key: 'failed'
}, {
title: '正确率',
dataIndex: 'accuracy',
width: '12.5%',
key: 'accuracy',
render: (value: number) => {
return `${(value * 100).toFixed(2)}%`;
}
},{
title: '可执行率',
dataIndex: 'execRate',
width: '12.5%',
key: 'execRate',
render: (value: number) => {
return `${(value * 100).toFixed(2)}%`;
}
}];
const ModelsTable = ({ data }: { data: BenchmarkSummary[] }) => {
const columns = [
{
title: '轮次',
dataIndex: 'roundId',
width: '12.5%',
key: 'roundId',
},
{
title: '模型',
dataIndex: 'llmCode',
width: '12.5%',
key: 'llmCode',
},
{
title: '题目数',
width: '12.5%',
key: 'total',
render: (record: any) => record.right + record.wrong + record.failed,
},
{
title: '正确题数',
dataIndex: 'right',
width: '12.5%',
key: 'right',
},
{
title: '错误题数',
dataIndex: 'wrong',
width: '12.5%',
key: 'wrong',
},
{
title: '失败题数',
dataIndex: 'failed',
width: '12.5%',
key: 'failed',
},
{
title: '正确率',
dataIndex: 'accuracy',
width: '12.5%',
key: 'accuracy',
render: (value: number) => {
return `${(value * 100).toFixed(2)}%`;
},
},
{
title: '可执行率',
dataIndex: 'execRate',
width: '12.5%',
key: 'execRate',
render: (value: number) => {
return `${(value * 100).toFixed(2)}%`;
},
},
];
return (
<Table
tableLayout='fixed'
pagination={false}
className='w-full'
className={`w-full ${styles.table}`}
columns={columns}
dataSource={data}
/>
)
}
);
};
export default EvaluationDetail;
export default EvaluationDetail;

View File

@@ -0,0 +1,262 @@
import { apiInterceptors } from '@/client/api';
import {
getBenchmarkDatasetTables,
getBenchmarkDatasets,
getBenchmarkTableRows,
} from '@/client/api/models_evaluation/datasets';
import { NavTo } from '@/components/models_evaluation/components/nav-to';
import { Card, Spin, Table, Tree, TreeDataNode, Typography } from 'antd';
import React, { Key, useEffect, useState } from 'react';
import styles from '../styles.module.css';
const { Title, Text } = Typography;
// 定义数据类型
interface Dataset {
dataset_id: string;
name: string;
tableCount: number;
}
interface TableColumn {
name: string;
type: string;
}
interface TableInfo {
name: string;
rowCount: number;
columns: TableColumn[];
}
interface TableRow {
[key: string]: any;
}
interface TableData {
table: string;
limit: number;
rows: TableRow[];
}
type CustomTreeDataNode = TreeDataNode & {
parent?: string; // 指向父节点
};
const DatasetsForEvaluation = () => {
const [tableData, setTableData] = useState<TableData | null>(null);
const [loading, setLoading] = useState({
datasets: false,
tables: false,
tableData: false,
});
const [selectedDataset, setSelectedDataset] = useState<string | null>(null);
const [selectedTable, setSelectedTable] = useState<string | null>(null);
// 构造树结构数据
const [treeData, setTreeData] = useState<CustomTreeDataNode[]>([]);
// 获取数据集列表
useEffect(() => {
async function init() {
const result: Dataset[] = await fetchDatasets();
setTreeData(
result.map((item: Dataset) => ({
title: `${item.name}(${item.tableCount}张表)`,
key: item.dataset_id,
selectable: false,
})),
);
setSelectedDataset(prevState => {
if (prevState && result.map(item => item.dataset_id).includes(prevState)) return prevState;
return result[0]?.dataset_id;
});
}
init();
}, []);
const fetchDatasets = async () => {
try {
setLoading(prev => ({ ...prev, datasets: true }));
const [err, data] = await apiInterceptors(getBenchmarkDatasets());
if (err) {
console.error('获取数据集列表失败:', err);
return;
}
return data || [];
} catch (err) {
console.error('获取数据集列表失败:', err);
} finally {
setLoading(prev => ({ ...prev, datasets: false }));
}
};
// 获取数据集下的表列表
const fetchTables = async (datasetId: string): Promise<TableInfo[]> => {
try {
setLoading(prev => ({ ...prev, tables: true }));
setSelectedTable(null);
const [err, data] = await apiInterceptors(getBenchmarkDatasetTables(datasetId));
if (err) {
console.error('获取表列表失败:', err);
return [];
}
return data || [];
} catch (err) {
console.error('获取表列表失败:', err);
return [];
} finally {
setLoading(prev => ({ ...prev, tables: false }));
}
};
const updateTreeData = (
list: CustomTreeDataNode[],
key: React.Key,
children: CustomTreeDataNode[],
): CustomTreeDataNode[] =>
list.map(node => {
if (node.key === key) {
return {
...node,
children,
};
}
if (node.children) {
return {
...node,
children: updateTreeData(node.children, key, children),
};
}
return node;
});
const loadTreeData = async ({ key, children }: any) => {
if (children) {
return;
}
const tables = await fetchTables(key);
setTreeData((prev: CustomTreeDataNode[]) =>
updateTreeData(
prev,
key,
tables.map(item => ({
title: item.name,
key: item.name,
parent: key, // 保留父节点的指针
isLeaf: true,
})),
),
);
return;
};
const onTableSelected = async (selectedKeys: Key[], { selectedNodes }: { selectedNodes: CustomTreeDataNode[] }) => {
setSelectedDataset(selectedNodes[0].parent as string);
setSelectedTable(selectedKeys[0] as string);
};
// 获取表数据
const fetchTableData = async (datasetId: string, tableName: string) => {
try {
setLoading(prev => ({ ...prev, tableData: true }));
const [err, data] = await apiInterceptors(getBenchmarkTableRows(datasetId, tableName));
if (err) {
console.error('获取表数据失败:', err);
return;
}
setTableData(data || null);
} catch (err) {
console.error('获取表数据失败:', err);
} finally {
setLoading(prev => ({ ...prev, tableData: false }));
}
};
useEffect(() => {
if (selectedDataset && selectedTable) {
fetchTableData(selectedDataset, selectedTable);
} else {
setTableData(null);
}
}, [selectedDataset, selectedTable]);
// 生成表格列定义
const generateColumns = () => {
if (!tableData || tableData.rows.length === 0) return [];
const firstRow = tableData.rows[0];
return Object.keys(firstRow).map((key, index) => ({
title: key,
dataIndex: key,
key: key,
width: index === 0 ? 100 : undefined,
}));
};
return (
<div className='h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12'>
<Card
title={
<>
<NavTo href='/models_evaluation'></NavTo>
</>
}
className={`w-full h-full flex-1 flex flex-col ${styles['page-card']}`}
>
<div className='flex h-full'>
{/* 左侧数据集列表 */}
<div className='w-1/4 pr-4 border-r flex flex-col'>
<Title level={5} className='mb-4'>
</Title>
<div className='overflow-y-auto h-full'>
<Tree loadData={loadTreeData} treeData={treeData} onSelect={onTableSelected} />
</div>
</div>
{/* 右侧表数据 */}
<div className='w-3/4 pl-4 flex flex-col'>
<div className='flex justify-between items-center mb-4'>
<Title level={5} className='mb-0'>
<span className='font-normal text-sm'>10</span>
</Title>
{selectedTable && <Text type='secondary'>{selectedTable}</Text>}
</div>
<div className='overflow-y-auto h-full'>
{loading.tableData ? (
<div className='flex justify-center items-center h-full'>
<Spin />
</div>
) : tableData && tableData.rows.length > 0 ? (
<Table
className={`w-full flex-auto ${styles.table}`}
dataSource={tableData.rows}
columns={generateColumns()}
pagination={false}
scroll={{ x: true }}
size='small'
/>
) : selectedTable ? (
<Text type='secondary'></Text>
) : (
<Text type='secondary'></Text>
)}
</div>
</div>
</div>
</Card>
</div>
);
};
export default DatasetsForEvaluation;

View File

@@ -1,17 +0,0 @@
import { Card, Typography } from "antd";
import React from "react";
const { Title, Text } = Typography;
const EvaluationDetail = () => {
return (
<div className="flex flex-col h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12">
<Card title="模型评估详情" className="w-full">
<Title level={4}></Title>
<Text></Text>
</Card>
</div>
);
};
export default EvaluationDetail;

View File

@@ -1,13 +1,12 @@
import { ConfigProvider } from "antd";
import React, { useState } from "react";
import { ConfigProvider } from 'antd';
import { useState } from 'react';
import { EvaluationHeader } from "./EvaluationHeader";
import { TabKey } from "@/types/models_evaluation";
import { EvaluationList } from "./EvaluationList";
import { EvaluationProvider } from "./context/EvaluationContext";
import { EvaluationHeader } from '@/components/models_evaluation/EvaluationHeader';
import { EvaluationList } from '@/components/models_evaluation/EvaluationList';
import { EvaluationProvider } from '@/components/models_evaluation/context/EvaluationContext';
import { TabKey } from '@/types/models_evaluation';
const ModelsEvaluation = () => {
const [activeKey, setActiveKey] = useState<TabKey>('all');
const [filterValue, setFilterValue] = useState<string>('');
@@ -31,15 +30,12 @@ const ModelsEvaluation = () => {
onSearch={setFilterValue}
/>
<div className='flex flex-col h-full w-full overflow-y-auto'>
<EvaluationList
filterValue={filterValue}
type={activeKey}
/>
<EvaluationList filterValue={filterValue} type={activeKey} />
</div>
</div>
</EvaluationProvider>
</ConfigProvider>
)
}
);
};
export default ModelsEvaluation;
export default ModelsEvaluation;

View File

@@ -1,3 +1,12 @@
.models-evaluation-detail :global .ant-card-body {
overflow-y: auto;
}
.table :global table {
display: table;
}
.page-card :global .ant-card-body {
height: 100%;
overflow: hidden;
}

View File

@@ -42,6 +42,7 @@ export interface EvaluationItem {
log_info: null;
gmt_create: string;
gmt_modified: string;
round_time: number;
}
export interface EvaluationData {
@@ -64,7 +65,7 @@ export interface getBenchmarkTaskListRequest {
page_size: number;
filter_param?: string;
sys_code?: string;
};
}
// 新的创建评测任务请求类型
export type createBenchmarkTaskRequest = {
@@ -72,4 +73,4 @@ export type createBenchmarkTaskRequest = {
model_list: string[];
temperature: number;
max_tokens: number;
};
};