feat: evaluation dataset info pages (#2911)

Co-authored-by: VLADIMIR KOBZEV <vladimir.kobzev@improvado.io> Co-authored-by: Aries-ckt <916701291@qq.com> Co-authored-by: xiandu.wl <xiandu.wl@antgroup.com>
2026-01-14 12:16:38 +00:00 · 2025-10-19 12:40:48 +08:00
parent d2e92e9382
commit 19bbc6fa8d
19 changed files with 581 additions and 293 deletions
--- a/web/client/api/models_evaluation/datasets.ts
+++ b/web/client/api/models_evaluation/datasets.ts
@@ -0,0 +1,16 @@
+import { GET } from '../index';
+
+// 获取数据集列表
+export const getBenchmarkDatasets = () => {
+  return GET<null, any>(`/api/v2/serve/evaluate/benchmark/list_datasets`);
+};
+
+// 获取数据集下的物理表列表
+export const getBenchmarkDatasetTables = (datasetId: string) => {
+  return GET<null, any>(`/api/v2/serve/evaluate/benchmark/dataset/${datasetId}`);
+};
+
+// 获取表数据
+export const getBenchmarkTableRows = (datasetId: string, table: string) => {
+  return GET<null, any>(`/api/v2/serve/evaluate/benchmark/dataset/${datasetId}/${table}/rows`);
+};
--- a/web/client/api/models_evaluation/index.ts
+++ b/web/client/api/models_evaluation/index.ts
@@ -1,7 +1,4 @@
-import type {
-  getBenchmarkTaskListRequest,
-  createBenchmarkTaskRequest,
-} from '@/types/models_evaluation';
+import type { createBenchmarkTaskRequest, getBenchmarkTaskListRequest } from '@/types/models_evaluation';
 import { getUserId } from '@/utils';
 import { GET, POST } from '../index';

@@ -23,4 +20,4 @@ export const createBenchmarkTask = (data: createBenchmarkTaskRequest) => {
      'user-id': userId,
    },
  });
-};
+};
--- a/web/client/api/models_evaluation/model.ts
+++ b/web/client/api/models_evaluation/model.ts
@@ -1,6 +0,0 @@
-import { GET } from '../index';
-
-// 获取可用模型列表
-export const getUsableModels = () => {
-  return GET<null, Array<string>>('/api/v1/model/types');
-};
--- a/web/client/api/models_evaluation/result.ts
+++ b/web/client/api/models_evaluation/result.ts
@@ -2,5 +2,5 @@ import { GET } from '../index';

 // 获取评测结果详情
 export const getBenchmarkResultDetail = (evaluateCode: string) => {
-  return GET<null, any>(`/api/v2/serve/evaluate/benchmark/result/${evaluateCode}`);
-};
+  return GET<string, any>(`/api/v2/serve/evaluate/benchmark/result/${evaluateCode}`);
+};
--- a/web/components/layout/side-bar.tsx
+++ b/web/components/layout/side-bar.tsx
@@ -307,7 +307,7 @@ function SideBar() {
          />
        ),
        path: '/models_evaluation',
-      }
+      },
    ];
    if (hasAdmin) {
      items.push({
@@ -421,7 +421,7 @@ function SideBar() {
    const language = i18n.language;
    if (language === 'zh') moment.locale('zh-cn');
    if (language === 'en') moment.locale('en');
-  }, []);
+  }, [i18n.language]);

  useEffect(() => {
    setLogo(mode === 'dark' ? '/logo_s_latest.png' : '/logo_zh_latest.png');
--- a/web/components/models_evaluation/EvaluationHeader.tsx
+++ b/web/components/models_evaluation/EvaluationHeader.tsx
@@ -1,17 +1,18 @@
-import { TabKey } from "@/types/models_evaluation";
-import Icon, { ReloadOutlined, SearchOutlined } from "@ant-design/icons";
-import { Button, Input, Segmented, Tooltip } from "antd";
-import { t } from "i18next";
-import { useState } from "react";
-import { NewEvaluationModal } from "./NewEvaluationModal";
-import { useEvaluation } from "./context/EvaluationContext";
+import { TabKey } from '@/types/models_evaluation';
+import { ReloadOutlined, SearchOutlined } from '@ant-design/icons';
+import { Button, Input, Segmented, Tooltip } from 'antd';
+import { t } from 'i18next';
+import { useState } from 'react';
+import { NewEvaluationModal } from './NewEvaluationModal';
+import { NavTo } from './components/nav-to';
+import { useEvaluation } from './context/EvaluationContext';

 type Props = {
-  activeKey?: TabKey,
+  activeKey?: TabKey;
  onTabChange?: (v: TabKey) => void;
  filterValue?: string;
  onSearch?: (v: string) => void;
-}
+};

 export const EvaluationHeader = (props: Props) => {
  const { onTabChange, activeKey = 'all', filterValue = '', onSearch } = props;
@@ -21,14 +22,14 @@ export const EvaluationHeader = (props: Props) => {

  const onFilterChange = (e: any) => {
    onSearch?.(e.target?.value);
-  }
+  };

  const createEvaluations = () => {
    setEvaluationVisible(true);
-  }
+  };

  return (
-    <div className="flex items-center justify-between">
+    <div className='flex items-center justify-between'>
      <div className='flex items-center gap-4'>
        <Segmented
          className='backdrop-filter h-10 backdrop-blur-lg bg-white bg-opacity-30 border border-white rounded-lg shadow p-1 dark:border-[#6f7f95] dark:bg-[#6f7f95] dark:bg-opacity-60'
@@ -56,18 +57,19 @@ export const EvaluationHeader = (props: Props) => {
        <Tooltip title={'刷新'}>
          <ReloadOutlined onClick={refresh} className='p-2 cursor-pointer' />
        </Tooltip>
-        <Button
-          className='border-none text-white bg-button-gradient h-full'
-          onClick={createEvaluations}
+        <NavTo
+          href='/models_evaluation/datasets'
+          className='border-none text-white bg-button-gradient h-full m-2'
+          type='primary'
+          openNewTab={true}
        >
+          查看评测数据
+        </NavTo>
+        <Button className='border-none text-white bg-button-gradient h-full' onClick={createEvaluations}>
          {t('create_evaluation')}
        </Button>
-        <NewEvaluationModal 
-          open={evaluationVisible} 
-          onCancel={() => setEvaluationVisible(false)}
-          onOk={refresh}
-        />
+        <NewEvaluationModal open={evaluationVisible} onCancel={() => setEvaluationVisible(false)} onOk={refresh} />
      </div>
    </div>
  );
-}
+};
--- a/web/components/models_evaluation/EvaluationList.tsx
+++ b/web/components/models_evaluation/EvaluationList.tsx
@@ -1,16 +1,16 @@
-import { Button, Table, Tag, Tooltip } from "antd";
-import React, { useCallback, useEffect } from "react";
-import { EvaluationItem } from "@/types/models_evaluation";
-import { useEvaluation } from "./context/EvaluationContext";
-import { useRouter } from "next/router";
-
+import { EvaluationItem } from '@/types/models_evaluation';
+import { Button, Table, Tag, Tooltip } from 'antd';
+import { useRouter } from 'next/router';
+import React, { useCallback, useEffect } from 'react';
+import { useEvaluation } from './context/EvaluationContext';
+import styles from './styles.module.css';
 interface EvaluationListProps {
  filterValue?: string;
  type?: string;
 }

-export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
-  const { filterValue = '', type = 'all' } = props;
+export const EvaluationList: React.FC<EvaluationListProps> = () => {
+  // const { filterValue = '', type = 'all' } = props;
  const { data, loading, getModelsEvaluation } = useEvaluation();

  const router = useRouter();
@@ -25,10 +25,10 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {

  const columns = [
    {
-      title: 'ID',
-      dataIndex: 'evaluate_code',
-      key: 'evaluate_code',
-      width: '20%',
+      title: '评测场景',
+      dataIndex: 'scene_key',
+      key: 'scene_key',
+      width: '10%',
    },
    {
      title: '任务名称',
@@ -36,6 +36,17 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
      key: 'scene_value',
      width: '10%',
    },
+    {
+      title: '评测集名称',
+      dataIndex: 'datasets_name',
+      key: 'datasets_name',
+      width: '20%',
+      render: (datasets_name: string) => (
+        <Tooltip title={datasets_name}>
+          <p className='truncate'>{datasets_name}</p>
+        </Tooltip>
+      ),
+    },
    {
      title: '创建时间',
      dataIndex: 'gmt_create',
@@ -53,18 +64,17 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
      dataIndex: 'model_list',
      key: 'model_list',
      width: '10%',
-      render: (model_list: string[]) => (
-        <span>{model_list.join(',')}</span>
-      ),
+      render: (model_list: string[]) => <span>{model_list.join(',')}</span>,
    },
    {
      title: '状态',
      dataIndex: 'state',
      key: 'state',
+      width: '5%',
      render: (state: string, record: EvaluationItem) => {
        let color = 'default';
        let text = state;
-        
+
        if (state === 'running') {
          color = 'blue';
          text = '运行中';
@@ -84,34 +94,25 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
            <Tooltip title={record.log_info}>
              <Tag color={color}>{text}</Tag>
            </Tooltip>
-          )
+          );
        }

        return <Tag color={color}>{text}</Tag>;
      },
    },
    {
-      title: '可执行率',
-      key: 'executable_rate',
+      title: '评测轮次',
+      dataIndex: 'round_time',
+      key: 'round_time',
      width: '10%',
-      render: () => <span>--</span>, // 暂时显示默认值
-    },
-    {
-      title: '正确率',
-      key: 'correct_rate',
-      width: '10%',
-      render: () => <span>--</span>, // 暂时显示默认值
    },
    {
      title: '操作',
+      width: '5%',
      key: 'action',
      render: (_: any, record: EvaluationItem) => {
        return (
-          <Button
-            type="link"
-            disabled={record.state !== 'complete'}
-            onClick={() => goToDetail(record)}
-          >
+          <Button type='link' disabled={record.state !== 'complete'} onClick={() => goToDetail(record)}>
            查看
          </Button>
        );
@@ -121,7 +122,8 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {

  return (
    <Table
-      className='w-full'
+      tableLayout='fixed'
+      className={`w-full ${styles.table}`}
      pagination={{
        total: data?.total_count || 0,
        current: data?.page || 1,
@@ -133,7 +135,7 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
      loading={loading}
      columns={columns}
      dataSource={data?.items || []}
-      rowKey="evaluate_code"
+      rowKey='evaluate_code'
    />
  );
-};
+};
--- a/web/components/models_evaluation/NewEvaluationModal.tsx
+++ b/web/components/models_evaluation/NewEvaluationModal.tsx
@@ -1,11 +1,10 @@
-import { apiInterceptors } from "@/client/api";
-import { getUsableModels } from "@/client/api/models_evaluation/model";
-import { createBenchmarkTask } from "@/client/api/models_evaluation";
-import { useRequest } from "ahooks";
-import { Form, Input, InputNumber, Modal, Select, Slider, message } from "antd";
-import { useState } from "react";
-import { useTranslation } from "react-i18next";
-import { createBenchmarkTaskRequest } from "@/types/models_evaluation";
+import { apiInterceptors, getUsableModels } from '@/client/api';
+import { createBenchmarkTask } from '@/client/api/models_evaluation';
+import { createBenchmarkTaskRequest } from '@/types/models_evaluation';
+import { useRequest } from 'ahooks';
+import { Form, Input, InputNumber, Modal, Select, Slider, message } from 'antd';
+import { useState } from 'react';
+import { useTranslation } from 'react-i18next';

 interface Props {
  open: boolean;
@@ -26,17 +25,17 @@ export const NewEvaluationModal = (props: Props) => {
      return data || [];
    },
    {
-      onSuccess: (data) => {
+      onSuccess: data => {
        const options = data.map((item: string) => ({
          label: item,
          value: item,
        }));
        setModelOptions(options);
      },
-      onError: (error) => {
+      onError: error => {
        message.error(t('get_model_list_failed') + ': ' + error.message);
      },
-    }
+    },
  );

  // 创建评测任务
@@ -61,10 +60,10 @@ export const NewEvaluationModal = (props: Props) => {
        onOk?.(); // 触发外部的onOk回调，用于刷新列表
        onCancel();
      },
-      onError: (error) => {
+      onError: error => {
        message.error(t('create_evaluation_failed') + ': ' + error.message);
      },
-    }
+    },
  );

  const handleOk = async () => {
@@ -92,7 +91,7 @@ export const NewEvaluationModal = (props: Props) => {
    >
      <Form
        form={form}
-        layout="vertical"
+        layout='vertical'
        requiredMark={false}
        initialValues={{
          temperature: 0.2,
@@ -101,34 +100,34 @@ export const NewEvaluationModal = (props: Props) => {
      >
        <Form.Item
          label={t('task_name')}
-          name="scene_value"
+          name='scene_value'
          rules={[{ required: true, message: t('please_input_task_name') }]}
        >
          <Input placeholder={t('please_input_task_name')} />
        </Form.Item>
-        
+
        <Form.Item
          label={t('models_to_evaluate')}
-          name="model_list"
+          name='model_list'
          rules={[
            { required: true, message: t('please_select_models_to_evaluate') },
-            { type: 'array', min: 1, message: t('please_select_at_least_one_model') }
+            { type: 'array', min: 1, message: t('please_select_at_least_one_model') },
          ]}
        >
          <Select
-            mode="multiple"
+            mode='multiple'
            placeholder={t('please_select_models_to_evaluate')}
            options={modelOptions}
            loading={modelLoading}
            showSearch
-            optionFilterProp="label"
+            optionFilterProp='label'
            allowClear
          />
        </Form.Item>

        <Form.Item
          label={t('temperature')}
-          name="temperature"
+          name='temperature'
          rules={[{ required: true, message: t('please_input_temperature') }]}
        >
          <Slider
@@ -145,17 +144,12 @@ export const NewEvaluationModal = (props: Props) => {

        <Form.Item
          label={t('max_new_tokens')}
-          name="max_tokens"
+          name='max_tokens'
          rules={[{ required: true, message: t('please_input_max_new_tokens') }]}
        >
-          <InputNumber
-            min={1}
-            max={32768}
-            style={{ width: '100%' }}
-            placeholder={t('please_input_max_new_tokens')}
-          />
+          <InputNumber min={1} max={32768} style={{ width: '100%' }} placeholder={t('please_input_max_new_tokens')} />
        </Form.Item>
      </Form>
    </Modal>
  );
-};
+};
--- a/web/components/models_evaluation/components/bar-chart.tsx
+++ b/web/components/models_evaluation/components/bar-chart.tsx
@@ -1,5 +1,5 @@
-import React from 'react';
 import { Column } from '@ant-design/plots';
+import React from 'react';

 interface ChartData {
  name: string;
@@ -18,7 +18,7 @@ interface InnerDataItem {
  value: number;
 }

-export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
+export const BarChart: React.FC<BarChartProps> = ({ data }) => {
  // 转换数据格式以适应Ant Design Charts
  const chartData = data.map(item => ({
    ...item,
@@ -34,7 +34,7 @@ export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
    axis: {
      y: {
        labelFormatter: '.00%',
-      }
+      },
    },
    label: {
      text: (d: InnerDataItem) => (d.value * 100).toFixed(2) + '%',
@@ -42,14 +42,14 @@ export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
    },
    interaction: {
      tooltip: {
-        render: (e: any, {title, items}: { title: string, items: InnerDataItem[]}) => {
+        render: (_e: any, { title, items }: { title: string; items: InnerDataItem[] }) => {
          return (
            <div key={title}>
              <h4>{title}</h4>
-              {items.map((item) => {
+              {items.map(item => {
                const { name, value, color } = item;
                return (
-                  <div className="flex justify-between gap-4">
+                  <div className='flex justify-between gap-4' key={item.name}>
                    <div>
                      <span
                        style={{
@@ -68,11 +68,11 @@ export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
                );
              })}
            </div>
-          )
-        }
-      }
-    }
+          );
+        },
+      },
+    },
  };

  return <Column {...config} />;
-};
+};
--- a/web/components/models_evaluation/components/nav-to.tsx
+++ b/web/components/models_evaluation/components/nav-to.tsx
@@ -0,0 +1,39 @@
+import { Button, ButtonProps } from 'antd';
+import { useRouter } from 'next/router';
+import React, { useCallback } from 'react';
+
+export const NavTo = ({
+  href,
+  type = 'link',
+  className = '',
+  openNewTab = false,
+  children,
+}: {
+  href: string;
+  type?: ButtonProps['type'];
+  className?: string;
+  openNewTab?: boolean;
+  children: React.ReactNode;
+}) => {
+  const goToList = useCallback(() => {
+    router.push(href);
+  }, [href]);
+
+  const router = useRouter();
+
+  if (openNewTab) {
+    return (
+      <Button type={type} className={className}>
+        <a href={href} target='_blank' rel='noopener noreferrer'>
+          {children}
+        </a>
+      </Button>
+    );
+  }
+
+  return (
+    <Button type={type} className={className} onClick={goToList}>
+      {children}
+    </Button>
+  );
+};
--- a/web/components/models_evaluation/context/EvaluationContext.tsx
+++ b/web/components/models_evaluation/context/EvaluationContext.tsx
@@ -16,17 +16,8 @@ interface EvaluationProviderProps {
  type?: string;
 }

-export const EvaluationProvider: React.FC<EvaluationProviderProps> = ({
-  children, 
-  filterValue = '', 
-  type = 'all' 
-}) => {
-  const {
-    data,
-    loading,
-    getModelsEvaluation,
-    refresh,
-  } = useEvaluationList({
+export const EvaluationProvider: React.FC<EvaluationProviderProps> = ({ children, filterValue = '', type = 'all' }) => {
+  const { data, loading, getModelsEvaluation, refresh } = useEvaluationList({
    filterValue,
    type,
  });
@@ -34,10 +25,10 @@ export const EvaluationProvider: React.FC<EvaluationProviderProps> = ({
  return (
    <EvaluationContext.Provider
      value={{
-        refresh, 
+        refresh,
        data,
-        loading, 
-        getModelsEvaluation
+        loading,
+        getModelsEvaluation,
      }}
    >
      {children}
--- a/web/components/models_evaluation/hooks/useEvaluationList.ts
+++ b/web/components/models_evaluation/hooks/useEvaluationList.ts
@@ -1,8 +1,8 @@
-import { apiInterceptors } from "@/client/api";
-import { getBenchmarkTaskList } from "@/client/api/models_evaluation";
-import { useRequest } from "ahooks";
-import { message } from "antd";
-import { EvaluationData, getBenchmarkTaskListRequest } from "@/types/models_evaluation";
+import { apiInterceptors } from '@/client/api';
+import { getBenchmarkTaskList } from '@/client/api/models_evaluation';
+import { EvaluationData, getBenchmarkTaskListRequest } from '@/types/models_evaluation';
+import { useRequest } from 'ahooks';
+import { message } from 'antd';

 interface UseEvaluationListProps {
  filterValue?: string;
@@ -26,18 +26,16 @@ export const useEvaluationList = (props: UseEvaluationListProps) => {
        sys_code: type === 'all' ? undefined : type,
      };

-      const [_, data] = await apiInterceptors(
-        getBenchmarkTaskList(params)
-      );
+      const [_, data] = await apiInterceptors(getBenchmarkTaskList(params));

      return data as EvaluationData;
    },
    {
      manual: true,
-      onError: (e) => {
+      onError: e => {
        message.error(e.message || '获取评估列表失败');
      },
-    }
+    },
  );

  return {
@@ -46,4 +44,4 @@ export const useEvaluationList = (props: UseEvaluationListProps) => {
    getModelsEvaluation,
    refresh,
  };
-};
+};
--- a/web/components/models_evaluation/styles.module.css
+++ b/web/components/models_evaluation/styles.module.css
@@ -0,0 +1,3 @@
+.table :global table {
+  display: table;
+}
--- a/web/pages/models_evaluation/[code].tsx
+++ b/web/pages/models_evaluation/[code].tsx
@@ -1,12 +1,11 @@
-import { Card, Typography, Spin, Descriptions, Row, Col, Statistic, Button, Tabs, Table } from "antd";
-import React, { useCallback, useEffect, useState } from "react";
-import { useRouter } from "next/router";
-import { apiInterceptors } from "@/client/api";
-import { getBenchmarkResultDetail } from "@/client/api/models_evaluation/result";
-import { BarChart } from "./components/bar-chart";
-import styles from "./styles.module.css";
-
-const { Title } = Typography;
+import { apiInterceptors } from '@/client/api';
+import { getBenchmarkResultDetail } from '@/client/api/models_evaluation/result';
+import { BarChart } from '@/components/models_evaluation/components/bar-chart';
+import { NavTo } from '@/components/models_evaluation/components/nav-to';
+import { Button, Card, Col, Descriptions, Row, Spin, Statistic, Table, Tabs } from 'antd';
+import { useRouter } from 'next/router';
+import { useEffect, useState } from 'react';
+import styles from './styles.module.css';

 // 定义数据类型
 interface BenchmarkSummary {
@@ -36,31 +35,41 @@ interface ChartData {
 const EvaluationDetail = () => {
  const router = useRouter();

-  const goToList = useCallback(() => {
-    router.push('/models_evaluation');
-  }, []);
+  const { code } = router.query;

  return (
-    <div className="flex flex-col h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12">
+    <div className='flex flex-col h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12'>
      <Card
        title={
-          <>
-            <span>模型评估详情</span>
-            <Button type="link" onClick={() => goToList()}>
-              回到列表
-            </Button>
-          </>
+          <div className='flex justify-between'>
+            <div>
+              <span>模型评估详情</span>
+              <NavTo href='/models_evaluation'>回到列表</NavTo>
+            </div>
+            <div>
+              <NavTo href='/models_evaluation/datasets' openNewTab={true}>
+                查看评测数据
+              </NavTo>
+              <Button
+                type='link'
+                target='_blank'
+                rel='noopener noreferrer'
+                href={`${process.env.API_BASE_URL}/api/v1/evaluate/benchmark_result_download?evaluate_code=${code}`}
+              >
+                下载结果
+              </Button>
+            </div>
+          </div>
        }
        className={`w-full h-full flex flex-col ${styles['models-evaluation-detail']}`}
      >
        <EvaluationDetailContent />
      </Card>
    </div>
-  )
-}
+  );
+};

 const EvaluationDetailContent = () => {
-
  const router = useRouter();
  const { code } = router.query;
  const [loading, setLoading] = useState(true);
@@ -77,7 +86,7 @@ const EvaluationDetailContent = () => {
    try {
      setLoading(true);
      const [err, data] = await apiInterceptors(getBenchmarkResultDetail(evaluateCode));
-      
+
      if (err) {
        setError(err.message || '获取评测结果失败');
        return;
@@ -94,31 +103,31 @@ const EvaluationDetailContent = () => {

  if (router.isFallback) {
    return (
-      <div className="flex justify-center items-center h-full">
-        <Spin size="large" />
+      <div className='flex justify-center items-center h-full'>
+        <Spin size='large' />
      </div>
    );
  }

  if (loading) {
    return (
-      <div className="flex justify-center items-center h-full">
-        <Spin size="large" />
+      <div className='flex justify-center items-center h-full'>
+        <Spin size='large' />
      </div>
    );
  }

  if (error) {
    return (
-      <div className="flex justify-center items-center h-full">
-        <div className="text-red-500">{error}</div>
+      <div className='flex justify-center items-center h-full'>
+        <div className='text-red-500'>{error}</div>
      </div>
    );
  }

  if (!resultData) {
    return (
-      <div className="flex justify-center items-center h-full">
+      <div className='flex justify-center items-center h-full'>
        <div>暂无数据</div>
      </div>
    );
@@ -130,140 +139,132 @@ const EvaluationDetailContent = () => {
  const totalFailed = resultData.summaries.reduce((sum, item) => sum + item.failed, 0);
  const totalException = resultData.summaries.reduce((sum, item) => sum + item.exception, 0);
  const totalQuestions = totalRight + totalWrong + totalFailed + totalException;
-  
+
  // const overallAccuracy = totalQuestions > 0 ? totalRight / totalQuestions : 0;
  // const overallExecRate = totalQuestions > 0 ? (totalRight + totalWrong) / totalQuestions : 0;

  // 准备图表数据
-  const chartData: ChartData[] = resultData.summaries.map(item => [
-    { name: '可执行率', label: item.llmCode, value: item.execRate },
-    { name: '正确率', label: item.llmCode, value: item.accuracy }
-  ]).flat();
+  const chartData: ChartData[] = resultData.summaries
+    .map(item => [
+      { name: '可执行率', label: item.llmCode, value: item.execRate },
+      { name: '正确率', label: item.llmCode, value: item.accuracy },
+    ])
+    .flat();

  return (
    <>
      <Descriptions
        bordered
-        items={[{
-          key: '1',
-          label: '任务ID',
-          children: resultData.evaluate_code
-        }]}
+        items={[
+          {
+            key: '1',
+            label: '任务ID',
+            children: resultData.evaluate_code,
+          },
+        ]}
      />
-      <div className="mt-6">
-        <Row gutter={16} className="mb-4">
+      <div className='mt-6'>
+        <Row gutter={16} className='mb-4'>
          <Col span={4}>
-            <Statistic
-              title="模型数"
-              value={resultData.summaries?.length}
-              className="border rounded-lg p-4"
-            />
+            <Statistic title='模型数' value={resultData.summaries?.length} className='border rounded-lg p-4' />
          </Col>
          <Col span={4}>
-            <Statistic
-              title="总题数"
-              value={totalQuestions}
-              className="border rounded-lg p-4"
-            />
+            <Statistic title='总题数' value={totalQuestions} className='border rounded-lg p-4' />
          </Col>
          <Col span={4}>
-            <Statistic
-              title="正确题数"
-              value={totalRight}
-              className="border rounded-lg p-4"
-            />
+            <Statistic title='正确题数' value={totalRight} className='border rounded-lg p-4' />
          </Col>
          <Col span={4}>
-            <Statistic
-              title="错误题数"
-              value={totalWrong}
-              className="border rounded-lg p-4"
-            />
+            <Statistic title='错误题数' value={totalWrong} className='border rounded-lg p-4' />
          </Col>
          <Col span={4}>
-            <Statistic
-              title="失败题数"
-              value={totalFailed}
-              className="border rounded-lg p-4"
-            />
+            <Statistic title='失败题数' value={totalFailed} className='border rounded-lg p-4' />
          </Col>
        </Row>
      </div>

-      <ModelsTable
-        data={resultData.summaries ?? []}
-      />
+      <ModelsTable data={resultData.summaries ?? []} />

      <Tabs
        items={[
          {
            key: 'overview',
            label: '概览',
-            children: <BarChart data={chartData} height={400} />
-          }
+            children: <BarChart data={chartData} />,
+          },
        ]}
      />
    </>
-  )
+  );
 };

-const ModelsTable = ({ data }: {data: BenchmarkSummary[] }) => {
-  const columns = [{
-    title: '轮次',
-    dataIndex: 'roundId',
-    width: '12.5%',
-    key: 'roundId'
-  }, {
-    title: '模型',
-    dataIndex: 'llmCode',
-    width: '12.5%',
-    key: 'llmCode'
-  }, {
-    title: '题目数',
-    width: '12.5%',
-    key: 'total',
-    render: (record: any) => record.right + record.wrong + record.failed,
-  }, {
-    title: '正确题数',
-    dataIndex: 'right',
-    width: '12.5%',
-    key: 'right'
-  }, {
-    title: '错误题数',
-    dataIndex: 'wrong',
-    width: '12.5%',
-    key: 'wrong'
-  }, {
-    title: '失败题数',
-    dataIndex: 'failed',
-    width: '12.5%',
-    key: 'failed'
-  }, {
-    title: '正确率',
-    dataIndex: 'accuracy',
-    width: '12.5%',
-    key: 'accuracy',
-    render: (value: number) => {
-      return `${(value * 100).toFixed(2)}%`;
-    }
-  },{
-    title: '可执行率',
-    dataIndex: 'execRate',
-    width: '12.5%',
-    key: 'execRate',
-    render: (value: number) => {
-      return `${(value * 100).toFixed(2)}%`;
-    }
-  }];
+const ModelsTable = ({ data }: { data: BenchmarkSummary[] }) => {
+  const columns = [
+    {
+      title: '轮次',
+      dataIndex: 'roundId',
+      width: '12.5%',
+      key: 'roundId',
+    },
+    {
+      title: '模型',
+      dataIndex: 'llmCode',
+      width: '12.5%',
+      key: 'llmCode',
+    },
+    {
+      title: '题目数',
+      width: '12.5%',
+      key: 'total',
+      render: (record: any) => record.right + record.wrong + record.failed,
+    },
+    {
+      title: '正确题数',
+      dataIndex: 'right',
+      width: '12.5%',
+      key: 'right',
+    },
+    {
+      title: '错误题数',
+      dataIndex: 'wrong',
+      width: '12.5%',
+      key: 'wrong',
+    },
+    {
+      title: '失败题数',
+      dataIndex: 'failed',
+      width: '12.5%',
+      key: 'failed',
+    },
+    {
+      title: '正确率',
+      dataIndex: 'accuracy',
+      width: '12.5%',
+      key: 'accuracy',
+      render: (value: number) => {
+        return `${(value * 100).toFixed(2)}%`;
+      },
+    },
+    {
+      title: '可执行率',
+      dataIndex: 'execRate',
+      width: '12.5%',
+      key: 'execRate',
+      render: (value: number) => {
+        return `${(value * 100).toFixed(2)}%`;
+      },
+    },
+  ];

  return (
    <Table
+      tableLayout='fixed'
      pagination={false}
-      className='w-full'
+      className={`w-full ${styles.table}`}
      columns={columns}
      dataSource={data}
    />
-  )
-}
+  );
+};

-export default EvaluationDetail;
+export default EvaluationDetail;
--- a/web/pages/models_evaluation/datasets/index.tsx
+++ b/web/pages/models_evaluation/datasets/index.tsx
@@ -0,0 +1,262 @@
+import { apiInterceptors } from '@/client/api';
+import {
+  getBenchmarkDatasetTables,
+  getBenchmarkDatasets,
+  getBenchmarkTableRows,
+} from '@/client/api/models_evaluation/datasets';
+import { NavTo } from '@/components/models_evaluation/components/nav-to';
+import { Card, Spin, Table, Tree, TreeDataNode, Typography } from 'antd';
+import React, { Key, useEffect, useState } from 'react';
+import styles from '../styles.module.css';
+
+const { Title, Text } = Typography;
+
+// 定义数据类型
+interface Dataset {
+  dataset_id: string;
+  name: string;
+  tableCount: number;
+}
+
+interface TableColumn {
+  name: string;
+  type: string;
+}
+
+interface TableInfo {
+  name: string;
+  rowCount: number;
+  columns: TableColumn[];
+}
+
+interface TableRow {
+  [key: string]: any;
+}
+
+interface TableData {
+  table: string;
+  limit: number;
+  rows: TableRow[];
+}
+
+type CustomTreeDataNode = TreeDataNode & {
+  parent?: string; // 指向父节点
+};
+
+const DatasetsForEvaluation = () => {
+  const [tableData, setTableData] = useState<TableData | null>(null);
+  const [loading, setLoading] = useState({
+    datasets: false,
+    tables: false,
+    tableData: false,
+  });
+  const [selectedDataset, setSelectedDataset] = useState<string | null>(null);
+  const [selectedTable, setSelectedTable] = useState<string | null>(null);
+  // 构造树结构数据
+  const [treeData, setTreeData] = useState<CustomTreeDataNode[]>([]);
+
+  // 获取数据集列表
+  useEffect(() => {
+    async function init() {
+      const result: Dataset[] = await fetchDatasets();
+      setTreeData(
+        result.map((item: Dataset) => ({
+          title: `${item.name}(${item.tableCount}张表)`,
+          key: item.dataset_id,
+          selectable: false,
+        })),
+      );
+
+      setSelectedDataset(prevState => {
+        if (prevState && result.map(item => item.dataset_id).includes(prevState)) return prevState;
+        return result[0]?.dataset_id;
+      });
+    }
+    init();
+  }, []);
+
+  const fetchDatasets = async () => {
+    try {
+      setLoading(prev => ({ ...prev, datasets: true }));
+      const [err, data] = await apiInterceptors(getBenchmarkDatasets());
+
+      if (err) {
+        console.error('获取数据集列表失败:', err);
+        return;
+      }
+
+      return data || [];
+    } catch (err) {
+      console.error('获取数据集列表失败:', err);
+    } finally {
+      setLoading(prev => ({ ...prev, datasets: false }));
+    }
+  };
+
+  // 获取数据集下的表列表
+  const fetchTables = async (datasetId: string): Promise<TableInfo[]> => {
+    try {
+      setLoading(prev => ({ ...prev, tables: true }));
+      setSelectedTable(null);
+
+      const [err, data] = await apiInterceptors(getBenchmarkDatasetTables(datasetId));
+
+      if (err) {
+        console.error('获取表列表失败:', err);
+        return [];
+      }
+
+      return data || [];
+    } catch (err) {
+      console.error('获取表列表失败:', err);
+      return [];
+    } finally {
+      setLoading(prev => ({ ...prev, tables: false }));
+    }
+  };
+
+  const updateTreeData = (
+    list: CustomTreeDataNode[],
+    key: React.Key,
+    children: CustomTreeDataNode[],
+  ): CustomTreeDataNode[] =>
+    list.map(node => {
+      if (node.key === key) {
+        return {
+          ...node,
+          children,
+        };
+      }
+      if (node.children) {
+        return {
+          ...node,
+          children: updateTreeData(node.children, key, children),
+        };
+      }
+      return node;
+    });
+
+  const loadTreeData = async ({ key, children }: any) => {
+    if (children) {
+      return;
+    }
+    const tables = await fetchTables(key);
+    setTreeData((prev: CustomTreeDataNode[]) =>
+      updateTreeData(
+        prev,
+        key,
+        tables.map(item => ({
+          title: item.name,
+          key: item.name,
+          parent: key, // 保留父节点的指针
+          isLeaf: true,
+        })),
+      ),
+    );
+    return;
+  };
+
+  const onTableSelected = async (selectedKeys: Key[], { selectedNodes }: { selectedNodes: CustomTreeDataNode[] }) => {
+    setSelectedDataset(selectedNodes[0].parent as string);
+    setSelectedTable(selectedKeys[0] as string);
+  };
+
+  // 获取表数据
+  const fetchTableData = async (datasetId: string, tableName: string) => {
+    try {
+      setLoading(prev => ({ ...prev, tableData: true }));
+
+      const [err, data] = await apiInterceptors(getBenchmarkTableRows(datasetId, tableName));
+
+      if (err) {
+        console.error('获取表数据失败:', err);
+        return;
+      }
+
+      setTableData(data || null);
+    } catch (err) {
+      console.error('获取表数据失败:', err);
+    } finally {
+      setLoading(prev => ({ ...prev, tableData: false }));
+    }
+  };
+
+  useEffect(() => {
+    if (selectedDataset && selectedTable) {
+      fetchTableData(selectedDataset, selectedTable);
+    } else {
+      setTableData(null);
+    }
+  }, [selectedDataset, selectedTable]);
+
+  // 生成表格列定义
+  const generateColumns = () => {
+    if (!tableData || tableData.rows.length === 0) return [];
+
+    const firstRow = tableData.rows[0];
+    return Object.keys(firstRow).map((key, index) => ({
+      title: key,
+      dataIndex: key,
+      key: key,
+      width: index === 0 ? 100 : undefined,
+    }));
+  };
+
+  return (
+    <div className='h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12'>
+      <Card
+        title={
+          <>
+            评测数据集
+            <NavTo href='/models_evaluation'>返回评测任务列表</NavTo>
+          </>
+        }
+        className={`w-full h-full flex-1 flex flex-col ${styles['page-card']}`}
+      >
+        <div className='flex h-full'>
+          {/* 左侧数据集列表 */}
+          <div className='w-1/4 pr-4 border-r flex flex-col'>
+            <Title level={5} className='mb-4'>
+              数据集列表
+            </Title>
+            <div className='overflow-y-auto h-full'>
+              <Tree loadData={loadTreeData} treeData={treeData} onSelect={onTableSelected} />
+            </div>
+          </div>
+
+          {/* 右侧表数据 */}
+          <div className='w-3/4 pl-4 flex flex-col'>
+            <div className='flex justify-between items-center mb-4'>
+              <Title level={5} className='mb-0'>
+                表数据<span className='font-normal text-sm'>（仅展示前10条数据）</span>
+              </Title>
+              {selectedTable && <Text type='secondary'>{selectedTable}</Text>}
+            </div>
+            <div className='overflow-y-auto h-full'>
+              {loading.tableData ? (
+                <div className='flex justify-center items-center h-full'>
+                  <Spin />
+                </div>
+              ) : tableData && tableData.rows.length > 0 ? (
+                <Table
+                  className={`w-full flex-auto ${styles.table}`}
+                  dataSource={tableData.rows}
+                  columns={generateColumns()}
+                  pagination={false}
+                  scroll={{ x: true }}
+                  size='small'
+                />
+              ) : selectedTable ? (
+                <Text type='secondary'>暂无数据</Text>
+              ) : (
+                <Text type='secondary'>请先选择一个表</Text>
+              )}
+            </div>
+          </div>
+        </div>
+      </Card>
+    </div>
+  );
+};
+
+export default DatasetsForEvaluation;
--- a/web/pages/models_evaluation/detail.tsx
+++ b/web/pages/models_evaluation/detail.tsx
@@ -1,17 +0,0 @@
-import { Card, Typography } from "antd";
-import React from "react";
-
-const { Title, Text } = Typography;
-
-const EvaluationDetail = () => {
-  return (
-    <div className="flex flex-col h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12">
-      <Card title="模型评估详情" className="w-full">
-        <Title level={4}>详情页面占位</Title>
-        <Text>这里是模型评估的详细信息页面</Text>
-      </Card>
-    </div>
-  );
-};
-
-export default EvaluationDetail;
--- a/web/pages/models_evaluation/index.tsx
+++ b/web/pages/models_evaluation/index.tsx
@@ -1,13 +1,12 @@
-import { ConfigProvider } from "antd";
-import React, { useState } from "react";
+import { ConfigProvider } from 'antd';
+import { useState } from 'react';

-import { EvaluationHeader } from "./EvaluationHeader";
-import { TabKey } from "@/types/models_evaluation";
-import { EvaluationList } from "./EvaluationList";
-import { EvaluationProvider } from "./context/EvaluationContext";
+import { EvaluationHeader } from '@/components/models_evaluation/EvaluationHeader';
+import { EvaluationList } from '@/components/models_evaluation/EvaluationList';
+import { EvaluationProvider } from '@/components/models_evaluation/context/EvaluationContext';
+import { TabKey } from '@/types/models_evaluation';

 const ModelsEvaluation = () => {
-
  const [activeKey, setActiveKey] = useState<TabKey>('all');
  const [filterValue, setFilterValue] = useState<string>('');

@@ -31,15 +30,12 @@ const ModelsEvaluation = () => {
            onSearch={setFilterValue}
          />
          <div className='flex flex-col h-full w-full overflow-y-auto'>
-            <EvaluationList
-              filterValue={filterValue}
-              type={activeKey}
-            />
+            <EvaluationList filterValue={filterValue} type={activeKey} />
          </div>
        </div>
      </EvaluationProvider>
    </ConfigProvider>
-  )
-}
+  );
+};

-export default ModelsEvaluation;
+export default ModelsEvaluation;
--- a/web/pages/models_evaluation/styles.module.css
+++ b/web/pages/models_evaluation/styles.module.css
@@ -1,3 +1,12 @@
 .models-evaluation-detail :global .ant-card-body {
  overflow-y: auto;
+}
+
+.table :global table {
+  display: table;
+}
+
+.page-card :global .ant-card-body {
+  height: 100%;
+  overflow: hidden;
 }
--- a/web/types/models_evaluation.ts
+++ b/web/types/models_evaluation.ts
@@ -42,6 +42,7 @@ export interface EvaluationItem {
  log_info: null;
  gmt_create: string;
  gmt_modified: string;
+  round_time: number;
 }

 export interface EvaluationData {
@@ -64,7 +65,7 @@ export interface getBenchmarkTaskListRequest {
  page_size: number;
  filter_param?: string;
  sys_code?: string;
-};
+}

 // 新的创建评测任务请求类型
 export type createBenchmarkTaskRequest = {
@@ -72,4 +73,4 @@ export type createBenchmarkTaskRequest = {
  model_list: string[];
  temperature: number;
  max_tokens: number;
-};
+};