From 19bbc6fa8dc6956eea1e918db874153144bfecf4 Mon Sep 17 00:00:00 2001
From: iterminatorheart <123625928+iterminatorheart@users.noreply.github.com>
Date: Sun, 19 Oct 2025 12:40:48 +0800
Subject: [PATCH] feat:  evaluation dataset info pages (#2911)

Co-authored-by: VLADIMIR KOBZEV <vladimir.kobzev@improvado.io>
Co-authored-by: Aries-ckt <916701291@qq.com>
Co-authored-by: xiandu.wl <xiandu.wl@antgroup.com>
---
 web/client/api/models_evaluation/datasets.ts  |  16 ++
 web/client/api/models_evaluation/index.ts     |   7 +-
 web/client/api/models_evaluation/model.ts     |   6 -
 web/client/api/models_evaluation/result.ts    |   4 +-
 web/components/layout/side-bar.tsx            |   4 +-
 .../models_evaluation/EvaluationHeader.tsx    |  44 +--
 .../models_evaluation/EvaluationList.tsx      |  70 ++---
 .../models_evaluation/NewEvaluationModal.tsx  |  52 ++--
 .../components/bar-chart.tsx                  |  22 +-
 .../models_evaluation/components/nav-to.tsx   |  39 +++
 .../context/EvaluationContext.tsx             |  19 +-
 .../hooks/useEvaluationList.ts                |  20 +-
 .../models_evaluation/styles.module.css       |   3 +
 web/pages/models_evaluation/[code].tsx        | 251 ++++++++---------
 .../models_evaluation/datasets/index.tsx      | 262 ++++++++++++++++++
 web/pages/models_evaluation/detail.tsx        |  17 --
 web/pages/models_evaluation/index.tsx         |  24 +-
 web/pages/models_evaluation/styles.module.css |   9 +
 web/types/models_evaluation.ts                |   5 +-
 19 files changed, 581 insertions(+), 293 deletions(-)
 create mode 100644 web/client/api/models_evaluation/datasets.ts
 delete mode 100644 web/client/api/models_evaluation/model.ts
 rename web/{pages => components}/models_evaluation/EvaluationHeader.tsx (66%)
 rename web/{pages => components}/models_evaluation/EvaluationList.tsx (67%)
 rename web/{pages => components}/models_evaluation/NewEvaluationModal.tsx (79%)
 rename web/{pages => components}/models_evaluation/components/bar-chart.tsx (83%)
 create mode 100644 web/components/models_evaluation/components/nav-to.tsx
 rename web/{pages => components}/models_evaluation/context/EvaluationContext.tsx (80%)
 rename web/{pages => components}/models_evaluation/hooks/useEvaluationList.ts (65%)
 create mode 100644 web/components/models_evaluation/styles.module.css
 create mode 100644 web/pages/models_evaluation/datasets/index.tsx
 delete mode 100644 web/pages/models_evaluation/detail.tsx

diff --git a/web/client/api/models_evaluation/datasets.ts b/web/client/api/models_evaluation/datasets.ts
new file mode 100644
index 000000000..2c02764f0
--- /dev/null
+++ b/web/client/api/models_evaluation/datasets.ts
@@ -0,0 +1,16 @@
+import { GET } from '../index';
+
+// 获取数据集列表
+export const getBenchmarkDatasets = () => {
+  return GET<null, any>(`/api/v2/serve/evaluate/benchmark/list_datasets`);
+};
+
+// 获取数据集下的物理表列表
+export const getBenchmarkDatasetTables = (datasetId: string) => {
+  return GET<null, any>(`/api/v2/serve/evaluate/benchmark/dataset/${datasetId}`);
+};
+
+// 获取表数据
+export const getBenchmarkTableRows = (datasetId: string, table: string) => {
+  return GET<null, any>(`/api/v2/serve/evaluate/benchmark/dataset/${datasetId}/${table}/rows`);
+};
diff --git a/web/client/api/models_evaluation/index.ts b/web/client/api/models_evaluation/index.ts
index d38fe0095..4c839582a 100644
--- a/web/client/api/models_evaluation/index.ts
+++ b/web/client/api/models_evaluation/index.ts
@@ -1,7 +1,4 @@
-import type {
-  getBenchmarkTaskListRequest,
-  createBenchmarkTaskRequest,
-} from '@/types/models_evaluation';
+import type { createBenchmarkTaskRequest, getBenchmarkTaskListRequest } from '@/types/models_evaluation';
 import { getUserId } from '@/utils';
 import { GET, POST } from '../index';
 
@@ -23,4 +20,4 @@ export const createBenchmarkTask = (data: createBenchmarkTaskRequest) => {
       'user-id': userId,
     },
   });
-};
\ No newline at end of file
+};
diff --git a/web/client/api/models_evaluation/model.ts b/web/client/api/models_evaluation/model.ts
deleted file mode 100644
index 8cb7439ea..000000000
--- a/web/client/api/models_evaluation/model.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-import { GET } from '../index';
-
-// 获取可用模型列表
-export const getUsableModels = () => {
-  return GET<null, Array<string>>('/api/v1/model/types');
-};
\ No newline at end of file
diff --git a/web/client/api/models_evaluation/result.ts b/web/client/api/models_evaluation/result.ts
index 69f7268cf..29d4a986f 100644
--- a/web/client/api/models_evaluation/result.ts
+++ b/web/client/api/models_evaluation/result.ts
@@ -2,5 +2,5 @@ import { GET } from '../index';
 
 // 获取评测结果详情
 export const getBenchmarkResultDetail = (evaluateCode: string) => {
-  return GET<null, any>(`/api/v2/serve/evaluate/benchmark/result/${evaluateCode}`);
-};
\ No newline at end of file
+  return GET<string, any>(`/api/v2/serve/evaluate/benchmark/result/${evaluateCode}`);
+};
diff --git a/web/components/layout/side-bar.tsx b/web/components/layout/side-bar.tsx
index 5a767c9f6..8425a664e 100644
--- a/web/components/layout/side-bar.tsx
+++ b/web/components/layout/side-bar.tsx
@@ -307,7 +307,7 @@ function SideBar() {
           />
         ),
         path: '/models_evaluation',
-      }
+      },
     ];
     if (hasAdmin) {
       items.push({
@@ -421,7 +421,7 @@ function SideBar() {
     const language = i18n.language;
     if (language === 'zh') moment.locale('zh-cn');
     if (language === 'en') moment.locale('en');
-  }, []);
+  }, [i18n.language]);
 
   useEffect(() => {
     setLogo(mode === 'dark' ? '/logo_s_latest.png' : '/logo_zh_latest.png');
diff --git a/web/pages/models_evaluation/EvaluationHeader.tsx b/web/components/models_evaluation/EvaluationHeader.tsx
similarity index 66%
rename from web/pages/models_evaluation/EvaluationHeader.tsx
rename to web/components/models_evaluation/EvaluationHeader.tsx
index e3a74912f..522250efc 100644
--- a/web/pages/models_evaluation/EvaluationHeader.tsx
+++ b/web/components/models_evaluation/EvaluationHeader.tsx
@@ -1,17 +1,18 @@
-import { TabKey } from "@/types/models_evaluation";
-import Icon, { ReloadOutlined, SearchOutlined } from "@ant-design/icons";
-import { Button, Input, Segmented, Tooltip } from "antd";
-import { t } from "i18next";
-import { useState } from "react";
-import { NewEvaluationModal } from "./NewEvaluationModal";
-import { useEvaluation } from "./context/EvaluationContext";
+import { TabKey } from '@/types/models_evaluation';
+import { ReloadOutlined, SearchOutlined } from '@ant-design/icons';
+import { Button, Input, Segmented, Tooltip } from 'antd';
+import { t } from 'i18next';
+import { useState } from 'react';
+import { NewEvaluationModal } from './NewEvaluationModal';
+import { NavTo } from './components/nav-to';
+import { useEvaluation } from './context/EvaluationContext';
 
 type Props = {
-  activeKey?: TabKey,
+  activeKey?: TabKey;
   onTabChange?: (v: TabKey) => void;
   filterValue?: string;
   onSearch?: (v: string) => void;
-}
+};
 
 export const EvaluationHeader = (props: Props) => {
   const { onTabChange, activeKey = 'all', filterValue = '', onSearch } = props;
@@ -21,14 +22,14 @@ export const EvaluationHeader = (props: Props) => {
 
   const onFilterChange = (e: any) => {
     onSearch?.(e.target?.value);
-  }
+  };
 
   const createEvaluations = () => {
     setEvaluationVisible(true);
-  }
+  };
 
   return (
-    <div className="flex items-center justify-between">
+    <div className='flex items-center justify-between'>
       <div className='flex items-center gap-4'>
         <Segmented
           className='backdrop-filter h-10 backdrop-blur-lg bg-white bg-opacity-30 border border-white rounded-lg shadow p-1 dark:border-[#6f7f95] dark:bg-[#6f7f95] dark:bg-opacity-60'
@@ -56,18 +57,19 @@ export const EvaluationHeader = (props: Props) => {
         <Tooltip title={'刷新'}>
           <ReloadOutlined onClick={refresh} className='p-2 cursor-pointer' />
         </Tooltip>
-        <Button
-          className='border-none text-white bg-button-gradient h-full'
-          onClick={createEvaluations}
+        <NavTo
+          href='/models_evaluation/datasets'
+          className='border-none text-white bg-button-gradient h-full m-2'
+          type='primary'
+          openNewTab={true}
         >
+          查看评测数据
+        </NavTo>
+        <Button className='border-none text-white bg-button-gradient h-full' onClick={createEvaluations}>
           {t('create_evaluation')}
         </Button>
-        <NewEvaluationModal 
-          open={evaluationVisible} 
-          onCancel={() => setEvaluationVisible(false)}
-          onOk={refresh}
-        />
+        <NewEvaluationModal open={evaluationVisible} onCancel={() => setEvaluationVisible(false)} onOk={refresh} />
       </div>
     </div>
   );
-}
\ No newline at end of file
+};
diff --git a/web/pages/models_evaluation/EvaluationList.tsx b/web/components/models_evaluation/EvaluationList.tsx
similarity index 67%
rename from web/pages/models_evaluation/EvaluationList.tsx
rename to web/components/models_evaluation/EvaluationList.tsx
index a165cd716..4c3b796de 100644
--- a/web/pages/models_evaluation/EvaluationList.tsx
+++ b/web/components/models_evaluation/EvaluationList.tsx
@@ -1,16 +1,16 @@
-import { Button, Table, Tag, Tooltip } from "antd";
-import React, { useCallback, useEffect } from "react";
-import { EvaluationItem } from "@/types/models_evaluation";
-import { useEvaluation } from "./context/EvaluationContext";
-import { useRouter } from "next/router";
-
+import { EvaluationItem } from '@/types/models_evaluation';
+import { Button, Table, Tag, Tooltip } from 'antd';
+import { useRouter } from 'next/router';
+import React, { useCallback, useEffect } from 'react';
+import { useEvaluation } from './context/EvaluationContext';
+import styles from './styles.module.css';
 interface EvaluationListProps {
   filterValue?: string;
   type?: string;
 }
 
-export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
-  const { filterValue = '', type = 'all' } = props;
+export const EvaluationList: React.FC<EvaluationListProps> = () => {
+  // const { filterValue = '', type = 'all' } = props;
   const { data, loading, getModelsEvaluation } = useEvaluation();
 
   const router = useRouter();
@@ -25,10 +25,10 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
 
   const columns = [
     {
-      title: 'ID',
-      dataIndex: 'evaluate_code',
-      key: 'evaluate_code',
-      width: '20%',
+      title: '评测场景',
+      dataIndex: 'scene_key',
+      key: 'scene_key',
+      width: '10%',
     },
     {
       title: '任务名称',
@@ -36,6 +36,17 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
       key: 'scene_value',
       width: '10%',
     },
+    {
+      title: '评测集名称',
+      dataIndex: 'datasets_name',
+      key: 'datasets_name',
+      width: '20%',
+      render: (datasets_name: string) => (
+        <Tooltip title={datasets_name}>
+          <p className='truncate'>{datasets_name}</p>
+        </Tooltip>
+      ),
+    },
     {
       title: '创建时间',
       dataIndex: 'gmt_create',
@@ -53,18 +64,17 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
       dataIndex: 'model_list',
       key: 'model_list',
       width: '10%',
-      render: (model_list: string[]) => (
-        <span>{model_list.join(',')}</span>
-      ),
+      render: (model_list: string[]) => <span>{model_list.join(',')}</span>,
     },
     {
       title: '状态',
       dataIndex: 'state',
       key: 'state',
+      width: '5%',
       render: (state: string, record: EvaluationItem) => {
         let color = 'default';
         let text = state;
-        
+
         if (state === 'running') {
           color = 'blue';
           text = '运行中';
@@ -84,34 +94,25 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
             <Tooltip title={record.log_info}>
               <Tag color={color}>{text}</Tag>
             </Tooltip>
-          )
+          );
         }
 
         return <Tag color={color}>{text}</Tag>;
       },
     },
     {
-      title: '可执行率',
-      key: 'executable_rate',
+      title: '评测轮次',
+      dataIndex: 'round_time',
+      key: 'round_time',
       width: '10%',
-      render: () => <span>--</span>, // 暂时显示默认值
-    },
-    {
-      title: '正确率',
-      key: 'correct_rate',
-      width: '10%',
-      render: () => <span>--</span>, // 暂时显示默认值
     },
     {
       title: '操作',
+      width: '5%',
       key: 'action',
       render: (_: any, record: EvaluationItem) => {
         return (
-          <Button
-            type="link"
-            disabled={record.state !== 'complete'}
-            onClick={() => goToDetail(record)}
-          >
+          <Button type='link' disabled={record.state !== 'complete'} onClick={() => goToDetail(record)}>
             查看
           </Button>
         );
@@ -121,7 +122,8 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
 
   return (
     <Table
-      className='w-full'
+      tableLayout='fixed'
+      className={`w-full ${styles.table}`}
       pagination={{
         total: data?.total_count || 0,
         current: data?.page || 1,
@@ -133,7 +135,7 @@ export const EvaluationList: React.FC<EvaluationListProps> = (props) => {
       loading={loading}
       columns={columns}
       dataSource={data?.items || []}
-      rowKey="evaluate_code"
+      rowKey='evaluate_code'
     />
   );
-};
\ No newline at end of file
+};
diff --git a/web/pages/models_evaluation/NewEvaluationModal.tsx b/web/components/models_evaluation/NewEvaluationModal.tsx
similarity index 79%
rename from web/pages/models_evaluation/NewEvaluationModal.tsx
rename to web/components/models_evaluation/NewEvaluationModal.tsx
index 4fee33dd9..4667ca1d4 100644
--- a/web/pages/models_evaluation/NewEvaluationModal.tsx
+++ b/web/components/models_evaluation/NewEvaluationModal.tsx
@@ -1,11 +1,10 @@
-import { apiInterceptors } from "@/client/api";
-import { getUsableModels } from "@/client/api/models_evaluation/model";
-import { createBenchmarkTask } from "@/client/api/models_evaluation";
-import { useRequest } from "ahooks";
-import { Form, Input, InputNumber, Modal, Select, Slider, message } from "antd";
-import { useState } from "react";
-import { useTranslation } from "react-i18next";
-import { createBenchmarkTaskRequest } from "@/types/models_evaluation";
+import { apiInterceptors, getUsableModels } from '@/client/api';
+import { createBenchmarkTask } from '@/client/api/models_evaluation';
+import { createBenchmarkTaskRequest } from '@/types/models_evaluation';
+import { useRequest } from 'ahooks';
+import { Form, Input, InputNumber, Modal, Select, Slider, message } from 'antd';
+import { useState } from 'react';
+import { useTranslation } from 'react-i18next';
 
 interface Props {
   open: boolean;
@@ -26,17 +25,17 @@ export const NewEvaluationModal = (props: Props) => {
       return data || [];
     },
     {
-      onSuccess: (data) => {
+      onSuccess: data => {
         const options = data.map((item: string) => ({
           label: item,
           value: item,
         }));
         setModelOptions(options);
       },
-      onError: (error) => {
+      onError: error => {
         message.error(t('get_model_list_failed') + ': ' + error.message);
       },
-    }
+    },
   );
 
   // 创建评测任务
@@ -61,10 +60,10 @@ export const NewEvaluationModal = (props: Props) => {
         onOk?.(); // 触发外部的onOk回调，用于刷新列表
         onCancel();
       },
-      onError: (error) => {
+      onError: error => {
         message.error(t('create_evaluation_failed') + ': ' + error.message);
       },
-    }
+    },
   );
 
   const handleOk = async () => {
@@ -92,7 +91,7 @@ export const NewEvaluationModal = (props: Props) => {
     >
       <Form
         form={form}
-        layout="vertical"
+        layout='vertical'
         requiredMark={false}
         initialValues={{
           temperature: 0.2,
@@ -101,34 +100,34 @@ export const NewEvaluationModal = (props: Props) => {
       >
         <Form.Item
           label={t('task_name')}
-          name="scene_value"
+          name='scene_value'
           rules={[{ required: true, message: t('please_input_task_name') }]}
         >
           <Input placeholder={t('please_input_task_name')} />
         </Form.Item>
-        
+
         <Form.Item
           label={t('models_to_evaluate')}
-          name="model_list"
+          name='model_list'
           rules={[
             { required: true, message: t('please_select_models_to_evaluate') },
-            { type: 'array', min: 1, message: t('please_select_at_least_one_model') }
+            { type: 'array', min: 1, message: t('please_select_at_least_one_model') },
           ]}
         >
           <Select
-            mode="multiple"
+            mode='multiple'
             placeholder={t('please_select_models_to_evaluate')}
             options={modelOptions}
             loading={modelLoading}
             showSearch
-            optionFilterProp="label"
+            optionFilterProp='label'
             allowClear
           />
         </Form.Item>
 
         <Form.Item
           label={t('temperature')}
-          name="temperature"
+          name='temperature'
           rules={[{ required: true, message: t('please_input_temperature') }]}
         >
           <Slider
@@ -145,17 +144,12 @@ export const NewEvaluationModal = (props: Props) => {
 
         <Form.Item
           label={t('max_new_tokens')}
-          name="max_tokens"
+          name='max_tokens'
           rules={[{ required: true, message: t('please_input_max_new_tokens') }]}
         >
-          <InputNumber
-            min={1}
-            max={32768}
-            style={{ width: '100%' }}
-            placeholder={t('please_input_max_new_tokens')}
-          />
+          <InputNumber min={1} max={32768} style={{ width: '100%' }} placeholder={t('please_input_max_new_tokens')} />
         </Form.Item>
       </Form>
     </Modal>
   );
-};
\ No newline at end of file
+};
diff --git a/web/pages/models_evaluation/components/bar-chart.tsx b/web/components/models_evaluation/components/bar-chart.tsx
similarity index 83%
rename from web/pages/models_evaluation/components/bar-chart.tsx
rename to web/components/models_evaluation/components/bar-chart.tsx
index 89158acbb..6ebf900ac 100644
--- a/web/pages/models_evaluation/components/bar-chart.tsx
+++ b/web/components/models_evaluation/components/bar-chart.tsx
@@ -1,5 +1,5 @@
-import React from 'react';
 import { Column } from '@ant-design/plots';
+import React from 'react';
 
 interface ChartData {
   name: string;
@@ -18,7 +18,7 @@ interface InnerDataItem {
   value: number;
 }
 
-export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
+export const BarChart: React.FC<BarChartProps> = ({ data }) => {
   // 转换数据格式以适应Ant Design Charts
   const chartData = data.map(item => ({
     ...item,
@@ -34,7 +34,7 @@ export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
     axis: {
       y: {
         labelFormatter: '.00%',
-      }
+      },
     },
     label: {
       text: (d: InnerDataItem) => (d.value * 100).toFixed(2) + '%',
@@ -42,14 +42,14 @@ export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
     },
     interaction: {
       tooltip: {
-        render: (e: any, {title, items}: { title: string, items: InnerDataItem[]}) => {
+        render: (_e: any, { title, items }: { title: string; items: InnerDataItem[] }) => {
           return (
             <div key={title}>
               <h4>{title}</h4>
-              {items.map((item) => {
+              {items.map(item => {
                 const { name, value, color } = item;
                 return (
-                  <div className="flex justify-between gap-4">
+                  <div className='flex justify-between gap-4' key={item.name}>
                     <div>
                       <span
                         style={{
@@ -68,11 +68,11 @@ export const BarChart: React.FC<BarChartProps> = ({ data, height = 400 }) => {
                 );
               })}
             </div>
-          )
-        }
-      }
-    }
+          );
+        },
+      },
+    },
   };
 
   return <Column {...config} />;
-};
\ No newline at end of file
+};
diff --git a/web/components/models_evaluation/components/nav-to.tsx b/web/components/models_evaluation/components/nav-to.tsx
new file mode 100644
index 000000000..d0ede6f10
--- /dev/null
+++ b/web/components/models_evaluation/components/nav-to.tsx
@@ -0,0 +1,39 @@
+import { Button, ButtonProps } from 'antd';
+import { useRouter } from 'next/router';
+import React, { useCallback } from 'react';
+
+export const NavTo = ({
+  href,
+  type = 'link',
+  className = '',
+  openNewTab = false,
+  children,
+}: {
+  href: string;
+  type?: ButtonProps['type'];
+  className?: string;
+  openNewTab?: boolean;
+  children: React.ReactNode;
+}) => {
+  const goToList = useCallback(() => {
+    router.push(href);
+  }, [href]);
+
+  const router = useRouter();
+
+  if (openNewTab) {
+    return (
+      <Button type={type} className={className}>
+        <a href={href} target='_blank' rel='noopener noreferrer'>
+          {children}
+        </a>
+      </Button>
+    );
+  }
+
+  return (
+    <Button type={type} className={className} onClick={goToList}>
+      {children}
+    </Button>
+  );
+};
diff --git a/web/pages/models_evaluation/context/EvaluationContext.tsx b/web/components/models_evaluation/context/EvaluationContext.tsx
similarity index 80%
rename from web/pages/models_evaluation/context/EvaluationContext.tsx
rename to web/components/models_evaluation/context/EvaluationContext.tsx
index bc3176005..db40b70ab 100644
--- a/web/pages/models_evaluation/context/EvaluationContext.tsx
+++ b/web/components/models_evaluation/context/EvaluationContext.tsx
@@ -16,17 +16,8 @@ interface EvaluationProviderProps {
   type?: string;
 }
 
-export const EvaluationProvider: React.FC<EvaluationProviderProps> = ({
-  children, 
-  filterValue = '', 
-  type = 'all' 
-}) => {
-  const {
-    data,
-    loading,
-    getModelsEvaluation,
-    refresh,
-  } = useEvaluationList({
+export const EvaluationProvider: React.FC<EvaluationProviderProps> = ({ children, filterValue = '', type = 'all' }) => {
+  const { data, loading, getModelsEvaluation, refresh } = useEvaluationList({
     filterValue,
     type,
   });
@@ -34,10 +25,10 @@ export const EvaluationProvider: React.FC<EvaluationProviderProps> = ({
   return (
     <EvaluationContext.Provider
       value={{
-        refresh, 
+        refresh,
         data,
-        loading, 
-        getModelsEvaluation
+        loading,
+        getModelsEvaluation,
       }}
     >
       {children}
diff --git a/web/pages/models_evaluation/hooks/useEvaluationList.ts b/web/components/models_evaluation/hooks/useEvaluationList.ts
similarity index 65%
rename from web/pages/models_evaluation/hooks/useEvaluationList.ts
rename to web/components/models_evaluation/hooks/useEvaluationList.ts
index 3b3487ed3..26fcf739d 100644
--- a/web/pages/models_evaluation/hooks/useEvaluationList.ts
+++ b/web/components/models_evaluation/hooks/useEvaluationList.ts
@@ -1,8 +1,8 @@
-import { apiInterceptors } from "@/client/api";
-import { getBenchmarkTaskList } from "@/client/api/models_evaluation";
-import { useRequest } from "ahooks";
-import { message } from "antd";
-import { EvaluationData, getBenchmarkTaskListRequest } from "@/types/models_evaluation";
+import { apiInterceptors } from '@/client/api';
+import { getBenchmarkTaskList } from '@/client/api/models_evaluation';
+import { EvaluationData, getBenchmarkTaskListRequest } from '@/types/models_evaluation';
+import { useRequest } from 'ahooks';
+import { message } from 'antd';
 
 interface UseEvaluationListProps {
   filterValue?: string;
@@ -26,18 +26,16 @@ export const useEvaluationList = (props: UseEvaluationListProps) => {
         sys_code: type === 'all' ? undefined : type,
       };
 
-      const [_, data] = await apiInterceptors(
-        getBenchmarkTaskList(params)
-      );
+      const [_, data] = await apiInterceptors(getBenchmarkTaskList(params));
 
       return data as EvaluationData;
     },
     {
       manual: true,
-      onError: (e) => {
+      onError: e => {
         message.error(e.message || '获取评估列表失败');
       },
-    }
+    },
   );
 
   return {
@@ -46,4 +44,4 @@ export const useEvaluationList = (props: UseEvaluationListProps) => {
     getModelsEvaluation,
     refresh,
   };
-};
\ No newline at end of file
+};
diff --git a/web/components/models_evaluation/styles.module.css b/web/components/models_evaluation/styles.module.css
new file mode 100644
index 000000000..1def982f4
--- /dev/null
+++ b/web/components/models_evaluation/styles.module.css
@@ -0,0 +1,3 @@
+.table :global table {
+  display: table;
+}
\ No newline at end of file
diff --git a/web/pages/models_evaluation/[code].tsx b/web/pages/models_evaluation/[code].tsx
index df429ef16..632505a44 100644
--- a/web/pages/models_evaluation/[code].tsx
+++ b/web/pages/models_evaluation/[code].tsx
@@ -1,12 +1,11 @@
-import { Card, Typography, Spin, Descriptions, Row, Col, Statistic, Button, Tabs, Table } from "antd";
-import React, { useCallback, useEffect, useState } from "react";
-import { useRouter } from "next/router";
-import { apiInterceptors } from "@/client/api";
-import { getBenchmarkResultDetail } from "@/client/api/models_evaluation/result";
-import { BarChart } from "./components/bar-chart";
-import styles from "./styles.module.css";
-
-const { Title } = Typography;
+import { apiInterceptors } from '@/client/api';
+import { getBenchmarkResultDetail } from '@/client/api/models_evaluation/result';
+import { BarChart } from '@/components/models_evaluation/components/bar-chart';
+import { NavTo } from '@/components/models_evaluation/components/nav-to';
+import { Button, Card, Col, Descriptions, Row, Spin, Statistic, Table, Tabs } from 'antd';
+import { useRouter } from 'next/router';
+import { useEffect, useState } from 'react';
+import styles from './styles.module.css';
 
 // 定义数据类型
 interface BenchmarkSummary {
@@ -36,31 +35,41 @@ interface ChartData {
 const EvaluationDetail = () => {
   const router = useRouter();
 
-  const goToList = useCallback(() => {
-    router.push('/models_evaluation');
-  }, []);
+  const { code } = router.query;
 
   return (
-    <div className="flex flex-col h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12">
+    <div className='flex flex-col h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12'>
       <Card
         title={
-          <>
-            <span>模型评估详情</span>
-            <Button type="link" onClick={() => goToList()}>
-              回到列表
-            </Button>
-          </>
+          <div className='flex justify-between'>
+            <div>
+              <span>模型评估详情</span>
+              <NavTo href='/models_evaluation'>回到列表</NavTo>
+            </div>
+            <div>
+              <NavTo href='/models_evaluation/datasets' openNewTab={true}>
+                查看评测数据
+              </NavTo>
+              <Button
+                type='link'
+                target='_blank'
+                rel='noopener noreferrer'
+                href={`${process.env.API_BASE_URL}/api/v1/evaluate/benchmark_result_download?evaluate_code=${code}`}
+              >
+                下载结果
+              </Button>
+            </div>
+          </div>
         }
         className={`w-full h-full flex flex-col ${styles['models-evaluation-detail']}`}
       >
         <EvaluationDetailContent />
       </Card>
     </div>
-  )
-}
+  );
+};
 
 const EvaluationDetailContent = () => {
-
   const router = useRouter();
   const { code } = router.query;
   const [loading, setLoading] = useState(true);
@@ -77,7 +86,7 @@ const EvaluationDetailContent = () => {
     try {
       setLoading(true);
       const [err, data] = await apiInterceptors(getBenchmarkResultDetail(evaluateCode));
-      
+
       if (err) {
         setError(err.message || '获取评测结果失败');
         return;
@@ -94,31 +103,31 @@ const EvaluationDetailContent = () => {
 
   if (router.isFallback) {
     return (
-      <div className="flex justify-center items-center h-full">
-        <Spin size="large" />
+      <div className='flex justify-center items-center h-full'>
+        <Spin size='large' />
       </div>
     );
   }
 
   if (loading) {
     return (
-      <div className="flex justify-center items-center h-full">
-        <Spin size="large" />
+      <div className='flex justify-center items-center h-full'>
+        <Spin size='large' />
       </div>
     );
   }
 
   if (error) {
     return (
-      <div className="flex justify-center items-center h-full">
-        <div className="text-red-500">{error}</div>
+      <div className='flex justify-center items-center h-full'>
+        <div className='text-red-500'>{error}</div>
       </div>
     );
   }
 
   if (!resultData) {
     return (
-      <div className="flex justify-center items-center h-full">
+      <div className='flex justify-center items-center h-full'>
         <div>暂无数据</div>
       </div>
     );
@@ -130,140 +139,132 @@ const EvaluationDetailContent = () => {
   const totalFailed = resultData.summaries.reduce((sum, item) => sum + item.failed, 0);
   const totalException = resultData.summaries.reduce((sum, item) => sum + item.exception, 0);
   const totalQuestions = totalRight + totalWrong + totalFailed + totalException;
-  
+
   // const overallAccuracy = totalQuestions > 0 ? totalRight / totalQuestions : 0;
   // const overallExecRate = totalQuestions > 0 ? (totalRight + totalWrong) / totalQuestions : 0;
 
   // 准备图表数据
-  const chartData: ChartData[] = resultData.summaries.map(item => [
-    { name: '可执行率', label: item.llmCode, value: item.execRate },
-    { name: '正确率', label: item.llmCode, value: item.accuracy }
-  ]).flat();
+  const chartData: ChartData[] = resultData.summaries
+    .map(item => [
+      { name: '可执行率', label: item.llmCode, value: item.execRate },
+      { name: '正确率', label: item.llmCode, value: item.accuracy },
+    ])
+    .flat();
 
   return (
     <>
       <Descriptions
         bordered
-        items={[{
-          key: '1',
-          label: '任务ID',
-          children: resultData.evaluate_code
-        }]}
+        items={[
+          {
+            key: '1',
+            label: '任务ID',
+            children: resultData.evaluate_code,
+          },
+        ]}
       />
-      <div className="mt-6">
-        <Row gutter={16} className="mb-4">
+      <div className='mt-6'>
+        <Row gutter={16} className='mb-4'>
           <Col span={4}>
-            <Statistic
-              title="模型数"
-              value={resultData.summaries?.length}
-              className="border rounded-lg p-4"
-            />
+            <Statistic title='模型数' value={resultData.summaries?.length} className='border rounded-lg p-4' />
           </Col>
           <Col span={4}>
-            <Statistic
-              title="总题数"
-              value={totalQuestions}
-              className="border rounded-lg p-4"
-            />
+            <Statistic title='总题数' value={totalQuestions} className='border rounded-lg p-4' />
           </Col>
           <Col span={4}>
-            <Statistic
-              title="正确题数"
-              value={totalRight}
-              className="border rounded-lg p-4"
-            />
+            <Statistic title='正确题数' value={totalRight} className='border rounded-lg p-4' />
           </Col>
           <Col span={4}>
-            <Statistic
-              title="错误题数"
-              value={totalWrong}
-              className="border rounded-lg p-4"
-            />
+            <Statistic title='错误题数' value={totalWrong} className='border rounded-lg p-4' />
           </Col>
           <Col span={4}>
-            <Statistic
-              title="失败题数"
-              value={totalFailed}
-              className="border rounded-lg p-4"
-            />
+            <Statistic title='失败题数' value={totalFailed} className='border rounded-lg p-4' />
           </Col>
         </Row>
       </div>
 
-      <ModelsTable
-        data={resultData.summaries ?? []}
-      />
+      <ModelsTable data={resultData.summaries ?? []} />
 
       <Tabs
         items={[
           {
             key: 'overview',
             label: '概览',
-            children: <BarChart data={chartData} height={400} />
-          }
+            children: <BarChart data={chartData} />,
+          },
         ]}
       />
     </>
-  )
+  );
 };
 
-const ModelsTable = ({ data }: {data: BenchmarkSummary[] }) => {
-  const columns = [{
-    title: '轮次',
-    dataIndex: 'roundId',
-    width: '12.5%',
-    key: 'roundId'
-  }, {
-    title: '模型',
-    dataIndex: 'llmCode',
-    width: '12.5%',
-    key: 'llmCode'
-  }, {
-    title: '题目数',
-    width: '12.5%',
-    key: 'total',
-    render: (record: any) => record.right + record.wrong + record.failed,
-  }, {
-    title: '正确题数',
-    dataIndex: 'right',
-    width: '12.5%',
-    key: 'right'
-  }, {
-    title: '错误题数',
-    dataIndex: 'wrong',
-    width: '12.5%',
-    key: 'wrong'
-  }, {
-    title: '失败题数',
-    dataIndex: 'failed',
-    width: '12.5%',
-    key: 'failed'
-  }, {
-    title: '正确率',
-    dataIndex: 'accuracy',
-    width: '12.5%',
-    key: 'accuracy',
-    render: (value: number) => {
-      return `${(value * 100).toFixed(2)}%`;
-    }
-  },{
-    title: '可执行率',
-    dataIndex: 'execRate',
-    width: '12.5%',
-    key: 'execRate',
-    render: (value: number) => {
-      return `${(value * 100).toFixed(2)}%`;
-    }
-  }];
+const ModelsTable = ({ data }: { data: BenchmarkSummary[] }) => {
+  const columns = [
+    {
+      title: '轮次',
+      dataIndex: 'roundId',
+      width: '12.5%',
+      key: 'roundId',
+    },
+    {
+      title: '模型',
+      dataIndex: 'llmCode',
+      width: '12.5%',
+      key: 'llmCode',
+    },
+    {
+      title: '题目数',
+      width: '12.5%',
+      key: 'total',
+      render: (record: any) => record.right + record.wrong + record.failed,
+    },
+    {
+      title: '正确题数',
+      dataIndex: 'right',
+      width: '12.5%',
+      key: 'right',
+    },
+    {
+      title: '错误题数',
+      dataIndex: 'wrong',
+      width: '12.5%',
+      key: 'wrong',
+    },
+    {
+      title: '失败题数',
+      dataIndex: 'failed',
+      width: '12.5%',
+      key: 'failed',
+    },
+    {
+      title: '正确率',
+      dataIndex: 'accuracy',
+      width: '12.5%',
+      key: 'accuracy',
+      render: (value: number) => {
+        return `${(value * 100).toFixed(2)}%`;
+      },
+    },
+    {
+      title: '可执行率',
+      dataIndex: 'execRate',
+      width: '12.5%',
+      key: 'execRate',
+      render: (value: number) => {
+        return `${(value * 100).toFixed(2)}%`;
+      },
+    },
+  ];
 
   return (
     <Table
+      tableLayout='fixed'
       pagination={false}
-      className='w-full'
+      className={`w-full ${styles.table}`}
       columns={columns}
       dataSource={data}
     />
-  )
-}
+  );
+};
 
-export default EvaluationDetail;
\ No newline at end of file
+export default EvaluationDetail;
diff --git a/web/pages/models_evaluation/datasets/index.tsx b/web/pages/models_evaluation/datasets/index.tsx
new file mode 100644
index 000000000..d1a29eadc
--- /dev/null
+++ b/web/pages/models_evaluation/datasets/index.tsx
@@ -0,0 +1,262 @@
+import { apiInterceptors } from '@/client/api';
+import {
+  getBenchmarkDatasetTables,
+  getBenchmarkDatasets,
+  getBenchmarkTableRows,
+} from '@/client/api/models_evaluation/datasets';
+import { NavTo } from '@/components/models_evaluation/components/nav-to';
+import { Card, Spin, Table, Tree, TreeDataNode, Typography } from 'antd';
+import React, { Key, useEffect, useState } from 'react';
+import styles from '../styles.module.css';
+
+const { Title, Text } = Typography;
+
+// 定义数据类型
+interface Dataset {
+  dataset_id: string;
+  name: string;
+  tableCount: number;
+}
+
+interface TableColumn {
+  name: string;
+  type: string;
+}
+
+interface TableInfo {
+  name: string;
+  rowCount: number;
+  columns: TableColumn[];
+}
+
+interface TableRow {
+  [key: string]: any;
+}
+
+interface TableData {
+  table: string;
+  limit: number;
+  rows: TableRow[];
+}
+
+type CustomTreeDataNode = TreeDataNode & {
+  parent?: string; // 指向父节点
+};
+
+const DatasetsForEvaluation = () => {
+  const [tableData, setTableData] = useState<TableData | null>(null);
+  const [loading, setLoading] = useState({
+    datasets: false,
+    tables: false,
+    tableData: false,
+  });
+  const [selectedDataset, setSelectedDataset] = useState<string | null>(null);
+  const [selectedTable, setSelectedTable] = useState<string | null>(null);
+  // 构造树结构数据
+  const [treeData, setTreeData] = useState<CustomTreeDataNode[]>([]);
+
+  // 获取数据集列表
+  useEffect(() => {
+    async function init() {
+      const result: Dataset[] = await fetchDatasets();
+      setTreeData(
+        result.map((item: Dataset) => ({
+          title: `${item.name}(${item.tableCount}张表)`,
+          key: item.dataset_id,
+          selectable: false,
+        })),
+      );
+
+      setSelectedDataset(prevState => {
+        if (prevState && result.map(item => item.dataset_id).includes(prevState)) return prevState;
+        return result[0]?.dataset_id;
+      });
+    }
+    init();
+  }, []);
+
+  const fetchDatasets = async () => {
+    try {
+      setLoading(prev => ({ ...prev, datasets: true }));
+      const [err, data] = await apiInterceptors(getBenchmarkDatasets());
+
+      if (err) {
+        console.error('获取数据集列表失败:', err);
+        return;
+      }
+
+      return data || [];
+    } catch (err) {
+      console.error('获取数据集列表失败:', err);
+    } finally {
+      setLoading(prev => ({ ...prev, datasets: false }));
+    }
+  };
+
+  // 获取数据集下的表列表
+  const fetchTables = async (datasetId: string): Promise<TableInfo[]> => {
+    try {
+      setLoading(prev => ({ ...prev, tables: true }));
+      setSelectedTable(null);
+
+      const [err, data] = await apiInterceptors(getBenchmarkDatasetTables(datasetId));
+
+      if (err) {
+        console.error('获取表列表失败:', err);
+        return [];
+      }
+
+      return data || [];
+    } catch (err) {
+      console.error('获取表列表失败:', err);
+      return [];
+    } finally {
+      setLoading(prev => ({ ...prev, tables: false }));
+    }
+  };
+
+  const updateTreeData = (
+    list: CustomTreeDataNode[],
+    key: React.Key,
+    children: CustomTreeDataNode[],
+  ): CustomTreeDataNode[] =>
+    list.map(node => {
+      if (node.key === key) {
+        return {
+          ...node,
+          children,
+        };
+      }
+      if (node.children) {
+        return {
+          ...node,
+          children: updateTreeData(node.children, key, children),
+        };
+      }
+      return node;
+    });
+
+  const loadTreeData = async ({ key, children }: any) => {
+    if (children) {
+      return;
+    }
+    const tables = await fetchTables(key);
+    setTreeData((prev: CustomTreeDataNode[]) =>
+      updateTreeData(
+        prev,
+        key,
+        tables.map(item => ({
+          title: item.name,
+          key: item.name,
+          parent: key, // 保留父节点的指针
+          isLeaf: true,
+        })),
+      ),
+    );
+    return;
+  };
+
+  const onTableSelected = async (selectedKeys: Key[], { selectedNodes }: { selectedNodes: CustomTreeDataNode[] }) => {
+    setSelectedDataset(selectedNodes[0].parent as string);
+    setSelectedTable(selectedKeys[0] as string);
+  };
+
+  // 获取表数据
+  const fetchTableData = async (datasetId: string, tableName: string) => {
+    try {
+      setLoading(prev => ({ ...prev, tableData: true }));
+
+      const [err, data] = await apiInterceptors(getBenchmarkTableRows(datasetId, tableName));
+
+      if (err) {
+        console.error('获取表数据失败:', err);
+        return;
+      }
+
+      setTableData(data || null);
+    } catch (err) {
+      console.error('获取表数据失败:', err);
+    } finally {
+      setLoading(prev => ({ ...prev, tableData: false }));
+    }
+  };
+
+  useEffect(() => {
+    if (selectedDataset && selectedTable) {
+      fetchTableData(selectedDataset, selectedTable);
+    } else {
+      setTableData(null);
+    }
+  }, [selectedDataset, selectedTable]);
+
+  // 生成表格列定义
+  const generateColumns = () => {
+    if (!tableData || tableData.rows.length === 0) return [];
+
+    const firstRow = tableData.rows[0];
+    return Object.keys(firstRow).map((key, index) => ({
+      title: key,
+      dataIndex: key,
+      key: key,
+      width: index === 0 ? 100 : undefined,
+    }));
+  };
+
+  return (
+    <div className='h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12'>
+      <Card
+        title={
+          <>
+            评测数据集
+            <NavTo href='/models_evaluation'>返回评测任务列表</NavTo>
+          </>
+        }
+        className={`w-full h-full flex-1 flex flex-col ${styles['page-card']}`}
+      >
+        <div className='flex h-full'>
+          {/* 左侧数据集列表 */}
+          <div className='w-1/4 pr-4 border-r flex flex-col'>
+            <Title level={5} className='mb-4'>
+              数据集列表
+            </Title>
+            <div className='overflow-y-auto h-full'>
+              <Tree loadData={loadTreeData} treeData={treeData} onSelect={onTableSelected} />
+            </div>
+          </div>
+
+          {/* 右侧表数据 */}
+          <div className='w-3/4 pl-4 flex flex-col'>
+            <div className='flex justify-between items-center mb-4'>
+              <Title level={5} className='mb-0'>
+                表数据<span className='font-normal text-sm'>（仅展示前10条数据）</span>
+              </Title>
+              {selectedTable && <Text type='secondary'>{selectedTable}</Text>}
+            </div>
+            <div className='overflow-y-auto h-full'>
+              {loading.tableData ? (
+                <div className='flex justify-center items-center h-full'>
+                  <Spin />
+                </div>
+              ) : tableData && tableData.rows.length > 0 ? (
+                <Table
+                  className={`w-full flex-auto ${styles.table}`}
+                  dataSource={tableData.rows}
+                  columns={generateColumns()}
+                  pagination={false}
+                  scroll={{ x: true }}
+                  size='small'
+                />
+              ) : selectedTable ? (
+                <Text type='secondary'>暂无数据</Text>
+              ) : (
+                <Text type='secondary'>请先选择一个表</Text>
+              )}
+            </div>
+          </div>
+        </div>
+      </Card>
+    </div>
+  );
+};
+
+export default DatasetsForEvaluation;
diff --git a/web/pages/models_evaluation/detail.tsx b/web/pages/models_evaluation/detail.tsx
deleted file mode 100644
index 3143809b4..000000000
--- a/web/pages/models_evaluation/detail.tsx
+++ /dev/null
@@ -1,17 +0,0 @@
-import { Card, Typography } from "antd";
-import React from "react";
-
-const { Title, Text } = Typography;
-
-const EvaluationDetail = () => {
-  return (
-    <div className="flex flex-col h-full w-full dark:bg-gradient-dark bg-gradient-light bg-cover bg-center px-6 py-2 pt-12">
-      <Card title="模型评估详情" className="w-full">
-        <Title level={4}>详情页面占位</Title>
-        <Text>这里是模型评估的详细信息页面</Text>
-      </Card>
-    </div>
-  );
-};
-
-export default EvaluationDetail;
\ No newline at end of file
diff --git a/web/pages/models_evaluation/index.tsx b/web/pages/models_evaluation/index.tsx
index f45f92f31..17c68dbae 100644
--- a/web/pages/models_evaluation/index.tsx
+++ b/web/pages/models_evaluation/index.tsx
@@ -1,13 +1,12 @@
-import { ConfigProvider } from "antd";
-import React, { useState } from "react";
+import { ConfigProvider } from 'antd';
+import { useState } from 'react';
 
-import { EvaluationHeader } from "./EvaluationHeader";
-import { TabKey } from "@/types/models_evaluation";
-import { EvaluationList } from "./EvaluationList";
-import { EvaluationProvider } from "./context/EvaluationContext";
+import { EvaluationHeader } from '@/components/models_evaluation/EvaluationHeader';
+import { EvaluationList } from '@/components/models_evaluation/EvaluationList';
+import { EvaluationProvider } from '@/components/models_evaluation/context/EvaluationContext';
+import { TabKey } from '@/types/models_evaluation';
 
 const ModelsEvaluation = () => {
-
   const [activeKey, setActiveKey] = useState<TabKey>('all');
   const [filterValue, setFilterValue] = useState<string>('');
 
@@ -31,15 +30,12 @@ const ModelsEvaluation = () => {
             onSearch={setFilterValue}
           />
           <div className='flex flex-col h-full w-full overflow-y-auto'>
-            <EvaluationList
-              filterValue={filterValue}
-              type={activeKey}
-            />
+            <EvaluationList filterValue={filterValue} type={activeKey} />
           </div>
         </div>
       </EvaluationProvider>
     </ConfigProvider>
-  )
-}
+  );
+};
 
-export default ModelsEvaluation;
\ No newline at end of file
+export default ModelsEvaluation;
diff --git a/web/pages/models_evaluation/styles.module.css b/web/pages/models_evaluation/styles.module.css
index 29e6d0036..4a267ce44 100644
--- a/web/pages/models_evaluation/styles.module.css
+++ b/web/pages/models_evaluation/styles.module.css
@@ -1,3 +1,12 @@
 .models-evaluation-detail :global .ant-card-body {
   overflow-y: auto;
+}
+
+.table :global table {
+  display: table;
+}
+
+.page-card :global .ant-card-body {
+  height: 100%;
+  overflow: hidden;
 }
\ No newline at end of file
diff --git a/web/types/models_evaluation.ts b/web/types/models_evaluation.ts
index 66846c807..d89a0786d 100644
--- a/web/types/models_evaluation.ts
+++ b/web/types/models_evaluation.ts
@@ -42,6 +42,7 @@ export interface EvaluationItem {
   log_info: null;
   gmt_create: string;
   gmt_modified: string;
+  round_time: number;
 }
 
 export interface EvaluationData {
@@ -64,7 +65,7 @@ export interface getBenchmarkTaskListRequest {
   page_size: number;
   filter_param?: string;
   sys_code?: string;
-};
+}
 
 // 新的创建评测任务请求类型
 export type createBenchmarkTaskRequest = {
@@ -72,4 +73,4 @@ export type createBenchmarkTaskRequest = {
   model_list: string[];
   temperature: number;
   max_tokens: number;
-};
\ No newline at end of file
+};