Support mtbench (#5025)

Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com>
2025-09-08 20:40:34 +00:00 · 2023-11-09 13:41:50 +08:00
parent f71e63b0f3
commit 239cd92eff
9 changed files with 312 additions and 13 deletions
--- a/applications/ColossalEval/colossal_eval/dataset/mtbench.py
+++ b/applications/ColossalEval/colossal_eval/dataset/mtbench.py
@@ -0,0 +1,72 @@
+import copy
+import json
+import os
+from collections import defaultdict
+from typing import Dict, List
+
+from colossal_eval.utils import get_json_list
+
+from colossalai.logging import DistributedLogger
+
+from .base import BaseDataset
+
+default_inference_kwargs = {
+    "calculate_loss": False,
+    "all_classes": None,
+    "language": "English",
+    "pretrain": False,
+    "max_new_tokens": 1024,
+    "turns": 2,
+}
+
+
+class MTBenchDataset(BaseDataset):
+    """
+    Dataset class for mt_bench dataset.
+    Data source: https://github.com/lm-sys/FastChat/blob/main/fastchat/llm_judge/data/mt_bench/question.jsonl
+    This dataset class will convert the original dataset into the inference dataset.
+    """
+
+    def __init__(self, path, logger, few_shot):
+        self.multiturn = True
+        self.dataset = self.load(path, logger, few_shot)
+
+    @staticmethod
+    def load(path: str, logger: DistributedLogger, few_shot: bool) -> List[Dict]:
+        dataset = {"test": defaultdict(dict)}
+
+        file_path = os.path.join(path, "question.jsonl")
+        ref_path = os.path.join(path, "reference_answer/gpt-4.jsonl")
+
+        reference = defaultdict(list)
+        ref_origin = get_json_list(ref_path)
+        for ref in ref_origin:
+            reference[ref["question_id"]] = ref["choices"][0]["turns"]
+
+        with open(file_path, "r", encoding="utf-8") as file:
+            for line in file:
+                question = json.loads(line)
+                category = question["category"]
+                turn_number = len(question["turns"])
+                data_point = {
+                    "id": question["question_id"],
+                    "dataset": "mtbench",
+                    "split": "test",
+                    "category": category,
+                    "instruction": question["turns"],
+                    "input": "",
+                    "output": [],
+                    "target": [""] * turn_number
+                    if question["question_id"] not in reference
+                    else reference[question["question_id"]],
+                }
+
+                if category in dataset["test"]:
+                    dataset["test"][category]["data"].append(data_point)
+                else:
+                    dataset["test"][category] = {
+                        "data": [data_point],
+                        "inference_kwargs": copy.deepcopy(default_inference_kwargs),
+                    }
+
+        return dataset