[feature] ColossalEval: Evaluation Pipeline for LLMs (#4786)

* Add ColossalEval * Delete evaluate in Chat --------- Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com> Co-authored-by: Tong Li <tong.li352711588@gmail.com>
2025-09-08 20:40:34 +00:00 · 2023-09-24 23:14:11 +08:00
parent 74aa7d964a
commit ce777853ae
60 changed files with 5314 additions and 2497 deletions
--- a/applications/ColossalEval/examples/gpt_evaluation/config/evaluation/config.json
+++ b/applications/ColossalEval/examples/gpt_evaluation/config/evaluation/config.json
@@ -0,0 +1,44 @@
+{
+  "language": "en",
+  "category": {
+    "brainstorming": {
+      "GPT": [
+        "language organization",
+        "relevance",
+        "creativity",
+        "practicality",
+        "reasonableness"
+      ]
+    },
+    "chat": {
+      "GPT": [
+        "language organization",
+        "naturalness",
+        "engagingness",
+        "fidelity"
+      ]
+    },
+    "generation": {
+      "GPT": [
+        "language organization",
+        "relevance",
+        "diversity"
+      ]
+    },
+    "open_qa": {
+      "GPT": [
+        "language organization",
+        "relevance",
+        "correctness"
+      ]
+    },
+    "roleplay": {
+      "GPT": [
+        "language organization",
+        "relevance",
+        "fidelity",
+        "creativity"
+      ]
+    }
+  }
+}
--- a/applications/ColossalEval/examples/gpt_evaluation/config/inference/config.json
+++ b/applications/ColossalEval/examples/gpt_evaluation/config/inference/config.json
@@ -0,0 +1,33 @@
+{
+  "model": [
+    {
+      "name": "model name",
+      "model_class": "HuggingFaceCausalLM",
+      "parameters": {
+        "path": "path to model",
+        "model_max_length": 4096,
+        "tokenizer_path": "",
+        "tokenizer_kwargs": {
+          "trust_remote_code": true
+        },
+        "peft_path": null,
+        "model_kwargs": {
+          "torch_dtype": "torch.float32",
+          "trust_remote_code": true
+        },
+        "prompt_template": "plain",
+        "batch_size": 4
+      }
+    }
+  ],
+  "dataset": [
+    {
+      "name": "colossal",
+      "dataset_class": "ColossalDataset",
+      "debug": false,
+      "few_shot": false,
+      "path": "../../configs/gpt_evaluation/data/eval_en_examples.json",
+      "save_path": "path to save converted dataset (inference_data/colossal.json)"
+    }
+  ]
+}