mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-08 20:40:34 +00:00
[feature] ColossalEval: Evaluation Pipeline for LLMs (#4786)
* Add ColossalEval * Delete evaluate in Chat --------- Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com> Co-authored-by: Tong Li <tong.li352711588@gmail.com>
This commit is contained in:
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"language": "en",
|
||||
"category": {
|
||||
"brainstorming": {
|
||||
"GPT": [
|
||||
"language organization",
|
||||
"relevance",
|
||||
"creativity",
|
||||
"practicality",
|
||||
"reasonableness"
|
||||
]
|
||||
},
|
||||
"chat": {
|
||||
"GPT": [
|
||||
"language organization",
|
||||
"naturalness",
|
||||
"engagingness",
|
||||
"fidelity"
|
||||
]
|
||||
},
|
||||
"generation": {
|
||||
"GPT": [
|
||||
"language organization",
|
||||
"relevance",
|
||||
"diversity"
|
||||
]
|
||||
},
|
||||
"open_qa": {
|
||||
"GPT": [
|
||||
"language organization",
|
||||
"relevance",
|
||||
"correctness"
|
||||
]
|
||||
},
|
||||
"roleplay": {
|
||||
"GPT": [
|
||||
"language organization",
|
||||
"relevance",
|
||||
"fidelity",
|
||||
"creativity"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"model": [
|
||||
{
|
||||
"name": "model name",
|
||||
"model_class": "HuggingFaceCausalLM",
|
||||
"parameters": {
|
||||
"path": "path to model",
|
||||
"model_max_length": 4096,
|
||||
"tokenizer_path": "",
|
||||
"tokenizer_kwargs": {
|
||||
"trust_remote_code": true
|
||||
},
|
||||
"peft_path": null,
|
||||
"model_kwargs": {
|
||||
"torch_dtype": "torch.float32",
|
||||
"trust_remote_code": true
|
||||
},
|
||||
"prompt_template": "plain",
|
||||
"batch_size": 4
|
||||
}
|
||||
}
|
||||
],
|
||||
"dataset": [
|
||||
{
|
||||
"name": "colossal",
|
||||
"dataset_class": "ColossalDataset",
|
||||
"debug": false,
|
||||
"few_shot": false,
|
||||
"path": "../../configs/gpt_evaluation/data/eval_en_examples.json",
|
||||
"save_path": "path to save converted dataset (inference_data/colossal.json)"
|
||||
}
|
||||
]
|
||||
}
|
Reference in New Issue
Block a user