fix default eval setting (#6321)

Co-authored-by: Tong Li <tong.li35271158@gmail.com>
This commit is contained in:
Tong Li 2025-05-22 11:52:41 +08:00 committed by GitHub
parent 32afa7bf29
commit de2ad3b206
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 11 additions and 6 deletions

View File

@ -1,4 +1,5 @@
import copy import copy
import os
import uuid import uuid
from typing import Any, Dict, Optional from typing import Any, Dict, Optional

View File

@ -149,7 +149,7 @@ class BaseProducer:
else: else:
raise ValueError(f"Unknown evaluation function type {evaluation_function_type}") raise ValueError(f"Unknown evaluation function type {evaluation_function_type}")
else: else:
raise ValueError("eval_dataset_config is not defined") print("No eval dataset provided, skip eval")
self.device = get_current_device() self.device = get_current_device()
# init backend # init backend

View File

@ -14,7 +14,7 @@ if __name__ == "__main__":
"-ed", "-ed",
"--eval-dataset", "--eval-dataset",
type=str, type=str,
default='{"eval task name":"data_eval.jsonl"}', default=None,
help="Evaluation dataset for each task, please use json format to specify the dataset for each task. \ help="Evaluation dataset for each task, please use json format to specify the dataset for each task. \
For example: {'task1':'data_eval_task1.jsonl', 'task2':'data_eval_task2.jsonl'}, the jsonl file should be in the same format as the training dataset. \ For example: {'task1':'data_eval_task1.jsonl', 'task2':'data_eval_task2.jsonl'}, the jsonl file should be in the same format as the training dataset. \
The key is the task name, and the value is the path to the jsonl file", The key is the task name, and the value is the path to the jsonl file",
@ -265,10 +265,14 @@ if __name__ == "__main__":
project_name=args.project, project_name=args.project,
save_interval=args.save_interval, save_interval=args.save_interval,
save_dir=os.path.join(args.save_dir, args.project.replace(" ", "_")), save_dir=os.path.join(args.save_dir, args.project.replace(" ", "_")),
eval_dataset_config={ eval_dataset_config=(
k: {"path": v, "max_length": args.max_prompt_tokens, "system_prompt": args.system_prompt} {
for k, v in json.loads(args.eval_dataset).items() k: {"path": v, "max_length": args.max_prompt_tokens, "system_prompt": args.system_prompt}
}, for k, v in json.loads(args.eval_dataset).items()
}
if args.eval_dataset
else None
),
eval_interval=args.eval_interval, eval_interval=args.eval_interval,
eval_save_dir=os.path.join(args.eval_save_dir, args.project.replace(" ", "_")), eval_save_dir=os.path.join(args.eval_save_dir, args.project.replace(" ", "_")),
eval_generation_config=eval_generation_config, eval_generation_config=eval_generation_config,