upgrade reward functions

This commit is contained in:
YeAnbang
2025-05-16 18:04:38 +08:00
parent 021914c565
commit 03b41d6fb5
3 changed files with 123 additions and 27 deletions

View File

@@ -198,6 +198,8 @@ if __name__ == "__main__":
"beta": args.kl_coeff, # KL penalty coefficient
"loss_variation": "sample_level",
"reward_fn_type": args.reward_type,
"max_length": args.max_new_tokens + args.max_prompt_tokens,
"max_new_tokens": args.max_new_tokens,
}
elif args.algo == "DAPO":
# DAPO variant settings
@@ -213,6 +215,7 @@ if __name__ == "__main__":
"loss_variation": "token_level",
"soft_over_length_punishment": True,
"max_length": args.max_new_tokens + args.max_prompt_tokens,
"max_new_tokens": args.max_new_tokens,
"cache_length": min(1024, int(args.max_new_tokens / 4)),
"filter_truncated_response": True,
"reward_fn_type": args.reward_type,