upgrade reward functions

This commit is contained in:
YeAnbang
2025-05-16 18:04:38 +08:00
parent 021914c565
commit 03b41d6fb5
3 changed files with 123 additions and 27 deletions

View File

@@ -127,7 +127,9 @@ class GRPOConsumer(BaseConsumer):
"answer_end": {"text": "</answer>", "num_occur": 1},
}
reward_model_kwargs = {
k: v for k, v in grpo_config.items() if k in ["soft_over_length_punishment", "max_length", "cache_length"]
k: v
for k, v in grpo_config.items()
if k in ["soft_over_length_punishment", "max_new_tokens", "cache_length"]
}
self.reward_model = VerifiableReward(
reward_fns=[