mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-06 11:32:10 +00:00
upgrade reward functions
This commit is contained in:
@@ -127,7 +127,9 @@ class GRPOConsumer(BaseConsumer):
|
||||
"answer_end": {"text": "</answer>", "num_occur": 1},
|
||||
}
|
||||
reward_model_kwargs = {
|
||||
k: v for k, v in grpo_config.items() if k in ["soft_over_length_punishment", "max_length", "cache_length"]
|
||||
k: v
|
||||
for k, v in grpo_config.items()
|
||||
if k in ["soft_over_length_punishment", "max_new_tokens", "cache_length"]
|
||||
}
|
||||
self.reward_model = VerifiableReward(
|
||||
reward_fns=[
|
||||
|
Reference in New Issue
Block a user