This commit is contained in:
YeAnbang
2025-03-07 18:29:34 +08:00
parent eb6337f07f
commit 6a6634b6e8
10 changed files with 350 additions and 12 deletions

View File

@@ -19,8 +19,8 @@ def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
return reward
else:
reward += 1.0
if gt_answer.strip().replace(" ", "").lower() == final_answer.strip().replace(" ", "").lower():
reward = reward + 2.0
# if gt_answer.strip().replace(" ", "").lower() == final_answer.strip().replace(" ", "").lower():
# reward = reward + 2.0
return reward