fix reward score

2026-07-15 15:29:48 +00:00 · 2025-03-11 10:17:32 +08:00
parent 71a0181fce
commit abca66e69f
1 changed files with 6 additions and 4 deletions
--- a/applications/ColossalChat/coati/distributed/reward/reward_fn.py
+++ b/applications/ColossalChat/coati/distributed/reward/reward_fn.py
@@ -4,6 +4,8 @@ from .reward_utils import extract_solution, validate_response_structure


 def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
+    format_score = 1.0
+    acc_score = 9.0
    tokenizer = kwargs["tokenizer"]
    reward = torch.tensor(0.0)
    format_reward = torch.tensor(0.0)
@@ -20,16 +22,16 @@ def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):

    # Check format accuracy
    if format_valid:
-        format_reward += 1.0
-        reward += 1.0
+        format_reward += format_score
+        reward += format_score

    # Check answer accuracy
    if (
        final_answer is not None
        and gt_answer.strip().replace(" ", "").lower() == final_answer.strip().replace(" ", "").lower()
    ):
-        acc_reward += 5.0
-        reward += 5.0
+        acc_reward += acc_score
+        reward += acc_score

    return torch.tensor([reward, format_reward, acc_reward]).to(input_ids.device)