fix reward score

2025-12-10 13:43:20 +00:00 · 2025-03-11 10:17:32 +08:00
parent 71a0181fce
commit abca66e69f
1 changed files with 6 additions and 4 deletions
--- a/applications/ColossalChat/coati/distributed/reward/reward_fn.py
+++ b/applications/ColossalChat/coati/distributed/reward/reward_fn.py
@@ -4,6 +4,8 @@ from .reward_utils import extract_solution, validate_response_structure
 def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
    format_score = 1.0
    acc_score = 9.0
    tokenizer = kwargs["tokenizer"]
    reward = torch.tensor(0.0)
    format_reward = torch.tensor(0.0)
@@ -20,16 +22,16 @@ def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
    # Check format accuracy
    if format_valid:
-        format_reward += 1.0
+        format_reward += format_score
-        reward += 1.0
+        reward += format_score
    # Check answer accuracy
    if (
        final_answer is not None
        and gt_answer.strip().replace(" ", "").lower() == final_answer.strip().replace(" ", "").lower()
    ):
-        acc_reward += 5.0
+        acc_reward += acc_score
-        reward += 5.0
+        reward += acc_score
    return torch.tensor([reward, format_reward, acc_reward]).to(input_ids.device)