diff --git a/applications/ColossalChat/coati/distributed/reward/reward_fn.py b/applications/ColossalChat/coati/distributed/reward/reward_fn.py
index a0f92d8c4..1260645c9 100644
--- a/applications/ColossalChat/coati/distributed/reward/reward_fn.py
+++ b/applications/ColossalChat/coati/distributed/reward/reward_fn.py
@@ -3,14 +3,11 @@ import torch
 from .reward_utils import extract_solution, validate_response_structure
 
 
-def math_reward_fn(step, input_ids, gt_answer, response_idx, **kwargs):
+def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
     tokenizer = kwargs["tokenizer"]
     soft_over_length_punishment = kwargs["soft_over_length_punishment"]
-    format_score = 1.0
-    acc_score = 9.0
-    if step > 30:
-        format_score = 0.0
-        acc_score = 10.0
+    format_score = 0.0
+    acc_score = 10.0
     reward = torch.tensor(0.0)
     format_reward = torch.tensor(0.0)
     acc_reward = torch.tensor(0.0)
@@ -21,10 +18,8 @@ def math_reward_fn(step, input_ids, gt_answer, response_idx, **kwargs):
         max_length = kwargs.get("max_length", 1024 * 4)
         cache_length = kwargs.get("cache_length", 512)
         res_length = e.item() - s.item() + 1
-        if res_length >= max_length:
-            length_reward = -1.0 * 2
-        elif res_length > max_length - cache_length:
-            length_reward = ((max_length - cache_length) - res_length) / cache_length * 2
+        if max_length - cache_length < res_length < max_length:
+            length_reward = ((max_length - cache_length) - res_length) / cache_length * acc_score
 
     if gt_answer is None:
         return reward