diff --git a/applications/ColossalChat/coati/distributed/reward/reward_fn.py b/applications/ColossalChat/coati/distributed/reward/reward_fn.py index 6844d700a..14d340dc4 100644 --- a/applications/ColossalChat/coati/distributed/reward/reward_fn.py +++ b/applications/ColossalChat/coati/distributed/reward/reward_fn.py @@ -1,5 +1,4 @@ import torch -from math_verify import parse, verify from .reward_utils import extract_boxed_solution, extract_solution, validate_response_structure @@ -36,7 +35,11 @@ def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs): format_acc += 1 # Check answer accuracy, answer is considered correct if the answer is correct and the format is valid - if format_valid and final_answer is not None and verify(parse(gt_answer.strip()), parse(final_answer.strip())): + if ( + format_valid + and final_answer is not None + and gt_answer.strip().replace(" ", "").lower() == final_answer.strip().replace(" ", "").lower() + ): ans_acc += 1 reward += acc_score @@ -88,7 +91,7 @@ def boxed_math_reward_fn(input_ids, gt_answer, response_idx, **kwargs): reward += format_score # Check answer accuracy, answer is considered correct if the answer is correct and the format is valid - if format_valid and final_answer is not None and verify(parse(gt_answer.strip()), parse(final_answer.strip())): + if format_valid and final_answer is not None and gt_answer.strip().lower() == final_answer.strip().lower(): ans_acc += 1 reward += acc_score