mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-05 19:13:01 +00:00
Revert "upgrade reward math verification"
This reverts commit a6085ff676
.
This commit is contained in:
@@ -1,5 +1,4 @@
|
|||||||
import torch
|
import torch
|
||||||
from math_verify import parse, verify
|
|
||||||
|
|
||||||
from .reward_utils import extract_boxed_solution, extract_solution, validate_response_structure
|
from .reward_utils import extract_boxed_solution, extract_solution, validate_response_structure
|
||||||
|
|
||||||
@@ -36,7 +35,11 @@ def math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
|
|||||||
format_acc += 1
|
format_acc += 1
|
||||||
|
|
||||||
# Check answer accuracy, answer is considered correct if the answer is correct and the format is valid
|
# Check answer accuracy, answer is considered correct if the answer is correct and the format is valid
|
||||||
if format_valid and final_answer is not None and verify(parse(gt_answer.strip()), parse(final_answer.strip())):
|
if (
|
||||||
|
format_valid
|
||||||
|
and final_answer is not None
|
||||||
|
and gt_answer.strip().replace(" ", "").lower() == final_answer.strip().replace(" ", "").lower()
|
||||||
|
):
|
||||||
ans_acc += 1
|
ans_acc += 1
|
||||||
reward += acc_score
|
reward += acc_score
|
||||||
|
|
||||||
@@ -88,7 +91,7 @@ def boxed_math_reward_fn(input_ids, gt_answer, response_idx, **kwargs):
|
|||||||
reward += format_score
|
reward += format_score
|
||||||
|
|
||||||
# Check answer accuracy, answer is considered correct if the answer is correct and the format is valid
|
# Check answer accuracy, answer is considered correct if the answer is correct and the format is valid
|
||||||
if format_valid and final_answer is not None and verify(parse(gt_answer.strip()), parse(final_answer.strip())):
|
if format_valid and final_answer is not None and gt_answer.strip().lower() == final_answer.strip().lower():
|
||||||
ans_acc += 1
|
ans_acc += 1
|
||||||
reward += acc_score
|
reward += acc_score
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user