update reward

This commit is contained in:
Tong Li 2025-03-10 14:19:10 +08:00
parent 754b16dfbf
commit 71a0181fce

View File

@ -21,7 +21,7 @@ class VerifiableReward:
# Get batch size
bs = input_ids.size(0)
# Initialize reward
rewards = torch.zeros(bs, device=input_ids.device)
rewards = torch.zeros((bs, 3), device=input_ids.device)
# Loop through reward functions
for reward_fn in self.reward_fns: