add simple grpo

This commit is contained in:
Tong Li
2025-02-23 22:54:26 +08:00
committed by YeAnbang
parent fa1272f9f2
commit 40d601802d
8 changed files with 253 additions and 21 deletions

View File

@@ -210,6 +210,8 @@ class VLLMInferenceBackend(BaseInferenceBackend):
"action_log_probs": log_probs,
"action_mask": action_mask,
}
if "gt_answer" in kwargs:
data["gt_answer"] = kwargs["gt_answer"]
data = {k: v.to(get_current_device()) for k, v in data.items()}
return data