add simple grpo

This commit is contained in:
Tong Li
2025-02-23 22:54:26 +08:00
parent 8e6c9a4ab3
commit ffd3878a1e
8 changed files with 253 additions and 21 deletions

View File

@@ -210,6 +210,8 @@ class VLLMInferenceBackend(BaseInferenceBackend):
"action_log_probs": log_probs,
"action_mask": action_mask,
}
if "gt_answer" in kwargs:
data["gt_answer"] = kwargs["gt_answer"]
data = {k: v.to(get_current_device()) for k, v in data.items()}
return data