[chat]: add vf_coef argument for PPOTrainer (#3318)

2025-09-22 18:09:06 +00:00 · 2023-04-11 09:54:59 +08:00
parent 89fd10a1c9
commit e6a132a449
2 changed files with 5 additions and 1 deletions
--- a/applications/Chat/coati/models/loss.py
+++ b/applications/Chat/coati/models/loss.py
@@ -65,7 +65,7 @@ class ValueLoss(nn.Module):
        surr2 = (values - reward)**2
        loss = torch.max(surr1, surr2)
        loss = loss.mean()
-        return loss
+        return 0.5 * loss


 class PPOPtxActorLoss(nn.Module):