mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-22 18:09:06 +00:00
[chat]: add vf_coef argument for PPOTrainer (#3318)
This commit is contained in:
@@ -65,7 +65,7 @@ class ValueLoss(nn.Module):
|
||||
surr2 = (values - reward)**2
|
||||
loss = torch.max(surr1, surr2)
|
||||
loss = loss.mean()
|
||||
return loss
|
||||
return 0.5 * loss
|
||||
|
||||
|
||||
class PPOPtxActorLoss(nn.Module):
|
||||
|
Reference in New Issue
Block a user