[chat]: add vf_coef argument for PPOTrainer (#3318)

This commit is contained in:
zhang-yi-chi
2023-04-11 09:54:59 +08:00
committed by GitHub
parent 89fd10a1c9
commit e6a132a449
2 changed files with 5 additions and 1 deletions

View File

@@ -65,7 +65,7 @@ class ValueLoss(nn.Module):
surr2 = (values - reward)**2
loss = torch.max(surr1, surr2)
loss = loss.mean()
return loss
return 0.5 * loss
class PPOPtxActorLoss(nn.Module):