diff --git a/applications/ColossalChat/coati/distributed/grpo_consumer.py b/applications/ColossalChat/coati/distributed/grpo_consumer.py index 0e956ca41..51cdcf322 100644 --- a/applications/ColossalChat/coati/distributed/grpo_consumer.py +++ b/applications/ColossalChat/coati/distributed/grpo_consumer.py @@ -345,7 +345,7 @@ class GRPOConsumer(BaseConsumer): criterion=_criterion, optimizer=self.optimizer, return_loss=True, - return_outputs=True, + return_outputs=False, ) loss = policy_model_outputs["loss"]