diff --git a/applications/ColossalChat/coati/distributed/grpo_consumer.py b/applications/ColossalChat/coati/distributed/grpo_consumer.py index 20b03597a..ec07fe2b5 100644 --- a/applications/ColossalChat/coati/distributed/grpo_consumer.py +++ b/applications/ColossalChat/coati/distributed/grpo_consumer.py @@ -358,7 +358,7 @@ class GRPOConsumer(BaseConsumer): criterion=_criterion, optimizer=self.optimizer, return_loss=True, - return_outputs=True, + return_outputs=False, ) loss = policy_model_outputs["loss"]