From 061d8cb3b6485787e7c2f75868342b3a243c44e1 Mon Sep 17 00:00:00 2001 From: YeAnbang Date: Fri, 4 Apr 2025 10:11:11 +0800 Subject: [PATCH] remove unused code --- .../ColossalChat/coati/distributed/grpo_consumer.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/applications/ColossalChat/coati/distributed/grpo_consumer.py b/applications/ColossalChat/coati/distributed/grpo_consumer.py index fbc06edc2..f4174261a 100644 --- a/applications/ColossalChat/coati/distributed/grpo_consumer.py +++ b/applications/ColossalChat/coati/distributed/grpo_consumer.py @@ -252,10 +252,6 @@ class GRPOConsumer(BaseConsumer): - (inputs["reference_action_log_probs"] - action_log_probs) - 1 ) - decode_tokens_100 = self.tokenizer.batch_decode( - input_ids_forward_micro_batch[:, -num_action:], - skip_special_tokens=False, - ) loss, skip_update, _ = self.policy_loss_fn( action_log_probs, action_log_probs, @@ -277,7 +273,7 @@ class GRPOConsumer(BaseConsumer): loss = policy_model_outputs["loss"] if self.booster.plugin.stage_manager.is_last_stage(): - # calculate kl + # calculate kl, as we cannot do this inside callback, kl needs be calculate again action_logits = policy_model_outputs["outputs"]["logits"] action_log_probs = calc_action_log_probs( action_logits / self.generate_config["temperature"],