update files

2025-08-15 14:43:13 +00:00 · 2025-04-30 11:33:23 +08:00 · 2025-04-30 11:33:23 +08:00 · 3454b10884
commit 3454b10884
parent bb8d370b44
1 changed files with 6 additions and 4 deletions
--- a/applications/ColossalChat/coati/distributed/producer.py
+++ b/applications/ColossalChat/coati/distributed/producer.py
@ -132,19 +132,21 @@ class BaseProducer:
                    ):
                        self.model.llm.sleep()  # revict KV_cache to avoid OOM
                    # don't sync model for last iteration
                    print(
                        f"[P{self.producer_idx}] Sync model episode {episode} step {(i + 1) // self.num_microbatches - 1}"
                    )
                    torch.cuda.empty_cache()
                    if self.consumer_pp_size > 1:
                        for i in range(self.consumer_pp_size):
-                            print(f"[P{self.producer_idx}] Sync model PP stage {i}")
+                            print(
                                f"[P{self.producer_idx}] Sync model PP stage {i} episode {episode} step {(i + 1) // self.num_microbatches - 1}"
                            )
                            state_dict = ray_broadcast_tensor_dict(
                                None, self.num_producers, device=self.device, group_name=f"sync_model_{i}"
                            )
                            self.load_state_dict(state_dict)
                    else:
                        print(
                            f"[P{self.producer_idx}] Sync model episode {episode} step {(i + 1) // self.num_microbatches - 1}"
                        )
                        state_dict = ray_broadcast_tensor_dict(
                            None, self.num_producers, device=self.device, group_name="sync_model"
                        )