From 4516a4ed6aabf6cef4cd568605f7e77144570394 Mon Sep 17 00:00:00 2001 From: Tong Li Date: Thu, 15 Aug 2024 03:47:54 +0000 Subject: [PATCH] update sft --- applications/ColossalChat/coati/trainer/sft.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/applications/ColossalChat/coati/trainer/sft.py b/applications/ColossalChat/coati/trainer/sft.py index fb2f9a765..298fb30ee 100755 --- a/applications/ColossalChat/coati/trainer/sft.py +++ b/applications/ColossalChat/coati/trainer/sft.py @@ -200,8 +200,9 @@ class SFTTrainer(SLTrainer): ) loss = outputs["loss"] if dist.get_rank() == dist.get_world_size() - 1: - step_bar.set_postfix({"eval/loss": loss.item()}) - self.accumulative_meter.add("loss", loss.item()) + global_loss = all_reduce_mean(loss, self.booster) + step_bar.set_postfix({"eval/loss": global_loss.item()}) + self.accumulative_meter.add("loss", global_loss.item()) step_bar.update() if dist.get_rank() == dist.get_world_size() - 1: