[ColossalChat] Hotfix for ColossalChat (#5910)

* add ignore and tiny llama * fix path issue * run style * fix issue * update bash * add ignore and tiny llama * fix path issue * run style * fix issue * update bash * fix ddp issue * add Qwen 1.5 32B
2025-09-05 02:51:59 +00:00 · 2024-07-19 13:40:07 +08:00
parent 8cc8f645cd
commit f585d4e38e
11 changed files with 42 additions and 30 deletions
--- a/applications/ColossalChat/coati/trainer/sft.py
+++ b/applications/ColossalChat/coati/trainer/sft.py
@@ -102,7 +102,6 @@ class SFTTrainer(SLTrainer):
            batch_size = batch["input_ids"].size(0)
            outputs = self.model(batch["input_ids"], attention_mask=batch["attention_mask"], labels=batch["labels"])
            loss = outputs.loss
-            step_bar.set_description(f"Epoch {epoch + 1}/{self.max_epochs} Loss: {loss.detach().cpu().item():.4f}")

            self.booster.backward(loss=loss, optimizer=self.optimizer)

@@ -115,6 +114,7 @@ class SFTTrainer(SLTrainer):
                self.optimizer.zero_grad()
                self.scheduler.step()

+                step_bar.set_postfix({"train/loss": self.accumulative_meter.get("loss")})
                if self.writer:
                    self.writer.add_scalar("train/loss", self.accumulative_meter.get("loss"), self.num_train_step)
                    self.writer.add_scalar("train/lr", self.scheduler.get_last_lr()[0], self.num_train_step)