From cd32236e53d565079fbbbeccf7bc7ef6ec7eafa1 Mon Sep 17 00:00:00 2001 From: YeAnbang <44796419+YeAnbang@users.noreply.github.com> Date: Tue, 29 Jul 2025 16:56:52 +0800 Subject: [PATCH] [Fix] Add L2 Regularization (#6372) * fix no L2 regularization error * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- applications/ColossalChat/coati/distributed/consumer.py | 2 +- .../ColossalChat/coati/distributed/grpo_consumer.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/applications/ColossalChat/coati/distributed/consumer.py b/applications/ColossalChat/coati/distributed/consumer.py index e360392e7..ba7d882c9 100644 --- a/applications/ColossalChat/coati/distributed/consumer.py +++ b/applications/ColossalChat/coati/distributed/consumer.py @@ -365,7 +365,7 @@ class SimpleConsumer(BaseConsumer): self.model = AutoModelForCausalLM.from_pretrained(path, **model_config) self.model.train() self.model.gradient_checkpointing_enable() - self.optimizer = HybridAdam(self.model.parameters(), lr=1e-3) + self.optimizer = HybridAdam(self.model.parameters(), lr=1e-3, weight_decay=0.01) self.accum_loss = torch.zeros(1, device=self.device) def setup(self): diff --git a/applications/ColossalChat/coati/distributed/grpo_consumer.py b/applications/ColossalChat/coati/distributed/grpo_consumer.py index a3f1a1cbb..424d46098 100644 --- a/applications/ColossalChat/coati/distributed/grpo_consumer.py +++ b/applications/ColossalChat/coati/distributed/grpo_consumer.py @@ -72,7 +72,11 @@ class GRPOConsumer(BaseConsumer): self.policy_model = AutoModelForCausalLM.from_pretrained(path, **model_config) self.policy_model.train() self.policy_model.gradient_checkpointing_enable() - self.optimizer = HybridAdam(self.policy_model.parameters(), lr=grpo_config.get("lr", 1e-6)) + self.optimizer = HybridAdam( + self.policy_model.parameters(), + lr=grpo_config.get("lr", 1e-6), + weight_decay=grpo_config.get("weight_decay", 0.01), + ) self.accum_loss = torch.zeros(1, device=self.device) self.accum_kl = torch.zeros(1, device=self.device) self.accum_entropy = torch.zeros(1, device=self.device)