From 73bdfd88910efeecc4f09025773ecc58305aa494 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 14 Aug 2025 11:05:40 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- applications/ColossalChat/coati/experience_maker/naive.py | 4 +++- applications/ColossalChat/coati/trainer/kto.py | 6 ++++-- .../ColossalChat/examples/training_scripts/train_grpo.py | 4 +--- .../ColossalChat/examples/training_scripts/train_ppo.py | 4 +--- applications/ColossalChat/tests/test_train.sh | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/applications/ColossalChat/coati/experience_maker/naive.py b/applications/ColossalChat/coati/experience_maker/naive.py index 063655d02..e9c5fb521 100755 --- a/applications/ColossalChat/coati/experience_maker/naive.py +++ b/applications/ColossalChat/coati/experience_maker/naive.py @@ -119,7 +119,9 @@ class NaiveExperienceMaker(ExperienceMaker): generate_kwargs["stop_token_ids"] = stop_token_ids # Hack: manually initialize cache_position to address transformer version conflict if generate_kwargs.get("cache_position", None) is None and generate_kwargs.get("use_cache", False) is True: - generate_kwargs["cache_position"] = torch.arange(0, input_ids.shape[1], dtype=torch.long, device=input_ids.device) + generate_kwargs["cache_position"] = torch.arange( + 0, input_ids.shape[1], dtype=torch.long, device=input_ids.device + ) torch.manual_seed(41) # for tp, gurantee the same input for reward model if self.use_grpo and self.num_generation > 1: diff --git a/applications/ColossalChat/coati/trainer/kto.py b/applications/ColossalChat/coati/trainer/kto.py index 5a4656a74..f87bf53c4 100755 --- a/applications/ColossalChat/coati/trainer/kto.py +++ b/applications/ColossalChat/coati/trainer/kto.py @@ -193,12 +193,14 @@ class KTOTrainer(SLTrainer): loss_mean = all_reduce_mean(tensor=loss) chosen_reward_mean = chosen_rewards.mean() chosen_rewards_list = [ - torch.tensor(0, dtype=chosen_reward_mean.dtype, device=loss.device) for _ in range(dist.get_world_size()) + torch.tensor(0, dtype=chosen_reward_mean.dtype, device=loss.device) + for _ in range(dist.get_world_size()) ] dist.all_gather(chosen_rewards_list, chosen_reward_mean) rejected_reward_mean = rejected_rewards.mean() rejected_rewards_list = [ - torch.tensor(0, dtype=rejected_reward_mean.dtype, device=loss.device) for _ in range(dist.get_world_size()) + torch.tensor(0, dtype=rejected_reward_mean.dtype, device=loss.device) + for _ in range(dist.get_world_size()) ] dist.all_gather(rejected_rewards_list, rejected_reward_mean) chosen_rewards_list = [i for i in chosen_rewards_list if not i.isnan()] diff --git a/applications/ColossalChat/examples/training_scripts/train_grpo.py b/applications/ColossalChat/examples/training_scripts/train_grpo.py index 99e785086..eb5b89b80 100755 --- a/applications/ColossalChat/examples/training_scripts/train_grpo.py +++ b/applications/ColossalChat/examples/training_scripts/train_grpo.py @@ -89,9 +89,7 @@ def train(args): actor = AutoModelForCausalLM.from_pretrained(args.pretrain, trust_remote_code=True) if args.rm_pretrain: reward_model = RewardModel(args.rm_pretrain, trust_remote_code=True) - ref_model = AutoModelForCausalLM.from_pretrained( - args.pretrain, trust_remote_code=True - ) + ref_model = AutoModelForCausalLM.from_pretrained(args.pretrain, trust_remote_code=True) if args.lora_config is not None: actor = convert_to_lora_module(actor, lora_config=lora_config) diff --git a/applications/ColossalChat/examples/training_scripts/train_ppo.py b/applications/ColossalChat/examples/training_scripts/train_ppo.py index 29d62a36f..74f2a73b5 100755 --- a/applications/ColossalChat/examples/training_scripts/train_ppo.py +++ b/applications/ColossalChat/examples/training_scripts/train_ppo.py @@ -102,9 +102,7 @@ def train(args): coordinator.print_on_master(msg="Flash-attention enabled successfully") else: actor = AutoModelForCausalLM.from_pretrained(args.pretrain, trust_remote_code=True) - ref_model = AutoModelForCausalLM.from_pretrained( - args.pretrain, trust_remote_code=True - ) + ref_model = AutoModelForCausalLM.from_pretrained(args.pretrain, trust_remote_code=True) if not args.no_neural_reward_model: reward_model = RewardModel(args.rm_pretrain, trust_remote_code=True) critic = Critic(args.rm_pretrain) diff --git a/applications/ColossalChat/tests/test_train.sh b/applications/ColossalChat/tests/test_train.sh index 8665e6713..b70535291 100755 --- a/applications/ColossalChat/tests/test_train.sh +++ b/applications/ColossalChat/tests/test_train.sh @@ -631,7 +631,7 @@ for lora_rank in ${LORA_RANK[@]}; do done done done - + echo "[Test]: testing ORPO ..."