mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-08-17 15:36:53 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
parent
99ba48fc40
commit
73bdfd8891
@ -119,7 +119,9 @@ class NaiveExperienceMaker(ExperienceMaker):
|
||||
generate_kwargs["stop_token_ids"] = stop_token_ids
|
||||
# Hack: manually initialize cache_position to address transformer version conflict
|
||||
if generate_kwargs.get("cache_position", None) is None and generate_kwargs.get("use_cache", False) is True:
|
||||
generate_kwargs["cache_position"] = torch.arange(0, input_ids.shape[1], dtype=torch.long, device=input_ids.device)
|
||||
generate_kwargs["cache_position"] = torch.arange(
|
||||
0, input_ids.shape[1], dtype=torch.long, device=input_ids.device
|
||||
)
|
||||
torch.manual_seed(41) # for tp, gurantee the same input for reward model
|
||||
|
||||
if self.use_grpo and self.num_generation > 1:
|
||||
|
@ -193,12 +193,14 @@ class KTOTrainer(SLTrainer):
|
||||
loss_mean = all_reduce_mean(tensor=loss)
|
||||
chosen_reward_mean = chosen_rewards.mean()
|
||||
chosen_rewards_list = [
|
||||
torch.tensor(0, dtype=chosen_reward_mean.dtype, device=loss.device) for _ in range(dist.get_world_size())
|
||||
torch.tensor(0, dtype=chosen_reward_mean.dtype, device=loss.device)
|
||||
for _ in range(dist.get_world_size())
|
||||
]
|
||||
dist.all_gather(chosen_rewards_list, chosen_reward_mean)
|
||||
rejected_reward_mean = rejected_rewards.mean()
|
||||
rejected_rewards_list = [
|
||||
torch.tensor(0, dtype=rejected_reward_mean.dtype, device=loss.device) for _ in range(dist.get_world_size())
|
||||
torch.tensor(0, dtype=rejected_reward_mean.dtype, device=loss.device)
|
||||
for _ in range(dist.get_world_size())
|
||||
]
|
||||
dist.all_gather(rejected_rewards_list, rejected_reward_mean)
|
||||
chosen_rewards_list = [i for i in chosen_rewards_list if not i.isnan()]
|
||||
|
@ -89,9 +89,7 @@ def train(args):
|
||||
actor = AutoModelForCausalLM.from_pretrained(args.pretrain, trust_remote_code=True)
|
||||
if args.rm_pretrain:
|
||||
reward_model = RewardModel(args.rm_pretrain, trust_remote_code=True)
|
||||
ref_model = AutoModelForCausalLM.from_pretrained(
|
||||
args.pretrain, trust_remote_code=True
|
||||
)
|
||||
ref_model = AutoModelForCausalLM.from_pretrained(args.pretrain, trust_remote_code=True)
|
||||
|
||||
if args.lora_config is not None:
|
||||
actor = convert_to_lora_module(actor, lora_config=lora_config)
|
||||
|
@ -102,9 +102,7 @@ def train(args):
|
||||
coordinator.print_on_master(msg="Flash-attention enabled successfully")
|
||||
else:
|
||||
actor = AutoModelForCausalLM.from_pretrained(args.pretrain, trust_remote_code=True)
|
||||
ref_model = AutoModelForCausalLM.from_pretrained(
|
||||
args.pretrain, trust_remote_code=True
|
||||
)
|
||||
ref_model = AutoModelForCausalLM.from_pretrained(args.pretrain, trust_remote_code=True)
|
||||
if not args.no_neural_reward_model:
|
||||
reward_model = RewardModel(args.rm_pretrain, trust_remote_code=True)
|
||||
critic = Critic(args.rm_pretrain)
|
||||
|
@ -631,7 +631,7 @@ for lora_rank in ${LORA_RANK[@]}; do
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
|
||||
|
||||
echo "[Test]: testing ORPO ..."
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user