fix style, add kto data sample

This commit is contained in:
YeAnbang
2024-07-18 08:38:56 +00:00
parent 845ea7214e
commit 544b7a38a1
11 changed files with 68 additions and 53 deletions

View File

@@ -180,7 +180,7 @@ class KTOTrainer(SLTrainer):
self.optimizer.zero_grad()
self.actor_scheduler.step()
# # sync
# sync
loss_mean = all_reduce_mean(tensor=loss)
chosen_rewards_mean = all_reduce_mean(tensor=chosen_rewards.mean())
rejected_rewards_mean = all_reduce_mean(tensor=rejected_rewards.mean())
@@ -297,7 +297,7 @@ class KTOTrainer(SLTrainer):
chosen_logprob, rejected_logprob, kl_logprob, ref_chosen_logprob, ref_rejected_logprob, ref_kl_logprob
)
# # sync
# sync
loss_mean = all_reduce_mean(tensor=loss)
chosen_rewards_mean = all_reduce_mean(tensor=chosen_rewards.mean())
rejected_rewards_mean = all_reduce_mean(tensor=rejected_rewards.mean())