[chatgpt] fix lora gemini conflict in RM training (#2984)

* fix lora bug * polish * fix lora gemini
2025-09-11 05:49:55 +00:00 · 2023-03-03 15:58:16 +08:00
parent 19ad49fb3b
commit f5ca0397dd
3 changed files with 3 additions and 10 deletions
--- a/applications/ChatGPT/examples/train_reward_model.py
+++ b/applications/ChatGPT/examples/train_reward_model.py
@@ -66,8 +66,6 @@ def train(args):
    train_dataset = RewardDataset(train_data, tokenizer, max_len)
    eval_dataset = RewardDataset(eval_data, tokenizer, max_len)

-    # batch_size here is expected to be C(k,2), k means # response of each prompt
-    # be limited with the format of dataset 'Dahoas/rm-static', we'd better use batch_size as 1
    trainer = RewardModelTrainer(model=model,
                                 strategy=strategy,
                                 optim=optim,