diff --git a/applications/ChatGPT/chatgpt/nn/reward_model.py b/applications/ChatGPT/chatgpt/nn/reward_model.py index baaa8b768..5108f61a6 100644 --- a/applications/ChatGPT/chatgpt/nn/reward_model.py +++ b/applications/ChatGPT/chatgpt/nn/reward_model.py @@ -23,7 +23,7 @@ class RewardModel(LoRAModule): lora_rank: int = 0, lora_train_bias: str = 'none') -> None: super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias) - self.body = model + self.model = model if value_head is not None: if value_head.out_features != 1: raise ValueError("The value head of reward model's output dim should be 1!") @@ -34,7 +34,7 @@ class RewardModel(LoRAModule): self.convert_to_lora() def forward(self, sequences: torch.LongTensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - outputs = self.body(sequences, attention_mask=attention_mask) + outputs = self.model(sequences, attention_mask=attention_mask) last_hidden_states = outputs['last_hidden_state'] values = self.value_head(last_hidden_states)[:, :-1] value = values.mean(dim=1).squeeze(1) # ensure shape is (B) diff --git a/applications/ChatGPT/chatgpt/trainer/rm.py b/applications/ChatGPT/chatgpt/trainer/rm.py index f9000eb7e..3286b8d8d 100644 --- a/applications/ChatGPT/chatgpt/trainer/rm.py +++ b/applications/ChatGPT/chatgpt/trainer/rm.py @@ -44,6 +44,8 @@ class RewardModelTrainer(ABC): self.eval_dataloader = DataLoader(eval_dataset, batch_size=batch_size) self.model = strategy.setup_model(model) + if "DDP" in str(self.strategy): + self.model = self.model.module self.loss_fn = PairWiseLoss() self.optimizer = strategy.setup_optimizer(optim, self.model) @@ -56,7 +58,7 @@ class RewardModelTrainer(ABC): # train if use_lora > 0: print("Using Lora") - lora.mark_only_lora_as_trainable(self.model.body) + lora.mark_only_lora_as_trainable(self.model.model) else: self.model.train() diff --git a/applications/ChatGPT/examples/train_reward_model.py b/applications/ChatGPT/examples/train_reward_model.py index bf2071793..c17c6f393 100644 --- a/applications/ChatGPT/examples/train_reward_model.py +++ b/applications/ChatGPT/examples/train_reward_model.py @@ -61,8 +61,8 @@ def train(args): # prepare for data and dataset data = load_dataset(args.dataset) - train_data = data["train"].select(range(100)) - eval_data = data['test'].select(range(5)) + train_data = data["train"] + eval_data = data['test'] train_dataset = RewardDataset(train_data, tokenizer, max_len) eval_dataset = RewardDataset(eval_data, tokenizer, max_len) @@ -93,7 +93,7 @@ if __name__ == '__main__': parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--dataset', type=str, default='Dahoas/rm-static') parser.add_argument('--save_path', type=str, default='rm_ckpt.pth') - parser.add_argument('--max_epochs', type=int, default=10) + parser.add_argument('--max_epochs', type=int, default=1) parser.add_argument('--batch_size', type=int, default=4) parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank") args = parser.parse_args()