From 9998d5ef64cb809e9858681f10b5307da1ff9196 Mon Sep 17 00:00:00 2001 From: Yuanchen <70520919+chengeharrison@users.noreply.github.com> Date: Wed, 22 Mar 2023 19:09:39 +0800 Subject: [PATCH] [chatgpt]add reward model code for deberta (#3199) Co-authored-by: Yuanchen Xu --- .../chatgpt/models/deberta/__init__.py | 4 ++ .../chatgpt/models/deberta/deberta_critic.py | 36 ++++++++++++++++++ .../chatgpt/models/deberta/deberta_rm.py | 37 +++++++++++++++++++ .../ChatGPT/examples/requirements.txt | 1 + applications/ChatGPT/examples/test_ci.sh | 6 +++ .../ChatGPT/examples/train_reward_model.py | 9 ++++- applications/ChatGPT/examples/train_rm.sh | 4 +- 7 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 applications/ChatGPT/chatgpt/models/deberta/__init__.py create mode 100644 applications/ChatGPT/chatgpt/models/deberta/deberta_critic.py create mode 100644 applications/ChatGPT/chatgpt/models/deberta/deberta_rm.py diff --git a/applications/ChatGPT/chatgpt/models/deberta/__init__.py b/applications/ChatGPT/chatgpt/models/deberta/__init__.py new file mode 100644 index 000000000..b66888f34 --- /dev/null +++ b/applications/ChatGPT/chatgpt/models/deberta/__init__.py @@ -0,0 +1,4 @@ +from .deberta_critic import DebertaCritic +from .deberta_rm import DebertaRM + +__all__ = ['DebertaCritic', 'DebertaRM'] diff --git a/applications/ChatGPT/chatgpt/models/deberta/deberta_critic.py b/applications/ChatGPT/chatgpt/models/deberta/deberta_critic.py new file mode 100644 index 000000000..e84c1dbd8 --- /dev/null +++ b/applications/ChatGPT/chatgpt/models/deberta/deberta_critic.py @@ -0,0 +1,36 @@ +from typing import Optional + +import torch.nn as nn +from transformers import DebertaV2Config, DebertaV2Model + +from ..base import Critic + + +class DebertaCritic(Critic): + """ + Deberta Critic model. + + Args: + pretrained (str): Pretrained model name or path. + config (DebertaV2Config): Model config. + checkpoint (bool): Enable gradient checkpointing. + lora_rank (int): Rank of the LO-RA decomposition. + lora_train_bias (str): LoRA bias training mode. + """ + + def __init__(self, + pretrained: Optional[str] = None, + config: Optional[DebertaV2Config] = None, + checkpoint: bool = False, + lora_rank: int = 0, + lora_train_bias: str = 'none') -> None: + if pretrained is not None: + model = DebertaV2Model.from_pretrained(pretrained) + elif config is not None: + model = DebertaV2Model(config) + else: + model = DebertaV2Model(DebertaV2Config()) + if checkpoint: + model.gradient_checkpointing_enable() + value_head = nn.Linear(model.config.hidden_size, 1) + super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/applications/ChatGPT/chatgpt/models/deberta/deberta_rm.py b/applications/ChatGPT/chatgpt/models/deberta/deberta_rm.py new file mode 100644 index 000000000..2448c879e --- /dev/null +++ b/applications/ChatGPT/chatgpt/models/deberta/deberta_rm.py @@ -0,0 +1,37 @@ +from typing import Optional + +import torch.nn as nn +from transformers import DebertaV2Config, DebertaV2Model + +from ..base import RewardModel + + +class DebertaRM(RewardModel): + """ + Deberta Reward model. + + Args: + pretrained (str): Pretrained model name or path. + config (DebertaV2Config): Model config. + checkpoint (bool): Enable gradient checkpointing. + lora_rank (int): Rank of the LO-RA decomposition. + lora_train_bias (str): LoRA bias training mode. + """ + + def __init__(self, + pretrained: str = None, + config: Optional[DebertaV2Config] = None, + checkpoint: bool = False, + lora_rank: int = 0, + lora_train_bias: str = 'none') -> None: + if pretrained is not None: + model = DebertaV2Model.from_pretrained(pretrained) + elif config is not None: + model = DebertaV2Model(config) + else: + model = DebertaV2Model(DebertaV2Config()) + if checkpoint: + model.gradient_checkpointing_enable() + value_head = nn.Linear(model.config.hidden_size, 1) + value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.hidden_size + 1)) + super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/applications/ChatGPT/examples/requirements.txt b/applications/ChatGPT/examples/requirements.txt index 6c5dac292..40e6edc7e 100644 --- a/applications/ChatGPT/examples/requirements.txt +++ b/applications/ChatGPT/examples/requirements.txt @@ -1 +1,2 @@ pandas>=1.4.1 +sentencepiece diff --git a/applications/ChatGPT/examples/test_ci.sh b/applications/ChatGPT/examples/test_ci.sh index abc43ab1e..1d05c4c58 100755 --- a/applications/ChatGPT/examples/test_ci.sh +++ b/applications/ChatGPT/examples/test_ci.sh @@ -88,4 +88,10 @@ torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \ --dataset 'Anthropic/hh-rlhf' --subset 'harmless-base'\ --test True --lora_rank 4 +torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \ + --pretrain 'microsoft/deberta-v3-large' --model 'deberta' \ + --strategy colossalai_zero2 --loss_fn 'log_sig'\ + --dataset 'Anthropic/hh-rlhf' --subset 'harmless-base'\ + --test True --lora_rank 4 + rm -rf ${BASE}/rm_ckpt.pt diff --git a/applications/ChatGPT/examples/train_reward_model.py b/applications/ChatGPT/examples/train_reward_model.py index 47dd988b8..a9c844b7b 100644 --- a/applications/ChatGPT/examples/train_reward_model.py +++ b/applications/ChatGPT/examples/train_reward_model.py @@ -8,12 +8,13 @@ from chatgpt.models.base import RewardModel from chatgpt.models.bloom import BLOOMRM from chatgpt.models.gpt import GPTRM from chatgpt.models.opt import OPTRM +from chatgpt.models.deberta import DebertaRM from chatgpt.trainer import RewardModelTrainer from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy from datasets import load_dataset from random import randint from torch.optim import Adam -from transformers import AutoTokenizer, BloomTokenizerFast +from transformers import AutoTokenizer, BloomTokenizerFast, DebertaV2Tokenizer from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer from colossalai.nn.optimizer import HybridAdam @@ -39,6 +40,8 @@ def train(args): model = OPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) elif args.model == 'gpt2': model = GPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) + elif args.model == 'deberta': + model = DebertaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) else: raise ValueError(f'Unsupported model "{args.model}"') @@ -54,6 +57,8 @@ def train(args): tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m') elif args.model == 'opt': tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") + elif args.model == 'deberta': + tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-large') else: raise ValueError(f'Unsupported model "{args.model}"') max_len = args.max_len @@ -119,7 +124,7 @@ if __name__ == '__main__': parser.add_argument('--strategy', choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive') - parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt'], default='bloom') + parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'deberta'], default='bloom') parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--model_path', type=str, default=None) parser.add_argument('--need_optim_ckpt', type=bool, default=False) diff --git a/applications/ChatGPT/examples/train_rm.sh b/applications/ChatGPT/examples/train_rm.sh index 981b7a15f..4f9f55b6b 100755 --- a/applications/ChatGPT/examples/train_rm.sh +++ b/applications/ChatGPT/examples/train_rm.sh @@ -1,7 +1,7 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 1 -python train_reward_model.py --pretrain '/home/lczht/data2/bloom-560m' \ - --model 'bloom' \ +python train_reward_model.py --pretrain 'microsoft/deberta-v3-large' \ + --model 'deberta' \ --strategy naive \ --loss_fn 'log_exp'\ --save_path 'rmstatic.pt' \