[chat] remove naive strategy and split colossalai strategy (#4094)

* feat: remove on_learn_epoch fn as not used

* revert: add _on_learn_epoch fn

* to: remove the use of NaiveStrategy

* test: remove NaiveStrategy tests

* feat: remove NaiveStrategy

* style: modify comments and params

* feat: split ColossalAIStrategy into LowLevelZeroStrategy and GeminiStrategy

* fix: remove naive

* fix: align with modified colossal strategy

* fix: fix ddp _try_init_dist arg
This commit is contained in:
Wenhao Chen
2023-06-29 18:11:00 +08:00
committed by GitHub
parent b03d64d010
commit edd75a59ea
25 changed files with 323 additions and 349 deletions

View File

@@ -6,7 +6,7 @@ from coati.experience_maker import Experience, NaiveExperienceMaker
from coati.models.base import Actor, Critic
from coati.models.loss import PolicyLoss, ValueLoss
from coati.trainer.callbacks import Callback
from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy, Strategy
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy, Strategy
from torch.optim import Adam
from colossalai.nn.optimizer import HybridAdam
@@ -85,7 +85,7 @@ class DetachedPPOTrainer(DetachedTrainer):
evaluator = TrainerPerformanceEvaluator(actor_numel, critic_numel)
callbacks = callbacks + [evaluator]
if isinstance(self.strategy, ColossalAIStrategy):
if isinstance(self.strategy, (LowLevelZeroStrategy, GeminiStrategy)):
self.actor_optim = HybridAdam(self.actor.parameters(), lr=1e-7)
self.critic_optim = HybridAdam(self.critic.parameters(), lr=1e-7)
else:

View File

@@ -1,6 +1,6 @@
import os
from typing import Any, Callable, Dict, List, Optional
from collections import OrderedDict
from typing import Any, Callable, Dict, List, Optional
import torch
import torch.distributed as dist
@@ -10,7 +10,7 @@ from coati.models.gpt import GPTRM, GPTActor, GPTCritic
from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
from coati.models.opt import OPTRM, OPTActor, OPTCritic
from coati.models.roberta import RoBERTaActor, RoBERTaCritic, RoBERTaRM
from coati.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
from coati.utils import prepare_llama_tokenizer_and_embedding
from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer, RobertaTokenizer
@@ -76,18 +76,16 @@ def get_reward_model_from_args(model: str, pretrained: str = None, config=None):
def get_strategy_from_args(strategy: str):
if strategy == 'naive':
strategy_ = NaiveStrategy()
elif strategy == 'ddp':
if strategy == 'ddp':
strategy_ = DDPStrategy()
elif strategy == 'colossalai_gemini':
strategy_ = ColossalAIStrategy(stage=3, placement_policy='cuda', initial_scale=2**5)
strategy_ = GeminiStrategy(placement_policy='cuda', initial_scale=2**5)
elif strategy == 'colossalai_zero2':
strategy_ = ColossalAIStrategy(stage=2, placement_policy='cuda')
strategy_ = LowLevelZeroStrategy(stage=2, placement_policy='cuda')
elif strategy == 'colossalai_gemini_cpu':
strategy_ = ColossalAIStrategy(stage=3, placement_policy='cpu', initial_scale=2**5)
strategy_ = GeminiStrategy(placement_policy='cpu', initial_scale=2**5)
elif strategy == 'colossalai_zero2_cpu':
strategy_ = ColossalAIStrategy(stage=2, placement_policy='cpu')
strategy_ = LowLevelZeroStrategy(stage=2, placement_policy='cpu')
else:
raise ValueError(f'Unsupported strategy "{strategy}"')
return strategy_