mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-25 19:55:03 +00:00
update examples and sphnix docs for the new api (#63)
This commit is contained in:
@@ -16,8 +16,8 @@ def build_from_config(module, config: dict):
|
||||
of the return object
|
||||
:type config: dict
|
||||
:raises AssertionError: Raises an AssertionError if `module` is not a class
|
||||
:return: An object of :class:`module`
|
||||
:rtype: :class:`module`
|
||||
:return: An object of interest
|
||||
:rtype: Object
|
||||
"""
|
||||
assert inspect.isclass(module), 'module must be a class'
|
||||
return module(**config)
|
||||
@@ -62,8 +62,8 @@ def build_layer(config):
|
||||
:param config: A python dict or a :class:`colossalai.context.Config` object
|
||||
containing information used in the construction of the return object
|
||||
:type config: dict or :class:`colossalai.context.Config`
|
||||
:return: An object of :class:`nn.Module`
|
||||
:rtype: :class:`nn.Module`
|
||||
:return: An object of :class:`torch.nn.Module`
|
||||
:rtype: :class:`torch.nn.Module`
|
||||
"""
|
||||
return build_from_registry(config, LAYERS)
|
||||
|
||||
@@ -75,8 +75,8 @@ def build_loss(config):
|
||||
:param config: A python dict or a :class:`colossalai.context.Config` object
|
||||
containing information used in the construction of the return object
|
||||
:type config: dict or :class:`colossalai.context.Config`
|
||||
:return: An object of :class:`torch.autograd.Function`
|
||||
:rtype: :class:`torch.autograd.Function`
|
||||
:return: An object of :class:`torch.nn.modules.loss._Loss`
|
||||
:rtype: :class:`torch.nn.modules.loss._Loss`
|
||||
"""
|
||||
return build_from_registry(config, LOSSES)
|
||||
|
||||
@@ -87,8 +87,8 @@ def build_model(config):
|
||||
:param config: A python dict or a :class:`colossalai.context.Config` object
|
||||
containing information used in the construction of the return object
|
||||
:type config: dict or :class:`colossalai.context.Config`
|
||||
:return: An object of :class:`nn.Module`
|
||||
:rtype: :class:`nn.Module`
|
||||
:return: An object of :class:`torch.nn.Module`
|
||||
:rtype: :class:`torch.nn.Module`
|
||||
"""
|
||||
return build_from_registry(config, MODELS)
|
||||
|
||||
@@ -134,8 +134,8 @@ def build_gradient_handler(config, model, optimizer):
|
||||
:type model: :class:`nn.Module`
|
||||
:param optimizer: An optimizer object containing parameters for the gradient handler
|
||||
:type optimizer: :class:`torch.optim.Optimizer`
|
||||
:return: An object of :class:`BaseGradientHandler`
|
||||
:rtype: :class:`BaseGradientHandler`
|
||||
:return: An object of :class:`colossalai.engine.BaseGradientHandler`
|
||||
:rtype: :class:`colossalai.engine.BaseGradientHandler`
|
||||
"""
|
||||
config_ = config.copy()
|
||||
config_['model'] = model
|
||||
@@ -151,8 +151,8 @@ def build_hooks(config, trainer):
|
||||
:type config: dict or :class:`colossalai.context.Config`
|
||||
:param trainer: A :class:`Trainer` object containing parameters for the hook
|
||||
:type trainer: :class:`Trainer`
|
||||
:return: An object of :class:`BaseHook`
|
||||
:rtype: :class:`BaseHook`
|
||||
:return: An object of :class:`colossalai.trainer.hooks.BaseHook`
|
||||
:rtype: :class:`colossalai.trainer.hooks.BaseHook`
|
||||
"""
|
||||
config_ = config.copy()
|
||||
config_['trainer'] = trainer
|
||||
@@ -182,8 +182,8 @@ def build_data_sampler(config, dataset):
|
||||
:param dataset: An object of :class:`torch.utils.data.Dataset` containing information
|
||||
used in the construction of the return object
|
||||
:type dataset: :class:`torch.utils.data.Dataset`
|
||||
:return: An object of :class:`colossalai.nn.data.sampler.BaseSampler`
|
||||
:rtype: :class:`colossalai.nn.data.sampler.BaseSampler`
|
||||
:return: An object of :class:`colossalai.utils.data_sampler.BaseSampler`
|
||||
:rtype: :class:`colossalai.utils.data_sampler.BaseSampler`
|
||||
"""
|
||||
config_ = config.copy()
|
||||
config_['dataset'] = dataset
|
||||
@@ -200,10 +200,6 @@ def build_lr_scheduler(config, optimizer):
|
||||
:param optimizer: An optimizer object containing parameters for the learning rate
|
||||
scheduler
|
||||
:type optimizer: :class:`torch.optim.Optimizer`
|
||||
:param total_steps: Number of total steps of the learning rate scheduler
|
||||
:type total_steps: int
|
||||
:param num_steps_per_epoch: number of steps per epoch of the learning rate scheduler
|
||||
:type num_steps_per_epoch: int
|
||||
:return: An object of :class:`torch.optim.lr_scheduler`
|
||||
:rtype: :class:`torch.optim.lr_scheduler`
|
||||
"""
|
||||
|
@@ -151,6 +151,28 @@ def _partition_balanced(weights, pipeline_parallel_size, num_chunks):
|
||||
|
||||
|
||||
class PipelineModelInitializer():
|
||||
"""An intializer to split the model into different stages for pipeline parallelism.
|
||||
|
||||
An example for the model config is shown below. The class VisionTransformerFromConfig should
|
||||
inherit colossalai.nn.model.ModelFromConfig to allow this initializer to build model from a sequence
|
||||
of layer configurations.
|
||||
|
||||
model_config = dict(
|
||||
type='VisionTransformerFromConfig',
|
||||
embedding_cfg=dict(...),
|
||||
...
|
||||
)
|
||||
|
||||
:param config: configuration of the model
|
||||
:type config: dict
|
||||
:param num_chunks: the number of chunks you want to have on the current stage. This value should be 1
|
||||
in most cases unless you are using virutal pipeline parallelism.
|
||||
:type num_chunks: int
|
||||
:param verbose: whether to print the logs
|
||||
:type verbose: bool
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, config, num_chunks, verbose=False):
|
||||
self.num_chunks = num_chunks
|
||||
self.ori_model = build_model(config)
|
||||
@@ -161,6 +183,13 @@ class PipelineModelInitializer():
|
||||
self._logger.info(f"The total length of layers is {layer_length}", ranks=[0])
|
||||
|
||||
def initialize(self, partition_method='parameter'):
|
||||
"""Initialize the model object from the config passed
|
||||
|
||||
:param partition_method: this parameter determines how you want to split your model layers into stages,
|
||||
you can set it as 'layer' or 'parameter'
|
||||
:type partition_method: str
|
||||
|
||||
"""
|
||||
# Some space for initializing comunication groups
|
||||
self._interval = None
|
||||
self._partition_layers(method=partition_method)
|
||||
@@ -183,7 +212,7 @@ class PipelineModelInitializer():
|
||||
# print_rank_0(param_counts)
|
||||
self.parts = _partition_balanced(param_counts, pipeline_parallel_size, self.num_chunks)
|
||||
else:
|
||||
assert method == 'layer', "Method should be a pre-set string"
|
||||
raise ValueError("Method should be a pre-set string in [layer, parameter]")
|
||||
|
||||
# Display the partition
|
||||
if gpc.get_global_rank() == 0 and self.verbose:
|
||||
|
Reference in New Issue
Block a user