mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-22 09:59:38 +00:00
Refactored docstring to google style
This commit is contained in:
@@ -15,8 +15,12 @@ class BaseSchedule(ABC):
|
||||
"""A basic helper class to control the process of training or evaluation.
|
||||
It mainly composes of forward_backward_step for gradient backward and
|
||||
optimizer_step for parameters update.
|
||||
For the convenience to enable FP16, we aggreate all codes that contain the
|
||||
For the convenience to enable FP16, we aggregate all codes that contain the
|
||||
control of FP16 in class schedule.
|
||||
|
||||
Args:
|
||||
batch_data_process_func (Callable, optional): The preprocessing function which receives a batch of data,
|
||||
and it will be executed in load_batch.
|
||||
"""
|
||||
|
||||
def __init__(self, batch_data_process_func: Callable = None):
|
||||
@@ -46,13 +50,12 @@ class BaseSchedule(ABC):
|
||||
"""Loads a batch from data iterator. It returns the data and labels which are
|
||||
already in the same GPU as where the model's.
|
||||
|
||||
:param data_iter: Data iterator from which get a batch of data
|
||||
:type data_iter: DataIter
|
||||
:param to_gpu: Whether the data should be moved to GPU
|
||||
:type to_gpu: bool, optional
|
||||
Args:
|
||||
data_iter (Iterable): Data iterator from which get a batch of data, obtained by calling iter(dataloader).
|
||||
to_gpu (bool, optional): Whether the data should be moved to GPU
|
||||
|
||||
:return: (data, label)
|
||||
:rtype: (:class:`Tensor`, :class:`torch.Tensor`)
|
||||
Returns:
|
||||
Tuple (:class:`Tensor`, :class:`torch.Tensor`): A tuple of (data, label).
|
||||
"""
|
||||
if data_iter is None:
|
||||
raise RuntimeError('Dataloader is not defined.')
|
||||
@@ -87,16 +90,12 @@ class BaseSchedule(ABC):
|
||||
):
|
||||
"""The process function over a batch of dataset for training or evaluation.
|
||||
|
||||
:param engine: Colossalai training engine
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param data_iter: Data iterator from which get a batch of data
|
||||
:type data_iter: DataIter
|
||||
:param forward_only: If True, the process won't include backward
|
||||
:type forward_only: bool
|
||||
:param return_loss: If False, the loss won't be returned
|
||||
:type return_loss: bool, optional
|
||||
:param return_output_label: If False, the output and label won't be returned
|
||||
:type return_output_label: bool, optional
|
||||
Args:
|
||||
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
|
||||
data_iter (Iterable): Data iterator from which get a batch of data, obtained by calling iter(dataloader).
|
||||
forward_only (bool): If True, the process won't include backward.
|
||||
return_loss (bool, optional): If False, the loss won't be returned.
|
||||
return_output_label (bool, optional): If False, the output and label won't be returned.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
@@ -15,6 +15,10 @@ class NonPipelineSchedule(BaseSchedule):
|
||||
During one process, it loads a batch of dataset and feeds it to the model.
|
||||
After getting the output and calculating the loss, it will use :meth:`step`
|
||||
to update the parameters if it is in training mode.
|
||||
|
||||
Args:
|
||||
batch_data_process_func (Callable, optional): The preprocessing function which receives a batch of data,
|
||||
and it will be executed in load_batch.
|
||||
"""
|
||||
|
||||
def forward_backward_step(self,
|
||||
@@ -23,22 +27,19 @@ class NonPipelineSchedule(BaseSchedule):
|
||||
forward_only: bool = False,
|
||||
return_loss: bool = True,
|
||||
return_output_label: bool = True):
|
||||
"""The process function that loads loads a batch of dataset and feeds it to the model.
|
||||
"""The process function that loads a batch of dataset and feeds it to the model.
|
||||
The returned labels and loss will None if :attr:`return_loss` is False.
|
||||
|
||||
:param engine: Model for training and inference
|
||||
:param data_iter: Data iterator of the dataloader, e.g. iter(dataloader)
|
||||
:param forward_only: If True, the model is run for the forward pass, else back propagation will be executed
|
||||
:param return_loss: Loss will be returned if True
|
||||
:param return_output_label: Output and label will be returned if True
|
||||
:type engine: Iterator
|
||||
:type data_iter: Iterator
|
||||
:type forward_only: bool, optional
|
||||
:type return_loss: bool, optional
|
||||
:type return_output_label: bool, optional
|
||||
Args:
|
||||
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
|
||||
data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
|
||||
forward_only (bool, optional):
|
||||
If True, the model is run for the forward pass, else back propagation will be executed.
|
||||
return_loss (bool, optional): Loss will be returned if True.
|
||||
return_output_label (bool, optional): Output and label will be returned if True.
|
||||
|
||||
:return: (output, label, loss)
|
||||
:rtype: Tuple[:class:`torch.Tensor`]
|
||||
Returns:
|
||||
Tuple[:class:`torch.Tensor`]: A tuple of (output, label, loss), loss and label could be None.
|
||||
"""
|
||||
assert forward_only or return_loss, \
|
||||
"The argument 'return_loss' has to be True when 'forward_only' is False, but got False."
|
||||
|
@@ -41,14 +41,13 @@ class PipelineSchedule(BaseSchedule):
|
||||
It uses non-interleaved 1F1B strategy. Other properties are similar as
|
||||
:class:`NonPipelineSchedule`.
|
||||
|
||||
:param num_microbatches: The number of microbatches
|
||||
:type num_microbatches: int
|
||||
:param batch_data_process_func: The preprocessing function which receives a batch of data, and it will be executed in `load_batch`
|
||||
:type batch_data_process_func: Callable, optional
|
||||
:param tensor_shape: Specified shape in pipeline communication
|
||||
:type tensor_shape: torch.Size, optional
|
||||
:param scatter_gather_tensors: If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization
|
||||
:type scatter_gather_tensors: bool, optional
|
||||
Args:
|
||||
num_microbatches (int): The number of microbatches.
|
||||
batch_data_process_func (Callable, optional):
|
||||
The preprocessing function which receives a batch of data, and it will be executed in `load_batch`.
|
||||
tensor_shape (torch.Size, optional): Specified shape in pipeline communication.
|
||||
scatter_gather_tensors (bool, optional):
|
||||
If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@@ -131,19 +130,14 @@ class PipelineSchedule(BaseSchedule):
|
||||
is obtained from data_iterator, otherwise the passed-in input_tensor is used.
|
||||
Returns output tensor. This is a helper function and can be ignored by users.
|
||||
|
||||
:param engine: Your engine object
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param input_tensor: Input tensor for this pipeline stage
|
||||
:type input_tensor: :class:`torch.Tensor`
|
||||
:param return_tensors: A list of tensors to return
|
||||
:type return_tensors: List[:class:`torch.Tensor`]
|
||||
:param return_output_label: Whether returns output labels
|
||||
:type return_output_label: bool, optional
|
||||
:param accum_loss: Where accumulated loss stores
|
||||
:type accum_loss: optional
|
||||
|
||||
:return: output or the loss value of the current pipeline stage
|
||||
:rtype: :class:`torch.Tensor`
|
||||
Args:
|
||||
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
|
||||
input_tensor (:class:`torch.Tensor`): Input tensor for this pipeline stage.
|
||||
return_tensors (List[:class:`torch.Tensor`]): A list of tensors to return.
|
||||
return_output_label (bool, optional): Whether returns output labels.
|
||||
accum_loss (optional): Where accumulated loss stores.
|
||||
Returns:
|
||||
:class:`torch.Tensor`: output or the loss value of the current pipeline stage.
|
||||
"""
|
||||
data, label = self.load_micro_batch()
|
||||
output_tensor = self._call_engine(engine.model, input_tensor, data)
|
||||
@@ -173,17 +167,14 @@ class PipelineSchedule(BaseSchedule):
|
||||
Returns the gradients with respect to the input tensor (None if first stage).
|
||||
This is a helper function and can be ignored by users.
|
||||
|
||||
:param engine: your engine object
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param input_tensor: input tensor for this pipeline stage
|
||||
:type input_tensor: :class:`torch.Tensor`
|
||||
:param output_tensor: output tensor for this pipeline stage
|
||||
:type output_tensor: :class:`torch.Tensor`
|
||||
:param output_tensor_grad: gradient of output tensor for this pipeline stage
|
||||
:type output_tensor_grad: :class:`torch.Tensor`
|
||||
Args:
|
||||
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
|
||||
input_tensor (:class:`torch.Tensor`): input tensor for this pipeline stage.
|
||||
output_tensor (:class:`torch.Tensor`): output tensor for this pipeline stage.
|
||||
output_tensor_grad (:class:`torch.Tensor`): gradient of output tensor for this pipeline stage.
|
||||
|
||||
:return: gradient of input tensor
|
||||
:rtype: :class:`torch.Tensor`
|
||||
Returns:
|
||||
:class:`torch.Tensor`: gradient of input tensor.
|
||||
"""
|
||||
|
||||
# Retain the grad on the input_tensor.
|
||||
@@ -207,19 +198,16 @@ class PipelineSchedule(BaseSchedule):
|
||||
"""Runs non-interleaved 1F1B schedule, with communication between pipeline stages.
|
||||
Returns a tuple with losses if the last stage, an empty tuple otherwise.
|
||||
|
||||
:param engine: Your engine object
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param data_iter: Dataloader as the form of an iterator, obtained by calling iter(dataloader)
|
||||
:type data_iter: Iterable
|
||||
:param forward_only: Whether run forward step only. Default is false. If true, no backward will be run.
|
||||
:type forward_only: bool
|
||||
:param return_loss: Whether returns the loss value. Default is true.
|
||||
:type return_loss: bool
|
||||
:param return_output_label: If False, the output and label won't be returned
|
||||
:type return_output_label: bool
|
||||
Args:
|
||||
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
|
||||
data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
|
||||
forward_only (bool, optional):
|
||||
Whether run forward step only. Default is false. If true, no backward will be run.
|
||||
return_loss (bool, optional): Whether returns the loss value. Default is true.
|
||||
return_output_label (bool, optional): If False, the output and label won't be returned.
|
||||
|
||||
:return: (output, label, loss)
|
||||
:rtype: Tuple[:class:`torch.Tensor`]
|
||||
Returns:
|
||||
Tuple[:class:`torch.Tensor`]: A tuple of (output, label, loss), loss and label could be None.
|
||||
"""
|
||||
|
||||
assert forward_only or return_loss, \
|
||||
@@ -354,16 +342,14 @@ class InterleavedPipelineSchedule(PipelineSchedule):
|
||||
It uses interleaved 1F1B strategy. Other properties are similar as
|
||||
:class:`NonPipelineSchedule`.
|
||||
|
||||
:param num_microbatches: The number of microbatches
|
||||
:type num_microbatches: int
|
||||
:param num_model_chunks: The number of model chunks
|
||||
:type num_model_chunks: int
|
||||
:param batch_data_process_func: The preprocessing function which receives a batch of data, and it will be executed in `load_batch`
|
||||
:type batch_data_process_func: Callable, optional
|
||||
:param tensor_shape: Specified shape in pipeline communication
|
||||
:type tensor_shape: torch.Size, optional
|
||||
:param scatter_gather_tensors: If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization
|
||||
:type scatter_gather_tensors: bool, optional
|
||||
Args:
|
||||
num_microbatches (int): The number of microbatches.
|
||||
num_model_chunks (int): The number of model chunks.
|
||||
batch_data_process_func (Callable, optional):
|
||||
The preprocessing function which receives a batch of data, and it will be executed in `load_batch`.
|
||||
tensor_shape (torch.Size, optional): Specified shape in pipeline communication.
|
||||
scatter_gather_tensors (bool, optional):
|
||||
If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization.
|
||||
"""
|
||||
assert num_microbatches % gpc.get_world_size(ParallelMode.PIPELINE) == 0, \
|
||||
'num_microbatches must be an integer multiple of pipeline parallel world size'
|
||||
@@ -408,6 +394,16 @@ class InterleavedPipelineSchedule(PipelineSchedule):
|
||||
"""Forward step for passed-in model. If it is the first stage, the input tensor
|
||||
is obtained from data_iterator, otherwise the passed-in input_tensor is used.
|
||||
Returns output tensor. This is a helper function and can be ignored by users.
|
||||
|
||||
Args:
|
||||
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
|
||||
model_chunk_id (int): The id of model chunks.
|
||||
input_tensor (:class:`torch.Tensor`): Input tensor for this pipeline stage.
|
||||
return_tensors (List[:class:`torch.Tensor`]): A list of tensors to return.
|
||||
return_output_label (bool, optional): Whether returns output labels.
|
||||
accum_loss (optional): Where accumulated loss stores.
|
||||
Returns:
|
||||
:class:`torch.Tensor`: output or the loss value of the current pipeline stage.
|
||||
"""
|
||||
data, label = self.load_micro_batch(model_chunk_id)
|
||||
output_tensor = self._call_engine(engine.model[model_chunk_id], input_tensor, data)
|
||||
@@ -435,18 +431,17 @@ class InterleavedPipelineSchedule(PipelineSchedule):
|
||||
"""Run interleaved 1F1B schedule (model split into model chunks), with
|
||||
communication between pipeline stages as needed.
|
||||
|
||||
Returns dictionary with losses if the last stage, empty dict otherwise.
|
||||
Args:
|
||||
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
|
||||
data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
|
||||
forward_only (bool, optional):
|
||||
Whether run forward step only. Default is false. If true, no backward will be run.
|
||||
return_loss (bool, optional): Whether returns the loss value. Default is true.
|
||||
return_output_label (bool, optional): If False, the output and label won't be returned.
|
||||
|
||||
:param engine: Your engine object
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param data_iter: Dataloader as the form of an iterator, obtained by calling iter(dataloader)
|
||||
:type data_iter: Iterable
|
||||
:param forward_only: Whether run forward step only. Default is false. If true, no backward will be run.
|
||||
:type forward_only: bool
|
||||
:param return_loss: Whether returns the loss value. Default is true.
|
||||
:type return_loss: bool
|
||||
:param return_output_label: If False, the output and label won't be returned
|
||||
:type return_output_label: bool
|
||||
Returns:
|
||||
Tuple[:class:`torch.Tensor`]: A tuple of (output, label, loss), loss and label could be None.
|
||||
The loss would be returned only in the last stage.
|
||||
"""
|
||||
assert forward_only or return_loss, \
|
||||
'The argument \'return_loss\' has to be True when \'forward_only\' is False, but got False.'
|
||||
|
Reference in New Issue
Block a user