Fixed docstring in colossalai (#171)

This commit is contained in:
HELSON
2022-01-21 10:44:30 +08:00
committed by GitHub
parent e2089c5c15
commit 0f8c7f9804
77 changed files with 983 additions and 603 deletions

View File

@@ -2,4 +2,4 @@ from ._base_schedule import BaseSchedule
from ._pipeline_schedule import PipelineSchedule, InterleavedPipelineSchedule
from ._non_pipeline_schedule import NonPipelineSchedule
__all__ = ['BaseSchedule', 'PipelineSchedule', 'NonPipelineSchedule', 'InterleavedPipelineSchedule']
__all__ = ['BaseSchedule', 'NonPipelineSchedule', 'PipelineSchedule', 'InterleavedPipelineSchedule']

View File

@@ -5,12 +5,13 @@ from abc import ABC, abstractmethod
import torch
from typing import Iterable, Callable
from typing import Iterable, Callable
from .._base_engine import Engine
from colossalai.logging import get_dist_logger
from colossalai.utils import get_current_device
from colossalai.nn.layer import split_batch
class BaseSchedule(ABC):
"""A basic helper class to control the process of training or evaluation.
It mainly composes of forward_backward_step for gradient backward and
@@ -46,6 +47,11 @@ class BaseSchedule(ABC):
"""Loads a batch from data iterator. It returns the data and labels which are
already in the same GPU as where the model's.
:param data_iter: Data iterator from which get a batch of data
:type data_iter: DataIter
:param to_gpu: Whether the data should be moved to GPU
:type to_gpu: bool, optional
:return: (data, label)
:rtype: (:class:`Tensor`, :class:`torch.Tensor`)
"""
@@ -62,13 +68,12 @@ class BaseSchedule(ABC):
if isinstance(data, torch.Tensor):
self.batch_size = data.size(0)
else:
self.batch_size = next(iter(data.values())).size(0)
self.batch_size = next(iter(data.values())).size(0)
data, label = split_batch(data), split_batch(label)
if to_gpu:
return self._move_to_device(data), self._move_to_device(label)
return data, label
def pre_processing(self, engine: Engine):
"""To perform actions before running the schedule.
"""
@@ -85,11 +90,15 @@ class BaseSchedule(ABC):
"""The process function over a batch of dataset for training or evaluation.
:param engine: Colossalai training engine
:param inputs: input data
:param labels: ground truth
:type engine: colossalai.engine.Engine
:param data_iter: Data iterator from which get a batch of data
:type data_iter: DataIter
:param forward_only: If True, the process won't include backward
:type forward_only: bool
:param return_loss: If False, the loss won't be returned
:type return_loss: bool, optional
:param return_output_label: If False, the output and label won't be returned
:type return_output_label: bool, optional
"""
pass
@@ -105,7 +114,7 @@ class BaseSchedule(ABC):
assert isinstance(outputs, (torch.Tensor, list, tuple)
), f'Expect output of model is (torch.Tensor, list, tuple), got {type(outputs)}'
if isinstance(outputs, torch.Tensor):
outputs = (outputs, )
outputs = (outputs,)
if isinstance(labels, torch.Tensor):
return engine.criterion(*outputs, labels)
else:

View File

@@ -15,10 +15,6 @@ class NonPipelineSchedule(BaseSchedule):
During one process, it loads a batch of dataset and feeds it to the model.
After getting the output and calculating the loss, it will use :meth:`step`
to update the parameters if it is in training mode.
:param amp_type: The type of automatic mixed precision
:param amp_config: The configuration of automatic mixed procision
:type amp_type: AMP_TYPE
:type amp_config: dict
"""
def forward_backward_step(self,
@@ -29,6 +25,7 @@ class NonPipelineSchedule(BaseSchedule):
return_output_label: bool = True):
"""The process function that loads loads a batch of dataset and feeds it to the model.
The returned labels and loss will None if :attr:`return_loss` is False.
:param engine: Model for training and inference
:param data_iter: Data iterator of the dataloader, e.g. iter(dataloader)
:param forward_only: If True, the model is run for the forward pass, else back propagation will be executed

View File

@@ -44,9 +44,11 @@ class PipelineSchedule(BaseSchedule):
:param num_microbatches: The number of microbatches
:type num_microbatches: int
:param batch_data_process_func: The preprocessing function which receives a batch of data, and it will be executed in `load_batch`
:type batch_data_process_func: Callable
:type batch_data_process_func: Callable, optional
:param tensor_shape: Specified shape in pipeline communication
:type tensor_shape: torch.Size, optional
:param scatter_gather_tensors: If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization
:type scatter_gather_tensors: bool
:type scatter_gather_tensors: bool, optional
"""
def __init__(self,
@@ -130,12 +132,16 @@ class PipelineSchedule(BaseSchedule):
is obtained from data_iterator, otherwise the passed-in input_tensor is used.
Returns output tensor. This is a helper function and can be ignored by users.
:param engine: your engine object
:param engine: Your engine object
:type engine: colossalai.engine.Engine
:param input_tensor: input tensor for this pipeline stage
:param input_tensor: Input tensor for this pipeline stage
:type input_tensor: :class:`torch.Tensor`
:param return_tensors: a list of tensors to return
:param return_tensors: A list of tensors to return
:type return_tensors: List[:class:`torch.Tensor`]
:param return_output_label: Whether returns output labels
:type return_output_label: bool, optional
:param accum_loss: Where accumulated loss stores
:type accum_loss: optional
:return: output or the loss value of the current pipeline stage
:rtype: :class:`torch.Tensor`
@@ -205,13 +211,13 @@ class PipelineSchedule(BaseSchedule):
"""Runs non-interleaved 1F1B schedule, with communication between pipeline stages.
Returns a tuple with losses if the last stage, an empty tuple otherwise.
:param engine: your engine object
:param engine: Your engine object
:type engine: colossalai.engine.Engine
:param data_iter: dataloader as the form of an iterator, obtained by calling iter(dataloader)
:param data_iter: Dataloader as the form of an iterator, obtained by calling iter(dataloader)
:type data_iter: Iterable
:param forward_only: whether run forward step only. Default is false. If true, no backward will be run.
:param forward_only: Whether run forward step only. Default is false. If true, no backward will be run.
:type forward_only: bool
:param return_loss: whether returns the loss value. Default is true.
:param return_loss: Whether returns the loss value. Default is true.
:type return_loss: bool
:param return_output_label: If False, the output and label won't be returned
:type return_output_label: bool
@@ -357,9 +363,11 @@ class InterleavedPipelineSchedule(PipelineSchedule):
:param num_model_chunks: The number of model chunks
:type num_model_chunks: int
:param batch_data_process_func: The preprocessing function which receives a batch of data, and it will be executed in `load_batch`
:type batch_data_process_func: Callable
:type batch_data_process_func: Callable, optional
:param tensor_shape: Specified shape in pipeline communication
:type tensor_shape: torch.Size, optional
:param scatter_gather_tensors: If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization
:type scatter_gather_tensors: bool
:type scatter_gather_tensors: bool, optional
"""
assert num_microbatches % gpc.get_world_size(ParallelMode.PIPELINE) == 0, \
'num_microbatches must be an integer multiple of pipeline parallel world size'
@@ -425,7 +433,19 @@ class InterleavedPipelineSchedule(PipelineSchedule):
"""Run interleaved 1F1B schedule (model split into model chunks), with
communication between pipeline stages as needed.
Returns dictionary with losses if the last stage, empty dict otherwise."""
Returns dictionary with losses if the last stage, empty dict otherwise.
:param engine: Your engine object
:type engine: colossalai.engine.Engine
:param data_iter: Dataloader as the form of an iterator, obtained by calling iter(dataloader)
:type data_iter: Iterable
:param forward_only: Whether run forward step only. Default is false. If true, no backward will be run.
:type forward_only: bool
:param return_loss: Whether returns the loss value. Default is true.
:type return_loss: bool
:param return_output_label: If False, the output and label won't be returned
:type return_output_label: bool
"""
assert forward_only or return_loss, \
'The argument \'return_loss\' has to be True when \'forward_only\' is False, but got False.'
self.load_batch(data_iter)