mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-23 18:39:56 +00:00
Fixed docstring in colossalai (#171)
This commit is contained in:
@@ -2,4 +2,4 @@ from ._base_schedule import BaseSchedule
|
||||
from ._pipeline_schedule import PipelineSchedule, InterleavedPipelineSchedule
|
||||
from ._non_pipeline_schedule import NonPipelineSchedule
|
||||
|
||||
__all__ = ['BaseSchedule', 'PipelineSchedule', 'NonPipelineSchedule', 'InterleavedPipelineSchedule']
|
||||
__all__ = ['BaseSchedule', 'NonPipelineSchedule', 'PipelineSchedule', 'InterleavedPipelineSchedule']
|
||||
|
@@ -5,12 +5,13 @@ from abc import ABC, abstractmethod
|
||||
|
||||
import torch
|
||||
|
||||
from typing import Iterable, Callable
|
||||
from typing import Iterable, Callable
|
||||
from .._base_engine import Engine
|
||||
from colossalai.logging import get_dist_logger
|
||||
from colossalai.utils import get_current_device
|
||||
from colossalai.nn.layer import split_batch
|
||||
|
||||
|
||||
class BaseSchedule(ABC):
|
||||
"""A basic helper class to control the process of training or evaluation.
|
||||
It mainly composes of forward_backward_step for gradient backward and
|
||||
@@ -46,6 +47,11 @@ class BaseSchedule(ABC):
|
||||
"""Loads a batch from data iterator. It returns the data and labels which are
|
||||
already in the same GPU as where the model's.
|
||||
|
||||
:param data_iter: Data iterator from which get a batch of data
|
||||
:type data_iter: DataIter
|
||||
:param to_gpu: Whether the data should be moved to GPU
|
||||
:type to_gpu: bool, optional
|
||||
|
||||
:return: (data, label)
|
||||
:rtype: (:class:`Tensor`, :class:`torch.Tensor`)
|
||||
"""
|
||||
@@ -62,13 +68,12 @@ class BaseSchedule(ABC):
|
||||
if isinstance(data, torch.Tensor):
|
||||
self.batch_size = data.size(0)
|
||||
else:
|
||||
self.batch_size = next(iter(data.values())).size(0)
|
||||
self.batch_size = next(iter(data.values())).size(0)
|
||||
data, label = split_batch(data), split_batch(label)
|
||||
if to_gpu:
|
||||
return self._move_to_device(data), self._move_to_device(label)
|
||||
return data, label
|
||||
|
||||
|
||||
def pre_processing(self, engine: Engine):
|
||||
"""To perform actions before running the schedule.
|
||||
"""
|
||||
@@ -85,11 +90,15 @@ class BaseSchedule(ABC):
|
||||
"""The process function over a batch of dataset for training or evaluation.
|
||||
|
||||
:param engine: Colossalai training engine
|
||||
:param inputs: input data
|
||||
:param labels: ground truth
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param data_iter: Data iterator from which get a batch of data
|
||||
:type data_iter: DataIter
|
||||
:param forward_only: If True, the process won't include backward
|
||||
:type forward_only: bool
|
||||
:param return_loss: If False, the loss won't be returned
|
||||
:type return_loss: bool, optional
|
||||
:param return_output_label: If False, the output and label won't be returned
|
||||
:type return_output_label: bool, optional
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -105,7 +114,7 @@ class BaseSchedule(ABC):
|
||||
assert isinstance(outputs, (torch.Tensor, list, tuple)
|
||||
), f'Expect output of model is (torch.Tensor, list, tuple), got {type(outputs)}'
|
||||
if isinstance(outputs, torch.Tensor):
|
||||
outputs = (outputs, )
|
||||
outputs = (outputs,)
|
||||
if isinstance(labels, torch.Tensor):
|
||||
return engine.criterion(*outputs, labels)
|
||||
else:
|
||||
|
@@ -15,10 +15,6 @@ class NonPipelineSchedule(BaseSchedule):
|
||||
During one process, it loads a batch of dataset and feeds it to the model.
|
||||
After getting the output and calculating the loss, it will use :meth:`step`
|
||||
to update the parameters if it is in training mode.
|
||||
:param amp_type: The type of automatic mixed precision
|
||||
:param amp_config: The configuration of automatic mixed procision
|
||||
:type amp_type: AMP_TYPE
|
||||
:type amp_config: dict
|
||||
"""
|
||||
|
||||
def forward_backward_step(self,
|
||||
@@ -29,6 +25,7 @@ class NonPipelineSchedule(BaseSchedule):
|
||||
return_output_label: bool = True):
|
||||
"""The process function that loads loads a batch of dataset and feeds it to the model.
|
||||
The returned labels and loss will None if :attr:`return_loss` is False.
|
||||
|
||||
:param engine: Model for training and inference
|
||||
:param data_iter: Data iterator of the dataloader, e.g. iter(dataloader)
|
||||
:param forward_only: If True, the model is run for the forward pass, else back propagation will be executed
|
||||
|
@@ -44,9 +44,11 @@ class PipelineSchedule(BaseSchedule):
|
||||
:param num_microbatches: The number of microbatches
|
||||
:type num_microbatches: int
|
||||
:param batch_data_process_func: The preprocessing function which receives a batch of data, and it will be executed in `load_batch`
|
||||
:type batch_data_process_func: Callable
|
||||
:type batch_data_process_func: Callable, optional
|
||||
:param tensor_shape: Specified shape in pipeline communication
|
||||
:type tensor_shape: torch.Size, optional
|
||||
:param scatter_gather_tensors: If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization
|
||||
:type scatter_gather_tensors: bool
|
||||
:type scatter_gather_tensors: bool, optional
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@@ -130,12 +132,16 @@ class PipelineSchedule(BaseSchedule):
|
||||
is obtained from data_iterator, otherwise the passed-in input_tensor is used.
|
||||
Returns output tensor. This is a helper function and can be ignored by users.
|
||||
|
||||
:param engine: your engine object
|
||||
:param engine: Your engine object
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param input_tensor: input tensor for this pipeline stage
|
||||
:param input_tensor: Input tensor for this pipeline stage
|
||||
:type input_tensor: :class:`torch.Tensor`
|
||||
:param return_tensors: a list of tensors to return
|
||||
:param return_tensors: A list of tensors to return
|
||||
:type return_tensors: List[:class:`torch.Tensor`]
|
||||
:param return_output_label: Whether returns output labels
|
||||
:type return_output_label: bool, optional
|
||||
:param accum_loss: Where accumulated loss stores
|
||||
:type accum_loss: optional
|
||||
|
||||
:return: output or the loss value of the current pipeline stage
|
||||
:rtype: :class:`torch.Tensor`
|
||||
@@ -205,13 +211,13 @@ class PipelineSchedule(BaseSchedule):
|
||||
"""Runs non-interleaved 1F1B schedule, with communication between pipeline stages.
|
||||
Returns a tuple with losses if the last stage, an empty tuple otherwise.
|
||||
|
||||
:param engine: your engine object
|
||||
:param engine: Your engine object
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param data_iter: dataloader as the form of an iterator, obtained by calling iter(dataloader)
|
||||
:param data_iter: Dataloader as the form of an iterator, obtained by calling iter(dataloader)
|
||||
:type data_iter: Iterable
|
||||
:param forward_only: whether run forward step only. Default is false. If true, no backward will be run.
|
||||
:param forward_only: Whether run forward step only. Default is false. If true, no backward will be run.
|
||||
:type forward_only: bool
|
||||
:param return_loss: whether returns the loss value. Default is true.
|
||||
:param return_loss: Whether returns the loss value. Default is true.
|
||||
:type return_loss: bool
|
||||
:param return_output_label: If False, the output and label won't be returned
|
||||
:type return_output_label: bool
|
||||
@@ -357,9 +363,11 @@ class InterleavedPipelineSchedule(PipelineSchedule):
|
||||
:param num_model_chunks: The number of model chunks
|
||||
:type num_model_chunks: int
|
||||
:param batch_data_process_func: The preprocessing function which receives a batch of data, and it will be executed in `load_batch`
|
||||
:type batch_data_process_func: Callable
|
||||
:type batch_data_process_func: Callable, optional
|
||||
:param tensor_shape: Specified shape in pipeline communication
|
||||
:type tensor_shape: torch.Size, optional
|
||||
:param scatter_gather_tensors: If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization
|
||||
:type scatter_gather_tensors: bool
|
||||
:type scatter_gather_tensors: bool, optional
|
||||
"""
|
||||
assert num_microbatches % gpc.get_world_size(ParallelMode.PIPELINE) == 0, \
|
||||
'num_microbatches must be an integer multiple of pipeline parallel world size'
|
||||
@@ -425,7 +433,19 @@ class InterleavedPipelineSchedule(PipelineSchedule):
|
||||
"""Run interleaved 1F1B schedule (model split into model chunks), with
|
||||
communication between pipeline stages as needed.
|
||||
|
||||
Returns dictionary with losses if the last stage, empty dict otherwise."""
|
||||
Returns dictionary with losses if the last stage, empty dict otherwise.
|
||||
|
||||
:param engine: Your engine object
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param data_iter: Dataloader as the form of an iterator, obtained by calling iter(dataloader)
|
||||
:type data_iter: Iterable
|
||||
:param forward_only: Whether run forward step only. Default is false. If true, no backward will be run.
|
||||
:type forward_only: bool
|
||||
:param return_loss: Whether returns the loss value. Default is true.
|
||||
:type return_loss: bool
|
||||
:param return_output_label: If False, the output and label won't be returned
|
||||
:type return_output_label: bool
|
||||
"""
|
||||
assert forward_only or return_loss, \
|
||||
'The argument \'return_loss\' has to be True when \'forward_only\' is False, but got False.'
|
||||
self.load_batch(data_iter)
|
||||
|
Reference in New Issue
Block a user