Fixed docstring in colossalai (#171)

2025-09-23 18:39:56 +00:00 · 2022-01-21 10:44:30 +08:00
parent e2089c5c15
commit 0f8c7f9804
77 changed files with 983 additions and 603 deletions
--- a/colossalai/engine/schedule/init.py
+++ b/colossalai/engine/schedule/init.py
@@ -2,4 +2,4 @@ from ._base_schedule import BaseSchedule
 from ._pipeline_schedule import PipelineSchedule, InterleavedPipelineSchedule
 from ._non_pipeline_schedule import NonPipelineSchedule

-__all__ = ['BaseSchedule', 'PipelineSchedule', 'NonPipelineSchedule', 'InterleavedPipelineSchedule']
+__all__ = ['BaseSchedule', 'NonPipelineSchedule', 'PipelineSchedule', 'InterleavedPipelineSchedule']
--- a/colossalai/engine/schedule/_base_schedule.py
+++ b/colossalai/engine/schedule/_base_schedule.py
@@ -5,12 +5,13 @@ from abc import ABC, abstractmethod

 import torch

-from typing import Iterable,  Callable
+from typing import Iterable, Callable
 from .._base_engine import Engine
 from colossalai.logging import get_dist_logger
 from colossalai.utils import get_current_device
 from colossalai.nn.layer import split_batch

+
 class BaseSchedule(ABC):
    """A basic helper class to control the process of training or evaluation.
    It mainly composes of forward_backward_step for gradient backward and
@@ -46,6 +47,11 @@ class BaseSchedule(ABC):
        """Loads a batch from data iterator. It returns the data and labels which are
        already in the same GPU as where the model's.

+        :param data_iter: Data iterator from which get a batch of data
+        :type data_iter: DataIter
+        :param to_gpu: Whether the data should be moved to GPU
+        :type to_gpu: bool, optional
+
        :return: (data, label)
        :rtype: (:class:`Tensor`, :class:`torch.Tensor`)
        """
@@ -62,13 +68,12 @@ class BaseSchedule(ABC):
        if isinstance(data, torch.Tensor):
            self.batch_size = data.size(0)
        else:
-            self.batch_size = next(iter(data.values())).size(0)    
+            self.batch_size = next(iter(data.values())).size(0)
        data, label = split_batch(data), split_batch(label)
        if to_gpu:
            return self._move_to_device(data), self._move_to_device(label)
        return data, label

-
    def pre_processing(self, engine: Engine):
        """To perform actions before running the schedule.
        """
@@ -85,11 +90,15 @@ class BaseSchedule(ABC):
        """The process function over a batch of dataset for training or evaluation.

        :param engine: Colossalai training engine
-        :param inputs: input data
-        :param labels: ground truth
+        :type engine: colossalai.engine.Engine
+        :param data_iter: Data iterator from which get a batch of data
+        :type data_iter: DataIter
        :param forward_only: If True, the process won't include backward
+        :type forward_only: bool
        :param return_loss: If False, the loss won't be returned
+        :type return_loss: bool, optional
        :param return_output_label: If False, the output and label won't be returned
+        :type return_output_label: bool, optional
        """
        pass

@@ -105,7 +114,7 @@ class BaseSchedule(ABC):
        assert isinstance(outputs, (torch.Tensor, list, tuple)
                          ), f'Expect output of model is (torch.Tensor, list, tuple), got {type(outputs)}'
        if isinstance(outputs, torch.Tensor):
-            outputs = (outputs, )
+            outputs = (outputs,)
        if isinstance(labels, torch.Tensor):
            return engine.criterion(*outputs, labels)
        else:
--- a/colossalai/engine/schedule/_non_pipeline_schedule.py
+++ b/colossalai/engine/schedule/_non_pipeline_schedule.py
@@ -15,10 +15,6 @@ class NonPipelineSchedule(BaseSchedule):
    During one process, it loads a batch of dataset and feeds it to the model.
    After getting the output and calculating the loss, it will use :meth:`step`
    to update the parameters if it is in training mode.
-    :param amp_type: The type of automatic mixed precision
-    :param amp_config: The configuration of automatic mixed procision
-    :type amp_type: AMP_TYPE
-    :type amp_config: dict
    """

    def forward_backward_step(self,
@@ -29,6 +25,7 @@ class NonPipelineSchedule(BaseSchedule):
                              return_output_label: bool = True):
        """The process function that loads loads a batch of dataset and feeds it to the model.
        The returned labels and loss will None if :attr:`return_loss` is False.
+
        :param engine: Model for training and inference
        :param data_iter: Data iterator of the dataloader, e.g. iter(dataloader)
        :param forward_only: If True, the model is run for the forward pass, else back propagation will be executed
--- a/colossalai/engine/schedule/_pipeline_schedule.py
+++ b/colossalai/engine/schedule/_pipeline_schedule.py
@@ -44,9 +44,11 @@ class PipelineSchedule(BaseSchedule):
    :param num_microbatches: The number of microbatches
    :type num_microbatches: int
    :param batch_data_process_func: The preprocessing function which receives a batch of data, and it will be executed in `load_batch`
-    :type batch_data_process_func: Callable
+    :type batch_data_process_func: Callable, optional
+    :param tensor_shape: Specified shape in pipeline communication
+    :type tensor_shape: torch.Size, optional
    :param scatter_gather_tensors: If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization
-    :type scatter_gather_tensors: bool
+    :type scatter_gather_tensors: bool, optional
    """

    def __init__(self,
@@ -130,12 +132,16 @@ class PipelineSchedule(BaseSchedule):
        is obtained from data_iterator, otherwise the passed-in input_tensor is used.
        Returns output tensor. This is a helper function and can be ignored by users.

-        :param engine: your engine object
+        :param engine: Your engine object
        :type engine: colossalai.engine.Engine
-        :param input_tensor: input tensor for this pipeline stage
+        :param input_tensor: Input tensor for this pipeline stage
        :type input_tensor: :class:`torch.Tensor`
-        :param return_tensors: a list of tensors to return
+        :param return_tensors: A list of tensors to return
        :type return_tensors: List[:class:`torch.Tensor`]
+        :param return_output_label: Whether returns output labels
+        :type return_output_label: bool, optional
+        :param accum_loss: Where accumulated loss stores
+        :type  accum_loss: optional

        :return: output or the loss value of the current pipeline stage
        :rtype: :class:`torch.Tensor`
@@ -205,13 +211,13 @@ class PipelineSchedule(BaseSchedule):
        """Runs non-interleaved 1F1B schedule, with communication between pipeline stages.
        Returns a tuple with losses if the last stage, an empty tuple otherwise.

-        :param engine: your engine object
+        :param engine: Your engine object
        :type engine: colossalai.engine.Engine
-        :param data_iter: dataloader as the form of an iterator, obtained by calling iter(dataloader)
+        :param data_iter: Dataloader as the form of an iterator, obtained by calling iter(dataloader)
        :type data_iter: Iterable
-        :param forward_only: whether run forward step only. Default is false. If true, no backward will be run.
+        :param forward_only: Whether run forward step only. Default is false. If true, no backward will be run.
        :type forward_only: bool
-        :param return_loss: whether returns the loss value. Default is true.
+        :param return_loss: Whether returns the loss value. Default is true.
        :type return_loss: bool
        :param return_output_label: If False, the output and label won't be returned
        :type return_output_label: bool
@@ -357,9 +363,11 @@ class InterleavedPipelineSchedule(PipelineSchedule):
        :param num_model_chunks: The number of model chunks
        :type num_model_chunks: int
        :param batch_data_process_func: The preprocessing function which receives a batch of data, and it will be executed in `load_batch`
-        :type batch_data_process_func: Callable
+        :type batch_data_process_func: Callable, optional
+        :param tensor_shape: Specified shape in pipeline communication
+        :type tensor_shape: torch.Size, optional
        :param scatter_gather_tensors: If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization
-        :type scatter_gather_tensors: bool
+        :type scatter_gather_tensors: bool, optional
        """
        assert num_microbatches % gpc.get_world_size(ParallelMode.PIPELINE) == 0, \
            'num_microbatches must be an integer multiple of pipeline parallel world size'
@@ -425,7 +433,19 @@ class InterleavedPipelineSchedule(PipelineSchedule):
        """Run interleaved 1F1B schedule (model split into model chunks), with
        communication between pipeline stages as needed.

-        Returns dictionary with losses if the last stage, empty dict otherwise."""
+        Returns dictionary with losses if the last stage, empty dict otherwise.
+
+        :param engine: Your engine object
+        :type engine: colossalai.engine.Engine
+        :param data_iter: Dataloader as the form of an iterator, obtained by calling iter(dataloader)
+        :type data_iter: Iterable
+        :param forward_only: Whether run forward step only. Default is false. If true, no backward will be run.
+        :type forward_only: bool
+        :param return_loss: Whether returns the loss value. Default is true.
+        :type return_loss: bool
+        :param return_output_label: If False, the output and label won't be returned
+        :type return_output_label: bool
+        """
        assert forward_only or return_loss, \
            'The argument \'return_loss\' has to be True when \'forward_only\' is False, but got False.'
        self.load_batch(data_iter)