Refactored docstring to google style

2025-09-22 09:59:38 +00:00 · 2022-03-25 13:02:39 +08:00
parent 53b1b6e340
commit ec5086c49c
94 changed files with 3389 additions and 2982 deletions
--- a/colossalai/engine/schedule/_base_schedule.py
+++ b/colossalai/engine/schedule/_base_schedule.py
@@ -15,8 +15,12 @@ class BaseSchedule(ABC):
    """A basic helper class to control the process of training or evaluation.
    It mainly composes of forward_backward_step for gradient backward and
    optimizer_step for parameters update.
-    For the convenience to enable FP16, we aggreate all codes that contain the
+    For the convenience to enable FP16, we aggregate all codes that contain the
    control of FP16 in class schedule.
+
+    Args:
+        batch_data_process_func (Callable, optional): The preprocessing function which receives a batch of data,
+        and it will be executed in load_batch.
    """

    def __init__(self, batch_data_process_func: Callable = None):
@@ -46,13 +50,12 @@ class BaseSchedule(ABC):
        """Loads a batch from data iterator. It returns the data and labels which are
        already in the same GPU as where the model's.

-        :param data_iter: Data iterator from which get a batch of data
-        :type data_iter: DataIter
-        :param to_gpu: Whether the data should be moved to GPU
-        :type to_gpu: bool, optional
+        Args:
+            data_iter (Iterable): Data iterator from which get a batch of data, obtained by calling iter(dataloader).
+            to_gpu (bool, optional): Whether the data should be moved to GPU

-        :return: (data, label)
-        :rtype: (:class:`Tensor`, :class:`torch.Tensor`)
+        Returns:
+            Tuple (:class:`Tensor`, :class:`torch.Tensor`): A tuple of (data, label).
        """
        if data_iter is None:
            raise RuntimeError('Dataloader is not defined.')
@@ -87,16 +90,12 @@ class BaseSchedule(ABC):
                              ):
        """The process function over a batch of dataset for training or evaluation.

-        :param engine: Colossalai training engine
-        :type engine: colossalai.engine.Engine
-        :param data_iter: Data iterator from which get a batch of data
-        :type data_iter: DataIter
-        :param forward_only: If True, the process won't include backward
-        :type forward_only: bool
-        :param return_loss: If False, the loss won't be returned
-        :type return_loss: bool, optional
-        :param return_output_label: If False, the output and label won't be returned
-        :type return_output_label: bool, optional
+        Args:
+            engine (colossalai.engine.Engine): Colossalai engine for training and inference.
+            data_iter (Iterable): Data iterator from which get a batch of data, obtained by calling iter(dataloader).
+            forward_only (bool): If True, the process won't include backward.
+            return_loss (bool, optional): If False, the loss won't be returned.
+            return_output_label (bool, optional): If False, the output and label won't be returned.
        """
        pass

--- a/colossalai/engine/schedule/_non_pipeline_schedule.py
+++ b/colossalai/engine/schedule/_non_pipeline_schedule.py
@@ -15,6 +15,10 @@ class NonPipelineSchedule(BaseSchedule):
    During one process, it loads a batch of dataset and feeds it to the model.
    After getting the output and calculating the loss, it will use :meth:`step`
    to update the parameters if it is in training mode.
+
+    Args:
+        batch_data_process_func (Callable, optional): The preprocessing function which receives a batch of data,
+        and it will be executed in load_batch.
    """

    def forward_backward_step(self,
@@ -23,22 +27,19 @@ class NonPipelineSchedule(BaseSchedule):
                              forward_only: bool = False,
                              return_loss: bool = True,
                              return_output_label: bool = True):
-        """The process function that loads loads a batch of dataset and feeds it to the model.
+        """The process function that loads a batch of dataset and feeds it to the model.
        The returned labels and loss will None if :attr:`return_loss` is False.

-        :param engine: Model for training and inference
-        :param data_iter: Data iterator of the dataloader, e.g. iter(dataloader)
-        :param forward_only: If True, the model is run for the forward pass, else back propagation will be executed
-        :param return_loss: Loss will be returned if True
-        :param return_output_label: Output and label will be returned if True
-        :type engine: Iterator
-        :type data_iter: Iterator
-        :type forward_only: bool, optional
-        :type return_loss: bool, optional
-        :type return_output_label: bool, optional
+        Args:
+            engine (colossalai.engine.Engine): Colossalai engine for training and inference.
+            data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
+            forward_only (bool, optional):
+                If True, the model is run for the forward pass, else back propagation will be executed.
+            return_loss (bool, optional): Loss will be returned if True.
+            return_output_label (bool, optional): Output and label will be returned if True.

-        :return: (output, label, loss)
-        :rtype: Tuple[:class:`torch.Tensor`]
+        Returns:
+            Tuple[:class:`torch.Tensor`]: A tuple of (output, label, loss), loss and label could be None.
        """
        assert forward_only or return_loss, \
            "The argument 'return_loss' has to be True when 'forward_only' is False, but got False."
--- a/colossalai/engine/schedule/_pipeline_schedule.py
+++ b/colossalai/engine/schedule/_pipeline_schedule.py
@@ -41,14 +41,13 @@ class PipelineSchedule(BaseSchedule):
    It uses non-interleaved 1F1B strategy. Other properties are similar as
    :class:`NonPipelineSchedule`.

-    :param num_microbatches: The number of microbatches
-    :type num_microbatches: int
-    :param batch_data_process_func: The preprocessing function which receives a batch of data, and it will be executed in `load_batch`
-    :type batch_data_process_func: Callable, optional
-    :param tensor_shape: Specified shape in pipeline communication
-    :type tensor_shape: torch.Size, optional
-    :param scatter_gather_tensors: If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization
-    :type scatter_gather_tensors: bool, optional
+    Args:
+        num_microbatches (int): The number of microbatches.
+        batch_data_process_func (Callable, optional):
+            The preprocessing function which receives a batch of data, and it will be executed in `load_batch`.
+        tensor_shape (torch.Size, optional): Specified shape in pipeline communication.
+        scatter_gather_tensors (bool, optional):
+            If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization.
    """

    def __init__(self,
@@ -131,19 +130,14 @@ class PipelineSchedule(BaseSchedule):
        is obtained from data_iterator, otherwise the passed-in input_tensor is used.
        Returns output tensor. This is a helper function and can be ignored by users.

-        :param engine: Your engine object
-        :type engine: colossalai.engine.Engine
-        :param input_tensor: Input tensor for this pipeline stage
-        :type input_tensor: :class:`torch.Tensor`
-        :param return_tensors: A list of tensors to return
-        :type return_tensors: List[:class:`torch.Tensor`]
-        :param return_output_label: Whether returns output labels
-        :type return_output_label: bool, optional
-        :param accum_loss: Where accumulated loss stores
-        :type  accum_loss: optional
-
-        :return: output or the loss value of the current pipeline stage
-        :rtype: :class:`torch.Tensor`
+        Args:
+            engine (colossalai.engine.Engine): Colossalai engine for training and inference.
+            input_tensor (:class:`torch.Tensor`): Input tensor for this pipeline stage.
+            return_tensors (List[:class:`torch.Tensor`]): A list of tensors to return.
+            return_output_label (bool, optional): Whether returns output labels.
+            accum_loss (optional): Where accumulated loss stores.
+        Returns:
+            :class:`torch.Tensor`: output or the loss value of the current pipeline stage.
        """
        data, label = self.load_micro_batch()
        output_tensor = self._call_engine(engine.model, input_tensor, data)
@@ -173,17 +167,14 @@ class PipelineSchedule(BaseSchedule):
        Returns the gradients with respect to the input tensor (None if first stage).
        This is a helper function and can be ignored by users.

-        :param engine: your engine object
-        :type engine: colossalai.engine.Engine
-        :param input_tensor: input tensor for this pipeline stage
-        :type input_tensor: :class:`torch.Tensor`
-        :param output_tensor: output tensor for this pipeline stage
-        :type output_tensor: :class:`torch.Tensor`
-        :param output_tensor_grad: gradient of output tensor for this pipeline stage
-        :type output_tensor_grad: :class:`torch.Tensor`
+        Args:
+            engine (colossalai.engine.Engine): Colossalai engine for training and inference.
+            input_tensor (:class:`torch.Tensor`): input tensor for this pipeline stage.
+            output_tensor (:class:`torch.Tensor`): output tensor for this pipeline stage.
+            output_tensor_grad (:class:`torch.Tensor`): gradient of output tensor for this pipeline stage.

-        :return: gradient of input tensor
-        :rtype: :class:`torch.Tensor`
+        Returns:
+            :class:`torch.Tensor`: gradient of input tensor.
        """

        # Retain the grad on the input_tensor.
@@ -207,19 +198,16 @@ class PipelineSchedule(BaseSchedule):
        """Runs non-interleaved 1F1B schedule, with communication between pipeline stages.
        Returns a tuple with losses if the last stage, an empty tuple otherwise.

-        :param engine: Your engine object
-        :type engine: colossalai.engine.Engine
-        :param data_iter: Dataloader as the form of an iterator, obtained by calling iter(dataloader)
-        :type data_iter: Iterable
-        :param forward_only: Whether run forward step only. Default is false. If true, no backward will be run.
-        :type forward_only: bool
-        :param return_loss: Whether returns the loss value. Default is true.
-        :type return_loss: bool
-        :param return_output_label: If False, the output and label won't be returned
-        :type return_output_label: bool
+        Args:
+            engine (colossalai.engine.Engine): Colossalai engine for training and inference.
+            data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
+            forward_only (bool, optional):
+                Whether run forward step only. Default is false. If true, no backward will be run.
+            return_loss (bool, optional): Whether returns the loss value. Default is true.
+            return_output_label (bool, optional): If False, the output and label won't be returned.

-        :return: (output, label, loss)
-        :rtype: Tuple[:class:`torch.Tensor`]
+        Returns:
+            Tuple[:class:`torch.Tensor`]: A tuple of (output, label, loss), loss and label could be None.
        """

        assert forward_only or return_loss, \
@@ -354,16 +342,14 @@ class InterleavedPipelineSchedule(PipelineSchedule):
        It uses interleaved 1F1B strategy. Other properties are similar as
        :class:`NonPipelineSchedule`.

-        :param num_microbatches: The number of microbatches
-        :type num_microbatches: int
-        :param num_model_chunks: The number of model chunks
-        :type num_model_chunks: int
-        :param batch_data_process_func: The preprocessing function which receives a batch of data, and it will be executed in `load_batch`
-        :type batch_data_process_func: Callable, optional
-        :param tensor_shape: Specified shape in pipeline communication
-        :type tensor_shape: torch.Size, optional
-        :param scatter_gather_tensors: If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization
-        :type scatter_gather_tensors: bool, optional
+        Args:
+            num_microbatches (int): The number of microbatches.
+            num_model_chunks (int): The number of model chunks.
+            batch_data_process_func (Callable, optional):
+                The preprocessing function which receives a batch of data, and it will be executed in `load_batch`.
+            tensor_shape (torch.Size, optional): Specified shape in pipeline communication.
+            scatter_gather_tensors (bool, optional):
+                If set to `True`, communication will be reduced over pipeline when using 1D tensor parallelization.
        """
        assert num_microbatches % gpc.get_world_size(ParallelMode.PIPELINE) == 0, \
            'num_microbatches must be an integer multiple of pipeline parallel world size'
@@ -408,6 +394,16 @@ class InterleavedPipelineSchedule(PipelineSchedule):
        """Forward step for passed-in model. If it is the first stage, the input tensor 
        is obtained from data_iterator, otherwise the passed-in input_tensor is used.
        Returns output tensor. This is a helper function and can be ignored by users.
+
+        Args:
+            engine (colossalai.engine.Engine): Colossalai engine for training and inference.
+            model_chunk_id (int): The id of model chunks.
+            input_tensor (:class:`torch.Tensor`): Input tensor for this pipeline stage.
+            return_tensors (List[:class:`torch.Tensor`]): A list of tensors to return.
+            return_output_label (bool, optional): Whether returns output labels.
+            accum_loss (optional): Where accumulated loss stores.
+        Returns:
+            :class:`torch.Tensor`: output or the loss value of the current pipeline stage.
        """
        data, label = self.load_micro_batch(model_chunk_id)
        output_tensor = self._call_engine(engine.model[model_chunk_id], input_tensor, data)
@@ -435,18 +431,17 @@ class InterleavedPipelineSchedule(PipelineSchedule):
        """Run interleaved 1F1B schedule (model split into model chunks), with
        communication between pipeline stages as needed.

-        Returns dictionary with losses if the last stage, empty dict otherwise.
+        Args:
+            engine (colossalai.engine.Engine): Colossalai engine for training and inference.
+            data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
+            forward_only (bool, optional):
+                Whether run forward step only. Default is false. If true, no backward will be run.
+            return_loss (bool, optional): Whether returns the loss value. Default is true.
+            return_output_label (bool, optional): If False, the output and label won't be returned.

-        :param engine: Your engine object
-        :type engine: colossalai.engine.Engine
-        :param data_iter: Dataloader as the form of an iterator, obtained by calling iter(dataloader)
-        :type data_iter: Iterable
-        :param forward_only: Whether run forward step only. Default is false. If true, no backward will be run.
-        :type forward_only: bool
-        :param return_loss: Whether returns the loss value. Default is true.
-        :type return_loss: bool
-        :param return_output_label: If False, the output and label won't be returned
-        :type return_output_label: bool
+        Returns:
+            Tuple[:class:`torch.Tensor`]: A tuple of (output, label, loss), loss and label could be None.
+                The loss would be returned only in the last stage.
        """
        assert forward_only or return_loss, \
            'The argument \'return_loss\' has to be True when \'forward_only\' is False, but got False.'