update examples and sphnix docs for the new api (#63)

2025-09-22 09:59:38 +00:00 · 2021-12-13 22:07:01 +08:00
parent 7d3711058f
commit 35813ed3c4
124 changed files with 1251 additions and 1462 deletions
--- a/colossalai/engine/_base_engine.py
+++ b/colossalai/engine/_base_engine.py
@@ -57,38 +57,61 @@ class Engine:

    @property
    def model(self):
+        """model attached to the engine"""
        return self._model

    @property
    def optimizer(self):
+        """optimizer attached to the engine"""
        return self._optimizer

    @property
    def criterion(self):
+        """criterion attached to the engine"""
        return self._criterion

-    @property
-    def schedule(self):
-        return self._schedule
-
    def zero_grad(self):
+        """set the gradient of parameters to zero
+        """
        self.optimizer.zero_grad()

    def step(self):
+        """execute parameter update
+        """
        self._all_reduce_gradients()
        self.optimizer.clip_grad_norm(self.model, self._clip_grad_norm)
        self.optimizer.step()

    def backward(self, loss: Tensor):
+        """Start backward propagation given the loss value computed by a loss function
+        
+        :param loss: loss value computed by a loss function
+        :type loss: :class:`torch.Tensor`
+        """
        return self.optimizer.backward(loss)

    def backward_by_grad(self, tensor, grad):
+        """Start backward propagation given the gradient of the output tensor
+        
+        :param loss: output tensor
+        :type loss: :class:`torch.Tensor`
+        :param grad: gradient passed back to the output
+        :type grad: :class:`torch.Tensor`
+        """
        return self.optimizer.backward_by_grad(tensor, grad)

    def calc_loss(self, *args, **kwargs):
+        """compute the loss value
+        :return: the loss value
+        :rtype: :class:`torch.Tensor`
+        """
        return self.criterion(*args, **kwargs)

    def __call__(self, *args, **kwargs):
+        """run the forward step for the model
+        :return: output the model
+        :rtype: Tuple[:class:`torch.Tensor`] or :class:`torch.Tensor`
+        """
        return self.model(*args, **kwargs)

    def _all_reduce_gradients(self):
--- a/colossalai/engine/schedule/_base_schedule.py
+++ b/colossalai/engine/schedule/_base_schedule.py
@@ -48,7 +48,7 @@ class BaseSchedule(ABC):
        already in the same GPU as where the model's.

        :return: (data, label)
-        :rtype: (Tensor, Tensor)
+        :rtype: (:class:`Tensor`, :class:`torch.Tensor`)
        """
        if data_iter is None:
            raise RuntimeError('Dataloader is not defined.')
--- a/colossalai/engine/schedule/_non_pipeline_schedule.py
+++ b/colossalai/engine/schedule/_non_pipeline_schedule.py
@@ -38,7 +38,9 @@ class NonPipelineSchedule(BaseSchedule):
        :type data_iter: Iterator
        :type forward_only: bool, optional
        :type return_loss: bool, optional
+        
        :return: (output, label, loss)
+        :rtype: Tuple[:class:`torch.Tensor`]
        """
        assert forward_only or return_loss, \
            "The argument 'return_loss' has to be True when 'forward_only' is False, but got False."
--- a/colossalai/engine/schedule/_pipeline_schedule.py
+++ b/colossalai/engine/schedule/_pipeline_schedule.py
@@ -133,6 +133,16 @@ class PipelineSchedule(BaseSchedule):
        """Forward step for passed-in model. If it is the first stage, the input tensor 
        is obtained from data_iterator, otherwise the passed-in input_tensor is used.
        Returns output tensor. This is a helper function and can be ignored by users.
+
+        :param engine: your engine object
+        :type engine: colossalai.engine.Engine
+        :param input_tensor: input tensor for this pipeline stage
+        :type input_tensor: :class:`torch.Tensor`
+        :param return_tensors: a list of tensors to return
+        :type return_tensors: List[:class:`torch.Tensor`]
+        
+        :return: output or the loss value of the current pipeline stage
+        :rtype: :class:`torch.Tensor`
        """

        if input_tensor is None:
@@ -162,6 +172,18 @@ class PipelineSchedule(BaseSchedule):
        output_tensor_grad is None, otherwise it is the gradients with respect to stage's output tensor.
        Returns the gradients with respect to the input tensor (None if first stage).
        This is a helper function and can be ignored by users.
+
+        :param engine: your engine object
+        :type engine: colossalai.engine.Engine
+        :param input_tensor: input tensor for this pipeline stage
+        :type input_tensor: :class:`torch.Tensor`
+        :param output_tensor: output tensor for this pipeline stage
+        :type output_tensor: :class:`torch.Tensor`
+        :param output_tensor_grad: gradient of output tensor for this pipeline stage
+        :type output_tensor_grad: :class:`torch.Tensor`
+
+        :return: gradient of input tensor
+        :rtype: :class:`torch.Tensor`
        """

        # Retain the grad on the input_tensor.
@@ -189,7 +211,17 @@ class PipelineSchedule(BaseSchedule):
        """Runs non-interleaved 1F1B schedule, with communication between pipeline stages.
        Returns a tuple with losses if the last stage, an empty tuple otherwise.

+        :param engine: your engine object
+        :type engine: colossalai.engine.Engine
+        :param data_iter: dataloader as the form of an iterator, obtained by calling iter(dataloader)
+        :type data_iter: Iterable
+        :param forward_only: whether run forward step only. Default is false. If true, no backward will be run.
+        :type forward_only: bool
+        :param return_loss: whether returns the loss value. Default is true.
+        :type return_loss: bool
+
        :return: (output, label, loss)
+        :rtype: Tuple[:class:`torch.Tensor`]
        """

        assert forward_only or return_loss, \