mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-22 09:59:38 +00:00
update examples and sphnix docs for the new api (#63)
This commit is contained in:
@@ -57,38 +57,61 @@ class Engine:
|
||||
|
||||
@property
|
||||
def model(self):
|
||||
"""model attached to the engine"""
|
||||
return self._model
|
||||
|
||||
@property
|
||||
def optimizer(self):
|
||||
"""optimizer attached to the engine"""
|
||||
return self._optimizer
|
||||
|
||||
@property
|
||||
def criterion(self):
|
||||
"""criterion attached to the engine"""
|
||||
return self._criterion
|
||||
|
||||
@property
|
||||
def schedule(self):
|
||||
return self._schedule
|
||||
|
||||
def zero_grad(self):
|
||||
"""set the gradient of parameters to zero
|
||||
"""
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
def step(self):
|
||||
"""execute parameter update
|
||||
"""
|
||||
self._all_reduce_gradients()
|
||||
self.optimizer.clip_grad_norm(self.model, self._clip_grad_norm)
|
||||
self.optimizer.step()
|
||||
|
||||
def backward(self, loss: Tensor):
|
||||
"""Start backward propagation given the loss value computed by a loss function
|
||||
|
||||
:param loss: loss value computed by a loss function
|
||||
:type loss: :class:`torch.Tensor`
|
||||
"""
|
||||
return self.optimizer.backward(loss)
|
||||
|
||||
def backward_by_grad(self, tensor, grad):
|
||||
"""Start backward propagation given the gradient of the output tensor
|
||||
|
||||
:param loss: output tensor
|
||||
:type loss: :class:`torch.Tensor`
|
||||
:param grad: gradient passed back to the output
|
||||
:type grad: :class:`torch.Tensor`
|
||||
"""
|
||||
return self.optimizer.backward_by_grad(tensor, grad)
|
||||
|
||||
def calc_loss(self, *args, **kwargs):
|
||||
"""compute the loss value
|
||||
:return: the loss value
|
||||
:rtype: :class:`torch.Tensor`
|
||||
"""
|
||||
return self.criterion(*args, **kwargs)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""run the forward step for the model
|
||||
:return: output the model
|
||||
:rtype: Tuple[:class:`torch.Tensor`] or :class:`torch.Tensor`
|
||||
"""
|
||||
return self.model(*args, **kwargs)
|
||||
|
||||
def _all_reduce_gradients(self):
|
||||
|
@@ -48,7 +48,7 @@ class BaseSchedule(ABC):
|
||||
already in the same GPU as where the model's.
|
||||
|
||||
:return: (data, label)
|
||||
:rtype: (Tensor, Tensor)
|
||||
:rtype: (:class:`Tensor`, :class:`torch.Tensor`)
|
||||
"""
|
||||
if data_iter is None:
|
||||
raise RuntimeError('Dataloader is not defined.')
|
||||
|
@@ -38,7 +38,9 @@ class NonPipelineSchedule(BaseSchedule):
|
||||
:type data_iter: Iterator
|
||||
:type forward_only: bool, optional
|
||||
:type return_loss: bool, optional
|
||||
|
||||
:return: (output, label, loss)
|
||||
:rtype: Tuple[:class:`torch.Tensor`]
|
||||
"""
|
||||
assert forward_only or return_loss, \
|
||||
"The argument 'return_loss' has to be True when 'forward_only' is False, but got False."
|
||||
|
@@ -133,6 +133,16 @@ class PipelineSchedule(BaseSchedule):
|
||||
"""Forward step for passed-in model. If it is the first stage, the input tensor
|
||||
is obtained from data_iterator, otherwise the passed-in input_tensor is used.
|
||||
Returns output tensor. This is a helper function and can be ignored by users.
|
||||
|
||||
:param engine: your engine object
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param input_tensor: input tensor for this pipeline stage
|
||||
:type input_tensor: :class:`torch.Tensor`
|
||||
:param return_tensors: a list of tensors to return
|
||||
:type return_tensors: List[:class:`torch.Tensor`]
|
||||
|
||||
:return: output or the loss value of the current pipeline stage
|
||||
:rtype: :class:`torch.Tensor`
|
||||
"""
|
||||
|
||||
if input_tensor is None:
|
||||
@@ -162,6 +172,18 @@ class PipelineSchedule(BaseSchedule):
|
||||
output_tensor_grad is None, otherwise it is the gradients with respect to stage's output tensor.
|
||||
Returns the gradients with respect to the input tensor (None if first stage).
|
||||
This is a helper function and can be ignored by users.
|
||||
|
||||
:param engine: your engine object
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param input_tensor: input tensor for this pipeline stage
|
||||
:type input_tensor: :class:`torch.Tensor`
|
||||
:param output_tensor: output tensor for this pipeline stage
|
||||
:type output_tensor: :class:`torch.Tensor`
|
||||
:param output_tensor_grad: gradient of output tensor for this pipeline stage
|
||||
:type output_tensor_grad: :class:`torch.Tensor`
|
||||
|
||||
:return: gradient of input tensor
|
||||
:rtype: :class:`torch.Tensor`
|
||||
"""
|
||||
|
||||
# Retain the grad on the input_tensor.
|
||||
@@ -189,7 +211,17 @@ class PipelineSchedule(BaseSchedule):
|
||||
"""Runs non-interleaved 1F1B schedule, with communication between pipeline stages.
|
||||
Returns a tuple with losses if the last stage, an empty tuple otherwise.
|
||||
|
||||
:param engine: your engine object
|
||||
:type engine: colossalai.engine.Engine
|
||||
:param data_iter: dataloader as the form of an iterator, obtained by calling iter(dataloader)
|
||||
:type data_iter: Iterable
|
||||
:param forward_only: whether run forward step only. Default is false. If true, no backward will be run.
|
||||
:type forward_only: bool
|
||||
:param return_loss: whether returns the loss value. Default is true.
|
||||
:type return_loss: bool
|
||||
|
||||
:return: (output, label, loss)
|
||||
:rtype: Tuple[:class:`torch.Tensor`]
|
||||
"""
|
||||
|
||||
assert forward_only or return_loss, \
|
||||
|
Reference in New Issue
Block a user