ColossalAI/colossalai/legacy/nn/_ops/loss.py
Hongxin Liu 554aa9592e
[legacy] move communication and nn to legacy and refactor logger (#4671)
* [legacy] move communication to legacy (#4640)

* [legacy] refactor logger and clean up legacy codes (#4654)

* [legacy] make logger independent to gpc

* [legacy] make optim independent to registry

* [legacy] move test engine to legacy

* [legacy] move nn to legacy (#4656)

* [legacy] move nn to legacy

* [checkpointio] fix save hf config

* [test] remove useledd rpc pp test

* [legacy] fix nn init

* [example] skip tutorial hybriad parallel example

* [devops] test doc check

* [devops] test doc check
2023-09-11 16:24:28 +08:00

52 lines
2.6 KiB
Python

from typing import Optional
import torch
import torch.nn.functional as F
from colossalai.legacy.nn.loss.loss_1d import VocabParallelCrossEntropyLoss1D
from colossalai.tensor import ColoTensor, ColoTensorSpec
from colossalai.tensor.op_wrapper import colo_op_impl
from ._utils import GeneralTensor, convert_to_colo_tensor
@colo_op_impl(F.cross_entropy)
def colo_cross_entropy(input_tensor: GeneralTensor,
target: GeneralTensor,
weight: Optional[GeneralTensor] = None,
size_average: Optional[bool] = None,
ignore_index: int = -100,
reduce: Optional[bool] = None,
reduction: str = "mean",
label_smoothing: float = 0.0):
assert isinstance(weight, ColoTensor) or isinstance(target, ColoTensor) or isinstance(input_tensor, ColoTensor)
pg = input_tensor.get_process_group() if isinstance(input_tensor, ColoTensor) else isinstance(target, ColoTensor)
weight = convert_to_colo_tensor(weight, pg)
target = convert_to_colo_tensor(target, pg)
input_tensor = convert_to_colo_tensor(input_tensor, pg)
if input_tensor.is_replicate(): # Input is gathered
assert target.is_replicate() and (weight is None or weight.is_replicate()), \
"Target tensor and weight tensor both should be complete"
output = F.cross_entropy(input_tensor,
target,
weight=weight,
size_average=size_average,
ignore_index=ignore_index,
reduce=reduce,
reduction=reduction,
label_smoothing=label_smoothing)
return ColoTensor.from_torch_tensor(output, ColoTensorSpec(pg))
elif input_tensor.has_compute_spec(): # Single Model Parallel Applied
if input_tensor.is_shard_1dcol():
assert weight is None, "Current TP cross entropy loss function doesn't support passing weight tensor in"
assert target.is_replicate(), "Target tensor should be complete in TP cross entropy loss function"
output = VocabParallelCrossEntropyLoss1D()(input_tensor,
target,
process_group=input_tensor.process_group.tp_process_group())
return ColoTensor.from_torch_tensor(output, ColoTensorSpec(pg))
else:
raise NotImplementedError
else:
raise NotImplementedError