[moe] merge moe into main (#4978)

* update moe module * support openmoe
2025-09-03 10:06:44 +00:00 · 2023-11-02 10:21:24 +08:00
parent 8993c8a817
commit dc003c304c
67 changed files with 7618 additions and 1657 deletions
--- a/colossalai/moe/loss.py
+++ b/colossalai/moe/loss.py
@@ -0,0 +1,78 @@
+import torch.nn as nn
+from torch.nn.modules.loss import _Loss
+
+from colossalai.moe.manager import MOE_MANAGER
+
+
+class MoeCrossEntropyLoss(_Loss):
+    r"""torch.nn.CrossEntropyLoss added with auxiliary loss.
+
+    Args:
+        input (:class:`torch.tensor`): Predicted unnormalized scores (often referred to as logits).
+        target (:class:`torch.tensor`): Ground truth class indices or class probabilities.
+        aux_weight (float, optional): Weight of auxiliary loss in total loss.Defaults 0.01.
+
+    The ``args`` and ``kwargs`` should include parameters below:
+    ::
+
+        weight (Tensor, optional)
+        size_average (bool, optional)
+        ignore_index (int, optional)
+        reduce (bool, optional)
+        reduction (str, optional)
+        label_smoothing (float, optional)
+
+    More details about ``args``, ``kwargs`` and ``torch.nn.functional.cross_entropy`` could be found in
+    `Cross_entropy <https://pytorch.org/docs/stable/generated/torch.nn.functional.cross_entropy.html#torch.nn.functional.cross_entropy>`_.
+    """
+
+    def __init__(self, aux_weight: float = 0.01, *args, **kwargs):
+        super().__init__()
+        self.loss = nn.CrossEntropyLoss(*args, **kwargs)
+        self.aux_weight = aux_weight
+
+    def forward(self, *args):
+        """
+        The ``args`` should at least include parameters below:
+        ::
+
+            input (:class:`torch.tensor`): Predicted unnormalized scores (often referred to as logits).
+            target (:class:`torch.tensor`): Ground truth class indices or class probabilities.
+
+        More details about ``args``, ``kwargs`` and ``torch.nn.functional.cross_entropy`` could be found in
+        `Cross_entropy <https://pytorch.org/docs/stable/generated/torch.nn.functional.cross_entropy.html#torch.nn.functional.cross_entropy>`_.
+        """
+        main_loss = self.loss(*args)
+        aux_loss = MOE_MANAGER.get_loss()
+        return main_loss + self.aux_weight * aux_loss
+
+
+class MoeLoss(_Loss):
+    """A wrapper class for any loss module to add with auxiliary loss.
+
+    Args:
+        aux_weight (float): Weight of auxiliary loss in total loss.
+        loss_fn (``Callable``): Loss function.
+        args (list): Args in loss function.
+        kwargs (dict): Kwargs in loss function
+    """
+
+    def __init__(self, aux_weight: float, loss_fn, *args, **kwargs):
+        super().__init__()
+        self.loss_fn = loss_fn(*args, **kwargs)
+        self.aux_weight = aux_weight
+
+    def forward(self, *args, **kwargs):
+        """
+        The ``args`` and ``kwargs`` should at least include parameters below:
+        ::
+
+            input (:class:`torch.tensor`): Predicted unnormalized scores (often referred to as logits).
+            target (:class:`torch.tensor`): Ground truth class indices or class probabilities.
+
+        Note:
+            The ``args`` and ``kwargs`` may include different parameters varying with different loss function.
+        """
+        main_loss = self.loss_fn(*args, **kwargs)
+        aux_loss = MOE_MANAGER.get_loss()
+        return main_loss + self.aux_weight * aux_loss