[builder] MOE builder (#2277)

2026-05-08 23:05:41 +00:00 · 2023-01-03 20:29:39 +08:00
parent 26e171af6c
commit 16cc8e6aa7
6 changed files with 60 additions and 20 deletions
--- a/colossalai/kernel/init.py
+++ b/colossalai/kernel/init.py
@@ -24,7 +24,19 @@ except ImportError:
    from colossalai.kernel.op_builder import ScaledSoftmaxBuilder
    scaled_upper_triang_masked_softmax = ScaledSoftmaxBuilder().load()

+try:
+    from colossalai._C import moe
+except ImportError:
+    from colossalai.kernel.op_builder import MOEBuilder
+    moe = MOEBuilder().load()
+
 __all__ = [
-    "fused_optim", "cpu_optim", "multihead_attention", "LayerNorm", "FusedScaleMaskSoftmax", "MultiHeadAttention",
-    "scaled_upper_triang_masked_softmax"
+    "fused_optim",
+    "cpu_optim",
+    "multihead_attention",
+    "moe",
+    "LayerNorm",
+    "FusedScaleMaskSoftmax",
+    "MultiHeadAttention",
+    "scaled_upper_triang_masked_softmax",
 ]
--- a/colossalai/kernel/op_builder/init.py
+++ b/colossalai/kernel/op_builder/init.py
@@ -1,6 +1,7 @@
 from .cpu_adam import CPUAdamBuilder
 from .fused_optim import FusedOptimBuilder
+from .moe import MOEBuilder
 from .multi_head_attn import MultiHeadAttnBuilder
 from .scaled_upper_triang_masked_softmax import ScaledSoftmaxBuilder

-__all__ = ['CPUAdamBuilder', 'FusedOptimBuilder', 'MultiHeadAttnBuilder', 'ScaledSoftmaxBuilder']
+__all__ = ['CPUAdamBuilder', 'FusedOptimBuilder', 'MultiHeadAttnBuilder', 'ScaledSoftmaxBuilder', 'MOEBuilder']
--- a/colossalai/kernel/op_builder/builder.py
+++ b/colossalai/kernel/op_builder/builder.py
@@ -1,12 +1,12 @@
 import os
 import re
-import sys
 from pathlib import Path
+from typing import List

 import torch


-def get_cuda_cc_flag():
+def get_cuda_cc_flag() -> List:
    """get_cuda_cc_flag

    cc flag for your GPU arch
--- a/colossalai/kernel/op_builder/moe.py
+++ b/colossalai/kernel/op_builder/moe.py
@@ -0,0 +1,33 @@
+import os
+
+from .builder import Builder, get_cuda_cc_flag
+
+
+class MOEBuilder(Builder):
+
+    def __init__(self):
+        self.base_dir = "cuda_native/csrc"
+        self.name = 'moe'
+        super().__init__()
+
+    def include_dirs(self):
+        ret = []
+        ret = [os.path.join(self.base_dir, "includes"), self.get_cuda_home_include()]
+        ret.append(os.path.join(self.base_dir, "kernels", "include"))
+        return [self.colossalai_src_path(path) for path in ret]
+
+    def sources_files(self):
+        ret = [os.path.join(self.base_dir, fname) for fname in ['moe_cuda.cpp', 'moe_cuda_kernel.cu']]
+        return [self.colossalai_src_path(path) for path in ret]
+
+    def cxx_flags(self):
+        return ['-O3', '-DVERSION_GE_1_1', '-DVERSION_GE_1_3', '-DVERSION_GE_1_5']
+
+    def nvcc_flags(self):
+        extra_cuda_flags = [
+            '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '--expt-relaxed-constexpr',
+            '--expt-extended-lambda'
+        ]
+        extra_cuda_flags.extend(get_cuda_cc_flag())
+        ret = ['-O3', '--use_fast_math'] + extra_cuda_flags
+        return ret