mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2026-05-08 23:05:41 +00:00
[builder] MOE builder (#2277)
This commit is contained in:
@@ -24,7 +24,19 @@ except ImportError:
|
||||
from colossalai.kernel.op_builder import ScaledSoftmaxBuilder
|
||||
scaled_upper_triang_masked_softmax = ScaledSoftmaxBuilder().load()
|
||||
|
||||
try:
|
||||
from colossalai._C import moe
|
||||
except ImportError:
|
||||
from colossalai.kernel.op_builder import MOEBuilder
|
||||
moe = MOEBuilder().load()
|
||||
|
||||
__all__ = [
|
||||
"fused_optim", "cpu_optim", "multihead_attention", "LayerNorm", "FusedScaleMaskSoftmax", "MultiHeadAttention",
|
||||
"scaled_upper_triang_masked_softmax"
|
||||
"fused_optim",
|
||||
"cpu_optim",
|
||||
"multihead_attention",
|
||||
"moe",
|
||||
"LayerNorm",
|
||||
"FusedScaleMaskSoftmax",
|
||||
"MultiHeadAttention",
|
||||
"scaled_upper_triang_masked_softmax",
|
||||
]
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from .cpu_adam import CPUAdamBuilder
|
||||
from .fused_optim import FusedOptimBuilder
|
||||
from .moe import MOEBuilder
|
||||
from .multi_head_attn import MultiHeadAttnBuilder
|
||||
from .scaled_upper_triang_masked_softmax import ScaledSoftmaxBuilder
|
||||
|
||||
__all__ = ['CPUAdamBuilder', 'FusedOptimBuilder', 'MultiHeadAttnBuilder', 'ScaledSoftmaxBuilder']
|
||||
__all__ = ['CPUAdamBuilder', 'FusedOptimBuilder', 'MultiHeadAttnBuilder', 'ScaledSoftmaxBuilder', 'MOEBuilder']
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
def get_cuda_cc_flag():
|
||||
def get_cuda_cc_flag() -> List:
|
||||
"""get_cuda_cc_flag
|
||||
|
||||
cc flag for your GPU arch
|
||||
|
||||
33
colossalai/kernel/op_builder/moe.py
Normal file
33
colossalai/kernel/op_builder/moe.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import os
|
||||
|
||||
from .builder import Builder, get_cuda_cc_flag
|
||||
|
||||
|
||||
class MOEBuilder(Builder):
|
||||
|
||||
def __init__(self):
|
||||
self.base_dir = "cuda_native/csrc"
|
||||
self.name = 'moe'
|
||||
super().__init__()
|
||||
|
||||
def include_dirs(self):
|
||||
ret = []
|
||||
ret = [os.path.join(self.base_dir, "includes"), self.get_cuda_home_include()]
|
||||
ret.append(os.path.join(self.base_dir, "kernels", "include"))
|
||||
return [self.colossalai_src_path(path) for path in ret]
|
||||
|
||||
def sources_files(self):
|
||||
ret = [os.path.join(self.base_dir, fname) for fname in ['moe_cuda.cpp', 'moe_cuda_kernel.cu']]
|
||||
return [self.colossalai_src_path(path) for path in ret]
|
||||
|
||||
def cxx_flags(self):
|
||||
return ['-O3', '-DVERSION_GE_1_1', '-DVERSION_GE_1_3', '-DVERSION_GE_1_5']
|
||||
|
||||
def nvcc_flags(self):
|
||||
extra_cuda_flags = [
|
||||
'-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '--expt-relaxed-constexpr',
|
||||
'--expt-extended-lambda'
|
||||
]
|
||||
extra_cuda_flags.extend(get_cuda_cc_flag())
|
||||
ret = ['-O3', '--use_fast_math'] + extra_cuda_flags
|
||||
return ret
|
||||
Reference in New Issue
Block a user