[setup] support pre-build and jit-build of cuda kernels (#2374)

* [setup] support pre-build and jit-build of cuda kernels

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code
This commit is contained in:
Frank Lee
2023-01-06 20:50:26 +08:00
committed by GitHub
parent 12c8bf38d7
commit 40d376c566
36 changed files with 414 additions and 390 deletions

View File

@@ -1,42 +1,7 @@
from .cuda_native import FusedScaleMaskSoftmax, LayerNorm, MultiHeadAttention
try:
from colossalai._C import fused_optim
except:
from colossalai.kernel.op_builder.fused_optim import FusedOptimBuilder
fused_optim = FusedOptimBuilder().load()
try:
from colossalai._C import cpu_optim
except ImportError:
from colossalai.kernel.op_builder import CPUAdamBuilder
cpu_optim = CPUAdamBuilder().load()
try:
from colossalai._C import multihead_attention
except ImportError:
from colossalai.kernel.op_builder import MultiHeadAttnBuilder
multihead_attention = MultiHeadAttnBuilder().load()
try:
from colossalai._C import scaled_upper_triang_masked_softmax
except ImportError:
from colossalai.kernel.op_builder import ScaledSoftmaxBuilder
scaled_upper_triang_masked_softmax = ScaledSoftmaxBuilder().load()
try:
from colossalai._C import moe
except ImportError:
from colossalai.kernel.op_builder import MOEBuilder
moe = MOEBuilder().load()
__all__ = [
"fused_optim",
"cpu_optim",
"multihead_attention",
"moe",
"LayerNorm",
"FusedScaleMaskSoftmax",
"MultiHeadAttention",
"scaled_upper_triang_masked_softmax",
]

View File

@@ -135,7 +135,8 @@ class MultiHeadAttention(nn.Module):
# Load cuda modules if needed
global colossal_multihead_attention
if colossal_multihead_attention is None:
from colossalai.kernel import multihead_attention
from colossalai.kernel.op_builder import MultiHeadAttnBuilder
multihead_attention = MultiHeadAttnBuilder().load()
colossal_multihead_attention = multihead_attention
# create the layer in cuda kernels.