mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-01 01:06:00 +00:00
[setup] support pre-build and jit-build of cuda kernels (#2374)
* [setup] support pre-build and jit-build of cuda kernels * polish code * polish code * polish code * polish code * polish code * polish code
This commit is contained in:
@@ -1,42 +1,7 @@
|
||||
from .cuda_native import FusedScaleMaskSoftmax, LayerNorm, MultiHeadAttention
|
||||
|
||||
try:
|
||||
from colossalai._C import fused_optim
|
||||
except:
|
||||
from colossalai.kernel.op_builder.fused_optim import FusedOptimBuilder
|
||||
fused_optim = FusedOptimBuilder().load()
|
||||
|
||||
try:
|
||||
from colossalai._C import cpu_optim
|
||||
except ImportError:
|
||||
from colossalai.kernel.op_builder import CPUAdamBuilder
|
||||
cpu_optim = CPUAdamBuilder().load()
|
||||
|
||||
try:
|
||||
from colossalai._C import multihead_attention
|
||||
except ImportError:
|
||||
from colossalai.kernel.op_builder import MultiHeadAttnBuilder
|
||||
multihead_attention = MultiHeadAttnBuilder().load()
|
||||
|
||||
try:
|
||||
from colossalai._C import scaled_upper_triang_masked_softmax
|
||||
except ImportError:
|
||||
from colossalai.kernel.op_builder import ScaledSoftmaxBuilder
|
||||
scaled_upper_triang_masked_softmax = ScaledSoftmaxBuilder().load()
|
||||
|
||||
try:
|
||||
from colossalai._C import moe
|
||||
except ImportError:
|
||||
from colossalai.kernel.op_builder import MOEBuilder
|
||||
moe = MOEBuilder().load()
|
||||
|
||||
__all__ = [
|
||||
"fused_optim",
|
||||
"cpu_optim",
|
||||
"multihead_attention",
|
||||
"moe",
|
||||
"LayerNorm",
|
||||
"FusedScaleMaskSoftmax",
|
||||
"MultiHeadAttention",
|
||||
"scaled_upper_triang_masked_softmax",
|
||||
]
|
||||
|
@@ -135,7 +135,8 @@ class MultiHeadAttention(nn.Module):
|
||||
# Load cuda modules if needed
|
||||
global colossal_multihead_attention
|
||||
if colossal_multihead_attention is None:
|
||||
from colossalai.kernel import multihead_attention
|
||||
from colossalai.kernel.op_builder import MultiHeadAttnBuilder
|
||||
multihead_attention = MultiHeadAttnBuilder().load()
|
||||
colossal_multihead_attention = multihead_attention
|
||||
|
||||
# create the layer in cuda kernels.
|
||||
|
Reference in New Issue
Block a user