[npu] use extension for op builder (#5172)

* update extension * update cpu adam * update is * add doc for cpu adam * update kernel * update commit * update flash * update memory efficient * update flash attn * update flash attention loader * update api * fix * update doc * update example time limit * reverse change * fix doc * remove useless kernel * fix * not use warning * update * update
2025-09-06 11:32:10 +00:00 · 2024-01-08 11:39:16 +08:00
parent d6df19bae7
commit dd2c28a323
35 changed files with 1067 additions and 274 deletions
--- a/colossalai/shardformer/layer/utils.py
+++ b/colossalai/shardformer/layer/utils.py
@@ -6,7 +6,8 @@ import torch.distributed as dist
 from torch import nn
 from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
 from torch.distributed import ProcessGroup, get_world_size
-from colossalai.utils.device import get_current_device, get_rng_state, set_rng_state, manual_seed
+
+from colossalai.utils.device import get_current_device, get_rng_state, manual_seed, set_rng_state


 class SeqParallelUtils:
@@ -280,21 +281,3 @@ def create_randomizer_with_offset(
        Randomizer.increment_index()

    return Randomizer(seed=base_seed)
-
-
-def get_attention_kernel():
-    """
-    Get the attention kernel based on the device type.
-    """
-    from colossalai.kernel.cuda_native import AttnMaskType
-
-    if torch.cuda.is_available():
-        from colossalai.kernel.cuda_native import ColoAttention as AttentionKernel
-    else:
-        try:
-            torch.npu.is_available()
-            from colossalai.kernel.npu import NPUColoAttention as AttentionKernel
-        except:
-            raise Exception("No available device for attention kernel!")
-
-    return AttnMaskType, AttentionKernel