[npu] use extension for op builder (#5172)

* update extension * update cpu adam * update is * add doc for cpu adam * update kernel * update commit * update flash * update memory efficient * update flash attn * update flash attention loader * update api * fix * update doc * update example time limit * reverse change * fix doc * remove useless kernel * fix * not use warning * update * update
2025-09-08 12:30:42 +00:00 · 2024-01-08 11:39:16 +08:00
parent d6df19bae7
commit dd2c28a323
35 changed files with 1067 additions and 274 deletions
--- a/colossalai/shardformer/modeling/chatglm2.py
+++ b/colossalai/shardformer/modeling/chatglm2.py
@@ -14,7 +14,7 @@ from colossalai.shardformer.modeling.chatglm2_6b.modeling_chatglm import ChatGLM


 def get_flash_core_attention_forward():
-    from colossalai.kernel.cuda_native import AttnMaskType, ColoAttention
+    from colossalai.kernel import AttnMaskType, ColoAttention

    from .chatglm2_6b.modeling_chatglm import CoreAttention