[Inference] Kernel: no pad rotary embedding (#5252)

* fix bugs * comment * use more accurate atol * fix
2025-09-06 19:40:28 +00:00 · 2024-01-11 16:24:54 +08:00
parent d40eb26029
commit fded91d049
3 changed files with 207 additions and 0 deletions
--- a/colossalai/kernel/triton/init.py
+++ b/colossalai/kernel/triton/init.py
@@ -11,6 +11,7 @@ if HAS_TRITON:
    from .context_attn_unpad import context_attention_unpadded
    from .fused_layernorm import layer_norm
    from .gptq_triton import gptq_fused_linear_triton
+    from .no_pad_rotary_embedding import rotary_embedding
    from .softmax import softmax

    __all__ = [
@@ -18,4 +19,5 @@ if HAS_TRITON:
        "softmax",
        "layer_norm",
        "gptq_fused_linear_triton",
+        "rotary_embedding",
    ]