mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-07 03:52:01 +00:00
[kernel] Add triton kernel for context attention (FAv2) without padding (#5192)
* add context attn unpadded triton kernel * test compatibility * kv cache copy (testing) * fix k/v cache copy * fix kv cache copy and test * fix boundary of block ptrs * add support for GQA/MQA and testing * fix import statement --------- Co-authored-by: Round Heng <yuanhengzhao@Rounds-MacBook-Pro.local>
This commit is contained in:
committed by
FrankLeeeee
parent
4df8876fca
commit
07b5283b6a
@@ -8,11 +8,13 @@ except ImportError:
|
||||
|
||||
# There may exist import error even if we have triton installed.
|
||||
if HAS_TRITON:
|
||||
from .context_attn_unpad import context_attention_unpadded
|
||||
from .fused_layernorm import layer_norm
|
||||
from .gptq_triton import gptq_fused_linear_triton
|
||||
from .softmax import softmax
|
||||
|
||||
__all__ = [
|
||||
"context_attention_unpadded",
|
||||
"softmax",
|
||||
"layer_norm",
|
||||
"gptq_fused_linear_triton",
|
||||
|
Reference in New Issue
Block a user