[Inference] Kernel: no pad rotary embedding (#5252)

* fix bugs

* comment

* use more accurate atol

* fix
This commit is contained in:
Jianghai
2024-01-11 16:24:54 +08:00
committed by FrankLeeeee
parent d40eb26029
commit fded91d049
3 changed files with 207 additions and 0 deletions

View File

@@ -11,6 +11,7 @@ if HAS_TRITON:
from .context_attn_unpad import context_attention_unpadded
from .fused_layernorm import layer_norm
from .gptq_triton import gptq_fused_linear_triton
from .no_pad_rotary_embedding import rotary_embedding
from .softmax import softmax
__all__ = [
@@ -18,4 +19,5 @@ if HAS_TRITON:
"softmax",
"layer_norm",
"gptq_fused_linear_triton",
"rotary_embedding",
]