Revert "[Inference] Adapt to Fused rotary (#5348)" (#5373)

This reverts commit 9f4ab2eb92.
This commit is contained in:
Frank Lee
2024-02-07 14:27:04 +08:00
committed by GitHub
parent 9f4ab2eb92
commit 8106ede07f
5 changed files with 22 additions and 161 deletions

View File

@@ -75,6 +75,7 @@ def copy_kv_to_blocked_cache(
block_size = k_cache.size(-2)
num_warps = 8 if head_dim > 128 else 4
grid = (bsz, num_kv_heads)
_copy_to_kvcache_seqlen1_kernel[grid](
k,