mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-07 20:10:17 +00:00
[Inference] Adapt to Fused rotary (#5348)
* revise rotary embedding * remove useless print * adapt * fix * add * fix * modeling * fix * fix * fix
This commit is contained in:
@@ -75,7 +75,6 @@ def copy_kv_to_blocked_cache(
|
||||
block_size = k_cache.size(-2)
|
||||
|
||||
num_warps = 8 if head_dim > 128 else 4
|
||||
|
||||
grid = (bsz, num_kv_heads)
|
||||
_copy_to_kvcache_seqlen1_kernel[grid](
|
||||
k,
|
||||
|
Reference in New Issue
Block a user