mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-06 11:32:10 +00:00
[kernel] Add KV cache copy kernel during decoding (#5261)
* add kv copy triton kernel during decoding stage * add pytest and fix kernel * fix test utilities * revise kernel config * add benchmark for kvcache copy
This commit is contained in:
@@ -31,7 +31,7 @@ def copy_to_cache(source, cache, lengths, block_tables, type: str = "prefill"):
|
||||
1, 2, 0
|
||||
)
|
||||
elif type == "decoding":
|
||||
assert len(source[0]) == 1, "seq_len should be equal to 1 when decoding."
|
||||
assert source.size(1) == 1, "seq_len should be equal to 1 when decoding."
|
||||
source = source.squeeze(1)
|
||||
slot_idx = (lengths + block_size - 1) % block_size
|
||||
for i in range(bsz):
|
||||
@@ -314,4 +314,4 @@ class PagedAttention:
|
||||
):
|
||||
return self.pad_decoding_forward(
|
||||
q.unsqueeze(1), k.unsqueeze(1), v.unsqueeze(1), k_cache, v_cache, lengths, block_tables
|
||||
)
|
||||
)
|
||||
|
Reference in New Issue
Block a user