mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-11 22:10:37 +00:00
[Inference/Feat] Add kvcache quantization support for FlashDecoding (#5656)
This commit is contained in:
@@ -174,13 +174,13 @@ void context_kv_cache_memcpy(
|
||||
key.scalar_type(),
|
||||
"context_kv_cache_memcpy",
|
||||
apply_context_kv_cache_memcpy<scalar_t>(
|
||||
key,
|
||||
value,
|
||||
key_cache,
|
||||
value_cache,
|
||||
sequence_lengths,
|
||||
cu_seqlens,
|
||||
block_tables,
|
||||
max_seq_len_in_batch
|
||||
);)
|
||||
key,
|
||||
value,
|
||||
key_cache,
|
||||
value_cache,
|
||||
sequence_lengths,
|
||||
cu_seqlens,
|
||||
block_tables,
|
||||
max_seq_len_in_batch
|
||||
);)
|
||||
}
|
||||
|
Reference in New Issue
Block a user