[Inference/Feat] Add kvcache quantization support for FlashDecoding (#5656)

This commit is contained in:
傅剑寒
2024-04-26 19:40:37 +08:00
committed by GitHub
parent 5be590b99e
commit 8ccb6714e7
5 changed files with 482 additions and 174 deletions

View File

@@ -174,13 +174,13 @@ void context_kv_cache_memcpy(
key.scalar_type(),
"context_kv_cache_memcpy",
apply_context_kv_cache_memcpy<scalar_t>(
key,
value,
key_cache,
value_cache,
sequence_lengths,
cu_seqlens,
block_tables,
max_seq_len_in_batch
);)
key,
value,
key_cache,
value_cache,
sequence_lengths,
cu_seqlens,
block_tables,
max_seq_len_in_batch
);)
}