mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-11 22:10:37 +00:00
[Inference/Feat] Feat quant kvcache step2 (#5674)
This commit is contained in:
@@ -372,7 +372,7 @@ void flash_decoding_attention(
|
||||
|
||||
TORCH_CHECK(query.scalar_type() == at::ScalarType::Float || query.scalar_type() == at::ScalarType::Half || query.scalar_type() == at::ScalarType::BFloat16,
|
||||
"Dtype of query should be float, half or bfloat16!");
|
||||
TORCH_CHECK(key_cache.scalar_type() == at::ScalarType::Byte || key_cache.scalar_type() == key_cache.scalar_type(),
|
||||
TORCH_CHECK(key_cache.scalar_type() == at::ScalarType::Byte || key_cache.scalar_type() == query.scalar_type(),
|
||||
"Dtype of query and kvcache should be the same unless dtype of kvcache is fp8!");
|
||||
|
||||
if(key_cache.scalar_type() == at::ScalarType::Byte)
|
||||
|
Reference in New Issue
Block a user