[Inference/Feat] Add kvcache quant support for fused_rotary_embedding_cache_copy (#5680)

This commit is contained in:
傅剑寒
2024-04-30 18:33:53 +08:00
committed by GitHub
parent 5cd75ce4c7
commit ef8e4ffe31
7 changed files with 226 additions and 125 deletions

View File

@@ -94,6 +94,10 @@ COLOSSAL_CAST_FUNCTOR_SPECIALIZATION(float, __nv_bfloat16, DEVICE,
STMTS_WRAPPER({
return __float2bfloat16_rn(val);
}))
COLOSSAL_CAST_FUNCTOR_SPECIALIZATION(__nv_bfloat16, float, DEVICE,
STMTS_WRAPPER({
return __bfloat162float(val);
}))
COLOSSAL_CAST_FUNCTOR_SPECIALIZATION(float4, dtype::bfloat164, DEVICE,
STMTS_WRAPPER({
dtype::bfloat164 dst;