[Inference/Feat] Add kvcache quantization support for FlashDecoding (#5656)

This commit is contained in:
傅剑寒
2024-04-26 19:40:37 +08:00
committed by GitHub
parent 5be590b99e
commit 8ccb6714e7
5 changed files with 482 additions and 174 deletions

View File

@@ -15,21 +15,6 @@
namespace colossalAI {
namespace funcs {
template <typename T>
inline __device__ void zero(T& dst) {
constexpr int WORDS = sizeof(T) / 4;
union {
T raw;
uint32_t words[WORDS];
} tmp;
#pragma unroll
for (int ii = 0; ii < WORDS; ii++) {
tmp.words[ii] = 0u;
}
dst = tmp.raw;
}
// Note(LiuYang): As a retrieved table to check which operation is supported
// already
enum class UnaryOpType { kLog2Ceil = 0, kAbs, kSum };