[Inference/Feat] Add kvcache quantization support for FlashDecoding (#5656)

2025-09-04 18:40:28 +00:00 · 2024-04-26 19:40:37 +08:00
parent 5be590b99e
commit 8ccb6714e7
5 changed files with 482 additions and 174 deletions
--- a/extensions/csrc/funcs/unary_functor.h
+++ b/extensions/csrc/funcs/unary_functor.h
@@ -15,21 +15,6 @@
 namespace colossalAI {
 namespace funcs {

-template <typename T>
-inline __device__ void zero(T& dst) {
-  constexpr int WORDS = sizeof(T) / 4;
-  union {
-    T raw;
-    uint32_t words[WORDS];
-  } tmp;
-
-#pragma unroll
-  for (int ii = 0; ii < WORDS; ii++) {
-    tmp.words[ii] = 0u;
-  }
-  dst = tmp.raw;
-}
-
 // Note(LiuYang): As a retrieved table to check which operation is supported
 // already
 enum class UnaryOpType { kLog2Ceil = 0, kAbs, kSum };