[Inference/SpecDec] Support GLIDE Drafter Model (#5455)

* add glide-llama policy and modeling * update glide modeling, compitable with transformers 4.36.2 * revise glide llama modeling/usage * fix issues of glimpsing large kv * revise the way re-loading params for glide drafter * fix drafter and engine tests * enable convert to glide strict=False * revise glide llama modeling * revise vicuna prompt template * revise drafter and tests * apply usage of glide model in engine
2025-09-06 03:20:52 +00:00 · 2024-04-01 21:54:24 +08:00
parent 912e24b2aa
commit d85d91435a
10 changed files with 722 additions and 82 deletions
--- a/colossalai/inference/spec/struct.py
+++ b/colossalai/inference/spec/struct.py
@@ -27,3 +27,29 @@ class DrafterOutput:
        if self.past_key_values is not None:
            assert isinstance(self.past_key_values, tuple), "Past key values should be a tuple"
            assert all([isinstance(past_key_value, tuple) for past_key_value in self.past_key_values])
+
+
+@dataclass
+class GlideInput:
+    """Dataclass for Glide Models (e.g. `colossalai/inference/modeling/models/glide_llama.py`).
+    Used for pack data that will be used during glimpsing KV Caches of the main model.
+
+    Args:
+        block_tables (torch.Tensor): [num_seqs, max_blocks_per_seq] The block table of KV Caches.
+        large_k_cache (torch.Tensor): [num_blocks, num_kv_heads, block_size, head_size]
+            Blocked key cache of the main model
+        large_v_cache (torch.Tensor): Blocked value cache of the main model. It has the same shape as k cache.
+        sequence_lengths (torch.Tensor): [num_seqs] Sequence lengths of the current batch.
+    """
+
+    block_tables: torch.Tensor = None
+    large_k_cache: torch.Tensor = None
+    large_v_cache: torch.Tensor = None
+    sequence_lengths: torch.Tensor = None
+
+    @property
+    def glimpse_ready(self):
+        return all(
+            attr is not None
+            for attr in [self.block_tables, self.large_k_cache, self.large_v_cache, self.sequence_lengths]
+        )