[Fix] Fix spec-dec Glide LlamaModel for compatibility with transformers (#5837)

* fix glide llama model

* revise
This commit is contained in:
Yuanheng Zhao
2024-06-19 15:37:53 +08:00
committed by GitHub
parent fd1dc417d8
commit 7b249c76e5
4 changed files with 7 additions and 1 deletions

View File

@@ -466,6 +466,7 @@ class InferenceEngine:
self.k_cache[-1], # use kv cahces of the last layer
self.v_cache[-1],
batch.get_sequence_lengths(),
n_spec_tokens=self.n_spec_tokens,
)
drafter_out = self.drafter.speculate(