fix bugs in attention.py and request_handler.py

This commit is contained in:
yuehuayingxueluo
2024-01-08 12:35:06 +08:00
committed by FrankLeeeee
parent bfd9b1b494
commit 47e53eaa1c
6 changed files with 208 additions and 60 deletions

View File

@@ -214,9 +214,6 @@ class InferenceEngine:
List[str]: Decoded finished sequences generated by one step.
"""
if self.verbose:
self.logger.info("Running generation step")
output_list = []
batch = self.request_handler.schedule()
@@ -224,6 +221,7 @@ class InferenceEngine:
batch,
self.k_cahce,
self.v_cache,
padding_id=self.tokenizer.pad_token_id,
)
logits = logits[:, -1, :]