mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-04 10:34:41 +00:00
fix bugs in attention.py and request_handler.py
This commit is contained in:
committed by
FrankLeeeee
parent
bfd9b1b494
commit
47e53eaa1c
@@ -214,9 +214,6 @@ class InferenceEngine:
|
||||
List[str]: Decoded finished sequences generated by one step.
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
self.logger.info("Running generation step")
|
||||
|
||||
output_list = []
|
||||
batch = self.request_handler.schedule()
|
||||
|
||||
@@ -224,6 +221,7 @@ class InferenceEngine:
|
||||
batch,
|
||||
self.k_cahce,
|
||||
self.v_cache,
|
||||
padding_id=self.tokenizer.pad_token_id,
|
||||
)
|
||||
|
||||
logits = logits[:, -1, :]
|
||||
|
@@ -110,6 +110,10 @@ class RequestHandler:
|
||||
self.prefill_batch.init_batch(self.running_list.prefill)
|
||||
return self.prefill_batch
|
||||
|
||||
if not self.running_batch.is_empty:
|
||||
for seq in self.running_batch.sequences_set:
|
||||
self.cache_manager.allocate_token_from_block_table(seq.block_table, seq.sentence_len)
|
||||
|
||||
return self.running_batch
|
||||
|
||||
def add_sequence(self, req: Sequence):
|
||||
|
Reference in New Issue
Block a user