mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-08 20:40:34 +00:00
[Inference] Optimize and Refactor Inference Batching/Scheduling (#5367)
* add kvcache manager funcs for batching * add batch bucket for batching * revise RunningList struct in handler * add kvcache/batch funcs for compatibility * use new batching methods * fix indexing bugs * revise abort logic * use cpu seq lengths/block tables * rm unused attr in Sequence * fix type conversion/default arg * add and revise pytests * revise pytests, rm unused tests * rm unused statements * fix pop finished indexing issue * fix: use index in batch when retrieving inputs/update seqs * use dict instead of odict in batch struct * arg type hinting * fix make compress * refine comments * fix: pop_n_seqs to pop the first n seqs * add check in request handler * remove redundant conversion * fix test for request handler * fix pop method in batch bucket * fix prefill adding
This commit is contained in:
@@ -148,6 +148,20 @@ def check_cache_manager(test_config):
|
||||
cache_manager.clear_all()
|
||||
assert cache_manager.num_available_blocks == num_blocks
|
||||
|
||||
for cache_block in cache_manager._cache_blocks:
|
||||
assert cache_block.available_space == block_size
|
||||
|
||||
# Mock batch operations (Prefill/Decoding updates)
|
||||
context_lengths = torch.tensor([max_input_length, max_input_length - 1])
|
||||
block_tables = torch.tensor(
|
||||
[[-1 for _ in range(cache_manager.max_blocks_per_sequence)] for _ in range(2)], dtype=torch.int32
|
||||
)
|
||||
cache_manager.allocate_context_from_block_tables(block_tables, context_lengths)
|
||||
cache_manager.allocate_tokens_from_block_tables(block_tables, context_lengths)
|
||||
cache_manager.free_block_tables(block_tables)
|
||||
for cache_block in cache_manager._cache_blocks:
|
||||
assert cache_block.available_space == block_size
|
||||
|
||||
|
||||
def run_dist(rank, world_size, port):
|
||||
colossalai.launch(config={}, rank=rank, world_size=world_size, port=port, host="localhost")
|
||||
|
Reference in New Issue
Block a user