mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-06 11:32:10 +00:00
[Inference] Add readme (roadmap) and fulfill request handler (#5147)
* request handler * add readme --------- Co-authored-by: CjhHa1 <cjh18671720497outlook.com>
This commit is contained in:
@@ -1,10 +1,48 @@
|
||||
from typing import List
|
||||
|
||||
|
||||
class RequestHandler:
|
||||
"""
|
||||
RequestHandler is the core for handling existing requests and updating current batch.
|
||||
During generation process, we call schedule function each iteration to update current batch.
|
||||
|
||||
Args:
|
||||
cache_config: Configuration for initialize and manage kv cache.
|
||||
"""
|
||||
|
||||
def __init__(self, cache_config) -> None:
|
||||
self.cache_config = cache_config
|
||||
self._init_cache()
|
||||
self.waiting_list: List["Reqseq"] = []
|
||||
self.running_list: List["Reqseq"] = []
|
||||
|
||||
def _init_cache(self):
|
||||
pass
|
||||
"""
|
||||
Initialize the cache manager with cache config.
|
||||
"""
|
||||
|
||||
def schedule(self, request):
|
||||
pass
|
||||
def schedule(self):
|
||||
"""
|
||||
The main logic of request handler.
|
||||
"""
|
||||
|
||||
def add_sequence(self, reqseq: "Reqseq"):
|
||||
"""
|
||||
Add the request to waiting list.
|
||||
"""
|
||||
self.waiting_list.append(reqseq)
|
||||
|
||||
def abort_sequence(self, seq_id: str):
|
||||
"""
|
||||
Abort the request. #TODO :implement this
|
||||
"""
|
||||
self._find_sequence(seq_id)
|
||||
return
|
||||
|
||||
def _find_sequence(self, seq_id: str) -> "Reqseq":
|
||||
"""
|
||||
Find the request by seq_id.
|
||||
"""
|
||||
|
||||
def check_unfinished_seqs(self) -> bool:
|
||||
return self.waiting_list or self.running_list
|
||||
|
Reference in New Issue
Block a user