[Inference] Add readme (roadmap) and fulfill request handler (#5147)

* request handler * add readme --------- Co-authored-by: CjhHa1 <cjh18671720497outlook.com>
2025-09-06 11:32:10 +00:00 · 2023-12-01 17:31:31 +08:00
parent 4cf4682e70
commit 56e75eeb06
3 changed files with 67 additions and 3 deletions
--- a/colossalai/inference/core/request_handler.py
+++ b/colossalai/inference/core/request_handler.py
@@ -1,10 +1,48 @@
+from typing import List
+
+
 class RequestHandler:
+    """
+    RequestHandler is the core for handling existing requests and updating current batch.
+    During generation process, we call schedule function each iteration to update current batch.
+
+    Args:
+        cache_config: Configuration for initialize and manage kv cache.
+    """
+
    def __init__(self, cache_config) -> None:
        self.cache_config = cache_config
        self._init_cache()
+        self.waiting_list: List["Reqseq"] = []
+        self.running_list: List["Reqseq"] = []

    def _init_cache(self):
-        pass
+        """
+        Initialize the cache manager with cache config.
+        """

-    def schedule(self, request):
-        pass
+    def schedule(self):
+        """
+        The main logic of request handler.
+        """
+
+    def add_sequence(self, reqseq: "Reqseq"):
+        """
+        Add the request to waiting list.
+        """
+        self.waiting_list.append(reqseq)
+
+    def abort_sequence(self, seq_id: str):
+        """
+        Abort the request. #TODO :implement this
+        """
+        self._find_sequence(seq_id)
+        return
+
+    def _find_sequence(self, seq_id: str) -> "Reqseq":
+        """
+        Find the request by seq_id.
+        """
+
+    def check_unfinished_seqs(self) -> bool:
+        return self.waiting_list or self.running_list