[Inference] ADD async and sync Api server using FastAPI (#5396)

* add api server * fix * add * add completion service and fix bug * add generation config * revise shardformer * fix bugs * add docstrings and fix some bugs * fix bugs and add choices for prompt template
2025-09-11 05:49:55 +00:00 · 2024-03-01 14:47:36 +08:00
parent d482922035
commit 69cd7e069d
13 changed files with 789 additions and 25 deletions
--- a/colossalai/inference/config.py
+++ b/colossalai/inference/config.py
@@ -1,10 +1,10 @@
 """
 Our config contains various options for inference optimization, it is a unified API that wraps all the configurations for inference.
 """
-
+import dataclasses
 import logging
 from dataclasses import dataclass
-from typing import Optional, Union
+from typing import Any, Dict, Optional, Union

 import torch
 import torch.distributed as dist
@@ -214,3 +214,18 @@ class InferenceConfig:
                meta_config[type] = getattr(model_config, type)

        return GenerationConfig.from_dict(meta_config)
+
+    @classmethod
+    def from_dict(cls, config_dict: Dict[str, Any]) -> "InferenceConfig":
+        # Get the list of attributes of this dataclass.
+        attrs = [attr.name for attr in dataclasses.fields(cls)]
+        inference_config_args = {}
+        for attr in attrs:
+            if attr in config_dict:
+                inference_config_args[attr] = config_dict[attr]
+            else:
+                inference_config_args[attr] = getattr(cls, attr)
+
+        # Set the attributes from the parsed arguments.
+        inference_config = cls(**inference_config_args)
+        return inference_config