[Inference] ADD async and sync Api server using FastAPI (#5396)

* add api server

* fix

* add

* add completion service and fix bug

* add generation config

* revise shardformer

* fix bugs

* add docstrings and fix some bugs

* fix bugs and add choices for prompt template
This commit is contained in:
Jianghai
2024-03-01 14:47:36 +08:00
committed by CjhHa1
parent d482922035
commit 69cd7e069d
13 changed files with 789 additions and 25 deletions

View File

@@ -1,10 +1,10 @@
"""
Our config contains various options for inference optimization, it is a unified API that wraps all the configurations for inference.
"""
import dataclasses
import logging
from dataclasses import dataclass
from typing import Optional, Union
from typing import Any, Dict, Optional, Union
import torch
import torch.distributed as dist
@@ -214,3 +214,18 @@ class InferenceConfig:
meta_config[type] = getattr(model_config, type)
return GenerationConfig.from_dict(meta_config)
@classmethod
def from_dict(cls, config_dict: Dict[str, Any]) -> "InferenceConfig":
# Get the list of attributes of this dataclass.
attrs = [attr.name for attr in dataclasses.fields(cls)]
inference_config_args = {}
for attr in attrs:
if attr in config_dict:
inference_config_args[attr] = config_dict[attr]
else:
inference_config_args[attr] = getattr(cls, attr)
# Set the attributes from the parsed arguments.
inference_config = cls(**inference_config_args)
return inference_config