diff --git a/pilot/model/base.py b/pilot/model/base.py index 48480b94b..841046bd0 100644 --- a/pilot/model/base.py +++ b/pilot/model/base.py @@ -54,6 +54,8 @@ class ModelOutput: model_context: Dict = None finish_reason: str = None usage: Dict[str, Any] = None + metrics: Dict[str, Any] = None + """Some metrics for model inference""" def to_dict(self) -> Dict: return asdict(self) diff --git a/pilot/model/cluster/base.py b/pilot/model/cluster/base.py index 36c4779b8..45e46ab3e 100644 --- a/pilot/model/cluster/base.py +++ b/pilot/model/cluster/base.py @@ -21,6 +21,8 @@ class PromptRequest(BaseModel): context_len: int = None echo: bool = True span_id: str = None + metrics: bool = False + """Whether to return metrics of inference""" class EmbeddingsRequest(BaseModel):