From 5734f1c11610dcf07203bd16e65a7de9f369dd87 Mon Sep 17 00:00:00 2001 From: aries_ckt <916701291@qq.com> Date: Thu, 8 Aug 2024 14:45:37 +0800 Subject: [PATCH] fix: evaluation compute() add query parameter --- dbgpt/rag/evaluation/answer.py | 7 ++++--- dbgpt/rag/evaluation/retriever.py | 6 ++++++ dbgpt/serve/agent/evaluation/evaluation_metric.py | 4 ++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/dbgpt/rag/evaluation/answer.py b/dbgpt/rag/evaluation/answer.py index 5934872f2..119437064 100644 --- a/dbgpt/rag/evaluation/answer.py +++ b/dbgpt/rag/evaluation/answer.py @@ -17,7 +17,7 @@ from dbgpt.core.interface.llm import LLMClient, ModelRequest logger = logging.getLogger(__name__) ANSWER_RELEVANCY_EVALUATE_PROMPT_TEMPLATE = """ -你是一个DBA智能答疑专家, 你的任务是根据用户的问题和已经相关的文档给问答的答案进行严格的打分. +你是一个智能答疑专家, 你的任务是根据用户的问题和已经相关的文档给问答的答案进行严格的打分. 你将会得到以下输入信息: - 用户的问题 @@ -69,17 +69,18 @@ class LLMEvaluationMetric(EvaluationMetric): self._model_name = model_name self._prompt_template = prompt_template - async def compute( # type: ignore # noqa + async def compute( self, - query: str, prediction: str, contexts: Optional[Sequence[str]] = None, + query: Optional[str] = None, ) -> EvaluationResult: """Compute the evaluation metric. Args: prediction(List[str]): The retrieved chunks from the retriever. contexts(Sequence[str]): The contexts from dataset. + query:(Optional[str]) The query text. Returns: BaseEvaluationResult: The evaluation result. diff --git a/dbgpt/rag/evaluation/retriever.py b/dbgpt/rag/evaluation/retriever.py index c1d26fc41..c0e75082c 100644 --- a/dbgpt/rag/evaluation/retriever.py +++ b/dbgpt/rag/evaluation/retriever.py @@ -34,12 +34,14 @@ class RetrieverSimilarityMetric(RetrieverEvaluationMetric): self, prediction: List[str], contexts: Optional[Sequence[str]] = None, + query: Optional[str] = None, ) -> BaseEvaluationResult: """Compute the evaluation metric. Args: prediction(List[str]): The retrieved chunks from the retriever. contexts(Sequence[str]): The contexts from dataset. + query:(Optional[str]) The query text. Returns: BaseEvaluationResult: The evaluation result. @@ -81,12 +83,14 @@ class RetrieverMRRMetric(RetrieverEvaluationMetric): self, prediction: List[str], contexts: Optional[Sequence[str]] = None, + query: Optional[str] = None, ) -> BaseEvaluationResult: """Compute MRR metric. Args: prediction(Optional[List[str]]): The retrieved chunks from the retriever. contexts(Optional[List[str]]): The contexts from dataset. + query:(Optional[str]) The query text. Returns: BaseEvaluationResult: The evaluation result. The score is the reciprocal rank of the first relevant chunk. @@ -119,12 +123,14 @@ class RetrieverHitRateMetric(RetrieverEvaluationMetric): self, prediction: List[str], contexts: Optional[Sequence[str]] = None, + query: Optional[str] = None, ) -> BaseEvaluationResult: """Compute HitRate metric. Args: prediction(Optional[List[str]]): The retrieved chunks from the retriever. contexts(Optional[List[str]]): The contexts from dataset. + query:(Optional[str]) The query text. Returns: BaseEvaluationResult: The evaluation result. """ diff --git a/dbgpt/serve/agent/evaluation/evaluation_metric.py b/dbgpt/serve/agent/evaluation/evaluation_metric.py index f1c9f3013..c85d2dc85 100644 --- a/dbgpt/serve/agent/evaluation/evaluation_metric.py +++ b/dbgpt/serve/agent/evaluation/evaluation_metric.py @@ -28,12 +28,14 @@ class AppLinkMetric(EvaluationMetric[str, str], ABC): self, prediction: Optional[str] = None, contexts: Optional[str] = None, + query: Optional[str] = None, **kwargs: Any, ) -> BaseEvaluationResult: """Compute Intent metric. Args: prediction(Optional[str]): The retrieved chunks from the retriever. contexts(Optional[str]): The contexts from dataset. + query:(Optional[str]) The query text. Returns: BaseEvaluationResult: The evaluation result. """ @@ -78,12 +80,14 @@ class IntentMetric(EvaluationMetric[str, str], ABC): self, prediction: Optional[str] = None, contexts: Optional[str] = None, + query: Optional[str] = None, **kwargs: Any, ) -> BaseEvaluationResult: """Compute Intent metric. Args: prediction(Optional[str]): The retrieved chunks from the retriever. contexts(Optional[str]): The contexts from dataset. + query:(Optional[str]) The query text. Returns: BaseEvaluationResult: The evaluation result. """