Merge remote-tracking branch 'origin/feat/dev-0.6' into feat/dev-0.6

This commit is contained in:
yhjun1026
2024-08-08 17:33:11 +08:00
4 changed files with 22 additions and 4 deletions

View File

@@ -92,30 +92,34 @@ class EvaluationMetric(ABC, Generic[P, C]):
self, self,
prediction: P, prediction: P,
contexts: Optional[Sequence[C]] = None, contexts: Optional[Sequence[C]] = None,
query: Optional[str] = None,
) -> BaseEvaluationResult: ) -> BaseEvaluationResult:
"""Compute the evaluation metric. """Compute the evaluation metric.
Args: Args:
prediction(P): The prediction data. prediction(P): The prediction data.
contexts(Optional[Sequence[C]]): The context data. contexts(Optional[Sequence[C]]): The context data.
query:(Optional[str]) The query text.
Returns: Returns:
BaseEvaluationResult: The evaluation result. BaseEvaluationResult: The evaluation result.
""" """
return await asyncio.get_running_loop().run_in_executor( return await asyncio.get_running_loop().run_in_executor(
None, self.sync_compute, prediction, contexts None, self.sync_compute, prediction, contexts, query
) )
def sync_compute( def sync_compute(
self, self,
prediction: P, prediction: P,
contexts: Optional[Sequence[C]] = None, contexts: Optional[Sequence[C]] = None,
query: Optional[str] = None,
) -> BaseEvaluationResult: ) -> BaseEvaluationResult:
"""Compute the evaluation metric. """Compute the evaluation metric.
Args: Args:
prediction(P): The prediction data. prediction(P): The prediction data.
contexts(Optional[Sequence[C]]): The factual data. contexts(Optional[Sequence[C]]): The factual data.
query:(Optional[str]) The query text.
Returns: Returns:
BaseEvaluationResult: The evaluation result. BaseEvaluationResult: The evaluation result.
@@ -151,6 +155,7 @@ class FunctionMetric(EvaluationMetric[P, C], Generic[P, C]):
self, self,
prediction: P, prediction: P,
context: Optional[Sequence[C]] = None, context: Optional[Sequence[C]] = None,
query: Optional[str] = None,
) -> BaseEvaluationResult: ) -> BaseEvaluationResult:
"""Compute the evaluation metric.""" """Compute the evaluation metric."""
return self.func(prediction, context) return self.func(prediction, context)
@@ -171,6 +176,7 @@ class ExactMatchMetric(EvaluationMetric[str, str]):
self, self,
prediction: str, prediction: str,
contexts: Optional[Sequence[str]] = None, contexts: Optional[Sequence[str]] = None,
query: Optional[str] = None,
) -> BaseEvaluationResult: ) -> BaseEvaluationResult:
"""Compute the evaluation metric.""" """Compute the evaluation metric."""
if self._ignore_case: if self._ignore_case:
@@ -208,6 +214,7 @@ class SimilarityMetric(EvaluationMetric[str, str]):
self, self,
prediction: str, prediction: str,
contexts: Optional[Sequence[str]] = None, contexts: Optional[Sequence[str]] = None,
query: Optional[str] = None,
) -> BaseEvaluationResult: ) -> BaseEvaluationResult:
"""Compute the evaluation metric.""" """Compute the evaluation metric."""
if not contexts: if not contexts:

View File

@@ -17,7 +17,7 @@ from dbgpt.core.interface.llm import LLMClient, ModelRequest
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
ANSWER_RELEVANCY_EVALUATE_PROMPT_TEMPLATE = """ ANSWER_RELEVANCY_EVALUATE_PROMPT_TEMPLATE = """
你是一个DBA智能答疑专家, 你的任务是根据用户的问题和已经相关的文档给问答的答案进行严格的打分. 你是一个智能答疑专家, 你的任务是根据用户的问题和已经相关的文档给问答的答案进行严格的打分.
你将会得到以下输入信息: 你将会得到以下输入信息:
- 用户的问题 - 用户的问题
@@ -69,17 +69,18 @@ class LLMEvaluationMetric(EvaluationMetric):
self._model_name = model_name self._model_name = model_name
self._prompt_template = prompt_template self._prompt_template = prompt_template
async def compute( # type: ignore # noqa async def compute(
self, self,
query: str,
prediction: str, prediction: str,
contexts: Optional[Sequence[str]] = None, contexts: Optional[Sequence[str]] = None,
query: Optional[str] = None,
) -> EvaluationResult: ) -> EvaluationResult:
"""Compute the evaluation metric. """Compute the evaluation metric.
Args: Args:
prediction(List[str]): The retrieved chunks from the retriever. prediction(List[str]): The retrieved chunks from the retriever.
contexts(Sequence[str]): The contexts from dataset. contexts(Sequence[str]): The contexts from dataset.
query:(Optional[str]) The query text.
Returns: Returns:
BaseEvaluationResult: The evaluation result. BaseEvaluationResult: The evaluation result.

View File

@@ -34,12 +34,14 @@ class RetrieverSimilarityMetric(RetrieverEvaluationMetric):
self, self,
prediction: List[str], prediction: List[str],
contexts: Optional[Sequence[str]] = None, contexts: Optional[Sequence[str]] = None,
query: Optional[str] = None,
) -> BaseEvaluationResult: ) -> BaseEvaluationResult:
"""Compute the evaluation metric. """Compute the evaluation metric.
Args: Args:
prediction(List[str]): The retrieved chunks from the retriever. prediction(List[str]): The retrieved chunks from the retriever.
contexts(Sequence[str]): The contexts from dataset. contexts(Sequence[str]): The contexts from dataset.
query:(Optional[str]) The query text.
Returns: Returns:
BaseEvaluationResult: The evaluation result. BaseEvaluationResult: The evaluation result.
@@ -81,12 +83,14 @@ class RetrieverMRRMetric(RetrieverEvaluationMetric):
self, self,
prediction: List[str], prediction: List[str],
contexts: Optional[Sequence[str]] = None, contexts: Optional[Sequence[str]] = None,
query: Optional[str] = None,
) -> BaseEvaluationResult: ) -> BaseEvaluationResult:
"""Compute MRR metric. """Compute MRR metric.
Args: Args:
prediction(Optional[List[str]]): The retrieved chunks from the retriever. prediction(Optional[List[str]]): The retrieved chunks from the retriever.
contexts(Optional[List[str]]): The contexts from dataset. contexts(Optional[List[str]]): The contexts from dataset.
query:(Optional[str]) The query text.
Returns: Returns:
BaseEvaluationResult: The evaluation result. BaseEvaluationResult: The evaluation result.
The score is the reciprocal rank of the first relevant chunk. The score is the reciprocal rank of the first relevant chunk.
@@ -119,12 +123,14 @@ class RetrieverHitRateMetric(RetrieverEvaluationMetric):
self, self,
prediction: List[str], prediction: List[str],
contexts: Optional[Sequence[str]] = None, contexts: Optional[Sequence[str]] = None,
query: Optional[str] = None,
) -> BaseEvaluationResult: ) -> BaseEvaluationResult:
"""Compute HitRate metric. """Compute HitRate metric.
Args: Args:
prediction(Optional[List[str]]): The retrieved chunks from the retriever. prediction(Optional[List[str]]): The retrieved chunks from the retriever.
contexts(Optional[List[str]]): The contexts from dataset. contexts(Optional[List[str]]): The contexts from dataset.
query:(Optional[str]) The query text.
Returns: Returns:
BaseEvaluationResult: The evaluation result. BaseEvaluationResult: The evaluation result.
""" """

View File

@@ -28,12 +28,14 @@ class AppLinkMetric(EvaluationMetric[str, str], ABC):
self, self,
prediction: Optional[str] = None, prediction: Optional[str] = None,
contexts: Optional[str] = None, contexts: Optional[str] = None,
query: Optional[str] = None,
**kwargs: Any, **kwargs: Any,
) -> BaseEvaluationResult: ) -> BaseEvaluationResult:
"""Compute Intent metric. """Compute Intent metric.
Args: Args:
prediction(Optional[str]): The retrieved chunks from the retriever. prediction(Optional[str]): The retrieved chunks from the retriever.
contexts(Optional[str]): The contexts from dataset. contexts(Optional[str]): The contexts from dataset.
query:(Optional[str]) The query text.
Returns: Returns:
BaseEvaluationResult: The evaluation result. BaseEvaluationResult: The evaluation result.
""" """
@@ -78,12 +80,14 @@ class IntentMetric(EvaluationMetric[str, str], ABC):
self, self,
prediction: Optional[str] = None, prediction: Optional[str] = None,
contexts: Optional[str] = None, contexts: Optional[str] = None,
query: Optional[str] = None,
**kwargs: Any, **kwargs: Any,
) -> BaseEvaluationResult: ) -> BaseEvaluationResult:
"""Compute Intent metric. """Compute Intent metric.
Args: Args:
prediction(Optional[str]): The retrieved chunks from the retriever. prediction(Optional[str]): The retrieved chunks from the retriever.
contexts(Optional[str]): The contexts from dataset. contexts(Optional[str]): The contexts from dataset.
query:(Optional[str]) The query text.
Returns: Returns:
BaseEvaluationResult: The evaluation result. BaseEvaluationResult: The evaluation result.
""" """