mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-14 05:31:40 +00:00
Merge remote-tracking branch 'origin/feat/dev-0.6' into feat/dev-0.6
This commit is contained in:
@@ -92,30 +92,34 @@ class EvaluationMetric(ABC, Generic[P, C]):
|
|||||||
self,
|
self,
|
||||||
prediction: P,
|
prediction: P,
|
||||||
contexts: Optional[Sequence[C]] = None,
|
contexts: Optional[Sequence[C]] = None,
|
||||||
|
query: Optional[str] = None,
|
||||||
) -> BaseEvaluationResult:
|
) -> BaseEvaluationResult:
|
||||||
"""Compute the evaluation metric.
|
"""Compute the evaluation metric.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prediction(P): The prediction data.
|
prediction(P): The prediction data.
|
||||||
contexts(Optional[Sequence[C]]): The context data.
|
contexts(Optional[Sequence[C]]): The context data.
|
||||||
|
query:(Optional[str]) The query text.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
BaseEvaluationResult: The evaluation result.
|
BaseEvaluationResult: The evaluation result.
|
||||||
"""
|
"""
|
||||||
return await asyncio.get_running_loop().run_in_executor(
|
return await asyncio.get_running_loop().run_in_executor(
|
||||||
None, self.sync_compute, prediction, contexts
|
None, self.sync_compute, prediction, contexts, query
|
||||||
)
|
)
|
||||||
|
|
||||||
def sync_compute(
|
def sync_compute(
|
||||||
self,
|
self,
|
||||||
prediction: P,
|
prediction: P,
|
||||||
contexts: Optional[Sequence[C]] = None,
|
contexts: Optional[Sequence[C]] = None,
|
||||||
|
query: Optional[str] = None,
|
||||||
) -> BaseEvaluationResult:
|
) -> BaseEvaluationResult:
|
||||||
"""Compute the evaluation metric.
|
"""Compute the evaluation metric.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prediction(P): The prediction data.
|
prediction(P): The prediction data.
|
||||||
contexts(Optional[Sequence[C]]): The factual data.
|
contexts(Optional[Sequence[C]]): The factual data.
|
||||||
|
query:(Optional[str]) The query text.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
BaseEvaluationResult: The evaluation result.
|
BaseEvaluationResult: The evaluation result.
|
||||||
@@ -151,6 +155,7 @@ class FunctionMetric(EvaluationMetric[P, C], Generic[P, C]):
|
|||||||
self,
|
self,
|
||||||
prediction: P,
|
prediction: P,
|
||||||
context: Optional[Sequence[C]] = None,
|
context: Optional[Sequence[C]] = None,
|
||||||
|
query: Optional[str] = None,
|
||||||
) -> BaseEvaluationResult:
|
) -> BaseEvaluationResult:
|
||||||
"""Compute the evaluation metric."""
|
"""Compute the evaluation metric."""
|
||||||
return self.func(prediction, context)
|
return self.func(prediction, context)
|
||||||
@@ -171,6 +176,7 @@ class ExactMatchMetric(EvaluationMetric[str, str]):
|
|||||||
self,
|
self,
|
||||||
prediction: str,
|
prediction: str,
|
||||||
contexts: Optional[Sequence[str]] = None,
|
contexts: Optional[Sequence[str]] = None,
|
||||||
|
query: Optional[str] = None,
|
||||||
) -> BaseEvaluationResult:
|
) -> BaseEvaluationResult:
|
||||||
"""Compute the evaluation metric."""
|
"""Compute the evaluation metric."""
|
||||||
if self._ignore_case:
|
if self._ignore_case:
|
||||||
@@ -208,6 +214,7 @@ class SimilarityMetric(EvaluationMetric[str, str]):
|
|||||||
self,
|
self,
|
||||||
prediction: str,
|
prediction: str,
|
||||||
contexts: Optional[Sequence[str]] = None,
|
contexts: Optional[Sequence[str]] = None,
|
||||||
|
query: Optional[str] = None,
|
||||||
) -> BaseEvaluationResult:
|
) -> BaseEvaluationResult:
|
||||||
"""Compute the evaluation metric."""
|
"""Compute the evaluation metric."""
|
||||||
if not contexts:
|
if not contexts:
|
||||||
|
@@ -17,7 +17,7 @@ from dbgpt.core.interface.llm import LLMClient, ModelRequest
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
ANSWER_RELEVANCY_EVALUATE_PROMPT_TEMPLATE = """
|
ANSWER_RELEVANCY_EVALUATE_PROMPT_TEMPLATE = """
|
||||||
你是一个DBA智能答疑专家, 你的任务是根据用户的问题和已经相关的文档给问答的答案进行严格的打分.
|
你是一个智能答疑专家, 你的任务是根据用户的问题和已经相关的文档给问答的答案进行严格的打分.
|
||||||
|
|
||||||
你将会得到以下输入信息:
|
你将会得到以下输入信息:
|
||||||
- 用户的问题
|
- 用户的问题
|
||||||
@@ -69,17 +69,18 @@ class LLMEvaluationMetric(EvaluationMetric):
|
|||||||
self._model_name = model_name
|
self._model_name = model_name
|
||||||
self._prompt_template = prompt_template
|
self._prompt_template = prompt_template
|
||||||
|
|
||||||
async def compute( # type: ignore # noqa
|
async def compute(
|
||||||
self,
|
self,
|
||||||
query: str,
|
|
||||||
prediction: str,
|
prediction: str,
|
||||||
contexts: Optional[Sequence[str]] = None,
|
contexts: Optional[Sequence[str]] = None,
|
||||||
|
query: Optional[str] = None,
|
||||||
) -> EvaluationResult:
|
) -> EvaluationResult:
|
||||||
"""Compute the evaluation metric.
|
"""Compute the evaluation metric.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prediction(List[str]): The retrieved chunks from the retriever.
|
prediction(List[str]): The retrieved chunks from the retriever.
|
||||||
contexts(Sequence[str]): The contexts from dataset.
|
contexts(Sequence[str]): The contexts from dataset.
|
||||||
|
query:(Optional[str]) The query text.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
BaseEvaluationResult: The evaluation result.
|
BaseEvaluationResult: The evaluation result.
|
||||||
|
@@ -34,12 +34,14 @@ class RetrieverSimilarityMetric(RetrieverEvaluationMetric):
|
|||||||
self,
|
self,
|
||||||
prediction: List[str],
|
prediction: List[str],
|
||||||
contexts: Optional[Sequence[str]] = None,
|
contexts: Optional[Sequence[str]] = None,
|
||||||
|
query: Optional[str] = None,
|
||||||
) -> BaseEvaluationResult:
|
) -> BaseEvaluationResult:
|
||||||
"""Compute the evaluation metric.
|
"""Compute the evaluation metric.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prediction(List[str]): The retrieved chunks from the retriever.
|
prediction(List[str]): The retrieved chunks from the retriever.
|
||||||
contexts(Sequence[str]): The contexts from dataset.
|
contexts(Sequence[str]): The contexts from dataset.
|
||||||
|
query:(Optional[str]) The query text.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
BaseEvaluationResult: The evaluation result.
|
BaseEvaluationResult: The evaluation result.
|
||||||
@@ -81,12 +83,14 @@ class RetrieverMRRMetric(RetrieverEvaluationMetric):
|
|||||||
self,
|
self,
|
||||||
prediction: List[str],
|
prediction: List[str],
|
||||||
contexts: Optional[Sequence[str]] = None,
|
contexts: Optional[Sequence[str]] = None,
|
||||||
|
query: Optional[str] = None,
|
||||||
) -> BaseEvaluationResult:
|
) -> BaseEvaluationResult:
|
||||||
"""Compute MRR metric.
|
"""Compute MRR metric.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prediction(Optional[List[str]]): The retrieved chunks from the retriever.
|
prediction(Optional[List[str]]): The retrieved chunks from the retriever.
|
||||||
contexts(Optional[List[str]]): The contexts from dataset.
|
contexts(Optional[List[str]]): The contexts from dataset.
|
||||||
|
query:(Optional[str]) The query text.
|
||||||
Returns:
|
Returns:
|
||||||
BaseEvaluationResult: The evaluation result.
|
BaseEvaluationResult: The evaluation result.
|
||||||
The score is the reciprocal rank of the first relevant chunk.
|
The score is the reciprocal rank of the first relevant chunk.
|
||||||
@@ -119,12 +123,14 @@ class RetrieverHitRateMetric(RetrieverEvaluationMetric):
|
|||||||
self,
|
self,
|
||||||
prediction: List[str],
|
prediction: List[str],
|
||||||
contexts: Optional[Sequence[str]] = None,
|
contexts: Optional[Sequence[str]] = None,
|
||||||
|
query: Optional[str] = None,
|
||||||
) -> BaseEvaluationResult:
|
) -> BaseEvaluationResult:
|
||||||
"""Compute HitRate metric.
|
"""Compute HitRate metric.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prediction(Optional[List[str]]): The retrieved chunks from the retriever.
|
prediction(Optional[List[str]]): The retrieved chunks from the retriever.
|
||||||
contexts(Optional[List[str]]): The contexts from dataset.
|
contexts(Optional[List[str]]): The contexts from dataset.
|
||||||
|
query:(Optional[str]) The query text.
|
||||||
Returns:
|
Returns:
|
||||||
BaseEvaluationResult: The evaluation result.
|
BaseEvaluationResult: The evaluation result.
|
||||||
"""
|
"""
|
||||||
|
@@ -28,12 +28,14 @@ class AppLinkMetric(EvaluationMetric[str, str], ABC):
|
|||||||
self,
|
self,
|
||||||
prediction: Optional[str] = None,
|
prediction: Optional[str] = None,
|
||||||
contexts: Optional[str] = None,
|
contexts: Optional[str] = None,
|
||||||
|
query: Optional[str] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> BaseEvaluationResult:
|
) -> BaseEvaluationResult:
|
||||||
"""Compute Intent metric.
|
"""Compute Intent metric.
|
||||||
Args:
|
Args:
|
||||||
prediction(Optional[str]): The retrieved chunks from the retriever.
|
prediction(Optional[str]): The retrieved chunks from the retriever.
|
||||||
contexts(Optional[str]): The contexts from dataset.
|
contexts(Optional[str]): The contexts from dataset.
|
||||||
|
query:(Optional[str]) The query text.
|
||||||
Returns:
|
Returns:
|
||||||
BaseEvaluationResult: The evaluation result.
|
BaseEvaluationResult: The evaluation result.
|
||||||
"""
|
"""
|
||||||
@@ -78,12 +80,14 @@ class IntentMetric(EvaluationMetric[str, str], ABC):
|
|||||||
self,
|
self,
|
||||||
prediction: Optional[str] = None,
|
prediction: Optional[str] = None,
|
||||||
contexts: Optional[str] = None,
|
contexts: Optional[str] = None,
|
||||||
|
query: Optional[str] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> BaseEvaluationResult:
|
) -> BaseEvaluationResult:
|
||||||
"""Compute Intent metric.
|
"""Compute Intent metric.
|
||||||
Args:
|
Args:
|
||||||
prediction(Optional[str]): The retrieved chunks from the retriever.
|
prediction(Optional[str]): The retrieved chunks from the retriever.
|
||||||
contexts(Optional[str]): The contexts from dataset.
|
contexts(Optional[str]): The contexts from dataset.
|
||||||
|
query:(Optional[str]) The query text.
|
||||||
Returns:
|
Returns:
|
||||||
BaseEvaluationResult: The evaluation result.
|
BaseEvaluationResult: The evaluation result.
|
||||||
"""
|
"""
|
||||||
|
Reference in New Issue
Block a user