feat(rag): Support rag retriever evaluation (#1291)

This commit is contained in:
Fangyin Cheng
2024-03-14 13:06:57 +08:00
committed by GitHub
parent cd2dcc253c
commit adaa68eb00
34 changed files with 1452 additions and 67 deletions

View File

@@ -0,0 +1,61 @@
"""Evaluation operators."""
import asyncio
from typing import Any, List, Optional
from dbgpt.core.awel import JoinOperator
from dbgpt.core.interface.evaluation import EvaluationMetric, EvaluationResult
from dbgpt.core.interface.llm import LLMClient
from ..chunk import Chunk
class RetrieverEvaluatorOperator(JoinOperator[List[EvaluationResult]]):
"""Evaluator for retriever."""
def __init__(
self,
evaluation_metrics: List[EvaluationMetric],
llm_client: Optional[LLMClient] = None,
**kwargs,
):
"""Create a new RetrieverEvaluatorOperator."""
self.llm_client = llm_client
self.evaluation_metrics = evaluation_metrics
super().__init__(combine_function=self._do_evaluation, **kwargs)
async def _do_evaluation(
self,
query: str,
prediction: List[Chunk],
contexts: List[str],
raw_dataset: Any = None,
) -> List[EvaluationResult]:
"""Run evaluation.
Args:
query(str): The query string.
prediction(List[Chunk]): The retrieved chunks from the retriever.
contexts(List[str]): The contexts from dataset.
raw_dataset(Any): The raw data(single row) from dataset.
"""
if isinstance(contexts, str):
contexts = [contexts]
prediction_strs = [chunk.content for chunk in prediction]
tasks = []
for metric in self.evaluation_metrics:
tasks.append(metric.compute(prediction_strs, contexts))
task_results = await asyncio.gather(*tasks)
results = []
for result, metric in zip(task_results, self.evaluation_metrics):
results.append(
EvaluationResult(
query=query,
prediction=prediction,
score=result.score,
contexts=contexts,
passing=result.passing,
raw_dataset=raw_dataset,
metric_name=metric.name,
)
)
return results