This commit is contained in:
William Fu-Hinthorn
2023-06-30 12:34:42 -07:00
parent 8b385861a2
commit 97841f4cfd
2 changed files with 25 additions and 8 deletions

View File

@@ -5,20 +5,21 @@ LangChain primitives such as language models and chains.
Some common use cases for evaluation include:
- Grading the accuracy of a response against ground truth answers: QAEvalChain
- Comparing the output of two models: PairwiseStringEvalChain
- Judging the efficacy of an agent's tool usage: TrajectoryEvalChain
- Checking whether an output complies with a set of criteria: CriteriaEvalChain
- Grading the accuracy of a response against ground truth answers: :class:`langchain.evaluation.qa.eval_chain.QAEvalChain`
- Comparing the output of two models: :class:`langchain.evaluation.comparison.eval_chain.PairwiseStringEvalChain`
- Judging the efficacy of an agent's tool usage: :class:`langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain`
- Checking whether an output complies with a set of criteria: :class:`langchain.evaluation.criteria.eval_chain.CriteriaEvalChain`
This module also contains low-level APIs for creating custom evaluators for
specific evaluation tasks. These include:
- StringEvaluator: Evaluates an output string against a reference and/or input context.
- PairwiseStringEvaluator: Evaluates two strings against each other.
- :class:`langchain.evaluation.schema.StringEvaluator`: Evaluates an output string against a reference and/or input context.
- :class:`langchain.evaluation.schema.PairwiseStringEvaluator`: Evaluates two strings against each other.
For loading evaluators and LangChain's HuggingFace datasets, you can use the
load_evaluators and load_dataset functions, respectively.
"""
:func:`langchain.evaluation.loading.load_evaluators` and :func:`langchain.evaluation.loading.load_datasets` functions, respectively.
""" # noqa: E501
from langchain.evaluation.agents.trajectory_eval_chain import TrajectoryEvalChain
from langchain.evaluation.comparison import PairwiseStringEvalChain
from langchain.evaluation.criteria.eval_chain import CriteriaEvalChain

View File

@@ -0,0 +1,16 @@
"""Test the loading function for evalutors."""
import pytest
from langchain.evaluation.loading import EvaluatorType, load_evaluators
from tests.unit_tests.llms.fake_chat_model import FakeChatModel
@pytest.mark.parametrize("evaluator_type", EvaluatorType)
def test_load_evaluators(evaluator_type: EvaluatorType) -> None:
"""Test loading evaluators."""
fake_llm = FakeChatModel()
load_evaluators([evaluator_type], llm=fake_llm)
# Test as string
load_evaluators([evaluator_type.value], llm=fake_llm) # type: ignore