diff --git a/langchain/evaluation/__init__.py b/langchain/evaluation/__init__.py index 4bceb9782dd..ae4fdb6e124 100644 --- a/langchain/evaluation/__init__.py +++ b/langchain/evaluation/__init__.py @@ -5,20 +5,21 @@ LangChain primitives such as language models and chains. Some common use cases for evaluation include: -- Grading the accuracy of a response against ground truth answers: QAEvalChain -- Comparing the output of two models: PairwiseStringEvalChain -- Judging the efficacy of an agent's tool usage: TrajectoryEvalChain -- Checking whether an output complies with a set of criteria: CriteriaEvalChain +- Grading the accuracy of a response against ground truth answers: :class:`langchain.evaluation.qa.eval_chain.QAEvalChain` +- Comparing the output of two models: :class:`langchain.evaluation.comparison.eval_chain.PairwiseStringEvalChain` +- Judging the efficacy of an agent's tool usage: :class:`langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain` +- Checking whether an output complies with a set of criteria: :class:`langchain.evaluation.criteria.eval_chain.CriteriaEvalChain` This module also contains low-level APIs for creating custom evaluators for specific evaluation tasks. These include: -- StringEvaluator: Evaluates an output string against a reference and/or input context. -- PairwiseStringEvaluator: Evaluates two strings against each other. + +- :class:`langchain.evaluation.schema.StringEvaluator`: Evaluates an output string against a reference and/or input context. +- :class:`langchain.evaluation.schema.PairwiseStringEvaluator`: Evaluates two strings against each other. For loading evaluators and LangChain's HuggingFace datasets, you can use the -load_evaluators and load_dataset functions, respectively. -""" +:func:`langchain.evaluation.loading.load_evaluators` and :func:`langchain.evaluation.loading.load_datasets` functions, respectively. +""" # noqa: E501 from langchain.evaluation.agents.trajectory_eval_chain import TrajectoryEvalChain from langchain.evaluation.comparison import PairwiseStringEvalChain from langchain.evaluation.criteria.eval_chain import CriteriaEvalChain diff --git a/tests/unit_tests/evaluation/test_loading.py b/tests/unit_tests/evaluation/test_loading.py new file mode 100644 index 00000000000..27c538d8b3b --- /dev/null +++ b/tests/unit_tests/evaluation/test_loading.py @@ -0,0 +1,16 @@ +"""Test the loading function for evalutors.""" + +import pytest + +from langchain.evaluation.loading import EvaluatorType, load_evaluators +from tests.unit_tests.llms.fake_chat_model import FakeChatModel + + +@pytest.mark.parametrize("evaluator_type", EvaluatorType) +def test_load_evaluators(evaluator_type: EvaluatorType) -> None: + """Test loading evaluators.""" + fake_llm = FakeChatModel() + load_evaluators([evaluator_type], llm=fake_llm) + + # Test as string + load_evaluators([evaluator_type.value], llm=fake_llm) # type: ignore