docs

2026-07-15 15:40:34 +00:00 · 2023-06-30 12:34:42 -07:00
parent 8b385861a2
commit 97841f4cfd
2 changed files with 25 additions and 8 deletions
--- a/langchain/evaluation/init.py
+++ b/langchain/evaluation/init.py
@@ -5,20 +5,21 @@ LangChain primitives such as language models and chains.

 Some common use cases for evaluation include:

- Grading the accuracy of a response against ground truth answers: QAEvalChain
- Comparing the output of two models: PairwiseStringEvalChain
- Judging the efficacy of an agent's tool usage: TrajectoryEvalChain
- Checking whether an output complies with a set of criteria: CriteriaEvalChain
+- Grading the accuracy of a response against ground truth answers: :class:`langchain.evaluation.qa.eval_chain.QAEvalChain`
+- Comparing the output of two models: :class:`langchain.evaluation.comparison.eval_chain.PairwiseStringEvalChain`
+- Judging the efficacy of an agent's tool usage: :class:`langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain`
+- Checking whether an output complies with a set of criteria: :class:`langchain.evaluation.criteria.eval_chain.CriteriaEvalChain`

 This module also contains low-level APIs for creating custom evaluators for
 specific evaluation tasks. These include:
- StringEvaluator: Evaluates an output string against a reference and/or input context.
- PairwiseStringEvaluator: Evaluates two strings against each other.
+
+- :class:`langchain.evaluation.schema.StringEvaluator`: Evaluates an output string against a reference and/or input context.
+- :class:`langchain.evaluation.schema.PairwiseStringEvaluator`: Evaluates two strings against each other.


 For loading evaluators and LangChain's HuggingFace datasets, you can use the
-load_evaluators and load_dataset functions, respectively.
-"""
+:func:`langchain.evaluation.loading.load_evaluators` and :func:`langchain.evaluation.loading.load_datasets` functions, respectively.
+"""  # noqa: E501
 from langchain.evaluation.agents.trajectory_eval_chain import TrajectoryEvalChain
 from langchain.evaluation.comparison import PairwiseStringEvalChain
 from langchain.evaluation.criteria.eval_chain import CriteriaEvalChain
--- a/tests/unit_tests/evaluation/test_loading.py
+++ b/tests/unit_tests/evaluation/test_loading.py
@@ -0,0 +1,16 @@
+"""Test the loading function for evalutors."""
+
+import pytest
+
+from langchain.evaluation.loading import EvaluatorType, load_evaluators
+from tests.unit_tests.llms.fake_chat_model import FakeChatModel
+
+
+@pytest.mark.parametrize("evaluator_type", EvaluatorType)
+def test_load_evaluators(evaluator_type: EvaluatorType) -> None:
+    """Test loading evaluators."""
+    fake_llm = FakeChatModel()
+    load_evaluators([evaluator_type], llm=fake_llm)
+
+    # Test as string
+    load_evaluators([evaluator_type.value], llm=fake_llm)  # type: ignore