mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-23 20:23:59 +00:00
docs
This commit is contained in:
@@ -5,20 +5,21 @@ LangChain primitives such as language models and chains.
|
||||
|
||||
Some common use cases for evaluation include:
|
||||
|
||||
- Grading the accuracy of a response against ground truth answers: QAEvalChain
|
||||
- Comparing the output of two models: PairwiseStringEvalChain
|
||||
- Judging the efficacy of an agent's tool usage: TrajectoryEvalChain
|
||||
- Checking whether an output complies with a set of criteria: CriteriaEvalChain
|
||||
- Grading the accuracy of a response against ground truth answers: :class:`langchain.evaluation.qa.eval_chain.QAEvalChain`
|
||||
- Comparing the output of two models: :class:`langchain.evaluation.comparison.eval_chain.PairwiseStringEvalChain`
|
||||
- Judging the efficacy of an agent's tool usage: :class:`langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain`
|
||||
- Checking whether an output complies with a set of criteria: :class:`langchain.evaluation.criteria.eval_chain.CriteriaEvalChain`
|
||||
|
||||
This module also contains low-level APIs for creating custom evaluators for
|
||||
specific evaluation tasks. These include:
|
||||
- StringEvaluator: Evaluates an output string against a reference and/or input context.
|
||||
- PairwiseStringEvaluator: Evaluates two strings against each other.
|
||||
|
||||
- :class:`langchain.evaluation.schema.StringEvaluator`: Evaluates an output string against a reference and/or input context.
|
||||
- :class:`langchain.evaluation.schema.PairwiseStringEvaluator`: Evaluates two strings against each other.
|
||||
|
||||
|
||||
For loading evaluators and LangChain's HuggingFace datasets, you can use the
|
||||
load_evaluators and load_dataset functions, respectively.
|
||||
"""
|
||||
:func:`langchain.evaluation.loading.load_evaluators` and :func:`langchain.evaluation.loading.load_datasets` functions, respectively.
|
||||
""" # noqa: E501
|
||||
from langchain.evaluation.agents.trajectory_eval_chain import TrajectoryEvalChain
|
||||
from langchain.evaluation.comparison import PairwiseStringEvalChain
|
||||
from langchain.evaluation.criteria.eval_chain import CriteriaEvalChain
|
||||
|
||||
16
tests/unit_tests/evaluation/test_loading.py
Normal file
16
tests/unit_tests/evaluation/test_loading.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""Test the loading function for evalutors."""
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.evaluation.loading import EvaluatorType, load_evaluators
|
||||
from tests.unit_tests.llms.fake_chat_model import FakeChatModel
|
||||
|
||||
|
||||
@pytest.mark.parametrize("evaluator_type", EvaluatorType)
|
||||
def test_load_evaluators(evaluator_type: EvaluatorType) -> None:
|
||||
"""Test loading evaluators."""
|
||||
fake_llm = FakeChatModel()
|
||||
load_evaluators([evaluator_type], llm=fake_llm)
|
||||
|
||||
# Test as string
|
||||
load_evaluators([evaluator_type.value], llm=fake_llm) # type: ignore
|
||||
Reference in New Issue
Block a user