mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 23:54:14 +00:00
fix evaluation parse test (#8859)
# What - fix evaluation parse test <!-- Thank you for contributing to LangChain! Replace this comment with: - Description: Fix evaluation parse test - Issue: None - Dependencies: None - Tag maintainer: @baskaryan - Twitter handle: @MLOpsJ Please make sure you're PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. Maintainer responsibilities: - General / Misc / if you don't know who to tag: @baskaryan - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev - Models / Prompts: @hwchase17, @baskaryan - Memory: @hwchase17 - Agents / Tools / Toolkits: @hinthornw - Tracing / Callbacks: @agola11 - Async: @agola11 If no one reviews your PR within a few days, feel free to @-mention the same people again. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md -->
This commit is contained in:
parent
40096c73cd
commit
ab47557db3
@ -100,14 +100,14 @@ class PairwiseStringResultOutputParser(BaseOutputParser[dict]):
|
|||||||
"""
|
"""
|
||||||
return "pairwise_string_result"
|
return "pairwise_string_result"
|
||||||
|
|
||||||
def parse(self, text: str) -> Any:
|
def parse(self, text: str) -> Dict[str, Any]:
|
||||||
"""Parse the output text.
|
"""Parse the output text.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text (str): The output text to parse.
|
text (str): The output text to parse.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Any: The parsed output.
|
Dict: The parsed output.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If the verdict is invalid.
|
ValueError: If the verdict is invalid.
|
||||||
|
@ -65,14 +65,14 @@ class CriteriaResultOutputParser(BaseOutputParser[dict]):
|
|||||||
def _type(self) -> str:
|
def _type(self) -> str:
|
||||||
return "criteria_result"
|
return "criteria_result"
|
||||||
|
|
||||||
def parse(self, text: str) -> Any:
|
def parse(self, text: str) -> Dict[str, Any]:
|
||||||
"""Parse the output text.
|
"""Parse the output text.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text (str): The output text to parse.
|
text (str): The output text to parse.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Any: The parsed output.
|
Dict: The parsed output.
|
||||||
"""
|
"""
|
||||||
parsed = text.strip().rsplit("\n", maxsplit=1)
|
parsed = text.strip().rsplit("\n", maxsplit=1)
|
||||||
if len(parsed) == 1:
|
if len(parsed) == 1:
|
||||||
|
@ -8,6 +8,7 @@ import pytest
|
|||||||
from langchain.evaluation.comparison.eval_chain import (
|
from langchain.evaluation.comparison.eval_chain import (
|
||||||
LabeledPairwiseStringEvalChain,
|
LabeledPairwiseStringEvalChain,
|
||||||
PairwiseStringEvalChain,
|
PairwiseStringEvalChain,
|
||||||
|
PairwiseStringResultOutputParser,
|
||||||
resolve_pairwise_criteria,
|
resolve_pairwise_criteria,
|
||||||
)
|
)
|
||||||
from langchain.evaluation.criteria.eval_chain import Criteria
|
from langchain.evaluation.criteria.eval_chain import Criteria
|
||||||
@ -27,6 +28,45 @@ def test_resolve_criteria_list_enum() -> None:
|
|||||||
assert set(val.keys()) == set(c.value for c in list(Criteria))
|
assert set(val.keys()) == set(c.value for c in list(Criteria))
|
||||||
|
|
||||||
|
|
||||||
|
def test_PairwiseStringResultOutputParser_parse() -> None:
|
||||||
|
output_parser = PairwiseStringResultOutputParser()
|
||||||
|
text = """I like pie better than cake.
|
||||||
|
[[A]]"""
|
||||||
|
got = output_parser.parse(text)
|
||||||
|
want = {
|
||||||
|
"reasoning": "I like pie better than cake.",
|
||||||
|
"value": "A",
|
||||||
|
"score": 1,
|
||||||
|
}
|
||||||
|
assert got.get("reasoning") == want["reasoning"]
|
||||||
|
assert got.get("value") == want["value"]
|
||||||
|
assert got.get("score") == want["score"]
|
||||||
|
|
||||||
|
text = """I like cake better than pie.
|
||||||
|
[[B]]"""
|
||||||
|
got = output_parser.parse(text)
|
||||||
|
want = {
|
||||||
|
"reasoning": "I like cake better than pie.",
|
||||||
|
"value": "B",
|
||||||
|
"score": 0,
|
||||||
|
}
|
||||||
|
assert got.get("reasoning") == want["reasoning"]
|
||||||
|
assert got.get("value") == want["value"]
|
||||||
|
assert got.get("score") == want["score"]
|
||||||
|
|
||||||
|
text = """I like cake and pie.
|
||||||
|
[[C]]"""
|
||||||
|
got = output_parser.parse(text)
|
||||||
|
want = {
|
||||||
|
"reasoning": "I like cake and pie.",
|
||||||
|
"value": None,
|
||||||
|
"score": 0.5,
|
||||||
|
}
|
||||||
|
assert got.get("reasoning") == want["reasoning"]
|
||||||
|
assert got.get("value") == want["value"]
|
||||||
|
assert got.get("score") == want["score"]
|
||||||
|
|
||||||
|
|
||||||
def test_pairwise_string_comparison_chain() -> None:
|
def test_pairwise_string_comparison_chain() -> None:
|
||||||
llm = FakeLLM(
|
llm = FakeLLM(
|
||||||
queries={
|
queries={
|
||||||
|
@ -7,6 +7,7 @@ from langchain.evaluation.criteria.eval_chain import (
|
|||||||
_SUPPORTED_CRITERIA,
|
_SUPPORTED_CRITERIA,
|
||||||
Criteria,
|
Criteria,
|
||||||
CriteriaEvalChain,
|
CriteriaEvalChain,
|
||||||
|
CriteriaResultOutputParser,
|
||||||
LabeledCriteriaEvalChain,
|
LabeledCriteriaEvalChain,
|
||||||
)
|
)
|
||||||
from langchain.evaluation.schema import StringEvaluator
|
from langchain.evaluation.schema import StringEvaluator
|
||||||
@ -23,6 +24,34 @@ def test_resolve_criteria_str() -> None:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_CriteriaResultOutputParser_parse() -> None:
|
||||||
|
output_parser = CriteriaResultOutputParser()
|
||||||
|
text = """Here is my step-by-step reasoning for the given criteria:
|
||||||
|
The criterion is: "Do you like cake?" I like cake.
|
||||||
|
Y"""
|
||||||
|
got = output_parser.parse(text)
|
||||||
|
want = {
|
||||||
|
"reasoning": """Here is my step-by-step reasoning for the given criteria:
|
||||||
|
The criterion is: "Do you like cake?" I like cake.""",
|
||||||
|
"value": "Y",
|
||||||
|
"score": 1,
|
||||||
|
}
|
||||||
|
assert got.get("reasoning") == want["reasoning"]
|
||||||
|
assert got.get("value") == want["value"]
|
||||||
|
assert got.get("score") == want["score"]
|
||||||
|
|
||||||
|
text = "Y"
|
||||||
|
got = output_parser.parse(text)
|
||||||
|
want = {
|
||||||
|
"reasoning": "",
|
||||||
|
"value": "Y",
|
||||||
|
"score": 1,
|
||||||
|
}
|
||||||
|
assert got.get("reasoning") == want["reasoning"]
|
||||||
|
assert got.get("value") == want["value"]
|
||||||
|
assert got.get("score") == want["score"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("criterion", list(Criteria))
|
@pytest.mark.parametrize("criterion", list(Criteria))
|
||||||
def test_resolve_criteria_enum(criterion: Criteria) -> None:
|
def test_resolve_criteria_enum(criterion: Criteria) -> None:
|
||||||
assert CriteriaEvalChain.resolve_criteria(criterion) == {
|
assert CriteriaEvalChain.resolve_criteria(criterion) == {
|
||||||
|
Loading…
Reference in New Issue
Block a user