mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 07:35:18 +00:00
fix evaluation parse test (#8859)
# What - fix evaluation parse test <!-- Thank you for contributing to LangChain! Replace this comment with: - Description: Fix evaluation parse test - Issue: None - Dependencies: None - Tag maintainer: @baskaryan - Twitter handle: @MLOpsJ Please make sure you're PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. Maintainer responsibilities: - General / Misc / if you don't know who to tag: @baskaryan - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev - Models / Prompts: @hwchase17, @baskaryan - Memory: @hwchase17 - Agents / Tools / Toolkits: @hinthornw - Tracing / Callbacks: @agola11 - Async: @agola11 If no one reviews your PR within a few days, feel free to @-mention the same people again. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md -->
This commit is contained in:
parent
40096c73cd
commit
ab47557db3
@ -100,14 +100,14 @@ class PairwiseStringResultOutputParser(BaseOutputParser[dict]):
|
||||
"""
|
||||
return "pairwise_string_result"
|
||||
|
||||
def parse(self, text: str) -> Any:
|
||||
def parse(self, text: str) -> Dict[str, Any]:
|
||||
"""Parse the output text.
|
||||
|
||||
Args:
|
||||
text (str): The output text to parse.
|
||||
|
||||
Returns:
|
||||
Any: The parsed output.
|
||||
Dict: The parsed output.
|
||||
|
||||
Raises:
|
||||
ValueError: If the verdict is invalid.
|
||||
|
@ -65,14 +65,14 @@ class CriteriaResultOutputParser(BaseOutputParser[dict]):
|
||||
def _type(self) -> str:
|
||||
return "criteria_result"
|
||||
|
||||
def parse(self, text: str) -> Any:
|
||||
def parse(self, text: str) -> Dict[str, Any]:
|
||||
"""Parse the output text.
|
||||
|
||||
Args:
|
||||
text (str): The output text to parse.
|
||||
|
||||
Returns:
|
||||
Any: The parsed output.
|
||||
Dict: The parsed output.
|
||||
"""
|
||||
parsed = text.strip().rsplit("\n", maxsplit=1)
|
||||
if len(parsed) == 1:
|
||||
|
@ -8,6 +8,7 @@ import pytest
|
||||
from langchain.evaluation.comparison.eval_chain import (
|
||||
LabeledPairwiseStringEvalChain,
|
||||
PairwiseStringEvalChain,
|
||||
PairwiseStringResultOutputParser,
|
||||
resolve_pairwise_criteria,
|
||||
)
|
||||
from langchain.evaluation.criteria.eval_chain import Criteria
|
||||
@ -27,6 +28,45 @@ def test_resolve_criteria_list_enum() -> None:
|
||||
assert set(val.keys()) == set(c.value for c in list(Criteria))
|
||||
|
||||
|
||||
def test_PairwiseStringResultOutputParser_parse() -> None:
|
||||
output_parser = PairwiseStringResultOutputParser()
|
||||
text = """I like pie better than cake.
|
||||
[[A]]"""
|
||||
got = output_parser.parse(text)
|
||||
want = {
|
||||
"reasoning": "I like pie better than cake.",
|
||||
"value": "A",
|
||||
"score": 1,
|
||||
}
|
||||
assert got.get("reasoning") == want["reasoning"]
|
||||
assert got.get("value") == want["value"]
|
||||
assert got.get("score") == want["score"]
|
||||
|
||||
text = """I like cake better than pie.
|
||||
[[B]]"""
|
||||
got = output_parser.parse(text)
|
||||
want = {
|
||||
"reasoning": "I like cake better than pie.",
|
||||
"value": "B",
|
||||
"score": 0,
|
||||
}
|
||||
assert got.get("reasoning") == want["reasoning"]
|
||||
assert got.get("value") == want["value"]
|
||||
assert got.get("score") == want["score"]
|
||||
|
||||
text = """I like cake and pie.
|
||||
[[C]]"""
|
||||
got = output_parser.parse(text)
|
||||
want = {
|
||||
"reasoning": "I like cake and pie.",
|
||||
"value": None,
|
||||
"score": 0.5,
|
||||
}
|
||||
assert got.get("reasoning") == want["reasoning"]
|
||||
assert got.get("value") == want["value"]
|
||||
assert got.get("score") == want["score"]
|
||||
|
||||
|
||||
def test_pairwise_string_comparison_chain() -> None:
|
||||
llm = FakeLLM(
|
||||
queries={
|
||||
|
@ -7,6 +7,7 @@ from langchain.evaluation.criteria.eval_chain import (
|
||||
_SUPPORTED_CRITERIA,
|
||||
Criteria,
|
||||
CriteriaEvalChain,
|
||||
CriteriaResultOutputParser,
|
||||
LabeledCriteriaEvalChain,
|
||||
)
|
||||
from langchain.evaluation.schema import StringEvaluator
|
||||
@ -23,6 +24,34 @@ def test_resolve_criteria_str() -> None:
|
||||
}
|
||||
|
||||
|
||||
def test_CriteriaResultOutputParser_parse() -> None:
|
||||
output_parser = CriteriaResultOutputParser()
|
||||
text = """Here is my step-by-step reasoning for the given criteria:
|
||||
The criterion is: "Do you like cake?" I like cake.
|
||||
Y"""
|
||||
got = output_parser.parse(text)
|
||||
want = {
|
||||
"reasoning": """Here is my step-by-step reasoning for the given criteria:
|
||||
The criterion is: "Do you like cake?" I like cake.""",
|
||||
"value": "Y",
|
||||
"score": 1,
|
||||
}
|
||||
assert got.get("reasoning") == want["reasoning"]
|
||||
assert got.get("value") == want["value"]
|
||||
assert got.get("score") == want["score"]
|
||||
|
||||
text = "Y"
|
||||
got = output_parser.parse(text)
|
||||
want = {
|
||||
"reasoning": "",
|
||||
"value": "Y",
|
||||
"score": 1,
|
||||
}
|
||||
assert got.get("reasoning") == want["reasoning"]
|
||||
assert got.get("value") == want["value"]
|
||||
assert got.get("score") == want["score"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("criterion", list(Criteria))
|
||||
def test_resolve_criteria_enum(criterion: Criteria) -> None:
|
||||
assert CriteriaEvalChain.resolve_criteria(criterion) == {
|
||||
|
Loading…
Reference in New Issue
Block a user