mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-15 22:44:36 +00:00
Move json and xml parsers to core (#15026)
<!-- Thank you for contributing to LangChain! Please title your PR "<package>: <description>", where <package> is whichever of langchain, community, core, experimental, etc. is being modified. Replace this entire comment with: - **Description:** a description of the change, - **Issue:** the issue # it fixes if applicable, - **Dependencies:** any dependencies required for this change, - **Twitter handle:** we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` from the root of the package you've modified to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. -->
This commit is contained in:
@@ -11,6 +11,8 @@ EXPECTED_ALL = [
|
||||
"StrOutputParser",
|
||||
"BaseTransformOutputParser",
|
||||
"BaseCumulativeTransformOutputParser",
|
||||
"SimpleJsonOutputParser",
|
||||
"XMLOutputParser",
|
||||
]
|
||||
|
||||
|
||||
|
488
libs/core/tests/unit_tests/output_parsers/test_json.py
Normal file
488
libs/core/tests/unit_tests/output_parsers/test_json.py
Normal file
@@ -0,0 +1,488 @@
|
||||
import json
|
||||
from typing import Any, AsyncIterator, Iterator, Tuple
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_core.output_parsers.json import (
|
||||
SimpleJsonOutputParser,
|
||||
parse_json_markdown,
|
||||
parse_partial_json,
|
||||
)
|
||||
|
||||
GOOD_JSON = """```json
|
||||
{
|
||||
"foo": "bar"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_NEW_LINES = """
|
||||
|
||||
```json
|
||||
{
|
||||
"foo": "bar"
|
||||
}
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
JSON_WITH_NEW_LINES_INSIDE = """```json
|
||||
{
|
||||
|
||||
"foo": "bar"
|
||||
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_NEW_LINES_EVERYWHERE = """
|
||||
|
||||
```json
|
||||
|
||||
{
|
||||
|
||||
"foo": "bar"
|
||||
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
TICKS_WITH_NEW_LINES_EVERYWHERE = """
|
||||
|
||||
```
|
||||
|
||||
{
|
||||
|
||||
"foo": "bar"
|
||||
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
JSON_WITH_MARKDOWN_CODE_BLOCK = """```json
|
||||
{
|
||||
"foo": "```bar```"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "```bar\n<div id="1" class=\"value\">\n\ttext\n</div>```"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "{"foo": "bar", "bar": "foo"}"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "{\"foo\": \"bar\", \"bar\": \"foo\"}"
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_PYTHON_DICT = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": {"foo": "bar", "bar": "foo"}
|
||||
}
|
||||
```"""
|
||||
|
||||
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON = """```json
|
||||
{
|
||||
"action": "Final Answer",
|
||||
"action_input": "{\\"foo\\": \\"bar\\", \\"bar\\": \\"foo\\"}"
|
||||
}
|
||||
```"""
|
||||
|
||||
NO_TICKS = """{
|
||||
"foo": "bar"
|
||||
}"""
|
||||
|
||||
NO_TICKS_WHITE_SPACE = """
|
||||
{
|
||||
"foo": "bar"
|
||||
}
|
||||
"""
|
||||
|
||||
TEXT_BEFORE = """Thought: I need to use the search tool
|
||||
|
||||
Action:
|
||||
```
|
||||
{
|
||||
"foo": "bar"
|
||||
}
|
||||
```"""
|
||||
|
||||
TEXT_AFTER = """```
|
||||
{
|
||||
"foo": "bar"
|
||||
}
|
||||
```
|
||||
This should do the trick"""
|
||||
|
||||
TEXT_BEFORE_AND_AFTER = """Action: Testing
|
||||
|
||||
```
|
||||
{
|
||||
"foo": "bar"
|
||||
}
|
||||
```
|
||||
This should do the trick"""
|
||||
|
||||
TEST_CASES = [
|
||||
GOOD_JSON,
|
||||
JSON_WITH_NEW_LINES,
|
||||
JSON_WITH_NEW_LINES_INSIDE,
|
||||
JSON_WITH_NEW_LINES_EVERYWHERE,
|
||||
TICKS_WITH_NEW_LINES_EVERYWHERE,
|
||||
NO_TICKS,
|
||||
NO_TICKS_WHITE_SPACE,
|
||||
TEXT_BEFORE,
|
||||
TEXT_AFTER,
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("json_string", TEST_CASES)
|
||||
def test_parse_json(json_string: str) -> None:
|
||||
parsed = parse_json_markdown(json_string)
|
||||
assert parsed == {"foo": "bar"}
|
||||
|
||||
|
||||
def test_parse_json_with_code_blocks() -> None:
|
||||
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK)
|
||||
assert parsed == {"foo": "```bar```"}
|
||||
|
||||
parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES)
|
||||
|
||||
assert parsed == {
|
||||
"action": "Final Answer",
|
||||
"action_input": '```bar\n<div id="1" class="value">\n\ttext\n</div>```',
|
||||
}
|
||||
|
||||
|
||||
TEST_CASES_ESCAPED_QUOTES = [
|
||||
JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON,
|
||||
JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON,
|
||||
JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON,
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("json_string", TEST_CASES_ESCAPED_QUOTES)
|
||||
def test_parse_nested_json_with_escaped_quotes(json_string: str) -> None:
|
||||
parsed = parse_json_markdown(json_string)
|
||||
assert parsed == {
|
||||
"action": "Final Answer",
|
||||
"action_input": '{"foo": "bar", "bar": "foo"}',
|
||||
}
|
||||
|
||||
|
||||
def test_parse_json_with_python_dict() -> None:
|
||||
parsed = parse_json_markdown(JSON_WITH_PYTHON_DICT)
|
||||
assert parsed == {
|
||||
"action": "Final Answer",
|
||||
"action_input": {"foo": "bar", "bar": "foo"},
|
||||
}
|
||||
|
||||
|
||||
TEST_CASES_PARTIAL = [
|
||||
('{"foo": "bar", "bar": "foo"}', '{"foo": "bar", "bar": "foo"}'),
|
||||
('{"foo": "bar", "bar": "foo', '{"foo": "bar", "bar": "foo"}'),
|
||||
('{"foo": "bar", "bar": "foo}', '{"foo": "bar", "bar": "foo}"}'),
|
||||
('{"foo": "bar", "bar": "foo[', '{"foo": "bar", "bar": "foo["}'),
|
||||
('{"foo": "bar", "bar": "foo\\"', '{"foo": "bar", "bar": "foo\\""}'),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("json_strings", TEST_CASES_PARTIAL)
|
||||
def test_parse_partial_json(json_strings: Tuple[str, str]) -> None:
|
||||
case, expected = json_strings
|
||||
parsed = parse_partial_json(case)
|
||||
assert parsed == json.loads(expected)
|
||||
|
||||
|
||||
STREAMED_TOKENS = """
|
||||
{
|
||||
|
||||
"
|
||||
setup
|
||||
":
|
||||
"
|
||||
Why
|
||||
did
|
||||
the
|
||||
bears
|
||||
start
|
||||
a
|
||||
band
|
||||
called
|
||||
Bears
|
||||
Bears
|
||||
Bears
|
||||
?
|
||||
"
|
||||
,
|
||||
"
|
||||
punchline
|
||||
":
|
||||
"
|
||||
Because
|
||||
they
|
||||
wanted
|
||||
to
|
||||
play
|
||||
bear
|
||||
-y
|
||||
good
|
||||
music
|
||||
!
|
||||
"
|
||||
,
|
||||
"
|
||||
audience
|
||||
":
|
||||
[
|
||||
"
|
||||
Haha
|
||||
"
|
||||
,
|
||||
"
|
||||
So
|
||||
funny
|
||||
"
|
||||
]
|
||||
|
||||
}
|
||||
""".splitlines()
|
||||
|
||||
EXPECTED_STREAMED_JSON = [
|
||||
{},
|
||||
{"setup": ""},
|
||||
{"setup": "Why"},
|
||||
{"setup": "Why did"},
|
||||
{"setup": "Why did the"},
|
||||
{"setup": "Why did the bears"},
|
||||
{"setup": "Why did the bears start"},
|
||||
{"setup": "Why did the bears start a"},
|
||||
{"setup": "Why did the bears start a band"},
|
||||
{"setup": "Why did the bears start a band called"},
|
||||
{"setup": "Why did the bears start a band called Bears"},
|
||||
{"setup": "Why did the bears start a band called Bears Bears"},
|
||||
{"setup": "Why did the bears start a band called Bears Bears Bears"},
|
||||
{"setup": "Why did the bears start a band called Bears Bears Bears ?"},
|
||||
{
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"punchline": "",
|
||||
},
|
||||
{
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"punchline": "Because",
|
||||
},
|
||||
{
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"punchline": "Because they",
|
||||
},
|
||||
{
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"punchline": "Because they wanted",
|
||||
},
|
||||
{
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"punchline": "Because they wanted to",
|
||||
},
|
||||
{
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"punchline": "Because they wanted to play",
|
||||
},
|
||||
{
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"punchline": "Because they wanted to play bear",
|
||||
},
|
||||
{
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"punchline": "Because they wanted to play bear -y",
|
||||
},
|
||||
{
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"punchline": "Because they wanted to play bear -y good",
|
||||
},
|
||||
{
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"punchline": "Because they wanted to play bear -y good music",
|
||||
},
|
||||
{
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"punchline": "Because they wanted to play bear -y good music !",
|
||||
},
|
||||
{
|
||||
"punchline": "Because they wanted to play bear -y good music !",
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"audience": [],
|
||||
},
|
||||
{
|
||||
"punchline": "Because they wanted to play bear -y good music !",
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"audience": [""],
|
||||
},
|
||||
{
|
||||
"punchline": "Because they wanted to play bear -y good music !",
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"audience": ["Haha"],
|
||||
},
|
||||
{
|
||||
"punchline": "Because they wanted to play bear -y good music !",
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"audience": ["Haha", ""],
|
||||
},
|
||||
{
|
||||
"punchline": "Because they wanted to play bear -y good music !",
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"audience": ["Haha", "So"],
|
||||
},
|
||||
{
|
||||
"punchline": "Because they wanted to play bear -y good music !",
|
||||
"setup": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
"audience": ["Haha", "So funny"],
|
||||
},
|
||||
]
|
||||
|
||||
EXPECTED_STREAMED_JSON_DIFF = [
|
||||
[{"op": "replace", "path": "", "value": {}}],
|
||||
[{"op": "add", "path": "/setup", "value": ""}],
|
||||
[{"op": "replace", "path": "/setup", "value": "Why"}],
|
||||
[{"op": "replace", "path": "/setup", "value": "Why did"}],
|
||||
[{"op": "replace", "path": "/setup", "value": "Why did the"}],
|
||||
[{"op": "replace", "path": "/setup", "value": "Why did the bears"}],
|
||||
[{"op": "replace", "path": "/setup", "value": "Why did the bears start"}],
|
||||
[{"op": "replace", "path": "/setup", "value": "Why did the bears start a"}],
|
||||
[{"op": "replace", "path": "/setup", "value": "Why did the bears start a band"}],
|
||||
[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/setup",
|
||||
"value": "Why did the bears start a band called",
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/setup",
|
||||
"value": "Why did the bears start a band called Bears",
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/setup",
|
||||
"value": "Why did the bears start a band called Bears Bears",
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/setup",
|
||||
"value": "Why did the bears start a band called Bears Bears Bears",
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/setup",
|
||||
"value": "Why did the bears start a band called Bears Bears Bears ?",
|
||||
}
|
||||
],
|
||||
[{"op": "add", "path": "/punchline", "value": ""}],
|
||||
[{"op": "replace", "path": "/punchline", "value": "Because"}],
|
||||
[{"op": "replace", "path": "/punchline", "value": "Because they"}],
|
||||
[{"op": "replace", "path": "/punchline", "value": "Because they wanted"}],
|
||||
[{"op": "replace", "path": "/punchline", "value": "Because they wanted to"}],
|
||||
[{"op": "replace", "path": "/punchline", "value": "Because they wanted to play"}],
|
||||
[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/punchline",
|
||||
"value": "Because they wanted to play bear",
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/punchline",
|
||||
"value": "Because they wanted to play bear -y",
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/punchline",
|
||||
"value": "Because they wanted to play bear -y good",
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/punchline",
|
||||
"value": "Because they wanted to play bear -y good music",
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"op": "replace",
|
||||
"path": "/punchline",
|
||||
"value": "Because they wanted to play bear -y good music !",
|
||||
}
|
||||
],
|
||||
[{"op": "add", "path": "/audience", "value": []}],
|
||||
[{"op": "add", "path": "/audience/0", "value": ""}],
|
||||
[{"op": "replace", "path": "/audience/0", "value": "Haha"}],
|
||||
[{"op": "add", "path": "/audience/1", "value": ""}],
|
||||
[{"op": "replace", "path": "/audience/1", "value": "So"}],
|
||||
[{"op": "replace", "path": "/audience/1", "value": "So funny"}],
|
||||
]
|
||||
|
||||
|
||||
def test_partial_text_json_output_parser() -> None:
|
||||
def input_iter(_: Any) -> Iterator[str]:
|
||||
for token in STREAMED_TOKENS:
|
||||
yield token
|
||||
|
||||
chain = input_iter | SimpleJsonOutputParser()
|
||||
|
||||
assert list(chain.stream(None)) == EXPECTED_STREAMED_JSON
|
||||
|
||||
|
||||
def test_partial_text_json_output_parser_diff() -> None:
|
||||
def input_iter(_: Any) -> Iterator[str]:
|
||||
for token in STREAMED_TOKENS:
|
||||
yield token
|
||||
|
||||
chain = input_iter | SimpleJsonOutputParser(diff=True)
|
||||
|
||||
assert list(chain.stream(None)) == EXPECTED_STREAMED_JSON_DIFF
|
||||
|
||||
|
||||
async def test_partial_text_json_output_parser_async() -> None:
|
||||
async def input_iter(_: Any) -> AsyncIterator[str]:
|
||||
for token in STREAMED_TOKENS:
|
||||
yield token
|
||||
|
||||
chain = input_iter | SimpleJsonOutputParser()
|
||||
|
||||
assert [p async for p in chain.astream(None)] == EXPECTED_STREAMED_JSON
|
||||
|
||||
|
||||
async def test_partial_text_json_output_parser_diff_async() -> None:
|
||||
async def input_iter(_: Any) -> AsyncIterator[str]:
|
||||
for token in STREAMED_TOKENS:
|
||||
yield token
|
||||
|
||||
chain = input_iter | SimpleJsonOutputParser(diff=True)
|
||||
|
||||
assert [p async for p in chain.astream(None)] == EXPECTED_STREAMED_JSON_DIFF
|
49
libs/core/tests/unit_tests/output_parsers/test_xml_parser.py
Normal file
49
libs/core/tests/unit_tests/output_parsers/test_xml_parser.py
Normal file
@@ -0,0 +1,49 @@
|
||||
"""Test XMLOutputParser"""
|
||||
import pytest
|
||||
|
||||
from langchain_core.output_parsers.xml import XMLOutputParser
|
||||
|
||||
DEF_RESULT_ENCODING = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<foo>
|
||||
<bar>
|
||||
<baz></baz>
|
||||
<baz>slim.shady</baz>
|
||||
</bar>
|
||||
<baz>tag</baz>
|
||||
</foo>"""
|
||||
|
||||
DEF_RESULT_EXPECTED = {
|
||||
"foo": [
|
||||
{"bar": [{"baz": None}, {"baz": "slim.shady"}]},
|
||||
{"baz": "tag"},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"result",
|
||||
[DEF_RESULT_ENCODING, DEF_RESULT_ENCODING[DEF_RESULT_ENCODING.find("\n") :]],
|
||||
)
|
||||
def test_xml_output_parser(result: str) -> None:
|
||||
"""Test XMLOutputParser."""
|
||||
|
||||
xml_parser = XMLOutputParser()
|
||||
|
||||
xml_result = xml_parser.parse(result)
|
||||
assert DEF_RESULT_EXPECTED == xml_result
|
||||
assert list(xml_parser.transform(iter(result))) == [
|
||||
{"foo": [{"bar": [{"baz": None}]}]},
|
||||
{"foo": [{"bar": [{"baz": "slim.shady"}]}]},
|
||||
{"foo": [{"baz": "tag"}]},
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("result", ["foo></foo>", "<foo></foo", "foo></foo", "foofoo"])
|
||||
def test_xml_output_parser_fail(result: str) -> None:
|
||||
"""Test XMLOutputParser where complete output is not in XML format."""
|
||||
|
||||
xml_parser = XMLOutputParser()
|
||||
|
||||
with pytest.raises(ValueError) as e:
|
||||
xml_parser.parse(result)
|
||||
assert "Could not parse output" in str(e)
|
Reference in New Issue
Block a user