mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-12 21:11:43 +00:00
core[patch]: XMLOutputParser fix to handle changes to xml standard library (#19612)
Newest python micro releases broke streaming in the XMLOutputParser. This fixes the parsing code to work with trailing junk after the XML content.
This commit is contained in:
@@ -1,10 +1,12 @@
|
||||
"""Test XMLOutputParser"""
|
||||
from typing import AsyncIterator, Iterable
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_core.exceptions import OutputParserException
|
||||
from langchain_core.output_parsers.xml import XMLOutputParser
|
||||
|
||||
DEF_RESULT_ENCODING = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
DATA = """
|
||||
<foo>
|
||||
<bar>
|
||||
<baz></baz>
|
||||
@@ -13,6 +15,25 @@ DEF_RESULT_ENCODING = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<baz>tag</baz>
|
||||
</foo>"""
|
||||
|
||||
WITH_XML_HEADER = f"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
{DATA}"""
|
||||
|
||||
|
||||
IN_XML_TAGS_WITH_XML_HEADER = f"""
|
||||
```xml
|
||||
{WITH_XML_HEADER}
|
||||
```
|
||||
"""
|
||||
|
||||
IN_XML_TAGS_WITH_HEADER_AND_TRAILING_JUNK = f"""
|
||||
Some random text
|
||||
```xml
|
||||
{WITH_XML_HEADER}
|
||||
```
|
||||
More random text
|
||||
"""
|
||||
|
||||
|
||||
DEF_RESULT_EXPECTED = {
|
||||
"foo": [
|
||||
{"bar": [{"baz": None}, {"baz": "slim.shady"}]},
|
||||
@@ -24,23 +45,13 @@ DEF_RESULT_EXPECTED = {
|
||||
@pytest.mark.parametrize(
|
||||
"result",
|
||||
[
|
||||
DEF_RESULT_ENCODING,
|
||||
DEF_RESULT_ENCODING[DEF_RESULT_ENCODING.find("\n") :],
|
||||
f"""
|
||||
```xml
|
||||
{DEF_RESULT_ENCODING}
|
||||
```
|
||||
""",
|
||||
f"""
|
||||
Some random text
|
||||
```xml
|
||||
{DEF_RESULT_ENCODING}
|
||||
```
|
||||
More random text
|
||||
""",
|
||||
DATA, # has no xml header
|
||||
WITH_XML_HEADER,
|
||||
IN_XML_TAGS_WITH_XML_HEADER,
|
||||
IN_XML_TAGS_WITH_HEADER_AND_TRAILING_JUNK,
|
||||
],
|
||||
)
|
||||
def test_xml_output_parser(result: str) -> None:
|
||||
async def test_xml_output_parser(result: str) -> None:
|
||||
"""Test XMLOutputParser."""
|
||||
|
||||
xml_parser = XMLOutputParser()
|
||||
@@ -48,12 +59,23 @@ def test_xml_output_parser(result: str) -> None:
|
||||
xml_result = xml_parser.parse(result)
|
||||
assert DEF_RESULT_EXPECTED == xml_result
|
||||
|
||||
# TODO(Eugene): Fix this test for newer python version
|
||||
# assert list(xml_parser.transform(iter(result))) == [
|
||||
# {"foo": [{"bar": [{"baz": None}]}]},
|
||||
# {"foo": [{"bar": [{"baz": "slim.shady"}]}]},
|
||||
# {"foo": [{"baz": "tag"}]},
|
||||
# ]
|
||||
assert list(xml_parser.transform(iter(result))) == [
|
||||
{"foo": [{"bar": [{"baz": None}]}]},
|
||||
{"foo": [{"bar": [{"baz": "slim.shady"}]}]},
|
||||
{"foo": [{"baz": "tag"}]},
|
||||
]
|
||||
|
||||
async def _as_iter(iterable: Iterable[str]) -> AsyncIterator[str]:
|
||||
for item in iterable:
|
||||
yield item
|
||||
|
||||
chunks = [chunk async for chunk in xml_parser.atransform(_as_iter(result))]
|
||||
|
||||
assert list(chunks) == [
|
||||
{"foo": [{"bar": [{"baz": None}]}]},
|
||||
{"foo": [{"bar": [{"baz": "slim.shady"}]}]},
|
||||
{"foo": [{"baz": "tag"}]},
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("result", ["foo></foo>", "<foo></foo", "foo></foo", "foofoo"])
|
||||
|
Reference in New Issue
Block a user