core[patch]: Reverting changes with defusedXML (#19604)

DefusedXML is causing parsing errors on previously functional code with
the 0.7.x versions. These do not seem to support newer version of python
well. 0.8.x has only been released as rc, so we're not going to to use
it in the core package
This commit is contained in:
Eugene Yurtsev
2024-03-26 15:13:09 -04:00
committed by GitHub
parent 9ea2a9b0c1
commit 8bc5cdccee
4 changed files with 12 additions and 71 deletions

View File

@@ -1,6 +1,4 @@
"""Test XMLOutputParser"""
from typing import AsyncIterator
import pytest
from langchain_core.exceptions import OutputParserException
@@ -42,29 +40,19 @@ More random text
""",
],
)
async def test_xml_output_parser(result: str) -> None:
def test_xml_output_parser(result: str) -> None:
"""Test XMLOutputParser."""
xml_parser = XMLOutputParser()
assert DEF_RESULT_EXPECTED == xml_parser.parse(result)
assert DEF_RESULT_EXPECTED == (await xml_parser.aparse(result))
xml_result = xml_parser.parse(result)
assert DEF_RESULT_EXPECTED == xml_result
assert list(xml_parser.transform(iter(result))) == [
{"foo": [{"bar": [{"baz": None}]}]},
{"foo": [{"bar": [{"baz": "slim.shady"}]}]},
{"foo": [{"baz": "tag"}]},
]
async def _as_iter(string: str) -> AsyncIterator[str]:
for c in string:
yield c
chunks = [chunk async for chunk in xml_parser.atransform(_as_iter(result))]
assert chunks == [
{"foo": [{"bar": [{"baz": None}]}]},
{"foo": [{"bar": [{"baz": "slim.shady"}]}]},
{"foo": [{"baz": "tag"}]},
]
@pytest.mark.parametrize("result", ["foo></foo>", "<foo></foo", "foo></foo", "foofoo"])
def test_xml_output_parser_fail(result: str) -> None:
@@ -75,27 +63,3 @@ def test_xml_output_parser_fail(result: str) -> None:
with pytest.raises(OutputParserException) as e:
xml_parser.parse(result)
assert "Failed to parse" in str(e)
MALICIOUS_XML = """<?xml version="1.0"?>
<!DOCTYPE lolz [<!ENTITY lol "lol"><!ELEMENT lolz (#PCDATA)>
<!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
<!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
<!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
<!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">
<!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">
<!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">
<!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">
<!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">
]>
<lolz>&lol9;</lolz>"""
async def tests_billion_laughs_attack() -> None:
parser = XMLOutputParser()
with pytest.raises(OutputParserException):
parser.parse(MALICIOUS_XML)
with pytest.raises(OutputParserException):
await parser.aparse(MALICIOUS_XML)