mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-18 16:16:33 +00:00
core[patch]: Patch XML vulnerability in XMLOutputParser (CVE-2024-1455) (#19653)
Patch potential XML vulnerability CVE-2024-1455 This patches a potential XML vulnerability in the XMLOutputParser in langchain-core. The vulnerability in some situations could lead to a denial of service attack. At risk are users that: 1) Running older distributions of python that have older version of libexpat 2) Are using XMLOutputParser with an agent 3) Accept inputs from untrusted sources with this agent (e.g., endpoint on the web that allows an untrusted user to interact wiith the parser)
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
"""Test XMLOutputParser"""
|
||||
import importlib
|
||||
from typing import AsyncIterator, Iterable
|
||||
|
||||
import pytest
|
||||
@@ -42,24 +43,12 @@ DEF_RESULT_EXPECTED = {
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"result",
|
||||
[
|
||||
DATA, # has no xml header
|
||||
WITH_XML_HEADER,
|
||||
IN_XML_TAGS_WITH_XML_HEADER,
|
||||
IN_XML_TAGS_WITH_HEADER_AND_TRAILING_JUNK,
|
||||
],
|
||||
)
|
||||
async def test_xml_output_parser(result: str) -> None:
|
||||
"""Test XMLOutputParser."""
|
||||
async def _test_parser(parser: XMLOutputParser, content: str) -> None:
|
||||
"""Test parser."""
|
||||
xml_content = parser.parse(content)
|
||||
assert DEF_RESULT_EXPECTED == xml_content
|
||||
|
||||
xml_parser = XMLOutputParser()
|
||||
|
||||
xml_result = xml_parser.parse(result)
|
||||
assert DEF_RESULT_EXPECTED == xml_result
|
||||
|
||||
assert list(xml_parser.transform(iter(result))) == [
|
||||
assert list(parser.transform(iter(content))) == [
|
||||
{"foo": [{"bar": [{"baz": None}]}]},
|
||||
{"foo": [{"bar": [{"baz": "slim.shady"}]}]},
|
||||
{"foo": [{"baz": "tag"}]},
|
||||
@@ -69,7 +58,7 @@ async def test_xml_output_parser(result: str) -> None:
|
||||
for item in iterable:
|
||||
yield item
|
||||
|
||||
chunks = [chunk async for chunk in xml_parser.atransform(_as_iter(result))]
|
||||
chunks = [chunk async for chunk in parser.atransform(_as_iter(content))]
|
||||
|
||||
assert list(chunks) == [
|
||||
{"foo": [{"bar": [{"baz": None}]}]},
|
||||
@@ -78,12 +67,72 @@ async def test_xml_output_parser(result: str) -> None:
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"content",
|
||||
[
|
||||
DATA, # has no xml header
|
||||
WITH_XML_HEADER,
|
||||
IN_XML_TAGS_WITH_XML_HEADER,
|
||||
IN_XML_TAGS_WITH_HEADER_AND_TRAILING_JUNK,
|
||||
],
|
||||
)
|
||||
async def test_xml_output_parser(content: str) -> None:
|
||||
"""Test XMLOutputParser."""
|
||||
xml_parser = XMLOutputParser(parser="xml")
|
||||
await _test_parser(xml_parser, content)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
importlib.util.find_spec("defusedxml") is None,
|
||||
reason="defusedxml is not installed",
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"content",
|
||||
[
|
||||
DATA, # has no xml header
|
||||
WITH_XML_HEADER,
|
||||
IN_XML_TAGS_WITH_XML_HEADER,
|
||||
IN_XML_TAGS_WITH_HEADER_AND_TRAILING_JUNK,
|
||||
],
|
||||
)
|
||||
async def test_xml_output_parser_defused(content: str) -> None:
|
||||
"""Test XMLOutputParser."""
|
||||
xml_parser = XMLOutputParser(parser="defusedxml")
|
||||
await _test_parser(xml_parser, content)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("result", ["foo></foo>", "<foo></foo", "foo></foo", "foofoo"])
|
||||
def test_xml_output_parser_fail(result: str) -> None:
|
||||
"""Test XMLOutputParser where complete output is not in XML format."""
|
||||
|
||||
xml_parser = XMLOutputParser()
|
||||
xml_parser = XMLOutputParser(parser="xml")
|
||||
|
||||
with pytest.raises(OutputParserException) as e:
|
||||
xml_parser.parse(result)
|
||||
assert "Failed to parse" in str(e)
|
||||
|
||||
|
||||
MALICIOUS_XML = """<?xml version="1.0"?>
|
||||
<!DOCTYPE lolz [<!ENTITY lol "lol"><!ELEMENT lolz (#PCDATA)>
|
||||
<!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
|
||||
<!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">
|
||||
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
|
||||
<!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
|
||||
<!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">
|
||||
<!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">
|
||||
<!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">
|
||||
<!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">
|
||||
<!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">
|
||||
]>
|
||||
<lolz>&lol9;</lolz>"""
|
||||
|
||||
|
||||
async def tests_billion_laughs_attack() -> None:
|
||||
# Testing with standard XML parser since it's safe to use in
|
||||
# newer versions of Python
|
||||
parser = XMLOutputParser(parser="xml")
|
||||
with pytest.raises(OutputParserException):
|
||||
parser.parse(MALICIOUS_XML)
|
||||
|
||||
with pytest.raises(OutputParserException):
|
||||
await parser.aparse(MALICIOUS_XML)
|
||||
|
Reference in New Issue
Block a user