core[patch]: Patch XML vulnerability in XMLOutputParser (CVE-2024-1455) (#19653)

Patch potential XML vulnerability CVE-2024-1455

This patches a potential XML vulnerability in the XMLOutputParser in
langchain-core. The vulnerability in some situations could lead to a
denial of service attack.

At risk are users that:

1) Running older distributions of python that have older version of
libexpat
2) Are using XMLOutputParser with an agent
3) Accept inputs from untrusted sources with this agent (e.g., endpoint
on the web that allows an untrusted user to interact wiith the parser)
This commit is contained in:
Eugene Yurtsev
2024-03-27 12:41:52 -04:00
committed by GitHub
parent 7042934b5f
commit e8339b1d83
2 changed files with 212 additions and 140 deletions

View File

@@ -1,4 +1,5 @@
"""Test XMLOutputParser"""
import importlib
from typing import AsyncIterator, Iterable
import pytest
@@ -42,24 +43,12 @@ DEF_RESULT_EXPECTED = {
}
@pytest.mark.parametrize(
"result",
[
DATA, # has no xml header
WITH_XML_HEADER,
IN_XML_TAGS_WITH_XML_HEADER,
IN_XML_TAGS_WITH_HEADER_AND_TRAILING_JUNK,
],
)
async def test_xml_output_parser(result: str) -> None:
"""Test XMLOutputParser."""
async def _test_parser(parser: XMLOutputParser, content: str) -> None:
"""Test parser."""
xml_content = parser.parse(content)
assert DEF_RESULT_EXPECTED == xml_content
xml_parser = XMLOutputParser()
xml_result = xml_parser.parse(result)
assert DEF_RESULT_EXPECTED == xml_result
assert list(xml_parser.transform(iter(result))) == [
assert list(parser.transform(iter(content))) == [
{"foo": [{"bar": [{"baz": None}]}]},
{"foo": [{"bar": [{"baz": "slim.shady"}]}]},
{"foo": [{"baz": "tag"}]},
@@ -69,7 +58,7 @@ async def test_xml_output_parser(result: str) -> None:
for item in iterable:
yield item
chunks = [chunk async for chunk in xml_parser.atransform(_as_iter(result))]
chunks = [chunk async for chunk in parser.atransform(_as_iter(content))]
assert list(chunks) == [
{"foo": [{"bar": [{"baz": None}]}]},
@@ -78,12 +67,72 @@ async def test_xml_output_parser(result: str) -> None:
]
@pytest.mark.parametrize(
"content",
[
DATA, # has no xml header
WITH_XML_HEADER,
IN_XML_TAGS_WITH_XML_HEADER,
IN_XML_TAGS_WITH_HEADER_AND_TRAILING_JUNK,
],
)
async def test_xml_output_parser(content: str) -> None:
"""Test XMLOutputParser."""
xml_parser = XMLOutputParser(parser="xml")
await _test_parser(xml_parser, content)
@pytest.mark.skipif(
importlib.util.find_spec("defusedxml") is None,
reason="defusedxml is not installed",
)
@pytest.mark.parametrize(
"content",
[
DATA, # has no xml header
WITH_XML_HEADER,
IN_XML_TAGS_WITH_XML_HEADER,
IN_XML_TAGS_WITH_HEADER_AND_TRAILING_JUNK,
],
)
async def test_xml_output_parser_defused(content: str) -> None:
"""Test XMLOutputParser."""
xml_parser = XMLOutputParser(parser="defusedxml")
await _test_parser(xml_parser, content)
@pytest.mark.parametrize("result", ["foo></foo>", "<foo></foo", "foo></foo", "foofoo"])
def test_xml_output_parser_fail(result: str) -> None:
"""Test XMLOutputParser where complete output is not in XML format."""
xml_parser = XMLOutputParser()
xml_parser = XMLOutputParser(parser="xml")
with pytest.raises(OutputParserException) as e:
xml_parser.parse(result)
assert "Failed to parse" in str(e)
MALICIOUS_XML = """<?xml version="1.0"?>
<!DOCTYPE lolz [<!ENTITY lol "lol"><!ELEMENT lolz (#PCDATA)>
<!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
<!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
<!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
<!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">
<!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">
<!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">
<!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">
<!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">
]>
<lolz>&lol9;</lolz>"""
async def tests_billion_laughs_attack() -> None:
# Testing with standard XML parser since it's safe to use in
# newer versions of Python
parser = XMLOutputParser(parser="xml")
with pytest.raises(OutputParserException):
parser.parse(MALICIOUS_XML)
with pytest.raises(OutputParserException):
await parser.aparse(MALICIOUS_XML)