mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-13 22:59:05 +00:00
core[patch]: fix xml output parser transform (#19530)
Previous PR passed _parser attribute which apparently is not meant to be used by user code and causes non deterministic failures on CI when testing the transform and a transform methods. Reverting this change temporarily.
This commit is contained in:
parent
e6952b04d5
commit
56f4c5459b
@ -1,7 +1,6 @@
|
||||
import re
|
||||
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union
|
||||
from xml.etree import ElementTree as ET
|
||||
from xml.etree.ElementTree import TreeBuilder
|
||||
|
||||
from langchain_core.exceptions import OutputParserException
|
||||
from langchain_core.messages import BaseMessage
|
||||
@ -61,13 +60,7 @@ class XMLOutputParser(BaseTransformOutputParser):
|
||||
def _transform(
|
||||
self, input: Iterator[Union[str, BaseMessage]]
|
||||
) -> Iterator[AddableDict]:
|
||||
# Imports are temporarily placed here to avoid issue with caching on CI
|
||||
# likely if you're reading this you can move them to the top of the file
|
||||
from defusedxml.ElementTree import DefusedXMLParser # type: ignore[import]
|
||||
|
||||
parser = ET.XMLPullParser(
|
||||
["start", "end"], _parser=DefusedXMLParser(target=TreeBuilder())
|
||||
)
|
||||
parser = ET.XMLPullParser(["start", "end"])
|
||||
xml_start_re = re.compile(r"<[a-zA-Z:_]")
|
||||
xml_started = False
|
||||
current_path: List[str] = []
|
||||
@ -117,12 +110,7 @@ class XMLOutputParser(BaseTransformOutputParser):
|
||||
async def _atransform(
|
||||
self, input: AsyncIterator[Union[str, BaseMessage]]
|
||||
) -> AsyncIterator[AddableDict]:
|
||||
# Imports are temporarily placed here to avoid issue with caching on CI
|
||||
# likely if you're reading this you can move them to the top of the file
|
||||
from defusedxml.ElementTree import DefusedXMLParser # type: ignore[import]
|
||||
|
||||
_parser = DefusedXMLParser(target=TreeBuilder())
|
||||
parser = ET.XMLPullParser(["start", "end"], _parser=_parser)
|
||||
parser = ET.XMLPullParser(["start", "end"])
|
||||
xml_start_re = re.compile(r"<[a-zA-Z:_]")
|
||||
xml_started = False
|
||||
current_path: List[str] = []
|
||||
|
@ -1,6 +1,5 @@
|
||||
"""Test XMLOutputParser"""
|
||||
from typing import AsyncIterator
|
||||
from xml.etree.ElementTree import ParseError
|
||||
|
||||
import pytest
|
||||
|
||||
@ -100,17 +99,3 @@ async def tests_billion_laughs_attack() -> None:
|
||||
|
||||
with pytest.raises(OutputParserException):
|
||||
await parser.aparse(MALICIOUS_XML)
|
||||
|
||||
with pytest.raises(ParseError):
|
||||
# Right now raises undefined entity error
|
||||
assert list(parser.transform(iter(MALICIOUS_XML))) == [
|
||||
{"foo": [{"bar": [{"baz": None}]}]}
|
||||
]
|
||||
|
||||
async def _as_iter(string: str) -> AsyncIterator[str]:
|
||||
for c in string:
|
||||
yield c
|
||||
|
||||
with pytest.raises(ParseError):
|
||||
chunks = [chunk async for chunk in parser.atransform(_as_iter(MALICIOUS_XML))]
|
||||
assert chunks == [{"foo": [{"bar": [{"baz": None}]}]}]
|
||||
|
Loading…
Reference in New Issue
Block a user