mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 06:14:37 +00:00
Strips leading/trailing whitespace before parsing xml (#12297)
**Description:** When llms output leading or trailing whitespace for xml (when using XMLOutputParser) the parser would raise a `ValueError: Could not parse output: ...`. However, leading or trailing whitespace are "ignorable" in the sense of XML standard. **Issue:** I did not find an issue related. **Dependencies:** None **Tag maintainer:** **Twitter handle:** donatoaz Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. Done, updated unit test and ran `make docker_test`.
This commit is contained in:
parent
3da1a65fa0
commit
d9f1bcf366
@ -22,6 +22,8 @@ class XMLOutputParser(BaseOutputParser):
|
|||||||
encoding_match = self.encoding_matcher.search(text)
|
encoding_match = self.encoding_matcher.search(text)
|
||||||
if encoding_match:
|
if encoding_match:
|
||||||
text = encoding_match.group(2)
|
text = encoding_match.group(2)
|
||||||
|
|
||||||
|
text = text.strip()
|
||||||
if (text.startswith("<") or text.startswith("\n<")) and (
|
if (text.startswith("<") or text.startswith("\n<")) and (
|
||||||
text.endswith(">") or text.endswith(">\n")
|
text.endswith(">") or text.endswith(">\n")
|
||||||
):
|
):
|
||||||
|
@ -4,7 +4,7 @@ import pytest
|
|||||||
from langchain.output_parsers.xml import XMLOutputParser
|
from langchain.output_parsers.xml import XMLOutputParser
|
||||||
|
|
||||||
DEF_RESULT_ENCODING = """<?xml version="1.0" encoding="UTF-8"?>
|
DEF_RESULT_ENCODING = """<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<foo>
|
<foo>
|
||||||
<bar>
|
<bar>
|
||||||
<baz></baz>
|
<baz></baz>
|
||||||
<baz>slim.shady</baz>
|
<baz>slim.shady</baz>
|
||||||
|
Loading…
Reference in New Issue
Block a user