core: Fix Exception handling in XMLOutputParser (#19126)

- **Description:** 
  - Exception handling in `XMLOutputParser`
1. Add Exception handling at `root = ET.fromstring(text)` // raises
`ET.ParseError`
    2. Fix Exception class (commonly uses in `BaseOutputParser` class)
  - AS-IS: raise `ValueError`, `ET.ParserError` without handling
    ```python
    # langchain_core/output_parsers/xml.py

        text = text.strip()
        if (text.startswith("<") or text.startswith("\n<")) and (
            text.endswith(">") or text.endswith(">\n")
        ):
            root = ET.fromstring(text)
            return self._root_to_dict(root)
        else:
            raise ValueError(f"Could not parse output: {text}")
    ```
  - TO-BE: raise `OutputParserException`
    ```python
    # langchain_core/output_parsers/xml.py

        text = text.strip()
        if (text.startswith("<") or text.startswith("\n<")) and (
            text.endswith(">") or text.endswith(">\n")
        ):
            try:
                root = ET.fromstring(text)
                return self._root_to_dict(root)

            except ET.ParseError:
raise OutputParserException(f"Could not parse output: {text}")

        else:
raise OutputParserException(f"Could not parse output: {text}")

    ``` 
- **Issue:** #19107  
- **Dependencies:** None
This commit is contained in:
Kangmoon Seo 2024-03-19 13:08:32 +09:00 committed by GitHub
parent 24a0a4472a
commit 07de4abe70
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 9 additions and 7 deletions

View File

@ -2,6 +2,7 @@ import re
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union
from langchain_core.exceptions import OutputParserException
from langchain_core.messages import BaseMessage from langchain_core.messages import BaseMessage
from langchain_core.output_parsers.transform import BaseTransformOutputParser from langchain_core.output_parsers.transform import BaseTransformOutputParser
from langchain_core.runnables.utils import AddableDict from langchain_core.runnables.utils import AddableDict
@ -44,13 +45,13 @@ class XMLOutputParser(BaseTransformOutputParser):
text = encoding_match.group(2) text = encoding_match.group(2)
text = text.strip() text = text.strip()
if (text.startswith("<") or text.startswith("\n<")) and ( try:
text.endswith(">") or text.endswith(">\n")
):
root = ET.fromstring(text) root = ET.fromstring(text)
return self._root_to_dict(root) return self._root_to_dict(root)
else:
raise ValueError(f"Could not parse output: {text}") except ET.ParseError as e:
msg = f"Failed to parse XML format from completion {text}. Got: {e}"
raise OutputParserException(msg, llm_output=text) from e
def _transform( def _transform(
self, input: Iterator[Union[str, BaseMessage]] self, input: Iterator[Union[str, BaseMessage]]

View File

@ -1,6 +1,7 @@
"""Test XMLOutputParser""" """Test XMLOutputParser"""
import pytest import pytest
from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers.xml import XMLOutputParser from langchain_core.output_parsers.xml import XMLOutputParser
DEF_RESULT_ENCODING = """<?xml version="1.0" encoding="UTF-8"?> DEF_RESULT_ENCODING = """<?xml version="1.0" encoding="UTF-8"?>
@ -59,6 +60,6 @@ def test_xml_output_parser_fail(result: str) -> None:
xml_parser = XMLOutputParser() xml_parser = XMLOutputParser()
with pytest.raises(ValueError) as e: with pytest.raises(OutputParserException) as e:
xml_parser.parse(result) xml_parser.parse(result)
assert "Could not parse output" in str(e) assert "Failed to parse" in str(e)