core: Fix Exception handling in XMLOutputParser (#19126)

- **Description:** 
  - Exception handling in `XMLOutputParser`
1. Add Exception handling at `root = ET.fromstring(text)` // raises
`ET.ParseError`
    2. Fix Exception class (commonly uses in `BaseOutputParser` class)
  - AS-IS: raise `ValueError`, `ET.ParserError` without handling
    ```python
    # langchain_core/output_parsers/xml.py

        text = text.strip()
        if (text.startswith("<") or text.startswith("\n<")) and (
            text.endswith(">") or text.endswith(">\n")
        ):
            root = ET.fromstring(text)
            return self._root_to_dict(root)
        else:
            raise ValueError(f"Could not parse output: {text}")
    ```
  - TO-BE: raise `OutputParserException`
    ```python
    # langchain_core/output_parsers/xml.py

        text = text.strip()
        if (text.startswith("<") or text.startswith("\n<")) and (
            text.endswith(">") or text.endswith(">\n")
        ):
            try:
                root = ET.fromstring(text)
                return self._root_to_dict(root)

            except ET.ParseError:
raise OutputParserException(f"Could not parse output: {text}")

        else:
raise OutputParserException(f"Could not parse output: {text}")

    ``` 
- **Issue:** #19107  
- **Dependencies:** None
This commit is contained in:
Kangmoon Seo 2024-03-19 13:08:32 +09:00 committed by GitHub
parent 24a0a4472a
commit 07de4abe70
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 9 additions and 7 deletions

View File

@ -2,6 +2,7 @@ import re
import xml.etree.ElementTree as ET
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union
from langchain_core.exceptions import OutputParserException
from langchain_core.messages import BaseMessage
from langchain_core.output_parsers.transform import BaseTransformOutputParser
from langchain_core.runnables.utils import AddableDict
@ -44,13 +45,13 @@ class XMLOutputParser(BaseTransformOutputParser):
text = encoding_match.group(2)
text = text.strip()
if (text.startswith("<") or text.startswith("\n<")) and (
text.endswith(">") or text.endswith(">\n")
):
try:
root = ET.fromstring(text)
return self._root_to_dict(root)
else:
raise ValueError(f"Could not parse output: {text}")
except ET.ParseError as e:
msg = f"Failed to parse XML format from completion {text}. Got: {e}"
raise OutputParserException(msg, llm_output=text) from e
def _transform(
self, input: Iterator[Union[str, BaseMessage]]

View File

@ -1,6 +1,7 @@
"""Test XMLOutputParser"""
import pytest
from langchain_core.exceptions import OutputParserException
from langchain_core.output_parsers.xml import XMLOutputParser
DEF_RESULT_ENCODING = """<?xml version="1.0" encoding="UTF-8"?>
@ -59,6 +60,6 @@ def test_xml_output_parser_fail(result: str) -> None:
xml_parser = XMLOutputParser()
with pytest.raises(ValueError) as e:
with pytest.raises(OutputParserException) as e:
xml_parser.parse(result)
assert "Could not parse output" in str(e)
assert "Failed to parse" in str(e)