diff --git a/libs/core/langchain_core/output_parsers/xml.py b/libs/core/langchain_core/output_parsers/xml.py index 3df9750f8e0..4fef349c498 100644 --- a/libs/core/langchain_core/output_parsers/xml.py +++ b/libs/core/langchain_core/output_parsers/xml.py @@ -142,6 +142,20 @@ class XMLOutputParser(BaseTransformOutputParser): """Parse an output using xml format.""" tags: Optional[list[str]] = None + """Tags to tell the LLM to expect in the XML output. + + Note this may not be perfect depending on the LLM implementation. + + For example, with tags=["foo", "bar", "baz"]: + 1. A well-formatted XML instance: + "\n \n \n \n" + + 2. A badly-formatted XML instance (missing closing tag for 'bar'): + "\n \n " + + 3. A badly-formatted XML instance (unexpected 'tag' element): + "\n \n \n" + """ encoding_matcher: re.Pattern = re.compile( r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL )