From 18824b5761d73ab409c2ee7ec1e1023e3198685b Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Thu, 3 Jul 2025 18:38:36 -0400 Subject: [PATCH] x --- .../langchain/agents/output_parsers/xml.py | 35 +++++++++++++++---- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/libs/langchain/langchain/agents/output_parsers/xml.py b/libs/langchain/langchain/agents/output_parsers/xml.py index cb487bc739e..566baec400d 100644 --- a/libs/langchain/langchain/agents/output_parsers/xml.py +++ b/libs/langchain/langchain/agents/output_parsers/xml.py @@ -66,17 +66,35 @@ class XMLAgentOutputParser(AgentOutputParser): """ def parse(self, text: str) -> Union[AgentAction, AgentFinish]: - if "" in text: - tool, tool_input = text.split("") - _tool = tool.split("")[1] - _tool_input = tool_input.split("")[1] - if "" in _tool_input: - _tool_input = _tool_input.split("")[0] - # Unescape custom delimiters in tool name and input + # Check for tool invocation first + tool_matches = re.findall(r"(.*?)", text, re.DOTALL) + if tool_matches: + if len(tool_matches) != 1: + raise ValueError( + f"Malformed tool invocation: expected exactly one block, " + f"but found {len(tool_matches)}." + ) + _tool = tool_matches[0] + + # Match optional tool input + input_matches = re.findall( + r"(.*?)", text, re.DOTALL + ) + if len(input_matches) > 1: + raise ValueError( + f"Malformed tool invocation: expected at most one " + f"block, but found {len(input_matches)}." + ) + _tool_input = input_matches[0] if input_matches else "" + + # Unescape if minimal escape format is used if self.escape_format == "minimal": _tool = _unescape(_tool) _tool_input = _unescape(_tool_input) + return AgentAction(tool=_tool, tool_input=_tool_input, log=text) + + # Check for final answer elif "" in text and "" in text: matches = re.findall(r"(.*?)", text, re.DOTALL) if len(matches) != 1: @@ -86,6 +104,9 @@ class XMLAgentOutputParser(AgentOutputParser): ) raise ValueError(msg) answer = matches[0] + # Unescape custom delimiters in final answer + if self.escape_format == "minimal": + answer = _unescape(answer) return AgentFinish(return_values={"output": answer}, log=text) else: msg = (