From 622bf12c2ebe1fafb1932084ad842b98bbb7b190 Mon Sep 17 00:00:00 2001 From: hmasdev <73353463+hmasdev@users.noreply.github.com> Date: Tue, 7 Nov 2023 00:53:14 +0900 Subject: [PATCH] fix regex pattern of structured output parser (#12929) - **Description:** fix the regex pattern of [StructuredChatOutputParser](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/structured_chat/output_parser.py#L18) and add unit tests for the code change. - **Issue:** #12158 #12922 - **Dependencies:** None - **Tag maintainer:** - **Twitter handle:** @hmdev3 - **NOTE:** This PR conflicts #7495 . After #7495 is merged, I am going to update PR. --- .../agents/structured_chat/output_parser.py | 2 +- .../unit_tests/agents/test_structured_chat.py | 62 ++++++++++++++++++- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/libs/langchain/langchain/agents/structured_chat/output_parser.py b/libs/langchain/langchain/agents/structured_chat/output_parser.py index 3049a531fab..ab5d449bfcf 100644 --- a/libs/langchain/langchain/agents/structured_chat/output_parser.py +++ b/libs/langchain/langchain/agents/structured_chat/output_parser.py @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) class StructuredChatOutputParser(AgentOutputParser): """Output parser for the structured chat agent.""" - pattern = re.compile(r"```(?:json)?\n(.*?)```", re.DOTALL) + pattern = re.compile(r"```(?:json\s+)?(\W.*?)```", re.DOTALL) def get_format_instructions(self) -> str: return FORMAT_INSTRUCTIONS diff --git a/libs/langchain/tests/unit_tests/agents/test_structured_chat.py b/libs/langchain/tests/unit_tests/agents/test_structured_chat.py index 356c3d87e2d..24020df0fe7 100644 --- a/libs/langchain/tests/unit_tests/agents/test_structured_chat.py +++ b/libs/langchain/tests/unit_tests/agents/test_structured_chat.py @@ -2,7 +2,7 @@ from typing import Tuple from langchain.agents.structured_chat.output_parser import StructuredChatOutputParser -from langchain.schema import AgentAction +from langchain.schema import AgentAction, AgentFinish output_parser = StructuredChatOutputParser() @@ -11,8 +11,10 @@ def get_action_and_input(text: str) -> Tuple[str, str]: output = output_parser.parse(text) if isinstance(output, AgentAction): return output.tool, str(output.tool_input) + elif isinstance(output, AgentFinish): + return output.return_values["output"], output.log else: - return "Final Answer", output.return_values["output"] + raise ValueError("Unexpected output type") def test_parse_with_language() -> None: @@ -45,3 +47,59 @@ def test_parse_without_language() -> None: action, action_input = get_action_and_input(llm_output) assert action == "foo" assert action_input == "bar" + + +def test_parse_with_language_and_spaces() -> None: + llm_output = """I can use the `foo` tool to achieve the goal. + + Action: + ```json + + { + "action": "foo", + "action_input": "bar" + } + ``` + """ + action, action_input = get_action_and_input(llm_output) + assert action == "foo" + assert action_input == "bar" + + +def test_parse_without_language_without_a_new_line() -> None: + llm_output = """I can use the `foo` tool to achieve the goal. + + Action: + ```{"action": "foo", "action_input": "bar"}``` + """ + action, action_input = get_action_and_input(llm_output) + assert action == "foo" + assert action_input == "bar" + + +def test_parse_with_language_without_a_new_line() -> None: + llm_output = """I can use the `foo` tool to achieve the goal. + + Action: + ```json{"action": "foo", "action_input": "bar"}``` + """ + # TODO: How should this be handled? + output, log = get_action_and_input(llm_output) + assert output == llm_output + assert log == llm_output + + +def test_parse_case_matched_and_final_answer() -> None: + llm_output = """I can use the `foo` tool to achieve the goal. + + Action: + ```json + { + "action": "Final Answer", + "action_input": "This is the final answer" + } + ``` + """ + output, log = get_action_and_input(llm_output) + assert output == "This is the final answer" + assert log == llm_output