From 622bf12c2ebe1fafb1932084ad842b98bbb7b190 Mon Sep 17 00:00:00 2001
From: hmasdev <73353463+hmasdev@users.noreply.github.com>
Date: Tue, 7 Nov 2023 00:53:14 +0900
Subject: [PATCH] fix regex pattern of structured output parser (#12929)

- **Description:** fix the regex pattern of
[StructuredChatOutputParser](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/structured_chat/output_parser.py#L18)
and add unit tests for the code change.
- **Issue:** #12158 #12922
- **Dependencies:** None
- **Tag maintainer:**
- **Twitter handle:** @hmdev3
- **NOTE:** This PR conflicts #7495 . After #7495 is merged, I am going
to update PR.
---
 .../agents/structured_chat/output_parser.py   |  2 +-
 .../unit_tests/agents/test_structured_chat.py | 62 ++++++++++++++++++-
 2 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/libs/langchain/langchain/agents/structured_chat/output_parser.py b/libs/langchain/langchain/agents/structured_chat/output_parser.py
index 3049a531fab..ab5d449bfcf 100644
--- a/libs/langchain/langchain/agents/structured_chat/output_parser.py
+++ b/libs/langchain/langchain/agents/structured_chat/output_parser.py
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
 class StructuredChatOutputParser(AgentOutputParser):
     """Output parser for the structured chat agent."""
 
-    pattern = re.compile(r"```(?:json)?\n(.*?)```", re.DOTALL)
+    pattern = re.compile(r"```(?:json\s+)?(\W.*?)```", re.DOTALL)
 
     def get_format_instructions(self) -> str:
         return FORMAT_INSTRUCTIONS
diff --git a/libs/langchain/tests/unit_tests/agents/test_structured_chat.py b/libs/langchain/tests/unit_tests/agents/test_structured_chat.py
index 356c3d87e2d..24020df0fe7 100644
--- a/libs/langchain/tests/unit_tests/agents/test_structured_chat.py
+++ b/libs/langchain/tests/unit_tests/agents/test_structured_chat.py
@@ -2,7 +2,7 @@
 from typing import Tuple
 
 from langchain.agents.structured_chat.output_parser import StructuredChatOutputParser
-from langchain.schema import AgentAction
+from langchain.schema import AgentAction, AgentFinish
 
 output_parser = StructuredChatOutputParser()
 
@@ -11,8 +11,10 @@ def get_action_and_input(text: str) -> Tuple[str, str]:
     output = output_parser.parse(text)
     if isinstance(output, AgentAction):
         return output.tool, str(output.tool_input)
+    elif isinstance(output, AgentFinish):
+        return output.return_values["output"], output.log
     else:
-        return "Final Answer", output.return_values["output"]
+        raise ValueError("Unexpected output type")
 
 
 def test_parse_with_language() -> None:
@@ -45,3 +47,59 @@ def test_parse_without_language() -> None:
     action, action_input = get_action_and_input(llm_output)
     assert action == "foo"
     assert action_input == "bar"
+
+
+def test_parse_with_language_and_spaces() -> None:
+    llm_output = """I can use the `foo` tool to achieve the goal.
+
+    Action:
+    ```json     
+
+    {
+      "action": "foo",
+      "action_input": "bar"
+    }
+    ```
+    """
+    action, action_input = get_action_and_input(llm_output)
+    assert action == "foo"
+    assert action_input == "bar"
+
+
+def test_parse_without_language_without_a_new_line() -> None:
+    llm_output = """I can use the `foo` tool to achieve the goal.
+
+    Action:
+    ```{"action": "foo", "action_input": "bar"}```
+    """
+    action, action_input = get_action_and_input(llm_output)
+    assert action == "foo"
+    assert action_input == "bar"
+
+
+def test_parse_with_language_without_a_new_line() -> None:
+    llm_output = """I can use the `foo` tool to achieve the goal.
+
+    Action:
+    ```json{"action": "foo", "action_input": "bar"}```
+    """
+    # TODO: How should this be handled?
+    output, log = get_action_and_input(llm_output)
+    assert output == llm_output
+    assert log == llm_output
+
+
+def test_parse_case_matched_and_final_answer() -> None:
+    llm_output = """I can use the `foo` tool to achieve the goal.
+
+    Action:
+    ```json
+    {
+      "action": "Final Answer",
+      "action_input": "This is the final answer"
+    }
+    ```
+    """
+    output, log = get_action_and_input(llm_output)
+    assert output == "This is the final answer"
+    assert log == llm_output