From ec090745a6f41793b7c72e1710efdd0a00a8dbec Mon Sep 17 00:00:00 2001
From: Nuno Campos <nuno@langchain.dev>
Date: Fri, 29 Dec 2023 09:59:21 -0800
Subject: [PATCH] Improve markdown list parser (#15295)

- do not match text after - in the middle of a sentence

<!-- Thank you for contributing to LangChain!

Please title your PR "<package>: <description>", where <package> is
whichever of langchain, community, core, experimental, etc. is being
modified.

Replace this entire comment with:
  - **Description:** a description of the change,
  - **Issue:** the issue # it fixes if applicable,
  - **Dependencies:** any dependencies required for this change,
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` from the root
of the package you've modified to check this locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc: https://python.langchain.com/docs/contributing/

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->
---
 libs/core/langchain_core/output_parsers/list.py             | 6 +++---
 libs/core/tests/unit_tests/output_parsers/test_json.py      | 1 +
 .../tests/unit_tests/output_parsers/test_list_parser.py     | 6 +++---
 3 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/libs/core/langchain_core/output_parsers/list.py b/libs/core/langchain_core/output_parsers/list.py
index ac9f6a5f2bf..36b9f7e9b77 100644
--- a/libs/core/langchain_core/output_parsers/list.py
+++ b/libs/core/langchain_core/output_parsers/list.py
@@ -154,18 +154,18 @@ class NumberedListOutputParser(ListOutputParser):
 class MarkdownListOutputParser(ListOutputParser):
     """Parse a markdown list."""
 
-    pattern = r"-\s([^\n]+)"
+    pattern = r"^\s*[-*]\s([^\n]+)$"
 
     def get_format_instructions(self) -> str:
         return "Your response should be a markdown list, " "eg: `- foo\n- bar\n- baz`"
 
     def parse(self, text: str) -> List[str]:
         """Parse the output of an LLM call."""
-        return re.findall(self.pattern, text)
+        return re.findall(self.pattern, text, re.MULTILINE)
 
     def parse_iter(self, text: str) -> Iterator[re.Match]:
         """Parse the output of an LLM call."""
-        return re.finditer(self.pattern, text)
+        return re.finditer(self.pattern, text, re.MULTILINE)
 
     @property
     def _type(self) -> str:
diff --git a/libs/core/tests/unit_tests/output_parsers/test_json.py b/libs/core/tests/unit_tests/output_parsers/test_json.py
index 8b2bc7d29a4..89362057445 100644
--- a/libs/core/tests/unit_tests/output_parsers/test_json.py
+++ b/libs/core/tests/unit_tests/output_parsers/test_json.py
@@ -147,6 +147,7 @@ TEST_CASES = [
     NO_TICKS_WHITE_SPACE,
     TEXT_BEFORE,
     TEXT_AFTER,
+    TEXT_BEFORE_AND_AFTER,
 ]
 
 
diff --git a/libs/core/tests/unit_tests/output_parsers/test_list_parser.py b/libs/core/tests/unit_tests/output_parsers/test_list_parser.py
index 4e2f506264b..710f9e52cee 100644
--- a/libs/core/tests/unit_tests/output_parsers/test_list_parser.py
+++ b/libs/core/tests/unit_tests/output_parsers/test_list_parser.py
@@ -51,7 +51,7 @@ def test_numbered_list() -> None:
         "For example: \n\n1. foo\n\n2. bar\n\n3. baz"
     )
 
-    text2 = "Items:\n\n1. apple\n\n2. banana\n\n3. cherry"
+    text2 = "Items:\n\n1. apple\n\n    2. banana\n\n3. cherry"
 
     text3 = "No items in the list."
 
@@ -82,11 +82,11 @@ def test_numbered_list() -> None:
 def test_markdown_list() -> None:
     parser = MarkdownListOutputParser()
     text1 = (
-        "Your response should be a numbered list with each item on a new line."
+        "Your response should be a numbered - not a list item - list with each item on a new line."  # noqa: E501
         "For example: \n- foo\n- bar\n- baz"
     )
 
-    text2 = "Items:\n- apple\n- banana\n- cherry"
+    text2 = "Items:\n- apple\n     - banana\n- cherry"
 
     text3 = "No items in the list."