From 6d3485e79858d14ee1b2a99f737ae46a5342d5c9 Mon Sep 17 00:00:00 2001 From: Nikhil Suresh Date: Fri, 25 Aug 2023 18:11:47 +0000 Subject: [PATCH 1/8] fixed regex to match sources for all cases, also includes source --- libs/langchain/langchain/chains/qa_with_sources/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py index d0408059a8e..2c75e60e36c 100644 --- a/libs/langchain/langchain/chains/qa_with_sources/base.py +++ b/libs/langchain/langchain/chains/qa_with_sources/base.py @@ -124,7 +124,7 @@ class BaseQAWithSourcesChain(Chain, ABC): answer, sources = re.split( r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE )[:2] - sources = re.split(r"\n", sources)[0].strip() + sources = re.split(r"\n", sources)[0] else: sources = "" return answer, sources From b1f649bca5f35e84bfb64d70760428286a37a4bf Mon Sep 17 00:00:00 2001 From: Nikhil Suresh Date: Sat, 26 Aug 2023 00:10:52 +0000 Subject: [PATCH 2/8] fixed issue with white space and added unit tests --- libs/langchain/langchain/chains/qa_with_sources/base.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py index 2c75e60e36c..c7d0b76ab51 100644 --- a/libs/langchain/langchain/chains/qa_with_sources/base.py +++ b/libs/langchain/langchain/chains/qa_with_sources/base.py @@ -121,10 +121,8 @@ class BaseQAWithSourcesChain(Chain, ABC): def _split_sources(self, answer: str) -> Tuple[str, str]: """Split sources from answer.""" if re.search(r"SOURCES?[:\s]", answer, re.IGNORECASE): - answer, sources = re.split( - r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE - )[:2] - sources = re.split(r"\n", sources)[0] + answer, sources = re.split(r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE)[:2] + sources = re.split(r"\n", sources)[0].strip() else: sources = "" return answer, sources From 8a4670e1276cbcacae437fe33e41bb40a6a756b0 Mon Sep 17 00:00:00 2001 From: Nikhil Suresh Date: Sat, 26 Aug 2023 00:17:54 +0000 Subject: [PATCH 3/8] updated formatting changes --- libs/langchain/langchain/chains/qa_with_sources/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py index c7d0b76ab51..d0408059a8e 100644 --- a/libs/langchain/langchain/chains/qa_with_sources/base.py +++ b/libs/langchain/langchain/chains/qa_with_sources/base.py @@ -121,7 +121,9 @@ class BaseQAWithSourcesChain(Chain, ABC): def _split_sources(self, answer: str) -> Tuple[str, str]: """Split sources from answer.""" if re.search(r"SOURCES?[:\s]", answer, re.IGNORECASE): - answer, sources = re.split(r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE)[:2] + answer, sources = re.split( + r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE + )[:2] sources = re.split(r"\n", sources)[0].strip() else: sources = "" From 64eb5a6082d506bc794f352c03102f72c3c5a34d Mon Sep 17 00:00:00 2001 From: Nikhil Suresh Date: Tue, 29 Aug 2023 03:44:14 +0000 Subject: [PATCH 4/8] removed unnecessary white space in regex that breaks qa with sources chain --- libs/langchain/langchain/chains/qa_with_sources/base.py | 2 +- .../tests/unit_tests/chains/test_qa_with_sources.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py index d0408059a8e..e6a07c37630 100644 --- a/libs/langchain/langchain/chains/qa_with_sources/base.py +++ b/libs/langchain/langchain/chains/qa_with_sources/base.py @@ -122,7 +122,7 @@ class BaseQAWithSourcesChain(Chain, ABC): """Split sources from answer.""" if re.search(r"SOURCES?[:\s]", answer, re.IGNORECASE): answer, sources = re.split( - r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE + r"SOURCES?[:]|QUESTION:\s", answer, flags=re.IGNORECASE )[:2] sources = re.split(r"\n", sources)[0].strip() else: diff --git a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py index 56daf006859..c93d202bae0 100644 --- a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py +++ b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py @@ -27,6 +27,11 @@ from tests.unit_tests.llms.fake_llm import FakeLLM "This Agreement is governed by English law.\n", "28-pl", ), + ( + "According to the sources, the agreement is governed by English law.\nSource: 28-pl", + "According to the sources, the agreement is governed by English law.\n", + "28-pl", + ), ( "This Agreement is governed by English law.\n" "SOURCES: 28-pl\n\n" From 23ef836b48bbbc328813a0e48e64ac35bbaf5b5d Mon Sep 17 00:00:00 2001 From: Nikhil Suresh Date: Tue, 29 Aug 2023 04:18:33 +0000 Subject: [PATCH 5/8] matches colon and any number of white spaces after colon --- libs/langchain/langchain/chains/qa_with_sources/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py index e6a07c37630..79b083de0b7 100644 --- a/libs/langchain/langchain/chains/qa_with_sources/base.py +++ b/libs/langchain/langchain/chains/qa_with_sources/base.py @@ -120,9 +120,9 @@ class BaseQAWithSourcesChain(Chain, ABC): def _split_sources(self, answer: str) -> Tuple[str, str]: """Split sources from answer.""" - if re.search(r"SOURCES?[:\s]", answer, re.IGNORECASE): + if re.search(r"SOURCES?[:]\s", answer, re.IGNORECASE): answer, sources = re.split( - r"SOURCES?[:]|QUESTION:\s", answer, flags=re.IGNORECASE + r"SOURCES?[:]\s|QUESTION:\s", answer, flags=re.IGNORECASE )[:2] sources = re.split(r"\n", sources)[0].strip() else: From dd10cf945c9d2d823fee1b77665252d03eb665b9 Mon Sep 17 00:00:00 2001 From: Nikhil Suresh Date: Tue, 29 Aug 2023 14:15:59 +0000 Subject: [PATCH 6/8] fixed minor linting issues --- libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py index c93d202bae0..3d34206f06d 100644 --- a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py +++ b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py @@ -28,7 +28,8 @@ from tests.unit_tests.llms.fake_llm import FakeLLM "28-pl", ), ( - "According to the sources, the agreement is governed by English law.\nSource: 28-pl", + "According to the sources, the agreement is governed by English law.\n" + "Source: 28-pl", "According to the sources, the agreement is governed by English law.\n", "28-pl", ), From b31475c6223fb02f6e512ad77dee01249ab4a699 Mon Sep 17 00:00:00 2001 From: Nikhil Suresh Date: Tue, 29 Aug 2023 23:13:31 +0000 Subject: [PATCH 7/8] minor updates to regex --- libs/langchain/langchain/chains/qa_with_sources/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py index 79b083de0b7..3e3023ce2b2 100644 --- a/libs/langchain/langchain/chains/qa_with_sources/base.py +++ b/libs/langchain/langchain/chains/qa_with_sources/base.py @@ -120,9 +120,9 @@ class BaseQAWithSourcesChain(Chain, ABC): def _split_sources(self, answer: str) -> Tuple[str, str]: """Split sources from answer.""" - if re.search(r"SOURCES?[:]\s", answer, re.IGNORECASE): + if re.search(r"SOURCES?:", answer, re.IGNORECASE): answer, sources = re.split( - r"SOURCES?[:]\s|QUESTION:\s", answer, flags=re.IGNORECASE + r"SOURCES?:|QUESTION:\s", answer, flags=re.IGNORECASE )[:2] sources = re.split(r"\n", sources)[0].strip() else: From 56a0165a4ecdf74c51b359026d862e03ded4a876 Mon Sep 17 00:00:00 2001 From: Nikhil Suresh Date: Tue, 29 Aug 2023 23:37:54 +0000 Subject: [PATCH 8/8] cleaned up unit test example --- .../langchain/tests/unit_tests/chains/test_qa_with_sources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py index 3d34206f06d..825284e2949 100644 --- a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py +++ b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py @@ -28,9 +28,9 @@ from tests.unit_tests.llms.fake_llm import FakeLLM "28-pl", ), ( - "According to the sources, the agreement is governed by English law.\n" + "According to the sources the agreement is governed by English law.\n" "Source: 28-pl", - "According to the sources, the agreement is governed by English law.\n", + "According to the sources the agreement is governed by English law.\n", "28-pl", ), (