From 0da5803f5ae54c9a7ba651f3e90e5eacf8b28d26 Mon Sep 17 00:00:00 2001 From: Nikhil Suresh Date: Fri, 25 Aug 2023 20:10:33 -0500 Subject: [PATCH] fixed regex to match sources for all cases, also includes source (#9775) - Description: Updated the regex to handle all the different cases for string matching (SOURCES, sources, Sources), - Issue: https://github.com/langchain-ai/langchain/issues/9774 - Dependencies: N/A --- .../langchain/chains/qa_with_sources/base.py | 8 +++++--- .../unit_tests/chains/test_qa_with_sources.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/libs/langchain/langchain/chains/qa_with_sources/base.py b/libs/langchain/langchain/chains/qa_with_sources/base.py index db07d24b233..d0408059a8e 100644 --- a/libs/langchain/langchain/chains/qa_with_sources/base.py +++ b/libs/langchain/langchain/chains/qa_with_sources/base.py @@ -120,9 +120,11 @@ class BaseQAWithSourcesChain(Chain, ABC): def _split_sources(self, answer: str) -> Tuple[str, str]: """Split sources from answer.""" - if re.search(r"SOURCES:\s", answer): - answer, sources = re.split(r"SOURCES:\s|QUESTION:\s", answer)[:2] - sources = re.split(r"\n", sources)[0] + if re.search(r"SOURCES?[:\s]", answer, re.IGNORECASE): + answer, sources = re.split( + r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE + )[:2] + sources = re.split(r"\n", sources)[0].strip() else: sources = "" return answer, sources diff --git a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py index e69d9b5cd11..56daf006859 100644 --- a/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py +++ b/libs/langchain/tests/unit_tests/chains/test_qa_with_sources.py @@ -12,6 +12,21 @@ from tests.unit_tests.llms.fake_llm import FakeLLM "This Agreement is governed by English law.\n", "28-pl", ), + ( + "This Agreement is governed by English law.\nSources: 28-pl", + "This Agreement is governed by English law.\n", + "28-pl", + ), + ( + "This Agreement is governed by English law.\nsource: 28-pl", + "This Agreement is governed by English law.\n", + "28-pl", + ), + ( + "This Agreement is governed by English law.\nSource: 28-pl", + "This Agreement is governed by English law.\n", + "28-pl", + ), ( "This Agreement is governed by English law.\n" "SOURCES: 28-pl\n\n"