fixed regex to match sources for all cases, also includes source (#9775)

- Description: Updated the regex to handle all the different cases for
string matching (SOURCES, sources, Sources),
  - Issue: https://github.com/langchain-ai/langchain/issues/9774
  - Dependencies: N/A
This commit is contained in:
Nikhil Suresh 2023-08-25 20:10:33 -05:00 committed by GitHub
parent a28eea5767
commit 0da5803f5a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 3 deletions

View File

@ -120,9 +120,11 @@ class BaseQAWithSourcesChain(Chain, ABC):
def _split_sources(self, answer: str) -> Tuple[str, str]:
"""Split sources from answer."""
if re.search(r"SOURCES:\s", answer):
answer, sources = re.split(r"SOURCES:\s|QUESTION:\s", answer)[:2]
sources = re.split(r"\n", sources)[0]
if re.search(r"SOURCES?[:\s]", answer, re.IGNORECASE):
answer, sources = re.split(
r"SOURCES?[:\s]|QUESTION:\s", answer, flags=re.IGNORECASE
)[:2]
sources = re.split(r"\n", sources)[0].strip()
else:
sources = ""
return answer, sources

View File

@ -12,6 +12,21 @@ from tests.unit_tests.llms.fake_llm import FakeLLM
"This Agreement is governed by English law.\n",
"28-pl",
),
(
"This Agreement is governed by English law.\nSources: 28-pl",
"This Agreement is governed by English law.\n",
"28-pl",
),
(
"This Agreement is governed by English law.\nsource: 28-pl",
"This Agreement is governed by English law.\n",
"28-pl",
),
(
"This Agreement is governed by English law.\nSource: 28-pl",
"This Agreement is governed by English law.\n",
"28-pl",
),
(
"This Agreement is governed by English law.\n"
"SOURCES: 28-pl\n\n"