mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 23:54:14 +00:00
Fix MultiQueryRetriever breaking Embeddings with empty lines (#21093)
Fix MultiQueryRetriever breaking Embeddings with empty lines ``` [chain/end] [1:chain:ConversationalRetrievalChain > 2:retriever:Retriever > 3:retriever:Retriever > 4:chain:LLMChain] [2.03s] Exiting Chain run with output: [outputs] > /workspaces/Sfeir/sncf/metabot-backend/.venv/lib/python3.11/site-packages/langchain/retrievers/multi_query.py(116)_aget_relevant_documents() -> if self.include_original: (Pdb) queries ['## Alternative questions for "Hello, tell me about phones?":', '', '1. **What are the latest trends in smartphone technology?** (Focuses on recent advancements)', '2. **How has the mobile phone industry evolved over the years?** (Historical perspective)', '3. **What are the different types of phones available in the market, and which one is best for me?** (Categorization and recommendation)'] ``` Example of failure on VertexAIEmbeddings ``` grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with: status = StatusCode.INVALID_ARGUMENT details = "The text content is empty." debug_error_string = "UNKNOWN:Error received from peer ipv4:142.250.184.234:443 {created_time:"2024-04-30T09:57:45.625698408+00:00", grpc_status:3, grpc_message:"The text content is empty."}" ``` Fixes: #15959 --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
5affbada61
commit
9c3da11910
@ -153,7 +153,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" def parse(self, text: str) -> List[str]:\n",
|
" def parse(self, text: str) -> List[str]:\n",
|
||||||
" lines = text.strip().split(\"\\n\")\n",
|
" lines = text.strip().split(\"\\n\")\n",
|
||||||
" return lines\n",
|
" return list(filter(None, lines)) # Remove empty lines\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"output_parser = LineListOutputParser()\n",
|
"output_parser = LineListOutputParser()\n",
|
||||||
|
@ -24,7 +24,7 @@ class LineListOutputParser(BaseOutputParser[List[str]]):
|
|||||||
|
|
||||||
def parse(self, text: str) -> List[str]:
|
def parse(self, text: str) -> List[str]:
|
||||||
lines = text.strip().split("\n")
|
lines = text.strip().split("\n")
|
||||||
return lines
|
return list(filter(None, lines)) # Remove empty lines
|
||||||
|
|
||||||
|
|
||||||
# Default prompt
|
# Default prompt
|
||||||
|
@ -3,7 +3,7 @@ from typing import List
|
|||||||
import pytest as pytest
|
import pytest as pytest
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
|
|
||||||
from langchain.retrievers.multi_query import _unique_documents
|
from langchain.retrievers.multi_query import LineListOutputParser, _unique_documents
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@ -38,3 +38,16 @@ from langchain.retrievers.multi_query import _unique_documents
|
|||||||
)
|
)
|
||||||
def test__unique_documents(documents: List[Document], expected: List[Document]) -> None:
|
def test__unique_documents(documents: List[Document], expected: List[Document]) -> None:
|
||||||
assert _unique_documents(documents) == expected
|
assert _unique_documents(documents) == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"text,expected",
|
||||||
|
[
|
||||||
|
("foo\nbar\nbaz", ["foo", "bar", "baz"]),
|
||||||
|
("foo\nbar\nbaz\n", ["foo", "bar", "baz"]),
|
||||||
|
("foo\n\nbar", ["foo", "bar"]),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_line_list_output_parser(text: str, expected: List[str]) -> None:
|
||||||
|
parser = LineListOutputParser()
|
||||||
|
assert parser.parse(text) == expected
|
||||||
|
Loading…
Reference in New Issue
Block a user