mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 07:35:18 +00:00
Fix MultiQueryRetriever breaking Embeddings with empty lines (#21093)
Fix MultiQueryRetriever breaking Embeddings with empty lines ``` [chain/end] [1:chain:ConversationalRetrievalChain > 2:retriever:Retriever > 3:retriever:Retriever > 4:chain:LLMChain] [2.03s] Exiting Chain run with output: [outputs] > /workspaces/Sfeir/sncf/metabot-backend/.venv/lib/python3.11/site-packages/langchain/retrievers/multi_query.py(116)_aget_relevant_documents() -> if self.include_original: (Pdb) queries ['## Alternative questions for "Hello, tell me about phones?":', '', '1. **What are the latest trends in smartphone technology?** (Focuses on recent advancements)', '2. **How has the mobile phone industry evolved over the years?** (Historical perspective)', '3. **What are the different types of phones available in the market, and which one is best for me?** (Categorization and recommendation)'] ``` Example of failure on VertexAIEmbeddings ``` grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with: status = StatusCode.INVALID_ARGUMENT details = "The text content is empty." debug_error_string = "UNKNOWN:Error received from peer ipv4:142.250.184.234:443 {created_time:"2024-04-30T09:57:45.625698408+00:00", grpc_status:3, grpc_message:"The text content is empty."}" ``` Fixes: #15959 --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
5affbada61
commit
9c3da11910
@ -153,7 +153,7 @@
|
||||
"\n",
|
||||
" def parse(self, text: str) -> List[str]:\n",
|
||||
" lines = text.strip().split(\"\\n\")\n",
|
||||
" return lines\n",
|
||||
" return list(filter(None, lines)) # Remove empty lines\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"output_parser = LineListOutputParser()\n",
|
||||
|
@ -24,7 +24,7 @@ class LineListOutputParser(BaseOutputParser[List[str]]):
|
||||
|
||||
def parse(self, text: str) -> List[str]:
|
||||
lines = text.strip().split("\n")
|
||||
return lines
|
||||
return list(filter(None, lines)) # Remove empty lines
|
||||
|
||||
|
||||
# Default prompt
|
||||
|
@ -3,7 +3,7 @@ from typing import List
|
||||
import pytest as pytest
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain.retrievers.multi_query import _unique_documents
|
||||
from langchain.retrievers.multi_query import LineListOutputParser, _unique_documents
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@ -38,3 +38,16 @@ from langchain.retrievers.multi_query import _unique_documents
|
||||
)
|
||||
def test__unique_documents(documents: List[Document], expected: List[Document]) -> None:
|
||||
assert _unique_documents(documents) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,expected",
|
||||
[
|
||||
("foo\nbar\nbaz", ["foo", "bar", "baz"]),
|
||||
("foo\nbar\nbaz\n", ["foo", "bar", "baz"]),
|
||||
("foo\n\nbar", ["foo", "bar"]),
|
||||
],
|
||||
)
|
||||
def test_line_list_output_parser(text: str, expected: List[str]) -> None:
|
||||
parser = LineListOutputParser()
|
||||
assert parser.parse(text) == expected
|
||||
|
Loading…
Reference in New Issue
Block a user