Fix MultiQueryRetriever breaking Embeddings with empty lines (#21093)

Fix MultiQueryRetriever breaking Embeddings with empty lines ``` [chain/end] [1:chain:ConversationalRetrievalChain > 2:retriever:Retriever > 3:retriever:Retriever > 4:chain:LLMChain] [2.03s] Exiting Chain run with output: [outputs] > /workspaces/Sfeir/sncf/metabot-backend/.venv/lib/python3.11/site-packages/langchain/retrievers/multi_query.py(116)_aget_relevant_documents() -> if self.include_original: (Pdb) queries ['## Alternative questions for "Hello, tell me about phones?":', '', '1. **What are the latest trends in smartphone technology?** (Focuses on recent advancements)', '2. **How has the mobile phone industry evolved over the years?** (Historical perspective)', '3. **What are the different types of phones available in the market, and which one is best for me?** (Categorization and recommendation)'] ``` Example of failure on VertexAIEmbeddings ``` grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with: status = StatusCode.INVALID_ARGUMENT details = "The text content is empty." debug_error_string = "UNKNOWN:Error received from peer ipv4:142.250.184.234:443 {created_time:"2024-04-30T09:57:45.625698408+00:00", grpc_status:3, grpc_message:"The text content is empty."}" ``` Fixes: #15959 --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-09-26 13:59:49 +00:00 · 2024-07-19 19:13:12 +02:00
parent 5affbada61
commit 9c3da11910
3 changed files with 16 additions and 3 deletions
--- a/docs/docs/how_to/MultiQueryRetriever.ipynb
+++ b/docs/docs/how_to/MultiQueryRetriever.ipynb
@@ -153,7 +153,7 @@
    "\n",
    "    def parse(self, text: str) -> List[str]:\n",
    "        lines = text.strip().split(\"\\n\")\n",
-    "        return lines\n",
+    "        return list(filter(None, lines))  # Remove empty lines\n",
    "\n",
    "\n",
    "output_parser = LineListOutputParser()\n",
--- a/libs/langchain/langchain/retrievers/multi_query.py
+++ b/libs/langchain/langchain/retrievers/multi_query.py
@@ -24,7 +24,7 @@ class LineListOutputParser(BaseOutputParser[List[str]]):

    def parse(self, text: str) -> List[str]:
        lines = text.strip().split("\n")
-        return lines
+        return list(filter(None, lines))  # Remove empty lines


 # Default prompt
--- a/libs/langchain/tests/unit_tests/retrievers/test_multi_query.py
+++ b/libs/langchain/tests/unit_tests/retrievers/test_multi_query.py
@@ -3,7 +3,7 @@ from typing import List
 import pytest as pytest
 from langchain_core.documents import Document

-from langchain.retrievers.multi_query import _unique_documents
+from langchain.retrievers.multi_query import LineListOutputParser, _unique_documents


@pytest.mark.parametrize(
@@ -38,3 +38,16 @@ from langchain.retrievers.multi_query import _unique_documents
 )
 def test__unique_documents(documents: List[Document], expected: List[Document]) -> None:
    assert _unique_documents(documents) == expected
+
+
+@pytest.mark.parametrize(
+    "text,expected",
+    [
+        ("foo\nbar\nbaz", ["foo", "bar", "baz"]),
+        ("foo\nbar\nbaz\n", ["foo", "bar", "baz"]),
+        ("foo\n\nbar", ["foo", "bar"]),
+    ],
+)
+def test_line_list_output_parser(text: str, expected: List[str]) -> None:
+    parser = LineListOutputParser()
+    assert parser.parse(text) == expected