From 9c3da1191072b7e28f73fb4319c9b37ced264368 Mon Sep 17 00:00:00 2001
From: Shikanime Deva <shikalegend@gmail.com>
Date: Fri, 19 Jul 2024 19:13:12 +0200
Subject: [PATCH] Fix MultiQueryRetriever breaking Embeddings with empty lines
 (#21093)

Fix MultiQueryRetriever breaking Embeddings with empty lines

```
[chain/end] [1:chain:ConversationalRetrievalChain > 2:retriever:Retriever > 3:retriever:Retriever > 4:chain:LLMChain] [2.03s] Exiting Chain run with output:
[outputs]
> /workspaces/Sfeir/sncf/metabot-backend/.venv/lib/python3.11/site-packages/langchain/retrievers/multi_query.py(116)_aget_relevant_documents()
-> if self.include_original:
(Pdb) queries
['## Alternative questions for "Hello, tell me about phones?":', '', '1. **What are the latest trends in smartphone technology?** (Focuses on recent advancements)', '2. **How has the mobile phone industry evolved over the years?** (Historical perspective)', '3. **What are the different types of phones available in the market, and which one is best for me?** (Categorization and recommendation)']
```

Example of failure on VertexAIEmbeddings

```
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
	status = StatusCode.INVALID_ARGUMENT
	details = "The text content is empty."
	debug_error_string = "UNKNOWN:Error received from peer ipv4:142.250.184.234:443 {created_time:"2024-04-30T09:57:45.625698408+00:00", grpc_status:3, grpc_message:"The text content is empty."}"
```

Fixes: #15959

---------

Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
Co-authored-by: Chester Curme <chester.curme@gmail.com>
---
 docs/docs/how_to/MultiQueryRetriever.ipynb        |  2 +-
 .../langchain/langchain/retrievers/multi_query.py |  2 +-
 .../unit_tests/retrievers/test_multi_query.py     | 15 ++++++++++++++-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/docs/docs/how_to/MultiQueryRetriever.ipynb b/docs/docs/how_to/MultiQueryRetriever.ipynb
index f1377124aa9..1574eb1dd63 100644
--- a/docs/docs/how_to/MultiQueryRetriever.ipynb
+++ b/docs/docs/how_to/MultiQueryRetriever.ipynb
@@ -153,7 +153,7 @@
     "\n",
     "    def parse(self, text: str) -> List[str]:\n",
     "        lines = text.strip().split(\"\\n\")\n",
-    "        return lines\n",
+    "        return list(filter(None, lines))  # Remove empty lines\n",
     "\n",
     "\n",
     "output_parser = LineListOutputParser()\n",
diff --git a/libs/langchain/langchain/retrievers/multi_query.py b/libs/langchain/langchain/retrievers/multi_query.py
index 3d1a36be476..23ba88e2a53 100644
--- a/libs/langchain/langchain/retrievers/multi_query.py
+++ b/libs/langchain/langchain/retrievers/multi_query.py
@@ -24,7 +24,7 @@ class LineListOutputParser(BaseOutputParser[List[str]]):
 
     def parse(self, text: str) -> List[str]:
         lines = text.strip().split("\n")
-        return lines
+        return list(filter(None, lines))  # Remove empty lines
 
 
 # Default prompt
diff --git a/libs/langchain/tests/unit_tests/retrievers/test_multi_query.py b/libs/langchain/tests/unit_tests/retrievers/test_multi_query.py
index 8f80e77e79b..d3529e8d97c 100644
--- a/libs/langchain/tests/unit_tests/retrievers/test_multi_query.py
+++ b/libs/langchain/tests/unit_tests/retrievers/test_multi_query.py
@@ -3,7 +3,7 @@ from typing import List
 import pytest as pytest
 from langchain_core.documents import Document
 
-from langchain.retrievers.multi_query import _unique_documents
+from langchain.retrievers.multi_query import LineListOutputParser, _unique_documents
 
 
 @pytest.mark.parametrize(
@@ -38,3 +38,16 @@ from langchain.retrievers.multi_query import _unique_documents
 )
 def test__unique_documents(documents: List[Document], expected: List[Document]) -> None:
     assert _unique_documents(documents) == expected
+
+
+@pytest.mark.parametrize(
+    "text,expected",
+    [
+        ("foo\nbar\nbaz", ["foo", "bar", "baz"]),
+        ("foo\nbar\nbaz\n", ["foo", "bar", "baz"]),
+        ("foo\n\nbar", ["foo", "bar"]),
+    ],
+)
+def test_line_list_output_parser(text: str, expected: List[str]) -> None:
+    parser = LineListOutputParser()
+    assert parser.parse(text) == expected