diff --git a/libs/community/langchain_community/vectorstores/lancedb.py b/libs/community/langchain_community/vectorstores/lancedb.py index 7253d885b11..f08e4380481 100644 --- a/libs/community/langchain_community/vectorstores/lancedb.py +++ b/libs/community/langchain_community/vectorstores/lancedb.py @@ -151,12 +151,14 @@ class LanceDB(VectorStore): score_col = "_relevance_score" else: score_col = None + # Check if 'metadata' is in the columns + has_metadata = "metadata" in columns if score_col is None or not score: return [ Document( page_content=results[self._text_key][idx].as_py(), - metadata=results["metadata"][idx].as_py(), + metadata=results["metadata"][idx].as_py() if has_metadata else {}, ) for idx in range(len(results)) ] @@ -165,7 +167,9 @@ class LanceDB(VectorStore): ( Document( page_content=results[self._text_key][idx].as_py(), - metadata=results["metadata"][idx].as_py(), + metadata=results["metadata"][idx].as_py() + if has_metadata + else {}, ), results[score_col][idx].as_py(), ) diff --git a/libs/community/tests/integration_tests/vectorstores/test_lancedb.py b/libs/community/tests/integration_tests/vectorstores/test_lancedb.py index 7ba3a004663..615b310629d 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_lancedb.py +++ b/libs/community/tests/integration_tests/vectorstores/test_lancedb.py @@ -114,3 +114,41 @@ def test_lancedb_all_searches() -> None: ) assert len(result_3[0]) == 2 # type: ignore assert "text 1" in result_3[0][0].page_content # type: ignore + + +@pytest.mark.requires("lancedb") +def test_lancedb_no_metadata() -> None: + lancedb = import_lancedb() + embeddings = FakeEmbeddings() + # Connect to a temporary LanceDB instance + db = lancedb.connect("/tmp/lancedb_no_metadata_test") + # Create data without the 'metadata' field + texts = ["text 1", "text 2", "item 3"] + data = [] + for idx, text in enumerate(texts): + embedding = embeddings.embed_documents([text])[0] + data.append( + { + "vector": embedding, + "id": str(idx), + "text": text, + # Note: We're deliberately not including 'metadata' here + } + ) + # Create the table without 'metadata' column + db.create_table("vectorstore_no_metadata", data=data) + # Initialize LanceDB with the existing connection and table name + store = LanceDB( + connection=db, + embedding=embeddings, + table_name="vectorstore_no_metadata", + ) + # Perform a similarity search + result = store.similarity_search("text 1") + # Verify that the metadata in the Document objects is an empty dictionary + for doc in result: + assert ( + doc.metadata == {} + ), "Expected empty metadata when 'metadata' column is missing" + # Clean up by deleting the table (optional) + db.drop_table("vectorstore_no_metadata")