mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-18 21:09:00 +00:00
Fixes: community: fix LanceDB return no metadata (#27024)
- [ x ] Fix when lancedb return table without metadata column - **Description:** Check the table schema, if not has metadata column, init the Document with metadata argument equal to empty dict - **Issue:** https://github.com/langchain-ai/langchain/issues/27005 - [ x ] **Add tests and docs** --------- Co-authored-by: ccurme <chester.curme@gmail.com>
This commit is contained in:
parent
91d28ef453
commit
f723a8456e
@ -151,12 +151,14 @@ class LanceDB(VectorStore):
|
||||
score_col = "_relevance_score"
|
||||
else:
|
||||
score_col = None
|
||||
# Check if 'metadata' is in the columns
|
||||
has_metadata = "metadata" in columns
|
||||
|
||||
if score_col is None or not score:
|
||||
return [
|
||||
Document(
|
||||
page_content=results[self._text_key][idx].as_py(),
|
||||
metadata=results["metadata"][idx].as_py(),
|
||||
metadata=results["metadata"][idx].as_py() if has_metadata else {},
|
||||
)
|
||||
for idx in range(len(results))
|
||||
]
|
||||
@ -165,7 +167,9 @@ class LanceDB(VectorStore):
|
||||
(
|
||||
Document(
|
||||
page_content=results[self._text_key][idx].as_py(),
|
||||
metadata=results["metadata"][idx].as_py(),
|
||||
metadata=results["metadata"][idx].as_py()
|
||||
if has_metadata
|
||||
else {},
|
||||
),
|
||||
results[score_col][idx].as_py(),
|
||||
)
|
||||
|
@ -114,3 +114,41 @@ def test_lancedb_all_searches() -> None:
|
||||
)
|
||||
assert len(result_3[0]) == 2 # type: ignore
|
||||
assert "text 1" in result_3[0][0].page_content # type: ignore
|
||||
|
||||
|
||||
@pytest.mark.requires("lancedb")
|
||||
def test_lancedb_no_metadata() -> None:
|
||||
lancedb = import_lancedb()
|
||||
embeddings = FakeEmbeddings()
|
||||
# Connect to a temporary LanceDB instance
|
||||
db = lancedb.connect("/tmp/lancedb_no_metadata_test")
|
||||
# Create data without the 'metadata' field
|
||||
texts = ["text 1", "text 2", "item 3"]
|
||||
data = []
|
||||
for idx, text in enumerate(texts):
|
||||
embedding = embeddings.embed_documents([text])[0]
|
||||
data.append(
|
||||
{
|
||||
"vector": embedding,
|
||||
"id": str(idx),
|
||||
"text": text,
|
||||
# Note: We're deliberately not including 'metadata' here
|
||||
}
|
||||
)
|
||||
# Create the table without 'metadata' column
|
||||
db.create_table("vectorstore_no_metadata", data=data)
|
||||
# Initialize LanceDB with the existing connection and table name
|
||||
store = LanceDB(
|
||||
connection=db,
|
||||
embedding=embeddings,
|
||||
table_name="vectorstore_no_metadata",
|
||||
)
|
||||
# Perform a similarity search
|
||||
result = store.similarity_search("text 1")
|
||||
# Verify that the metadata in the Document objects is an empty dictionary
|
||||
for doc in result:
|
||||
assert (
|
||||
doc.metadata == {}
|
||||
), "Expected empty metadata when 'metadata' column is missing"
|
||||
# Clean up by deleting the table (optional)
|
||||
db.drop_table("vectorstore_no_metadata")
|
||||
|
Loading…
Reference in New Issue
Block a user