mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-19 05:13:46 +00:00
Fixes: community: fix LanceDB return no metadata (#27024)
- [ x ] Fix when lancedb return table without metadata column - **Description:** Check the table schema, if not has metadata column, init the Document with metadata argument equal to empty dict - **Issue:** https://github.com/langchain-ai/langchain/issues/27005 - [ x ] **Add tests and docs** --------- Co-authored-by: ccurme <chester.curme@gmail.com>
This commit is contained in:
parent
91d28ef453
commit
f723a8456e
@ -151,12 +151,14 @@ class LanceDB(VectorStore):
|
|||||||
score_col = "_relevance_score"
|
score_col = "_relevance_score"
|
||||||
else:
|
else:
|
||||||
score_col = None
|
score_col = None
|
||||||
|
# Check if 'metadata' is in the columns
|
||||||
|
has_metadata = "metadata" in columns
|
||||||
|
|
||||||
if score_col is None or not score:
|
if score_col is None or not score:
|
||||||
return [
|
return [
|
||||||
Document(
|
Document(
|
||||||
page_content=results[self._text_key][idx].as_py(),
|
page_content=results[self._text_key][idx].as_py(),
|
||||||
metadata=results["metadata"][idx].as_py(),
|
metadata=results["metadata"][idx].as_py() if has_metadata else {},
|
||||||
)
|
)
|
||||||
for idx in range(len(results))
|
for idx in range(len(results))
|
||||||
]
|
]
|
||||||
@ -165,7 +167,9 @@ class LanceDB(VectorStore):
|
|||||||
(
|
(
|
||||||
Document(
|
Document(
|
||||||
page_content=results[self._text_key][idx].as_py(),
|
page_content=results[self._text_key][idx].as_py(),
|
||||||
metadata=results["metadata"][idx].as_py(),
|
metadata=results["metadata"][idx].as_py()
|
||||||
|
if has_metadata
|
||||||
|
else {},
|
||||||
),
|
),
|
||||||
results[score_col][idx].as_py(),
|
results[score_col][idx].as_py(),
|
||||||
)
|
)
|
||||||
|
@ -114,3 +114,41 @@ def test_lancedb_all_searches() -> None:
|
|||||||
)
|
)
|
||||||
assert len(result_3[0]) == 2 # type: ignore
|
assert len(result_3[0]) == 2 # type: ignore
|
||||||
assert "text 1" in result_3[0][0].page_content # type: ignore
|
assert "text 1" in result_3[0][0].page_content # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("lancedb")
|
||||||
|
def test_lancedb_no_metadata() -> None:
|
||||||
|
lancedb = import_lancedb()
|
||||||
|
embeddings = FakeEmbeddings()
|
||||||
|
# Connect to a temporary LanceDB instance
|
||||||
|
db = lancedb.connect("/tmp/lancedb_no_metadata_test")
|
||||||
|
# Create data without the 'metadata' field
|
||||||
|
texts = ["text 1", "text 2", "item 3"]
|
||||||
|
data = []
|
||||||
|
for idx, text in enumerate(texts):
|
||||||
|
embedding = embeddings.embed_documents([text])[0]
|
||||||
|
data.append(
|
||||||
|
{
|
||||||
|
"vector": embedding,
|
||||||
|
"id": str(idx),
|
||||||
|
"text": text,
|
||||||
|
# Note: We're deliberately not including 'metadata' here
|
||||||
|
}
|
||||||
|
)
|
||||||
|
# Create the table without 'metadata' column
|
||||||
|
db.create_table("vectorstore_no_metadata", data=data)
|
||||||
|
# Initialize LanceDB with the existing connection and table name
|
||||||
|
store = LanceDB(
|
||||||
|
connection=db,
|
||||||
|
embedding=embeddings,
|
||||||
|
table_name="vectorstore_no_metadata",
|
||||||
|
)
|
||||||
|
# Perform a similarity search
|
||||||
|
result = store.similarity_search("text 1")
|
||||||
|
# Verify that the metadata in the Document objects is an empty dictionary
|
||||||
|
for doc in result:
|
||||||
|
assert (
|
||||||
|
doc.metadata == {}
|
||||||
|
), "Expected empty metadata when 'metadata' column is missing"
|
||||||
|
# Clean up by deleting the table (optional)
|
||||||
|
db.drop_table("vectorstore_no_metadata")
|
||||||
|
Loading…
Reference in New Issue
Block a user