mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 06:14:37 +00:00
couchbase: Add document id to vector search results (#27622)
**Description:** Returns the document id along with the Vector Search results **Issue:** Fixes https://github.com/langchain-ai/langchain/issues/26860 for CouchbaseVectorStore - [x] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
455ab7d714
commit
0623c74560
@ -559,12 +559,13 @@ class CouchbaseVectorStore(VectorStore):
|
|||||||
# Parse the results
|
# Parse the results
|
||||||
for row in search_iter.rows():
|
for row in search_iter.rows():
|
||||||
text = row.fields.pop(self._text_key, "")
|
text = row.fields.pop(self._text_key, "")
|
||||||
|
id = row.id
|
||||||
|
|
||||||
# Format the metadata from Couchbase
|
# Format the metadata from Couchbase
|
||||||
metadata = self._format_metadata(row.fields)
|
metadata = self._format_metadata(row.fields)
|
||||||
|
|
||||||
score = row.score
|
score = row.score
|
||||||
doc = Document(page_content=text, metadata=metadata)
|
doc = Document(id=id, page_content=text, metadata=metadata)
|
||||||
docs_with_score.append((doc, score))
|
docs_with_score.append((doc, score))
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|||||||
|
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "langchain-couchbase"
|
name = "langchain-couchbase"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
description = "An integration package connecting Couchbase and LangChain"
|
description = "An integration package connecting Couchbase and LangChain"
|
||||||
authors = []
|
authors = []
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
@ -193,6 +193,7 @@ class TestCouchbaseVectorStore:
|
|||||||
time.sleep(SLEEP_DURATION)
|
time.sleep(SLEEP_DURATION)
|
||||||
|
|
||||||
output = vectorstore.similarity_search("foo", k=1)
|
output = vectorstore.similarity_search("foo", k=1)
|
||||||
|
assert output[0].id == "a"
|
||||||
assert output[0].page_content == "foo"
|
assert output[0].page_content == "foo"
|
||||||
assert output[0].metadata["a"] == 1
|
assert output[0].metadata["a"] == 1
|
||||||
|
|
||||||
@ -364,3 +365,32 @@ class TestCouchbaseVectorStore:
|
|||||||
|
|
||||||
assert result == hybrid_result
|
assert result == hybrid_result
|
||||||
assert score <= hybrid_score
|
assert score <= hybrid_score
|
||||||
|
|
||||||
|
def test_id_in_results(self, cluster: Any) -> None:
|
||||||
|
"""Test that the id is returned in the result documents."""
|
||||||
|
|
||||||
|
texts = [
|
||||||
|
"foo",
|
||||||
|
"bar",
|
||||||
|
"baz",
|
||||||
|
]
|
||||||
|
|
||||||
|
metadatas = [{"a": 1}, {"b": 2}, {"c": 3}]
|
||||||
|
|
||||||
|
vectorstore = CouchbaseVectorStore(
|
||||||
|
cluster=cluster,
|
||||||
|
embedding=ConsistentFakeEmbeddings(),
|
||||||
|
index_name=INDEX_NAME,
|
||||||
|
bucket_name=BUCKET_NAME,
|
||||||
|
scope_name=SCOPE_NAME,
|
||||||
|
collection_name=COLLECTION_NAME,
|
||||||
|
)
|
||||||
|
|
||||||
|
ids = vectorstore.add_texts(texts, metadatas=metadatas)
|
||||||
|
assert len(ids) == len(texts)
|
||||||
|
|
||||||
|
# Wait for the documents to be indexed
|
||||||
|
time.sleep(SLEEP_DURATION)
|
||||||
|
|
||||||
|
output = vectorstore.similarity_search("foo", k=1)
|
||||||
|
assert output[0].id == ids[0]
|
||||||
|
Loading…
Reference in New Issue
Block a user