couchbase: Add document id to vector search results (#27622)

**Description:** Returns the document id along with the Vector Search
results

**Issue:** Fixes https://github.com/langchain-ai/langchain/issues/26860
for CouchbaseVectorStore


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified.

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Nithish Raghunandanan 2024-10-24 23:47:36 +02:00 committed by GitHub
parent 455ab7d714
commit 0623c74560
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 33 additions and 2 deletions

View File

@ -559,12 +559,13 @@ class CouchbaseVectorStore(VectorStore):
# Parse the results
for row in search_iter.rows():
text = row.fields.pop(self._text_key, "")
id = row.id
# Format the metadata from Couchbase
metadata = self._format_metadata(row.fields)
score = row.score
doc = Document(page_content=text, metadata=metadata)
doc = Document(id=id, page_content=text, metadata=metadata)
docs_with_score.append((doc, score))
except Exception as e:

View File

@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "langchain-couchbase"
version = "0.2.0"
version = "0.2.1"
description = "An integration package connecting Couchbase and LangChain"
authors = []
readme = "README.md"

View File

@ -193,6 +193,7 @@ class TestCouchbaseVectorStore:
time.sleep(SLEEP_DURATION)
output = vectorstore.similarity_search("foo", k=1)
assert output[0].id == "a"
assert output[0].page_content == "foo"
assert output[0].metadata["a"] == 1
@ -364,3 +365,32 @@ class TestCouchbaseVectorStore:
assert result == hybrid_result
assert score <= hybrid_score
def test_id_in_results(self, cluster: Any) -> None:
"""Test that the id is returned in the result documents."""
texts = [
"foo",
"bar",
"baz",
]
metadatas = [{"a": 1}, {"b": 2}, {"c": 3}]
vectorstore = CouchbaseVectorStore(
cluster=cluster,
embedding=ConsistentFakeEmbeddings(),
index_name=INDEX_NAME,
bucket_name=BUCKET_NAME,
scope_name=SCOPE_NAME,
collection_name=COLLECTION_NAME,
)
ids = vectorstore.add_texts(texts, metadatas=metadatas)
assert len(ids) == len(texts)
# Wait for the documents to be indexed
time.sleep(SLEEP_DURATION)
output = vectorstore.similarity_search("foo", k=1)
assert output[0].id == ids[0]