From 0623c74560d18570ecea55d39566fa7eb53823e7 Mon Sep 17 00:00:00 2001 From: Nithish Raghunandanan Date: Thu, 24 Oct 2024 23:47:36 +0200 Subject: [PATCH] couchbase: Add document id to vector search results (#27622) **Description:** Returns the document id along with the Vector Search results **Issue:** Fixes https://github.com/langchain-ai/langchain/issues/26860 for CouchbaseVectorStore - [x] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. Co-authored-by: Erick Friis --- .../langchain_couchbase/vectorstores.py | 3 +- libs/partners/couchbase/pyproject.toml | 2 +- .../integration_tests/test_vector_store.py | 30 +++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/libs/partners/couchbase/langchain_couchbase/vectorstores.py b/libs/partners/couchbase/langchain_couchbase/vectorstores.py index 069cc125506..e76a206bbd3 100644 --- a/libs/partners/couchbase/langchain_couchbase/vectorstores.py +++ b/libs/partners/couchbase/langchain_couchbase/vectorstores.py @@ -559,12 +559,13 @@ class CouchbaseVectorStore(VectorStore): # Parse the results for row in search_iter.rows(): text = row.fields.pop(self._text_key, "") + id = row.id # Format the metadata from Couchbase metadata = self._format_metadata(row.fields) score = row.score - doc = Document(page_content=text, metadata=metadata) + doc = Document(id=id, page_content=text, metadata=metadata) docs_with_score.append((doc, score)) except Exception as e: diff --git a/libs/partners/couchbase/pyproject.toml b/libs/partners/couchbase/pyproject.toml index f051b4336d4..04c594754f9 100644 --- a/libs/partners/couchbase/pyproject.toml +++ b/libs/partners/couchbase/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "langchain-couchbase" -version = "0.2.0" +version = "0.2.1" description = "An integration package connecting Couchbase and LangChain" authors = [] readme = "README.md" diff --git a/libs/partners/couchbase/tests/integration_tests/test_vector_store.py b/libs/partners/couchbase/tests/integration_tests/test_vector_store.py index 4cad73481d0..ebc6c93f6c8 100644 --- a/libs/partners/couchbase/tests/integration_tests/test_vector_store.py +++ b/libs/partners/couchbase/tests/integration_tests/test_vector_store.py @@ -193,6 +193,7 @@ class TestCouchbaseVectorStore: time.sleep(SLEEP_DURATION) output = vectorstore.similarity_search("foo", k=1) + assert output[0].id == "a" assert output[0].page_content == "foo" assert output[0].metadata["a"] == 1 @@ -364,3 +365,32 @@ class TestCouchbaseVectorStore: assert result == hybrid_result assert score <= hybrid_score + + def test_id_in_results(self, cluster: Any) -> None: + """Test that the id is returned in the result documents.""" + + texts = [ + "foo", + "bar", + "baz", + ] + + metadatas = [{"a": 1}, {"b": 2}, {"c": 3}] + + vectorstore = CouchbaseVectorStore( + cluster=cluster, + embedding=ConsistentFakeEmbeddings(), + index_name=INDEX_NAME, + bucket_name=BUCKET_NAME, + scope_name=SCOPE_NAME, + collection_name=COLLECTION_NAME, + ) + + ids = vectorstore.add_texts(texts, metadatas=metadatas) + assert len(ids) == len(texts) + + # Wait for the documents to be indexed + time.sleep(SLEEP_DURATION) + + output = vectorstore.similarity_search("foo", k=1) + assert output[0].id == ids[0]