From 8c085fc697205787e4d1db40ed1541a5e0b2bd44 Mon Sep 17 00:00:00 2001 From: Mayank Solanki <83648453+spike-spiegel-21@users.noreply.github.com> Date: Sat, 27 Apr 2024 04:04:09 +0530 Subject: [PATCH] community[patch]: Added a function `from_existing_collection` in `Qdrant` vector database. (#20779) Issue: #20514 The current implementation of `construct_instance` expects a `texts: List[str]` that will call the embedding function. This might not be needed when we already have a client with collection and `path, you don't want to add any text. This PR adds a class method that returns a qdrant instance with an existing client. Here everytime https://github.com/langchain-ai/langchain/blob/cb6e5e56c29477c6da5824c17f1b70af11352685/libs/community/langchain_community/vectorstores/qdrant.py#L1592 `construct_instance` is called, this line sends some text for embedding generation. --------- Co-authored-by: Anush --- .../vectorstores/qdrant.py | 45 +++++++++++++++++++ .../qdrant/test_from_existing_collection.py | 39 ++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 libs/community/tests/integration_tests/vectorstores/qdrant/test_from_existing_collection.py diff --git a/libs/community/langchain_community/vectorstores/qdrant.py b/libs/community/langchain_community/vectorstores/qdrant.py index d42c968689d..7937bcf487c 100644 --- a/libs/community/langchain_community/vectorstores/qdrant.py +++ b/libs/community/langchain_community/vectorstores/qdrant.py @@ -1367,6 +1367,51 @@ class Qdrant(VectorStore): qdrant.add_texts(texts, metadatas, ids, batch_size) return qdrant + @classmethod + def from_existing_collection( + cls: Type[Qdrant], + embedding: Embeddings, + path: str, + collection_name: str, + location: Optional[str] = None, + url: Optional[str] = None, + port: Optional[int] = 6333, + grpc_port: int = 6334, + prefer_grpc: bool = False, + https: Optional[bool] = None, + api_key: Optional[str] = None, + prefix: Optional[str] = None, + timeout: Optional[float] = None, + host: Optional[str] = None, + **kwargs: Any, + ) -> Qdrant: + """ + Get instance of an existing Qdrant collection. + This method will return the instance of the store without inserting any new + embeddings + """ + client, async_client = cls._generate_clients( + location=location, + url=url, + port=port, + grpc_port=grpc_port, + prefer_grpc=prefer_grpc, + https=https, + api_key=api_key, + prefix=prefix, + timeout=timeout, + host=host, + path=path, + **kwargs, + ) + return cls( + client=client, + async_client=async_client, + collection_name=collection_name, + embeddings=embedding, + **kwargs, + ) + @classmethod @sync_call_fallback async def afrom_texts( diff --git a/libs/community/tests/integration_tests/vectorstores/qdrant/test_from_existing_collection.py b/libs/community/tests/integration_tests/vectorstores/qdrant/test_from_existing_collection.py new file mode 100644 index 00000000000..04a09c69fa3 --- /dev/null +++ b/libs/community/tests/integration_tests/vectorstores/qdrant/test_from_existing_collection.py @@ -0,0 +1,39 @@ +import tempfile +import uuid + +import pytest + +from langchain_community.vectorstores import Qdrant +from tests.integration_tests.vectorstores.fake_embeddings import ( + ConsistentFakeEmbeddings, +) + + +@pytest.mark.parametrize("vector_name", ["custom-vector"]) +def test_qdrant_from_existing_collection_uses_same_collection(vector_name: str) -> None: + """Test if the Qdrant.from_existing_collection reuses the same collection.""" + from qdrant_client import QdrantClient + + collection_name = uuid.uuid4().hex + with tempfile.TemporaryDirectory() as tmpdir: + docs = ["foo"] + qdrant = Qdrant.from_texts( + docs, + embedding=ConsistentFakeEmbeddings(), + path=str(tmpdir), + collection_name=collection_name, + vector_name=vector_name, + ) + del qdrant + + qdrant = Qdrant.from_existing_collection( + embedding=ConsistentFakeEmbeddings(), + path=str(tmpdir), + collection_name=collection_name, + vector_name=vector_name, + ) + qdrant.add_texts(["baz", "bar"]) + del qdrant + + client = QdrantClient(path=str(tmpdir)) + assert 3 == client.count(collection_name).count