From 9241c239de7a1cd6d56f944ef04fc8b3a6db771b Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Wed, 23 Jul 2025 04:59:37 +0500 Subject: [PATCH 01/13] wip: Async Client Added --- .../qdrant/langchain_qdrant/qdrant.py | 754 +++++++++++++++++- 1 file changed, 739 insertions(+), 15 deletions(-) diff --git a/libs/partners/qdrant/langchain_qdrant/qdrant.py b/libs/partners/qdrant/langchain_qdrant/qdrant.py index b5cfb7075ee..4c749dba8c9 100644 --- a/libs/partners/qdrant/langchain_qdrant/qdrant.py +++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py @@ -16,7 +16,7 @@ import numpy as np from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from langchain_core.vectorstores import VectorStore -from qdrant_client import QdrantClient, models +from qdrant_client import AsyncQdrantClient, QdrantClient, models from langchain_qdrant._utils import maximal_marginal_relevance from langchain_qdrant.sparse_embeddings import SparseEmbeddings @@ -51,8 +51,8 @@ class QdrantVectorStore(VectorStore): Optional sparse embedding function to use. Key init args — client params: - client: QdrantClient - Qdrant client to use. + client: Union[QdrantClient, AsyncQdrantClient] + Qdrant client to use. Can be either sync or async client. retrieval_mode: RetrievalMode Retrieval mode to use. @@ -175,7 +175,7 @@ class QdrantVectorStore(VectorStore): def __init__( self, - client: QdrantClient, + client: Union[QdrantClient, AsyncQdrantClient], collection_name: str, embedding: Optional[Embeddings] = None, retrieval_mode: RetrievalMode = RetrievalMode.DENSE, @@ -204,15 +204,46 @@ class QdrantVectorStore(VectorStore): self._validate_embeddings(retrieval_mode, embedding, sparse_embedding) if validate_collection_config: - self._validate_collection_config( - client, - collection_name, - retrieval_mode, - vector_name, - sparse_vector_name, - distance, - embedding, - ) + if isinstance(client, AsyncQdrantClient): + # For async clients, run async validation synchronously + import asyncio + + async def _validate(): + await self._avalidate_collection_config( + client, + collection_name, + retrieval_mode, + vector_name, + sparse_vector_name, + distance, + embedding, + ) + + try: + asyncio.run(_validate()) + except RuntimeError as e: + # Only handle the specific case of existing event loop + if ( + "asyncio.run() cannot be called from a running event loop" + in str(e) + ): + # If we're in an existing event loop, use it directly + loop = asyncio.get_event_loop() + loop.run_until_complete(_validate()) + else: + # Re-raise other RuntimeErrors (could be from validation) + raise + else: + # For sync clients, use sync validation + self._validate_collection_config( + client, + collection_name, + retrieval_mode, + vector_name, + sparse_vector_name, + distance, + embedding, + ) self._client = client self.collection_name = collection_name @@ -226,14 +257,16 @@ class QdrantVectorStore(VectorStore): self.sparse_vector_name = sparse_vector_name @property - def client(self) -> QdrantClient: + def client(self) -> Union[QdrantClient, AsyncQdrantClient]: """Get the Qdrant client instance that is being used. Returns: - QdrantClient: An instance of ``QdrantClient``. + Union[QdrantClient, AsyncQdrantClient]: An instance of ``QdrantClient`` or ``AsyncQdrantClient``. """ return self._client + + @property def embeddings(self) -> Embeddings: """Get the dense embeddings instance that is being used. @@ -355,6 +388,119 @@ class QdrantVectorStore(VectorStore): qdrant.add_texts(texts, metadatas, ids, batch_size) return qdrant + @classmethod + async def aconstruct_instance( + cls: type[QdrantVectorStore], + embedding: Optional[Embeddings] = None, + retrieval_mode: RetrievalMode = RetrievalMode.DENSE, + sparse_embedding: Optional[SparseEmbeddings] = None, + client_options: dict[str, Any] = {}, + collection_name: Optional[str] = None, + distance: models.Distance = models.Distance.COSINE, + content_payload_key: str = CONTENT_KEY, + metadata_payload_key: str = METADATA_KEY, + vector_name: str = VECTOR_NAME, + sparse_vector_name: str = SPARSE_VECTOR_NAME, + force_recreate: bool = False, + collection_create_options: dict[str, Any] = {}, + vector_params: dict[str, Any] = {}, + sparse_vector_params: dict[str, Any] = {}, + validate_embeddings: bool = True, + validate_collection_config: bool = True, + ) -> QdrantVectorStore: + """Async construct an instance of QdrantVectorStore. + + Returns: + QdrantVectorStore: A new instance with async client. + """ + if validate_embeddings: + cls._validate_embeddings(retrieval_mode, embedding, sparse_embedding) + collection_name = collection_name or uuid.uuid4().hex + + client = AsyncQdrantClient(**client_options) + collection_exists = await client.collection_exists(collection_name) + + if collection_exists and force_recreate: + await client.delete_collection(collection_name) + collection_exists = False + + if collection_exists: + if validate_collection_config: + await cls._avalidate_collection_config( + client, + collection_name, + retrieval_mode, + vector_name, + sparse_vector_name, + distance, + embedding, + ) + + if not collection_exists: + vectors_config, sparse_vectors_config = {}, {} + if retrieval_mode == RetrievalMode.DENSE: + partial_embeddings = await embedding.aembed_documents( + ["dummy_text"] + ) # type: ignore + + vector_params["size"] = len(partial_embeddings[0]) + vector_params["distance"] = distance + + vectors_config = { + vector_name: models.VectorParams( + **vector_params, + ) + } + + elif retrieval_mode == RetrievalMode.SPARSE: + sparse_vectors_config = { + sparse_vector_name: models.SparseVectorParams( + **sparse_vector_params + ) + } + + elif retrieval_mode == RetrievalMode.HYBRID: + partial_embeddings = await embedding.aembed_documents( + ["dummy_text"] + ) # type: ignore + + vector_params["size"] = len(partial_embeddings[0]) + vector_params["distance"] = distance + + vectors_config = { + vector_name: models.VectorParams( + **vector_params, + ) + } + + sparse_vectors_config = { + sparse_vector_name: models.SparseVectorParams( + **sparse_vector_params + ) + } + + collection_create_options["collection_name"] = collection_name + collection_create_options["vectors_config"] = vectors_config + collection_create_options["sparse_vectors_config"] = sparse_vectors_config + + await client.create_collection(**collection_create_options) + + qdrant = cls( + client=client, + collection_name=collection_name, + embedding=embedding, + retrieval_mode=retrieval_mode, + content_payload_key=content_payload_key, + metadata_payload_key=metadata_payload_key, + distance=distance, + vector_name=vector_name, + sparse_embedding=sparse_embedding, + sparse_vector_name=sparse_vector_name, + validate_embeddings=False, + validate_collection_config=False, + ) + return qdrant + @classmethod def from_existing_collection( cls: type[QdrantVectorStore], @@ -442,6 +588,36 @@ class QdrantVectorStore(VectorStore): return added_ids + async def aadd_texts( # type: ignore + self, + texts: Iterable[str], + metadatas: Optional[list[dict]] = None, + ids: Optional[Sequence[str | int]] = None, + batch_size: int = 64, + **kwargs: Any, + ) -> list[str | int]: + """Async add texts with embeddings to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + ids: Optional list of IDs associated with the texts. + batch_size: Batch size for adding texts. + **kwargs: vectorstore specific parameters. + + Returns: + List of ids from adding the texts into the vectorstore. + """ + added_ids = [] + async for batch_ids, points in self._agenerate_batches( + texts, metadatas, ids, batch_size + ): + await self.client.upsert( + collection_name=self.collection_name, points=points, **kwargs + ) + added_ids.extend(batch_ids) + return added_ids + def similarity_search( self, query: str, @@ -472,6 +648,36 @@ class QdrantVectorStore(VectorStore): ) return list(map(itemgetter(0), results)) + async def asimilarity_search( + self, + query: str, + k: int = 4, + filter: Optional[models.Filter] = None, + search_params: Optional[models.SearchParams] = None, + offset: int = 0, + score_threshold: Optional[float] = None, + consistency: Optional[models.ReadConsistency] = None, + hybrid_fusion: Optional[models.FusionQuery] = None, + **kwargs: Any, + ) -> list[Document]: + """Async return docs most similar to query. + + Returns: + List of Documents most similar to the query. + """ + results = await self.asimilarity_search_with_score( + query, + k, + filter=filter, + search_params=search_params, + offset=offset, + score_threshold=score_threshold, + consistency=consistency, + hybrid_fusion=hybrid_fusion, + **kwargs, + ) + return list(map(itemgetter(0), results)) + def similarity_search_with_score( self, query: str, @@ -562,6 +768,104 @@ class QdrantVectorStore(VectorStore): for result in results ] + async def asimilarity_search_with_score( + self, + query: str, + k: int = 4, + filter: Optional[models.Filter] = None, + search_params: Optional[models.SearchParams] = None, + offset: int = 0, + score_threshold: Optional[float] = None, + consistency: Optional[models.ReadConsistency] = None, + hybrid_fusion: Optional[models.FusionQuery] = None, + **kwargs: Any, + ) -> list[tuple[Document, float]]: + """Async return docs most similar to query. + + Returns: + List of documents most similar to the query text and distance for each. + """ + + + query_options = { + "collection_name": self.collection_name, + "query_filter": filter, + "search_params": search_params, + "limit": k, + "offset": offset, + "with_payload": True, + "with_vectors": False, + "score_threshold": score_threshold, + "consistency": consistency, + **kwargs, + } + if self.retrieval_mode == RetrievalMode.DENSE: + query_dense_embedding = await self.embeddings.aembed_query(query) + results = ( + await self.client.query_points( + query=query_dense_embedding, + using=self.vector_name, + **query_options, + ) + ).points + + elif self.retrieval_mode == RetrievalMode.SPARSE: + query_sparse_embedding = await self.sparse_embeddings.aembed_query(query) + results = ( + await self.client.query_points( + query=models.SparseVector( + indices=query_sparse_embedding.indices, + values=query_sparse_embedding.values, + ), + using=self.sparse_vector_name, + **query_options, + ) + ).points + + elif self.retrieval_mode == RetrievalMode.HYBRID: + query_dense_embedding = await self.embeddings.aembed_query(query) + query_sparse_embedding = await self.sparse_embeddings.aembed_query(query) + results = ( + await self.client.query_points( + prefetch=[ + models.Prefetch( + using=self.vector_name, + query=query_dense_embedding, + filter=filter, + limit=k, + params=search_params, + ), + models.Prefetch( + using=self.sparse_vector_name, + query=models.SparseVector( + indices=query_sparse_embedding.indices, + values=query_sparse_embedding.values, + ), + filter=filter, + limit=k, + params=search_params, + ), + ], + query=hybrid_fusion or models.FusionQuery(fusion=models.Fusion.RRF), + **query_options, + ) + ).points + + else: + raise ValueError(f"Invalid retrieval mode. {self.retrieval_mode}.") + return [ + ( + self._document_from_point( + result, + self.collection_name, + self.content_payload_key, + self.metadata_payload_key, + ), + result.score, + ) + for result in results + ] + def similarity_search_with_score_by_vector( self, embedding: list[float], @@ -684,6 +988,49 @@ class QdrantVectorStore(VectorStore): **kwargs, ) + async def amax_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[models.Filter] = None, + search_params: Optional[models.SearchParams] = None, + score_threshold: Optional[float] = None, + consistency: Optional[models.ReadConsistency] = None, + **kwargs: Any, + ) -> list[Document]: + """Async return docs selected using the maximal marginal relevance with dense vectors. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Returns: + List of Documents selected by maximal marginal relevance. + """ + + + self._validate_collection_for_dense( + self.client, + self.collection_name, + self.vector_name, + self.distance, + self.embeddings, + ) + + query_embedding = await self.embeddings.aembed_query(query) + return await self.amax_marginal_relevance_search_by_vector( + query_embedding, + k=k, + fetch_k=fetch_k, + lambda_mult=lambda_mult, + filter=filter, + search_params=search_params, + score_threshold=score_threshold, + consistency=consistency, + **kwargs, + ) + def max_marginal_relevance_search_by_vector( self, embedding: list[float], @@ -717,6 +1064,39 @@ class QdrantVectorStore(VectorStore): ) return list(map(itemgetter(0), results)) + async def amax_marginal_relevance_search_by_vector( + self, + embedding: list[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[models.Filter] = None, + search_params: Optional[models.SearchParams] = None, + score_threshold: Optional[float] = None, + consistency: Optional[models.ReadConsistency] = None, + **kwargs: Any, + ) -> list[Document]: + """Async return docs selected using the maximal marginal relevance with dense vectors. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Returns: + List of Documents selected by maximal marginal relevance. + """ + results = await self.amax_marginal_relevance_search_with_score_by_vector( + embedding, + k=k, + fetch_k=fetch_k, + lambda_mult=lambda_mult, + filter=filter, + search_params=search_params, + score_threshold=score_threshold, + consistency=consistency, + **kwargs, + ) + return list(map(itemgetter(0), results)) + def max_marginal_relevance_search_with_score_by_vector( self, embedding: list[float], @@ -773,6 +1153,66 @@ class QdrantVectorStore(VectorStore): for i in mmr_selected ] + async def amax_marginal_relevance_search_with_score_by_vector( + self, + embedding: list[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[models.Filter] = None, + search_params: Optional[models.SearchParams] = None, + score_threshold: Optional[float] = None, + consistency: Optional[models.ReadConsistency] = None, + **kwargs: Any, + ) -> list[tuple[Document, float]]: + """Async return docs selected using the maximal marginal relevance. + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Returns: + List of Documents selected by maximal marginal relevance and distance for + each. + """ + + + results = ( + await self.client.query_points( + collection_name=self.collection_name, + query=embedding, + query_filter=filter, + search_params=search_params, + limit=fetch_k, + with_payload=True, + with_vectors=True, + score_threshold=score_threshold, + consistency=consistency, + using=self.vector_name, + **kwargs, + ) + ).points + + embeddings = [ + result.vector + if isinstance(result.vector, list) + else result.vector.get(self.vector_name) # type: ignore + for result in results + ] + mmr_selected = maximal_marginal_relevance( + np.array(embedding), embeddings, k=k, lambda_mult=lambda_mult + ) + return [ + ( + self._document_from_point( + results[i], + self.collection_name, + self.content_payload_key, + self.metadata_payload_key, + ), + results[i].score, + ) + for i in mmr_selected + ] + def delete( # type: ignore self, ids: Optional[list[str | int]] = None, @@ -793,6 +1233,26 @@ class QdrantVectorStore(VectorStore): ) return result.status == models.UpdateStatus.COMPLETED + async def adelete( # type: ignore + self, + ids: Optional[list[str | int]] = None, + **kwargs: Any, + ) -> Optional[bool]: + """Async delete documents by their ids. + + Args: + ids: List of ids to delete. + **kwargs: Other keyword arguments that subclasses might use. + + Returns: + True if deletion is successful, False otherwise. + """ + result = await self.client.delete( + collection_name=self.collection_name, + points_selector=ids, + ) + return result.status == models.UpdateStatus.COMPLETED + def get_by_ids(self, ids: Sequence[str | int], /) -> list[Document]: results = self.client.retrieve(self.collection_name, ids, with_payload=True) @@ -806,6 +1266,54 @@ class QdrantVectorStore(VectorStore): for result in results ] + async def aget_by_ids(self, ids: Sequence[str | int], /) -> list[Document]: + """Async get documents by their IDs. + + Args: + ids: List of ids to retrieve. + + Returns: + List of Documents. + """ + results = await self.client.retrieve( + self.collection_name, ids, with_payload=True + ) + + return [ + self._document_from_point( + result, + self.collection_name, + self.content_payload_key, + self.metadata_payload_key, + ) + for result in results + ] + + async def aadd_documents( + self, documents: list[Document], **kwargs: Any + ) -> list[str]: + """Async add documents with embeddings to the vectorstore. + + Args: + documents: List of documents to add to the vectorstore. + **kwargs: Additional keyword arguments. + + Returns: + List of ids from adding the documents into the vectorstore. + """ + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + + # Extract ids from documents if present + ids = None + if "ids" not in kwargs: + doc_ids = [doc.id for doc in documents] + if any(doc_ids): + ids = doc_ids + kwargs["ids"] = ids + + return await self.aadd_texts(texts, metadatas, ids=ids, **kwargs) + @classmethod def construct_instance( cls: type[QdrantVectorStore], @@ -984,6 +1492,41 @@ class QdrantVectorStore(VectorStore): yield batch_ids, points + async def _agenerate_batches( + self, + texts: Iterable[str], + metadatas: Optional[list[dict]] = None, + ids: Optional[Sequence[str | int]] = None, + batch_size: int = 64, + ): + """Async version of _generate_batches.""" + texts_iterator = iter(texts) + metadatas_iterator = iter(metadatas or []) + ids_iterator = iter(ids or [uuid.uuid4().hex for _ in iter(texts)]) + + while batch_texts := list(islice(texts_iterator, batch_size)): + batch_metadatas = list(islice(metadatas_iterator, batch_size)) or None + batch_ids = list(islice(ids_iterator, batch_size)) + points = [ + models.PointStruct( + id=point_id, + vector=vector, + payload=payload, + ) + for point_id, vector, payload in zip( + batch_ids, + await self._abuild_vectors(batch_texts), + self._build_payloads( + batch_texts, + batch_metadatas, + self.content_payload_key, + self.metadata_payload_key, + ), + ) + ] + + yield batch_ids, points + @staticmethod def _build_payloads( texts: Iterable[str], @@ -1060,6 +1603,60 @@ class QdrantVectorStore(VectorStore): f"Unknown retrieval mode. {self.retrieval_mode} to build vectors." ) + async def _abuild_vectors( + self, + texts: Iterable[str], + ) -> list[models.VectorStruct]: + if self.retrieval_mode == RetrievalMode.DENSE: + batch_embeddings = await self.embeddings.aembed_documents(list(texts)) + return [ + { + self.vector_name: vector, + } + for vector in batch_embeddings + ] + + elif self.retrieval_mode == RetrievalMode.SPARSE: + batch_sparse_embeddings = await self.sparse_embeddings.aembed_documents( + list(texts) + ) + return [ + { + self.sparse_vector_name: models.SparseVector( + values=vector.values, indices=vector.indices + ) + } + for vector in batch_sparse_embeddings + ] + + elif self.retrieval_mode == RetrievalMode.HYBRID: + dense_embeddings = await self.embeddings.aembed_documents(list(texts)) + sparse_embeddings = await self.sparse_embeddings.aembed_documents( + list(texts) + ) + + if len(dense_embeddings) != len(sparse_embeddings): + raise ValueError( + "Mismatched length between dense and sparse embeddings." + ) + + return [ + { + self.vector_name: dense_vector, + self.sparse_vector_name: models.SparseVector( + values=sparse_vector.values, indices=sparse_vector.indices + ), + } + for dense_vector, sparse_vector in zip( + dense_embeddings, sparse_embeddings + ) + ] + + else: + raise ValueError( + f"Unknown retrieval mode. {self.retrieval_mode} to build vectors." + ) + @classmethod def _validate_collection_config( cls: type[QdrantVectorStore], @@ -1089,6 +1686,36 @@ class QdrantVectorStore(VectorStore): client, collection_name, sparse_vector_name ) + @classmethod + async def _avalidate_collection_config( + cls: type[QdrantVectorStore], + client: AsyncQdrantClient, + collection_name: str, + retrieval_mode: RetrievalMode, + vector_name: str, + sparse_vector_name: str, + distance: models.Distance, + embedding: Optional[Embeddings], + ) -> None: + """Async version of _validate_collection_config.""" + if retrieval_mode == RetrievalMode.DENSE: + await cls._avalidate_collection_for_dense( + client, collection_name, vector_name, distance, embedding + ) + + elif retrieval_mode == RetrievalMode.SPARSE: + await cls._avalidate_collection_for_sparse( + client, collection_name, sparse_vector_name + ) + + elif retrieval_mode == RetrievalMode.HYBRID: + await cls._avalidate_collection_for_dense( + client, collection_name, vector_name, distance, embedding + ) + await cls._avalidate_collection_for_sparse( + client, collection_name, sparse_vector_name + ) + @classmethod def _validate_collection_for_dense( cls: type[QdrantVectorStore], @@ -1157,6 +1784,79 @@ class QdrantVectorStore(VectorStore): f"parameter to `True`." ) + @classmethod + async def _avalidate_collection_for_dense( + cls: type[QdrantVectorStore], + client: AsyncQdrantClient, + collection_name: str, + vector_name: str, + distance: models.Distance, + dense_embeddings: Union[Embeddings, list[float], None], + ) -> None: + """Async version of _validate_collection_for_dense.""" + collection_info = await client.get_collection( + collection_name=collection_name + ) + vector_config = collection_info.config.params.vectors + + if isinstance(vector_config, dict): + # vector_config is a Dict[str, VectorParams] + if vector_name not in vector_config: + raise QdrantVectorStoreError( + f"Existing Qdrant collection {collection_name} does not " + f"contain dense vector named {vector_name}. " + "Did you mean one of the " + f"existing vectors: {', '.join(vector_config.keys())}? " # type: ignore + f"If you want to recreate the collection, set `force_recreate` " + f"parameter to `True`." + ) + + # Get the VectorParams object for the specified vector_name + vector_config = vector_config[vector_name] # type: ignore + + else: + # vector_config is an instance of VectorParams + # Case of a collection with single/unnamed vector. + if vector_name != "": + raise QdrantVectorStoreError( + f"Existing Qdrant collection {collection_name} is built " + "with unnamed dense vector. " + f"If you want to reuse it, set `vector_name` to ''(empty string)." + f"If you want to recreate the collection, " + "set `force_recreate` to `True`." + ) + + if vector_config is None: + raise ValueError("VectorParams is None") + + if isinstance(dense_embeddings, Embeddings): + vector_size = len( + (await dense_embeddings.aembed_documents(["dummy_text"]))[0] + ) + elif isinstance(dense_embeddings, list): + vector_size = len(dense_embeddings) + else: + raise ValueError("Invalid `embeddings` type.") + + if vector_config.size != vector_size: + raise QdrantVectorStoreError( + f"Existing Qdrant collection is configured for dense vectors with " + f"{vector_config.size} dimensions. " + f"Selected embeddings are {vector_size}-dimensional. " + f"If you want to recreate the collection, set `force_recreate` " + f"parameter to `True`." + ) + + if vector_config.distance != distance: + raise QdrantVectorStoreError( + f"Existing Qdrant collection is configured for " + f"{vector_config.distance.name} similarity, but requested " + f"{distance.upper()}. Please set `distance` parameter to " + f"`{vector_config.distance.name}` if you want to reuse it. " + f"If you want to recreate the collection, set `force_recreate` " + f"parameter to `True`." + ) + @classmethod def _validate_collection_for_sparse( cls: type[QdrantVectorStore], @@ -1178,6 +1878,30 @@ class QdrantVectorStore(VectorStore): f"parameter to `True`." ) + @classmethod + async def _avalidate_collection_for_sparse( + cls: type[QdrantVectorStore], + client: AsyncQdrantClient, + collection_name: str, + sparse_vector_name: str, + ) -> None: + """Async version of _validate_collection_for_sparse.""" + collection_info = await client.get_collection( + collection_name=collection_name + ) + sparse_vector_config = collection_info.config.params.sparse_vectors + + if ( + sparse_vector_config is None + or sparse_vector_name not in sparse_vector_config + ): + raise QdrantVectorStoreError( + f"Existing Qdrant collection {collection_name} does not " + f"contain sparse vectors named {sparse_vector_name}. " + f"If you want to recreate the collection, set `force_recreate` " + f"parameter to `True`." + ) + @classmethod def _validate_embeddings( cls: type[QdrantVectorStore], From 036904fcb19c4ccac3ccf8a3a3e670f7a6d3a117 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 03:01:47 +0500 Subject: [PATCH 02/13] fix: Validation Config --- .../qdrant/langchain_qdrant/qdrant.py | 96 +++++++++---------- 1 file changed, 45 insertions(+), 51 deletions(-) diff --git a/libs/partners/qdrant/langchain_qdrant/qdrant.py b/libs/partners/qdrant/langchain_qdrant/qdrant.py index 4c749dba8c9..47fcfa15c90 100644 --- a/libs/partners/qdrant/langchain_qdrant/qdrant.py +++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py @@ -1,5 +1,6 @@ from __future__ import annotations +import asyncio import uuid from collections.abc import Generator, Iterable, Sequence from enum import Enum @@ -203,13 +204,13 @@ class QdrantVectorStore(VectorStore): if validate_embeddings: self._validate_embeddings(retrieval_mode, embedding, sparse_embedding) + # Store validation task for async clients + self._validation_task = None + self.validate_collection_config = validate_collection_config if validate_collection_config: if isinstance(client, AsyncQdrantClient): - # For async clients, run async validation synchronously - import asyncio - - async def _validate(): - await self._avalidate_collection_config( + self._validation_task = asyncio.create_task( + self._avalidate_collection_config( client, collection_name, retrieval_mode, @@ -218,21 +219,7 @@ class QdrantVectorStore(VectorStore): distance, embedding, ) - - try: - asyncio.run(_validate()) - except RuntimeError as e: - # Only handle the specific case of existing event loop - if ( - "asyncio.run() cannot be called from a running event loop" - in str(e) - ): - # If we're in an existing event loop, use it directly - loop = asyncio.get_event_loop() - loop.run_until_complete(_validate()) - else: - # Re-raise other RuntimeErrors (could be from validation) - raise + ) else: # For sync clients, use sync validation self._validate_collection_config( @@ -265,7 +252,11 @@ class QdrantVectorStore(VectorStore): """ return self._client - + async def _ensure_validation_complete(self) -> None: + """Ensure any pending validation task is completed.""" + if self.validate_collection_config and self._validation_task is not None: + await self._validation_task + self._validation_task = None @property def embeddings(self) -> Embeddings: @@ -409,39 +400,36 @@ class QdrantVectorStore(VectorStore): validate_collection_config: bool = True, ) -> QdrantVectorStore: """Async construct an instance of QdrantVectorStore. - + Returns: QdrantVectorStore: A new instance with async client. """ if validate_embeddings: cls._validate_embeddings(retrieval_mode, embedding, sparse_embedding) collection_name = collection_name or uuid.uuid4().hex - + client = AsyncQdrantClient(**client_options) collection_exists = await client.collection_exists(collection_name) if collection_exists and force_recreate: await client.delete_collection(collection_name) collection_exists = False - - if collection_exists: - if validate_collection_config: - await cls._avalidate_collection_config( - client, - collection_name, - retrieval_mode, - vector_name, - sparse_vector_name, - distance, - embedding, - ) - + + if collection_exists and validate_collection_config: + await cls._avalidate_collection_config( + client, + collection_name, + retrieval_mode, + vector_name, + sparse_vector_name, + distance, + embedding, + ) + if not collection_exists: vectors_config, sparse_vectors_config = {}, {} if retrieval_mode == RetrievalMode.DENSE: - partial_embeddings = await embedding.aembed_documents( - ["dummy_text"] - ) # type: ignore + partial_embeddings = await embedding.aembed_documents(["dummy_text"]) # type: ignore vector_params["size"] = len(partial_embeddings[0]) vector_params["distance"] = distance @@ -460,9 +448,7 @@ class QdrantVectorStore(VectorStore): } elif retrieval_mode == RetrievalMode.HYBRID: - partial_embeddings = await embedding.aembed_documents( - ["dummy_text"] - ) # type: ignore + partial_embeddings = await embedding.aembed_documents(["dummy_text"]) # type: ignore vector_params["size"] = len(partial_embeddings[0]) vector_params["distance"] = distance @@ -608,6 +594,8 @@ class QdrantVectorStore(VectorStore): Returns: List of ids from adding the texts into the vectorstore. """ + await self._ensure_validation_complete() + added_ids = [] async for batch_ids, points in self._agenerate_batches( texts, metadatas, ids, batch_size @@ -665,6 +653,8 @@ class QdrantVectorStore(VectorStore): Returns: List of Documents most similar to the query. """ + await self._ensure_validation_complete() + results = await self.asimilarity_search_with_score( query, k, @@ -785,7 +775,7 @@ class QdrantVectorStore(VectorStore): Returns: List of documents most similar to the query text and distance for each. """ - + await self._ensure_validation_complete() query_options = { "collection_name": self.collection_name, @@ -1008,7 +998,7 @@ class QdrantVectorStore(VectorStore): Returns: List of Documents selected by maximal marginal relevance. """ - + await self._ensure_validation_complete() self._validate_collection_for_dense( self.client, @@ -1084,6 +1074,8 @@ class QdrantVectorStore(VectorStore): Returns: List of Documents selected by maximal marginal relevance. """ + await self._ensure_validation_complete() + results = await self.amax_marginal_relevance_search_with_score_by_vector( embedding, k=k, @@ -1173,7 +1165,7 @@ class QdrantVectorStore(VectorStore): List of Documents selected by maximal marginal relevance and distance for each. """ - + await self._ensure_validation_complete() results = ( await self.client.query_points( @@ -1247,6 +1239,8 @@ class QdrantVectorStore(VectorStore): Returns: True if deletion is successful, False otherwise. """ + await self._ensure_validation_complete() + result = await self.client.delete( collection_name=self.collection_name, points_selector=ids, @@ -1275,6 +1269,8 @@ class QdrantVectorStore(VectorStore): Returns: List of Documents. """ + await self._ensure_validation_complete() + results = await self.client.retrieve( self.collection_name, ids, with_payload=True ) @@ -1301,6 +1297,8 @@ class QdrantVectorStore(VectorStore): Returns: List of ids from adding the documents into the vectorstore. """ + await self._ensure_validation_complete() + texts = [doc.page_content for doc in documents] metadatas = [doc.metadata for doc in documents] @@ -1794,9 +1792,7 @@ class QdrantVectorStore(VectorStore): dense_embeddings: Union[Embeddings, list[float], None], ) -> None: """Async version of _validate_collection_for_dense.""" - collection_info = await client.get_collection( - collection_name=collection_name - ) + collection_info = await client.get_collection(collection_name=collection_name) vector_config = collection_info.config.params.vectors if isinstance(vector_config, dict): @@ -1886,9 +1882,7 @@ class QdrantVectorStore(VectorStore): sparse_vector_name: str, ) -> None: """Async version of _validate_collection_for_sparse.""" - collection_info = await client.get_collection( - collection_name=collection_name - ) + collection_info = await client.get_collection(collection_name=collection_name) sparse_vector_config = collection_info.config.params.sparse_vectors if ( From 5610f37aea489b6aa1a27e0aca4977f11a403be8 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 03:08:42 +0500 Subject: [PATCH 03/13] chore: error lines fixed --- libs/partners/qdrant/langchain_qdrant/qdrant.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libs/partners/qdrant/langchain_qdrant/qdrant.py b/libs/partners/qdrant/langchain_qdrant/qdrant.py index 47fcfa15c90..0e342e04a98 100644 --- a/libs/partners/qdrant/langchain_qdrant/qdrant.py +++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py @@ -248,7 +248,8 @@ class QdrantVectorStore(VectorStore): """Get the Qdrant client instance that is being used. Returns: - Union[QdrantClient, AsyncQdrantClient]: An instance of ``QdrantClient`` or ``AsyncQdrantClient``. + Union[QdrantClient, AsyncQdrantClient]: An instance of ``QdrantClient`` + or ``AsyncQdrantClient``. """ return self._client @@ -990,7 +991,8 @@ class QdrantVectorStore(VectorStore): consistency: Optional[models.ReadConsistency] = None, **kwargs: Any, ) -> list[Document]: - """Async return docs selected using the maximal marginal relevance with dense vectors. + """Async return docs selected using the maximal marginal relevance + with dense vectors. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. @@ -1066,7 +1068,8 @@ class QdrantVectorStore(VectorStore): consistency: Optional[models.ReadConsistency] = None, **kwargs: Any, ) -> list[Document]: - """Async return docs selected using the maximal marginal relevance with dense vectors. + """Async return docs selected using the maximal marginal relevance + with dense vectors. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. From 523f3e46cbc7f95f56b046c9cb0d60388ba36744 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 03:10:21 +0500 Subject: [PATCH 04/13] chore: error lines fixed --- libs/partners/qdrant/langchain_qdrant/qdrant.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/partners/qdrant/langchain_qdrant/qdrant.py b/libs/partners/qdrant/langchain_qdrant/qdrant.py index 0e342e04a98..b91c6668f0c 100644 --- a/libs/partners/qdrant/langchain_qdrant/qdrant.py +++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py @@ -248,7 +248,7 @@ class QdrantVectorStore(VectorStore): """Get the Qdrant client instance that is being used. Returns: - Union[QdrantClient, AsyncQdrantClient]: An instance of ``QdrantClient`` + Union[QdrantClient, AsyncQdrantClient]: An instance of ``QdrantClient`` or ``AsyncQdrantClient``. """ return self._client @@ -991,7 +991,7 @@ class QdrantVectorStore(VectorStore): consistency: Optional[models.ReadConsistency] = None, **kwargs: Any, ) -> list[Document]: - """Async return docs selected using the maximal marginal relevance + """Async return docs selected using the maximal marginal relevance with dense vectors. Maximal marginal relevance optimizes for similarity to query AND diversity @@ -1068,7 +1068,7 @@ class QdrantVectorStore(VectorStore): consistency: Optional[models.ReadConsistency] = None, **kwargs: Any, ) -> list[Document]: - """Async return docs selected using the maximal marginal relevance + """Async return docs selected using the maximal marginal relevance with dense vectors. Maximal marginal relevance optimizes for similarity to query AND diversity From 7f237b64c0af05d7b2f52dec3d4c0ea3e97bf340 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 03:21:01 +0500 Subject: [PATCH 05/13] chore: error lines fixed --- .../qdrant/langchain_qdrant/qdrant.py | 66 +++++++++++++------ 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/libs/partners/qdrant/langchain_qdrant/qdrant.py b/libs/partners/qdrant/langchain_qdrant/qdrant.py index b91c6668f0c..8c3b2874235 100644 --- a/libs/partners/qdrant/langchain_qdrant/qdrant.py +++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py @@ -2,7 +2,7 @@ from __future__ import annotations import asyncio import uuid -from collections.abc import Generator, Iterable, Sequence +from collections.abc import AsyncGenerator, Generator, Iterable, Sequence from enum import Enum from itertools import islice from operator import itemgetter @@ -259,6 +259,15 @@ class QdrantVectorStore(VectorStore): await self._validation_task self._validation_task = None + def _ensure_sync_client(self) -> QdrantClient: + """Ensure we have a sync client for sync operations.""" + if isinstance(self.client, AsyncQdrantClient): + raise ValueError( + "Cannot perform synchronous operations with AsyncQdrantClient. " + "Use async methods instead." + ) + return self.client + @property def embeddings(self) -> Embeddings: """Get the dense embeddings instance that is being used. @@ -565,10 +574,11 @@ class QdrantVectorStore(VectorStore): List of ids from adding the texts into the vectorstore. """ added_ids = [] + sync_client = self._ensure_sync_client() for batch_ids, points in self._generate_batches( texts, metadatas, ids, batch_size ): - self.client.upsert( + sync_client.upsert( collection_name=self.collection_name, points=points, **kwargs ) added_ids.extend(batch_ids) @@ -700,7 +710,8 @@ class QdrantVectorStore(VectorStore): } if self.retrieval_mode == RetrievalMode.DENSE: query_dense_embedding = self.embeddings.embed_query(query) - results = self.client.query_points( + sync_client = self._ensure_sync_client() + results = sync_client.query_points( query=query_dense_embedding, using=self.vector_name, **query_options, @@ -708,7 +719,8 @@ class QdrantVectorStore(VectorStore): elif self.retrieval_mode == RetrievalMode.SPARSE: query_sparse_embedding = self.sparse_embeddings.embed_query(query) - results = self.client.query_points( + sync_client = self._ensure_sync_client() + results = sync_client.query_points( query=models.SparseVector( indices=query_sparse_embedding.indices, values=query_sparse_embedding.values, @@ -720,7 +732,8 @@ class QdrantVectorStore(VectorStore): elif self.retrieval_mode == RetrievalMode.HYBRID: query_dense_embedding = self.embeddings.embed_query(query) query_sparse_embedding = self.sparse_embeddings.embed_query(query) - results = self.client.query_points( + sync_client = self._ensure_sync_client() + results = sync_client.query_points( prefetch=[ models.Prefetch( using=self.vector_name, @@ -882,7 +895,8 @@ class QdrantVectorStore(VectorStore): distance=self.distance, dense_embeddings=embedding, ) - results = self.client.query_points( + sync_client = self._ensure_sync_client() + results = sync_client.query_points( collection_name=self.collection_name, query=embedding, using=self.vector_name, @@ -1002,13 +1016,22 @@ class QdrantVectorStore(VectorStore): """ await self._ensure_validation_complete() - self._validate_collection_for_dense( - self.client, - self.collection_name, - self.vector_name, - self.distance, - self.embeddings, - ) + if isinstance(self.client, AsyncQdrantClient): + await self._avalidate_collection_for_dense( + self.client, + self.collection_name, + self.vector_name, + self.distance, + self.embeddings, + ) + else: + self._validate_collection_for_dense( + self.client, + self.collection_name, + self.vector_name, + self.distance, + self.embeddings, + ) query_embedding = await self.embeddings.aembed_query(query) return await self.amax_marginal_relevance_search_by_vector( @@ -1112,7 +1135,8 @@ class QdrantVectorStore(VectorStore): List of Documents selected by maximal marginal relevance and distance for each. """ - results = self.client.query_points( + sync_client = self._ensure_sync_client() + results = sync_client.query_points( collection_name=self.collection_name, query=embedding, query_filter=filter, @@ -1222,7 +1246,8 @@ class QdrantVectorStore(VectorStore): Returns: True if deletion is successful, False otherwise. """ - result = self.client.delete( + sync_client = self._ensure_sync_client() + result = sync_client.delete( collection_name=self.collection_name, points_selector=ids, ) @@ -1251,7 +1276,8 @@ class QdrantVectorStore(VectorStore): return result.status == models.UpdateStatus.COMPLETED def get_by_ids(self, ids: Sequence[str | int], /) -> list[Document]: - results = self.client.retrieve(self.collection_name, ids, with_payload=True) + sync_client = self._ensure_sync_client() + results = sync_client.retrieve(self.collection_name, ids, with_payload=True) return [ self._document_from_point( @@ -1290,7 +1316,7 @@ class QdrantVectorStore(VectorStore): async def aadd_documents( self, documents: list[Document], **kwargs: Any - ) -> list[str]: + ) -> list[str | int]: """Async add documents with embeddings to the vectorstore. Args: @@ -1308,8 +1334,8 @@ class QdrantVectorStore(VectorStore): # Extract ids from documents if present ids = None if "ids" not in kwargs: - doc_ids = [doc.id for doc in documents] - if any(doc_ids): + doc_ids = [doc.id for doc in documents if doc.id is not None] + if doc_ids: ids = doc_ids kwargs["ids"] = ids @@ -1499,7 +1525,7 @@ class QdrantVectorStore(VectorStore): metadatas: Optional[list[dict]] = None, ids: Optional[Sequence[str | int]] = None, batch_size: int = 64, - ): + ) -> AsyncGenerator[tuple[list[str | int], list[models.PointStruct]], None]: """Async version of _generate_batches.""" texts_iterator = iter(texts) metadatas_iterator = iter(metadatas or []) From e50e876629e42eed74dddeee2230654472bdb258 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 03:28:16 +0500 Subject: [PATCH 06/13] chore: error lines fixed --- .../qdrant/langchain_qdrant/qdrant.py | 39 +++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/libs/partners/qdrant/langchain_qdrant/qdrant.py b/libs/partners/qdrant/langchain_qdrant/qdrant.py index 8c3b2874235..e702d4075e0 100644 --- a/libs/partners/qdrant/langchain_qdrant/qdrant.py +++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py @@ -268,6 +268,15 @@ class QdrantVectorStore(VectorStore): ) return self.client + def _ensure_async_client(self) -> AsyncQdrantClient: + """Ensure we have an async client for async operations.""" + if not isinstance(self.client, AsyncQdrantClient): + raise ValueError( + "Cannot perform asynchronous operations with QdrantClient. " + "Use sync methods instead or pass AsyncQdrantClient." + ) + return self.client + @property def embeddings(self) -> Embeddings: """Get the dense embeddings instance that is being used. @@ -608,10 +617,11 @@ class QdrantVectorStore(VectorStore): await self._ensure_validation_complete() added_ids = [] + async_client = self._ensure_async_client() async for batch_ids, points in self._agenerate_batches( texts, metadatas, ids, batch_size ): - await self.client.upsert( + await async_client.upsert( collection_name=self.collection_name, points=points, **kwargs ) added_ids.extend(batch_ids) @@ -790,6 +800,7 @@ class QdrantVectorStore(VectorStore): List of documents most similar to the query text and distance for each. """ await self._ensure_validation_complete() + async_client = self._ensure_async_client() query_options = { "collection_name": self.collection_name, @@ -806,7 +817,7 @@ class QdrantVectorStore(VectorStore): if self.retrieval_mode == RetrievalMode.DENSE: query_dense_embedding = await self.embeddings.aembed_query(query) results = ( - await self.client.query_points( + await async_client.query_points( query=query_dense_embedding, using=self.vector_name, **query_options, @@ -816,7 +827,7 @@ class QdrantVectorStore(VectorStore): elif self.retrieval_mode == RetrievalMode.SPARSE: query_sparse_embedding = await self.sparse_embeddings.aembed_query(query) results = ( - await self.client.query_points( + await async_client.query_points( query=models.SparseVector( indices=query_sparse_embedding.indices, values=query_sparse_embedding.values, @@ -830,7 +841,7 @@ class QdrantVectorStore(VectorStore): query_dense_embedding = await self.embeddings.aembed_query(query) query_sparse_embedding = await self.sparse_embeddings.aembed_query(query) results = ( - await self.client.query_points( + await async_client.query_points( prefetch=[ models.Prefetch( using=self.vector_name, @@ -887,9 +898,10 @@ class QdrantVectorStore(VectorStore): List of Documents most similar to the query and distance for each. """ qdrant_filter = filter + sync_client = self._ensure_sync_client() self._validate_collection_for_dense( - client=self.client, + client=sync_client, collection_name=self.collection_name, vector_name=self.vector_name, distance=self.distance, @@ -972,8 +984,9 @@ class QdrantVectorStore(VectorStore): Returns: List of Documents selected by maximal marginal relevance. """ + sync_client = self._ensure_sync_client() self._validate_collection_for_dense( - self.client, + sync_client, self.collection_name, self.vector_name, self.distance, @@ -1193,9 +1206,10 @@ class QdrantVectorStore(VectorStore): each. """ await self._ensure_validation_complete() + async_client = self._ensure_async_client() results = ( - await self.client.query_points( + await async_client.query_points( collection_name=self.collection_name, query=embedding, query_filter=filter, @@ -1268,8 +1282,9 @@ class QdrantVectorStore(VectorStore): True if deletion is successful, False otherwise. """ await self._ensure_validation_complete() + async_client = self._ensure_async_client() - result = await self.client.delete( + result = await async_client.delete( collection_name=self.collection_name, points_selector=ids, ) @@ -1299,8 +1314,9 @@ class QdrantVectorStore(VectorStore): List of Documents. """ await self._ensure_validation_complete() + async_client = self._ensure_async_client() - results = await self.client.retrieve( + results = await async_client.retrieve( self.collection_name, ids, with_payload=True ) @@ -1316,7 +1332,7 @@ class QdrantVectorStore(VectorStore): async def aadd_documents( self, documents: list[Document], **kwargs: Any - ) -> list[str | int]: + ) -> list[str]: """Async add documents with embeddings to the vectorstore. Args: @@ -1339,7 +1355,8 @@ class QdrantVectorStore(VectorStore): ids = doc_ids kwargs["ids"] = ids - return await self.aadd_texts(texts, metadatas, ids=ids, **kwargs) + result = await self.aadd_texts(texts, metadatas, ids=ids, **kwargs) + return [str(id_) for id_ in result] @classmethod def construct_instance( From 0e210a35ad301a95a7bdad7adb2611a2bab4725f Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 04:13:07 +0500 Subject: [PATCH 07/13] tests: updated tests --- .../test_qdrant_vector_store_add_texts.py | 118 +++++++++++++++++ .../test_qdrant_vector_store_from_texts.py | 124 ++++++++++++++++++ .../async_api/test_qdrant_vector_store_mmr.py | 113 ++++++++++++++++ .../test_qdrant_vector_store_search.py | 120 +++++++++++++++++ .../qdrant_vector_store/test_add_texts.py | 4 +- .../qdrant_vector_store/test_from_existing.py | 2 +- .../qdrant_vector_store/test_from_texts.py | 18 +-- 7 files changed, 487 insertions(+), 12 deletions(-) create mode 100644 libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_add_texts.py create mode 100644 libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_from_texts.py create mode 100644 libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_mmr.py create mode 100644 libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py diff --git a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_add_texts.py b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_add_texts.py new file mode 100644 index 00000000000..cad26ccfc99 --- /dev/null +++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_add_texts.py @@ -0,0 +1,118 @@ +import uuid + +import pytest +from langchain_core.documents import Document +from qdrant_client import AsyncQdrantClient, models + +from langchain_qdrant import QdrantVectorStore, RetrievalMode +from tests.integration_tests.common import ( + ConsistentFakeEmbeddings, + ConsistentFakeSparseEmbeddings, +) +from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes + + +@pytest.mark.parametrize("location", qdrant_locations()) +@pytest.mark.parametrize("retrieval_mode", retrieval_modes()) +async def test_async_add_texts_basic(location: str, retrieval_mode: RetrievalMode) -> None: + """Test async basic add_texts functionality.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + retrieval_mode=retrieval_mode, + sparse_embedding=ConsistentFakeSparseEmbeddings(), + collection_name=collection_name, + client_options={"location": location}, + ) + + # Add initial texts + texts1 = ["foo", "bar"] + ids1 = await vec_store.aadd_texts(texts1) + assert len(ids1) == 2 + + # Add more texts + texts2 = ["baz", "qux"] + ids2 = await vec_store.aadd_texts(texts2) + assert len(ids2) == 2 + + # Verify all texts are in the collection + async_client = vec_store.client + assert isinstance(async_client, AsyncQdrantClient) + count_result = await async_client.count(collection_name) + assert 4 == count_result.count + + # Test search functionality + results = await vec_store.asimilarity_search("foo", k=1) + assert len(results) == 1 + assert results[0].page_content == "foo" + + +@pytest.mark.parametrize("location", qdrant_locations()) +async def test_async_add_texts_with_filters(location: str) -> None: + """Test async add_texts and search with filters.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + collection_name=collection_name, + client_options={"location": location}, + ) + + texts = ["Red apple", "Blue apple", "Green apple"] + metadatas = [ + {"color": "red", "type": "fruit"}, + {"color": "blue", "type": "fruit"}, + {"color": "green", "type": "fruit"}, + ] + + await vec_store.aadd_texts(texts, metadatas=metadatas) + + # Test search with filter + filter_condition = models.Filter( + must=[ + models.FieldCondition( + key="metadata.color", + match=models.MatchValue(value="red") + ) + ] + ) + + results = await vec_store.asimilarity_search( + "apple", k=3, filter=filter_condition + ) + + assert len(results) == 1 + assert results[0].page_content == "Red apple" + assert results[0].metadata["color"] == "red" + + +@pytest.mark.parametrize("location", qdrant_locations()) +async def test_async_add_texts_with_custom_ids(location: str) -> None: + """Test async add_texts with custom IDs.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + collection_name=collection_name, + client_options={"location": location}, + ) + + texts = ["First document", "Second document"] + custom_ids = [ + "fa38d572-4c31-4579-aedc-1960d79df6df", + "cdc1aa36-d6ab-4fb2-8a94-56674fd27484" + ] + + returned_ids = await vec_store.aadd_texts(texts, ids=custom_ids) + + # Should return the same IDs we provided + assert returned_ids == custom_ids + + # Verify documents can be retrieved by custom IDs + docs = await vec_store.aget_by_ids(custom_ids) + assert len(docs) == 2 + + contents = [doc.page_content for doc in docs] + assert "First document" in contents + assert "Second document" in contents diff --git a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_from_texts.py b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_from_texts.py new file mode 100644 index 00000000000..8e4936ebe15 --- /dev/null +++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_from_texts.py @@ -0,0 +1,124 @@ +import uuid +from typing import Union + +import pytest +from langchain_core.documents import Document +from qdrant_client import AsyncQdrantClient, models + +from langchain_qdrant import QdrantVectorStore, RetrievalMode +from langchain_qdrant.qdrant import QdrantVectorStoreError +from tests.integration_tests.common import ( + ConsistentFakeEmbeddings, + ConsistentFakeSparseEmbeddings, + assert_documents_equals, +) +from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes + + +@pytest.mark.parametrize("location", qdrant_locations()) +@pytest.mark.parametrize("retrieval_mode", retrieval_modes()) +async def test_async_vectorstore_from_texts(location: str, retrieval_mode: RetrievalMode) -> None: + """Test end to end QdrantVectorStore async construction from texts.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + retrieval_mode=retrieval_mode, + sparse_embedding=ConsistentFakeSparseEmbeddings(), + collection_name=collection_name, + client_options={"location": location}, + ) + + # Add texts using async method + await vec_store.aadd_texts(["Lorem ipsum dolor sit amet", "Ipsum dolor sit amet"]) + + # Verify count using AsyncQdrantClient + async_client = vec_store.client + assert isinstance(async_client, AsyncQdrantClient) + count_result = await async_client.count(collection_name) + assert 2 == count_result.count + + +@pytest.mark.parametrize("location", qdrant_locations()) +async def test_async_qdrant_similarity_search(location: str) -> None: + """Test QdrantVectorStore async similarity search.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + collection_name=collection_name, + client_options={"location": location}, + ) + + await vec_store.aadd_texts(["foo", "bar", "baz"]) + + # Test async similarity search + output = await vec_store.asimilarity_search("foo", k=1) + assert len(output) == 1 + # Use assert_documents_equals which doesn't assume ordering + assert_documents_equals(actual=output, expected=[Document(page_content="foo")]) + + +@pytest.mark.parametrize("location", qdrant_locations()) +async def test_async_qdrant_delete(location: str) -> None: + """Test QdrantVectorStore async delete functionality.""" + collection_name = uuid.uuid4().hex + texts = ["foo", "bar", "baz"] + ids = [ + "fa38d572-4c31-4579-aedc-1960d79df6df", + "cdc1aa36-d6ab-4fb2-8a94-56674fd27484", + "b4c1aa36-d6ab-4fb2-8a94-56674fd27485" + ] + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + collection_name=collection_name, + client_options={"location": location}, + ) + + await vec_store.aadd_texts(texts, ids=ids) + + async_client = vec_store.client + assert isinstance(async_client, AsyncQdrantClient) + + # Verify all texts are added + count_result = await async_client.count(collection_name) + assert 3 == count_result.count + + # Delete one document + result = await vec_store.adelete([ids[1]]) # Delete the second document + assert result is True + + # Verify deletion + count_result = await async_client.count(collection_name) + assert 2 == count_result.count + + +@pytest.mark.parametrize("location", qdrant_locations()) +async def test_async_qdrant_add_documents(location: str) -> None: + """Test QdrantVectorStore async add documents functionality.""" + collection_name = uuid.uuid4().hex + + documents = [ + Document(page_content="foo", metadata={"page": 1}), + Document(page_content="bar", metadata={"page": 2}), + Document(page_content="baz", metadata={"page": 3}), + ] + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + collection_name=collection_name, + client_options={"location": location}, + ) + + # Test async add documents + ids = await vec_store.aadd_documents(documents) + assert len(ids) == 3 + assert all(isinstance(id_, str) for id_ in ids) + + async_client = vec_store.client + assert isinstance(async_client, AsyncQdrantClient) + + # Verify documents are added + count_result = await async_client.count(collection_name) + assert 3 == count_result.count diff --git a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_mmr.py b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_mmr.py new file mode 100644 index 00000000000..bda1b302916 --- /dev/null +++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_mmr.py @@ -0,0 +1,113 @@ +import uuid + +import pytest +from langchain_core.documents import Document +from qdrant_client import AsyncQdrantClient, models + +from langchain_qdrant import QdrantVectorStore, RetrievalMode +from tests.integration_tests.common import ConsistentFakeEmbeddings +from tests.integration_tests.fixtures import qdrant_locations + + +@pytest.mark.parametrize("location", qdrant_locations()) +async def test_async_max_marginal_relevance_search_basic(location: str) -> None: + """Test basic async max marginal relevance search functionality.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + retrieval_mode=RetrievalMode.DENSE, # MMR only works with dense + collection_name=collection_name, + client_options={"location": location}, + ) + + texts = ["apple", "banana", "cherry", "apple pie", "apple juice"] + await vec_store.aadd_texts(texts) + + # Test basic MMR search + results = await vec_store.amax_marginal_relevance_search( + "apple", k=3, fetch_k=5 + ) + + assert len(results) <= 3 + assert all(isinstance(doc, Document) for doc in results) + + # First result should be most similar + assert "apple" in results[0].page_content.lower() + + +@pytest.mark.parametrize("location", qdrant_locations()) +async def test_async_max_marginal_relevance_search_by_vector(location: str) -> None: + """Test async MMR search by vector.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + retrieval_mode=RetrievalMode.DENSE, + collection_name=collection_name, + client_options={"location": location}, + ) + + texts = ["apple", "banana", "cherry", "apple pie"] + await vec_store.aadd_texts(texts) + + # Get embedding for search + embedding = ConsistentFakeEmbeddings().embed_query("apple") + + # Test MMR by vector + results = await vec_store.amax_marginal_relevance_search_by_vector( + embedding, k=2, fetch_k=4 + ) + + assert len(results) <= 2 + assert all(isinstance(doc, Document) for doc in results) + + +@pytest.mark.parametrize("location", qdrant_locations()) +async def test_async_max_marginal_relevance_search_with_score_by_vector(location: str) -> None: + """Test async MMR search with score by vector.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + retrieval_mode=RetrievalMode.DENSE, + collection_name=collection_name, + client_options={"location": location}, + ) + + texts = ["apple", "banana", "cherry", "apple pie", "apple juice"] + await vec_store.aadd_texts(texts) + + # Get embedding for search + embedding = ConsistentFakeEmbeddings().embed_query("apple") + + # Test MMR with scores by vector + results = await vec_store.amax_marginal_relevance_search_with_score_by_vector( + embedding, k=3, fetch_k=5 + ) + + assert len(results) <= 3 + for doc, score in results: + assert isinstance(doc, Document) + assert isinstance(score, float) + assert score >= 0.0 + + +@pytest.mark.parametrize("location", qdrant_locations()) +async def test_async_max_marginal_relevance_search_empty_collection(location: str) -> None: + """Test async MMR search on empty collection.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + retrieval_mode=RetrievalMode.DENSE, + collection_name=collection_name, + client_options={"location": location}, + ) + + # Search in empty collection + results = await vec_store.amax_marginal_relevance_search( + "anything", k=5, fetch_k=10 + ) + + assert len(results) == 0 diff --git a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py new file mode 100644 index 00000000000..145ad180a65 --- /dev/null +++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py @@ -0,0 +1,120 @@ +import uuid + +import pytest +from langchain_core.documents import Document +from qdrant_client import AsyncQdrantClient, models + +from langchain_qdrant import QdrantVectorStore, RetrievalMode +from tests.integration_tests.common import ( + ConsistentFakeEmbeddings, + ConsistentFakeSparseEmbeddings, +) +from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes + + +@pytest.mark.parametrize("location", qdrant_locations()) +@pytest.mark.parametrize("retrieval_mode", retrieval_modes()) +async def test_async_similarity_search_basic( + location: str, retrieval_mode: RetrievalMode +) -> None: + """Test basic async similarity search functionality.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + retrieval_mode=retrieval_mode, + sparse_embedding=ConsistentFakeSparseEmbeddings(), + collection_name=collection_name, + client_options={"location": location}, + ) + + texts = ["apple", "banana", "cherry", "date"] + await vec_store.aadd_texts(texts) + + # Test basic similarity search + results = await vec_store.asimilarity_search("apple", k=2) + + assert len(results) <= 2 + assert all(isinstance(doc, Document) for doc in results) + assert results[0].page_content == "apple" # Should be most similar to itself + + +@pytest.mark.parametrize("location", qdrant_locations()) +@pytest.mark.parametrize("retrieval_mode", retrieval_modes()) +async def test_async_similarity_search_with_score( + location: str, retrieval_mode: RetrievalMode +) -> None: + """Test async similarity search with scores.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + retrieval_mode=retrieval_mode, + sparse_embedding=ConsistentFakeSparseEmbeddings(), + collection_name=collection_name, + client_options={"location": location}, + ) + + texts = ["red apple", "green apple", "blue car", "yellow banana"] + await vec_store.aadd_texts(texts) + + # Test similarity search with scores + results = await vec_store.asimilarity_search_with_score("apple", k=3) + + assert len(results) <= 3 + for doc, score in results: + assert isinstance(doc, Document) + assert isinstance(score, float) + assert score >= 0.0 # Scores should be non-negative + + # First result should be most relevant + all_contents = [doc.page_content for doc, _ in results] + assert any("apple" in content for content in all_contents) + + +@pytest.mark.parametrize("location", qdrant_locations()) +async def test_async_similarity_search_empty_collection(location: str) -> None: + """Test async similarity search on empty collection.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + collection_name=collection_name, + client_options={"location": location}, + ) + + # Search in empty collection + results = await vec_store.asimilarity_search("anything", k=5) + + assert len(results) == 0 + + +@pytest.mark.parametrize("location", qdrant_locations()) +async def test_async_similarity_search_with_consistency(location: str) -> None: + """Test async similarity search with read consistency parameter.""" + collection_name = uuid.uuid4().hex + + vec_store = await QdrantVectorStore.aconstruct_instance( + embedding=ConsistentFakeEmbeddings(), + collection_name=collection_name, + client_options={"location": location}, + ) + + texts = ["test document"] + await vec_store.aadd_texts(texts) + + # Test with different consistency levels + consistency_levels = [ + 1, + models.ReadConsistencyType.MAJORITY, + models.ReadConsistencyType.ALL, + ] + + for consistency in consistency_levels: + results = await vec_store.asimilarity_search( + "test", k=1, consistency=consistency + ) + + assert len(results) <= 1 + if results: + assert results[0].page_content == "test document" diff --git a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_add_texts.py b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_add_texts.py index 5908ba4b7e2..226a7c448c0 100644 --- a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_add_texts.py +++ b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_add_texts.py @@ -137,7 +137,7 @@ def test_qdrant_add_texts_stores_ids( batch_size=batch_size, ) - assert 3 == vec_store.client.count(collection_name).count - stored_ids = [point.id for point in vec_store.client.scroll(collection_name)[0]] + assert 3 == vec_store.sync_client.count(collection_name).count + stored_ids = [point.id for point in vec_store.sync_client.scroll(collection_name)[0]] assert set(ids) == set(stored_ids) assert 3 == len(vec_store.get_by_ids(ids)) diff --git a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_existing.py b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_existing.py index 4e4a78d517c..66e24c674e7 100644 --- a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_existing.py +++ b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_existing.py @@ -48,4 +48,4 @@ def test_qdrant_from_existing_collection_uses_same_collection( ) qdrant.add_texts(["baz", "bar"]) - assert 3 == qdrant.client.count(collection_name).count + assert 3 == qdrant.sync_client.count(collection_name).count diff --git a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_texts.py b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_texts.py index 437d9525e4e..688f0891a78 100644 --- a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_texts.py +++ b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_texts.py @@ -30,7 +30,7 @@ def test_vectorstore_from_texts(location: str, retrieval_mode: RetrievalMode) -> sparse_embedding=ConsistentFakeSparseEmbeddings(), ) - assert 2 == vec_store.client.count(collection_name).count + assert 2 == vec_store.sync_client.count(collection_name).count @pytest.mark.parametrize("batch_size", [1, 64]) @@ -66,8 +66,8 @@ def test_qdrant_from_texts_stores_ids( sparse_vector_name=sparse_vector_name, ) - assert 2 == vec_store.client.count(collection_name).count - stored_ids = [point.id for point in vec_store.client.retrieve(collection_name, ids)] + assert 2 == vec_store.sync_client.count(collection_name).count + stored_ids = [point.id for point in vec_store.sync_client.retrieve(collection_name, ids)] assert set(ids) == set(stored_ids) @@ -97,16 +97,16 @@ def test_qdrant_from_texts_stores_embeddings_as_named_vectors( sparse_embedding=ConsistentFakeSparseEmbeddings(), ) - assert 5 == vec_store.client.count(collection_name).count + assert 5 == vec_store.sync_client.count(collection_name).count if retrieval_mode in retrieval_modes(sparse=False): assert all( (vector_name in point.vector or isinstance(point.vector, list)) # type: ignore - for point in vec_store.client.scroll(collection_name, with_vectors=True)[0] + for point in vec_store.sync_client.scroll(collection_name, with_vectors=True)[0] ) if retrieval_mode in retrieval_modes(dense=False): assert all( sparse_vector_name in point.vector # type: ignore - for point in vec_store.client.scroll(collection_name, with_vectors=True)[0] + for point in vec_store.sync_client.scroll(collection_name, with_vectors=True)[0] ) @@ -149,7 +149,7 @@ def test_qdrant_from_texts_reuses_same_collection( sparse_embedding=sparse_embeddings, ) - assert 7 == vec_store.client.count(collection_name).count + assert 7 == vec_store.sync_client.count(collection_name).count @pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False)) @@ -302,7 +302,7 @@ def test_qdrant_from_texts_recreates_collection_on_force_recreate( force_recreate=True, ) - assert 2 == vec_store.client.count(collection_name).count + assert 2 == vec_store.sync_client.count(collection_name).count @pytest.mark.parametrize("location", qdrant_locations()) @@ -379,7 +379,7 @@ def test_from_texts_passed_optimizers_config_and_on_disk_payload( sparse_embedding=ConsistentFakeSparseEmbeddings(), ) - collection_info = vec_store.client.get_collection(collection_name) + collection_info = vec_store.sync_client.get_collection(collection_name) assert collection_info.config.params.vectors[vector_name].on_disk is True # type: ignore assert collection_info.config.optimizer_config.memmap_threshold == 1000 assert collection_info.config.params.on_disk_payload is True From 233d90bef553b5c436eec063c724fcd21ceeef3e Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 04:13:29 +0500 Subject: [PATCH 08/13] chore: sync client property --- libs/partners/qdrant/langchain_qdrant/qdrant.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/libs/partners/qdrant/langchain_qdrant/qdrant.py b/libs/partners/qdrant/langchain_qdrant/qdrant.py index e702d4075e0..4e4fc15968b 100644 --- a/libs/partners/qdrant/langchain_qdrant/qdrant.py +++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py @@ -248,11 +248,26 @@ class QdrantVectorStore(VectorStore): """Get the Qdrant client instance that is being used. Returns: - Union[QdrantClient, AsyncQdrantClient]: An instance of ``QdrantClient`` + Union[QdrantClient, AsyncQdrantClient]: An instance of ``QdrantClient`` or ``AsyncQdrantClient``. """ return self._client + @property + def sync_client(self) -> QdrantClient: + """Get the sync Qdrant client instance. + + This property provides type-safe access to the sync client for cases where + you know the client is sync (e.g., created via from_texts() or with QdrantClient). + + Returns: + QdrantClient: The sync client instance. + + Raises: + ValueError: If the client is an AsyncQdrantClient. + """ + return self._ensure_sync_client() + async def _ensure_validation_complete(self) -> None: """Ensure any pending validation task is completed.""" if self.validate_collection_config and self._validation_task is not None: From 2c53b8067c98d7045dd249effc164805ce533b90 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 04:18:17 +0500 Subject: [PATCH 09/13] chore: lint fix --- libs/partners/qdrant/langchain_qdrant/qdrant.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/partners/qdrant/langchain_qdrant/qdrant.py b/libs/partners/qdrant/langchain_qdrant/qdrant.py index 4e4fc15968b..da6e4e15fc7 100644 --- a/libs/partners/qdrant/langchain_qdrant/qdrant.py +++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py @@ -258,7 +258,8 @@ class QdrantVectorStore(VectorStore): """Get the sync Qdrant client instance. This property provides type-safe access to the sync client for cases where - you know the client is sync (e.g., created via from_texts() or with QdrantClient). + you know the client is sync (e.g., created via from_texts() or with + QdrantClient). Returns: QdrantClient: The sync client instance. From a57626536ac2252fce37dca4f0fc577edd488f87 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 04:19:19 +0500 Subject: [PATCH 10/13] chore: lint fix --- libs/partners/qdrant/langchain_qdrant/qdrant.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libs/partners/qdrant/langchain_qdrant/qdrant.py b/libs/partners/qdrant/langchain_qdrant/qdrant.py index da6e4e15fc7..209e3ba1e3c 100644 --- a/libs/partners/qdrant/langchain_qdrant/qdrant.py +++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py @@ -248,7 +248,7 @@ class QdrantVectorStore(VectorStore): """Get the Qdrant client instance that is being used. Returns: - Union[QdrantClient, AsyncQdrantClient]: An instance of ``QdrantClient`` + Union[QdrantClient, AsyncQdrantClient]: An instance of ``QdrantClient`` or ``AsyncQdrantClient``. """ return self._client @@ -256,14 +256,14 @@ class QdrantVectorStore(VectorStore): @property def sync_client(self) -> QdrantClient: """Get the sync Qdrant client instance. - + This property provides type-safe access to the sync client for cases where you know the client is sync (e.g., created via from_texts() or with QdrantClient). - + Returns: QdrantClient: The sync client instance. - + Raises: ValueError: If the client is an AsyncQdrantClient. """ From eb9556e7b9f1aef06dd3f6ccd6fcd2525d5350c4 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 04:22:26 +0500 Subject: [PATCH 11/13] tests: updated --- .../test_qdrant_vector_store_add_texts.py | 32 ++++++++--------- .../test_qdrant_vector_store_from_texts.py | 22 ++++++------ .../async_api/test_qdrant_vector_store_mmr.py | 35 ++++++++++--------- .../test_qdrant_vector_store_search.py | 20 +++++------ .../qdrant_vector_store/test_add_texts.py | 4 ++- .../qdrant_vector_store/test_from_texts.py | 12 +++++-- 6 files changed, 66 insertions(+), 59 deletions(-) diff --git a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_add_texts.py b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_add_texts.py index cad26ccfc99..bfd98a51434 100644 --- a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_add_texts.py +++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_add_texts.py @@ -1,7 +1,6 @@ import uuid import pytest -from langchain_core.documents import Document from qdrant_client import AsyncQdrantClient, models from langchain_qdrant import QdrantVectorStore, RetrievalMode @@ -14,10 +13,12 @@ from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes @pytest.mark.parametrize("location", qdrant_locations()) @pytest.mark.parametrize("retrieval_mode", retrieval_modes()) -async def test_async_add_texts_basic(location: str, retrieval_mode: RetrievalMode) -> None: +async def test_async_add_texts_basic( + location: str, retrieval_mode: RetrievalMode +) -> None: """Test async basic add_texts functionality.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), retrieval_mode=retrieval_mode, @@ -52,7 +53,7 @@ async def test_async_add_texts_basic(location: str, retrieval_mode: RetrievalMod async def test_async_add_texts_with_filters(location: str) -> None: """Test async add_texts and search with filters.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), collection_name=collection_name, @@ -65,23 +66,20 @@ async def test_async_add_texts_with_filters(location: str) -> None: {"color": "blue", "type": "fruit"}, {"color": "green", "type": "fruit"}, ] - + await vec_store.aadd_texts(texts, metadatas=metadatas) # Test search with filter filter_condition = models.Filter( must=[ models.FieldCondition( - key="metadata.color", - match=models.MatchValue(value="red") + key="metadata.color", match=models.MatchValue(value="red") ) ] ) - - results = await vec_store.asimilarity_search( - "apple", k=3, filter=filter_condition - ) - + + results = await vec_store.asimilarity_search("apple", k=3, filter=filter_condition) + assert len(results) == 1 assert results[0].page_content == "Red apple" assert results[0].metadata["color"] == "red" @@ -91,7 +89,7 @@ async def test_async_add_texts_with_filters(location: str) -> None: async def test_async_add_texts_with_custom_ids(location: str) -> None: """Test async add_texts with custom IDs.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), collection_name=collection_name, @@ -101,18 +99,18 @@ async def test_async_add_texts_with_custom_ids(location: str) -> None: texts = ["First document", "Second document"] custom_ids = [ "fa38d572-4c31-4579-aedc-1960d79df6df", - "cdc1aa36-d6ab-4fb2-8a94-56674fd27484" + "cdc1aa36-d6ab-4fb2-8a94-56674fd27484", ] - + returned_ids = await vec_store.aadd_texts(texts, ids=custom_ids) - + # Should return the same IDs we provided assert returned_ids == custom_ids # Verify documents can be retrieved by custom IDs docs = await vec_store.aget_by_ids(custom_ids) assert len(docs) == 2 - + contents = [doc.page_content for doc in docs] assert "First document" in contents assert "Second document" in contents diff --git a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_from_texts.py b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_from_texts.py index 8e4936ebe15..e5df00381f9 100644 --- a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_from_texts.py +++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_from_texts.py @@ -1,12 +1,10 @@ import uuid -from typing import Union import pytest from langchain_core.documents import Document -from qdrant_client import AsyncQdrantClient, models +from qdrant_client import AsyncQdrantClient from langchain_qdrant import QdrantVectorStore, RetrievalMode -from langchain_qdrant.qdrant import QdrantVectorStoreError from tests.integration_tests.common import ( ConsistentFakeEmbeddings, ConsistentFakeSparseEmbeddings, @@ -17,7 +15,9 @@ from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes @pytest.mark.parametrize("location", qdrant_locations()) @pytest.mark.parametrize("retrieval_mode", retrieval_modes()) -async def test_async_vectorstore_from_texts(location: str, retrieval_mode: RetrievalMode) -> None: +async def test_async_vectorstore_from_texts( + location: str, retrieval_mode: RetrievalMode +) -> None: """Test end to end QdrantVectorStore async construction from texts.""" collection_name = uuid.uuid4().hex @@ -28,7 +28,7 @@ async def test_async_vectorstore_from_texts(location: str, retrieval_mode: Retri collection_name=collection_name, client_options={"location": location}, ) - + # Add texts using async method await vec_store.aadd_texts(["Lorem ipsum dolor sit amet", "Ipsum dolor sit amet"]) @@ -43,7 +43,7 @@ async def test_async_vectorstore_from_texts(location: str, retrieval_mode: Retri async def test_async_qdrant_similarity_search(location: str) -> None: """Test QdrantVectorStore async similarity search.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), collection_name=collection_name, @@ -66,8 +66,8 @@ async def test_async_qdrant_delete(location: str) -> None: texts = ["foo", "bar", "baz"] ids = [ "fa38d572-4c31-4579-aedc-1960d79df6df", - "cdc1aa36-d6ab-4fb2-8a94-56674fd27484", - "b4c1aa36-d6ab-4fb2-8a94-56674fd27485" + "cdc1aa36-d6ab-4fb2-8a94-56674fd27484", + "b4c1aa36-d6ab-4fb2-8a94-56674fd27485", ] vec_store = await QdrantVectorStore.aconstruct_instance( @@ -80,7 +80,7 @@ async def test_async_qdrant_delete(location: str) -> None: async_client = vec_store.client assert isinstance(async_client, AsyncQdrantClient) - + # Verify all texts are added count_result = await async_client.count(collection_name) assert 3 == count_result.count @@ -98,7 +98,7 @@ async def test_async_qdrant_delete(location: str) -> None: async def test_async_qdrant_add_documents(location: str) -> None: """Test QdrantVectorStore async add documents functionality.""" collection_name = uuid.uuid4().hex - + documents = [ Document(page_content="foo", metadata={"page": 1}), Document(page_content="bar", metadata={"page": 2}), @@ -118,7 +118,7 @@ async def test_async_qdrant_add_documents(location: str) -> None: async_client = vec_store.client assert isinstance(async_client, AsyncQdrantClient) - + # Verify documents are added count_result = await async_client.count(collection_name) assert 3 == count_result.count diff --git a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_mmr.py b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_mmr.py index bda1b302916..1cb1b8b766f 100644 --- a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_mmr.py +++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_mmr.py @@ -2,7 +2,6 @@ import uuid import pytest from langchain_core.documents import Document -from qdrant_client import AsyncQdrantClient, models from langchain_qdrant import QdrantVectorStore, RetrievalMode from tests.integration_tests.common import ConsistentFakeEmbeddings @@ -13,7 +12,7 @@ from tests.integration_tests.fixtures import qdrant_locations async def test_async_max_marginal_relevance_search_basic(location: str) -> None: """Test basic async max marginal relevance search functionality.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), retrieval_mode=RetrievalMode.DENSE, # MMR only works with dense @@ -25,13 +24,11 @@ async def test_async_max_marginal_relevance_search_basic(location: str) -> None: await vec_store.aadd_texts(texts) # Test basic MMR search - results = await vec_store.amax_marginal_relevance_search( - "apple", k=3, fetch_k=5 - ) - + results = await vec_store.amax_marginal_relevance_search("apple", k=3, fetch_k=5) + assert len(results) <= 3 assert all(isinstance(doc, Document) for doc in results) - + # First result should be most similar assert "apple" in results[0].page_content.lower() @@ -40,7 +37,7 @@ async def test_async_max_marginal_relevance_search_basic(location: str) -> None: async def test_async_max_marginal_relevance_search_by_vector(location: str) -> None: """Test async MMR search by vector.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), retrieval_mode=RetrievalMode.DENSE, @@ -53,21 +50,23 @@ async def test_async_max_marginal_relevance_search_by_vector(location: str) -> N # Get embedding for search embedding = ConsistentFakeEmbeddings().embed_query("apple") - + # Test MMR by vector results = await vec_store.amax_marginal_relevance_search_by_vector( embedding, k=2, fetch_k=4 ) - + assert len(results) <= 2 assert all(isinstance(doc, Document) for doc in results) @pytest.mark.parametrize("location", qdrant_locations()) -async def test_async_max_marginal_relevance_search_with_score_by_vector(location: str) -> None: +async def test_async_max_marginal_relevance_search_with_score_by_vector( + location: str, +) -> None: """Test async MMR search with score by vector.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), retrieval_mode=RetrievalMode.DENSE, @@ -80,12 +79,12 @@ async def test_async_max_marginal_relevance_search_with_score_by_vector(location # Get embedding for search embedding = ConsistentFakeEmbeddings().embed_query("apple") - + # Test MMR with scores by vector results = await vec_store.amax_marginal_relevance_search_with_score_by_vector( embedding, k=3, fetch_k=5 ) - + assert len(results) <= 3 for doc, score in results: assert isinstance(doc, Document) @@ -94,10 +93,12 @@ async def test_async_max_marginal_relevance_search_with_score_by_vector(location @pytest.mark.parametrize("location", qdrant_locations()) -async def test_async_max_marginal_relevance_search_empty_collection(location: str) -> None: +async def test_async_max_marginal_relevance_search_empty_collection( + location: str, +) -> None: """Test async MMR search on empty collection.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), retrieval_mode=RetrievalMode.DENSE, @@ -109,5 +110,5 @@ async def test_async_max_marginal_relevance_search_empty_collection(location: st results = await vec_store.amax_marginal_relevance_search( "anything", k=5, fetch_k=10 ) - + assert len(results) == 0 diff --git a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py index 145ad180a65..ee9200ac37d 100644 --- a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py +++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py @@ -2,7 +2,7 @@ import uuid import pytest from langchain_core.documents import Document -from qdrant_client import AsyncQdrantClient, models +from qdrant_client import models from langchain_qdrant import QdrantVectorStore, RetrievalMode from tests.integration_tests.common import ( @@ -19,7 +19,7 @@ async def test_async_similarity_search_basic( ) -> None: """Test basic async similarity search functionality.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), retrieval_mode=retrieval_mode, @@ -33,7 +33,7 @@ async def test_async_similarity_search_basic( # Test basic similarity search results = await vec_store.asimilarity_search("apple", k=2) - + assert len(results) <= 2 assert all(isinstance(doc, Document) for doc in results) assert results[0].page_content == "apple" # Should be most similar to itself @@ -46,7 +46,7 @@ async def test_async_similarity_search_with_score( ) -> None: """Test async similarity search with scores.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), retrieval_mode=retrieval_mode, @@ -60,13 +60,13 @@ async def test_async_similarity_search_with_score( # Test similarity search with scores results = await vec_store.asimilarity_search_with_score("apple", k=3) - + assert len(results) <= 3 for doc, score in results: assert isinstance(doc, Document) assert isinstance(score, float) assert score >= 0.0 # Scores should be non-negative - + # First result should be most relevant all_contents = [doc.page_content for doc, _ in results] assert any("apple" in content for content in all_contents) @@ -76,7 +76,7 @@ async def test_async_similarity_search_with_score( async def test_async_similarity_search_empty_collection(location: str) -> None: """Test async similarity search on empty collection.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), collection_name=collection_name, @@ -85,7 +85,7 @@ async def test_async_similarity_search_empty_collection(location: str) -> None: # Search in empty collection results = await vec_store.asimilarity_search("anything", k=5) - + assert len(results) == 0 @@ -93,7 +93,7 @@ async def test_async_similarity_search_empty_collection(location: str) -> None: async def test_async_similarity_search_with_consistency(location: str) -> None: """Test async similarity search with read consistency parameter.""" collection_name = uuid.uuid4().hex - + vec_store = await QdrantVectorStore.aconstruct_instance( embedding=ConsistentFakeEmbeddings(), collection_name=collection_name, @@ -114,7 +114,7 @@ async def test_async_similarity_search_with_consistency(location: str) -> None: results = await vec_store.asimilarity_search( "test", k=1, consistency=consistency ) - + assert len(results) <= 1 if results: assert results[0].page_content == "test document" diff --git a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_add_texts.py b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_add_texts.py index 226a7c448c0..bf97812b8bf 100644 --- a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_add_texts.py +++ b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_add_texts.py @@ -138,6 +138,8 @@ def test_qdrant_add_texts_stores_ids( ) assert 3 == vec_store.sync_client.count(collection_name).count - stored_ids = [point.id for point in vec_store.sync_client.scroll(collection_name)[0]] + stored_ids = [ + point.id for point in vec_store.sync_client.scroll(collection_name)[0] + ] assert set(ids) == set(stored_ids) assert 3 == len(vec_store.get_by_ids(ids)) diff --git a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_texts.py b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_texts.py index 688f0891a78..07f8ee52473 100644 --- a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_texts.py +++ b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_texts.py @@ -67,7 +67,9 @@ def test_qdrant_from_texts_stores_ids( ) assert 2 == vec_store.sync_client.count(collection_name).count - stored_ids = [point.id for point in vec_store.sync_client.retrieve(collection_name, ids)] + stored_ids = [ + point.id for point in vec_store.sync_client.retrieve(collection_name, ids) + ] assert set(ids) == set(stored_ids) @@ -101,12 +103,16 @@ def test_qdrant_from_texts_stores_embeddings_as_named_vectors( if retrieval_mode in retrieval_modes(sparse=False): assert all( (vector_name in point.vector or isinstance(point.vector, list)) # type: ignore - for point in vec_store.sync_client.scroll(collection_name, with_vectors=True)[0] + for point in vec_store.sync_client.scroll( + collection_name, with_vectors=True + )[0] ) if retrieval_mode in retrieval_modes(dense=False): assert all( sparse_vector_name in point.vector # type: ignore - for point in vec_store.sync_client.scroll(collection_name, with_vectors=True)[0] + for point in vec_store.sync_client.scroll( + collection_name, with_vectors=True + )[0] ) From 6965d838449a27a1560d0c49bb83a39329f5da17 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 04:33:18 +0500 Subject: [PATCH 12/13] tests: updated --- .../test_qdrant_vector_store_search.py | 38 ++++++++++++------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py index ee9200ac37d..eddac0480ad 100644 --- a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py +++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py @@ -104,17 +104,27 @@ async def test_async_similarity_search_with_consistency(location: str) -> None: await vec_store.aadd_texts(texts) # Test with different consistency levels - consistency_levels = [ - 1, - models.ReadConsistencyType.MAJORITY, - models.ReadConsistencyType.ALL, - ] - - for consistency in consistency_levels: - results = await vec_store.asimilarity_search( - "test", k=1, consistency=consistency - ) - - assert len(results) <= 1 - if results: - assert results[0].page_content == "test document" + + # Test with factor consistency (int) + results = await vec_store.asimilarity_search( + "test", k=1, consistency=1 + ) + assert len(results) <= 1 + if results: + assert results[0].page_content == "test document" + + # Test with majority consistency + results = await vec_store.asimilarity_search( + "test", k=1, consistency=models.ReadConsistencyType.MAJORITY + ) + assert len(results) <= 1 + if results: + assert results[0].page_content == "test document" + + # Test with all consistency + results = await vec_store.asimilarity_search( + "test", k=1, consistency=models.ReadConsistencyType.ALL + ) + assert len(results) <= 1 + if results: + assert results[0].page_content == "test document" \ No newline at end of file From fb4ca7ecc6c82a4e44389269b7203d7d1b34987d Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 27 Jul 2025 04:34:53 +0500 Subject: [PATCH 13/13] chore: formating fixed --- .../async_api/test_qdrant_vector_store_search.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py index eddac0480ad..0eb6f715a4a 100644 --- a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py +++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py @@ -104,15 +104,13 @@ async def test_async_similarity_search_with_consistency(location: str) -> None: await vec_store.aadd_texts(texts) # Test with different consistency levels - + # Test with factor consistency (int) - results = await vec_store.asimilarity_search( - "test", k=1, consistency=1 - ) + results = await vec_store.asimilarity_search("test", k=1, consistency=1) assert len(results) <= 1 if results: assert results[0].page_content == "test document" - + # Test with majority consistency results = await vec_store.asimilarity_search( "test", k=1, consistency=models.ReadConsistencyType.MAJORITY @@ -120,11 +118,11 @@ async def test_async_similarity_search_with_consistency(location: str) -> None: assert len(results) <= 1 if results: assert results[0].page_content == "test document" - + # Test with all consistency results = await vec_store.asimilarity_search( "test", k=1, consistency=models.ReadConsistencyType.ALL ) assert len(results) <= 1 if results: - assert results[0].page_content == "test document" \ No newline at end of file + assert results[0].page_content == "test document"