From f0a7581b50c922d52eb78e294ac8e749e7f2f93a Mon Sep 17 00:00:00 2001 From: Leonid Ganeline Date: Fri, 12 Jul 2024 16:25:31 -0700 Subject: [PATCH] milvus: docstring (#23151) Added missed docstrings. Format docstrings to the consistent format (used in the API Reference) --------- Co-authored-by: Isaac Francisco <78627776+isahers1@users.noreply.github.com> Co-authored-by: isaac hershenson Co-authored-by: Erick Friis --- .../retrievers/milvus_hybrid_search.py | 3 +- .../zilliz_cloud_pipeline_retriever.py | 28 +++++++++---------- .../milvus/langchain_milvus/utils/sparse.py | 5 ++-- .../langchain_milvus/vectorstores/milvus.py | 14 ++++++++-- 4 files changed, 31 insertions(+), 19 deletions(-) diff --git a/libs/partners/milvus/langchain_milvus/retrievers/milvus_hybrid_search.py b/libs/partners/milvus/langchain_milvus/retrievers/milvus_hybrid_search.py index 6b6e692dd2c..b6e309c32ee 100644 --- a/libs/partners/milvus/langchain_milvus/retrievers/milvus_hybrid_search.py +++ b/libs/partners/milvus/langchain_milvus/retrievers/milvus_hybrid_search.py @@ -11,8 +11,9 @@ from langchain_milvus.utils.sparse import BaseSparseEmbedding class MilvusCollectionHybridSearchRetriever(BaseRetriever): - """This is a hybrid search retriever + """Hybrid search retriever that uses Milvus Collection to retrieve documents based on multiple fields. + For more information, please refer to: https://milvus.io/docs/release_notes.md#Multi-Embedding---Hybrid-Search """ diff --git a/libs/partners/milvus/langchain_milvus/retrievers/zilliz_cloud_pipeline_retriever.py b/libs/partners/milvus/langchain_milvus/retrievers/zilliz_cloud_pipeline_retriever.py index 6fbccfa47fa..88c6a55cac9 100644 --- a/libs/partners/milvus/langchain_milvus/retrievers/zilliz_cloud_pipeline_retriever.py +++ b/libs/partners/milvus/langchain_milvus/retrievers/zilliz_cloud_pipeline_retriever.py @@ -7,13 +7,13 @@ from langchain_core.retrievers import BaseRetriever class ZillizCloudPipelineRetriever(BaseRetriever): - """`Zilliz Cloud Pipeline` retriever + """`Zilliz Cloud Pipeline` retriever. - Args: - pipeline_ids (dict): A dictionary of pipeline ids. + Parameters: + pipeline_ids: A dictionary of pipeline ids. Valid keys: "ingestion", "search", "deletion". - token (str): Zilliz Cloud's token. Defaults to "". - cloud_region (str='gcp-us-west1'): The region of Zilliz Cloud's cluster. + token: Zilliz Cloud's token. Defaults to "". + cloud_region: The region of Zilliz Cloud's cluster. Defaults to 'gcp-us-west1'. """ @@ -35,14 +35,14 @@ class ZillizCloudPipelineRetriever(BaseRetriever): Get documents relevant to a query. Args: - query (str): String to find relevant documents for - top_k (int=10): The number of results. Defaults to 10. - offset (int=0): The number of records to skip in the search result. + query: String to find relevant documents for + top_k: The number of results. Defaults to 10. + offset: The number of records to skip in the search result. Defaults to 0. - output_fields (list=[]): The extra fields to present in output. - filter (str=""): The Milvus expression to filter search results. + output_fields: The extra fields to present in output. + filter: The Milvus expression to filter search results. Defaults to "". - run_manager (CallBackManagerForRetrieverRun): The callbacks handler to use. + run_manager: The callbacks handler to use. Returns: List of relevant documents @@ -100,8 +100,8 @@ class ZillizCloudPipelineRetriever(BaseRetriever): Only supported by a text ingestion pipeline in Zilliz Cloud. Args: - texts (List[str]): A list of text strings. - metadata (Dict[str, Any]): A key-value dictionary of metadata will + texts: A list of text strings. + metadata: A key-value dictionary of metadata will be inserted as preserved fields required by ingestion pipeline. Defaults to None. """ @@ -144,7 +144,7 @@ class ZillizCloudPipelineRetriever(BaseRetriever): Args: doc_url: A document url. - metadata (Dict[str, Any]): A key-value dictionary of metadata will + metadata: A key-value dictionary of metadata will be inserted as preserved fields required by ingestion pipeline. Defaults to None. """ diff --git a/libs/partners/milvus/langchain_milvus/utils/sparse.py b/libs/partners/milvus/langchain_milvus/utils/sparse.py index 027c978c65d..47c19567cea 100644 --- a/libs/partners/milvus/langchain_milvus/utils/sparse.py +++ b/libs/partners/milvus/langchain_milvus/utils/sparse.py @@ -6,6 +6,7 @@ from scipy.sparse import csr_array # type: ignore class BaseSparseEmbedding(ABC): """Interface for Sparse embedding models. + You can inherit from it and implement your custom sparse embedding model. """ @@ -19,8 +20,8 @@ class BaseSparseEmbedding(ABC): class BM25SparseEmbedding(BaseSparseEmbedding): - """This is a class that inherits BaseSparseEmbedding - and implements a sparse vector embedding model based on BM25. + """Sparse embedding model based on BM25. + This class uses the BM25 model in Milvus model to implement sparse vector embedding. This model requires pymilvus[model] to be installed. `pip install pymilvus[model]` diff --git a/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py b/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py index 5a5acda13f6..343a92fa68c 100644 --- a/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py +++ b/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py @@ -57,7 +57,17 @@ def maximal_marginal_relevance( lambda_mult: float = 0.5, k: int = 4, ) -> List[int]: - """Calculate maximal marginal relevance.""" + """Calculate maximal marginal relevance. + + Args: + query_embedding: The query embedding. + embedding_list: The list of embeddings. + lambda_mult: The lambda multiplier. Defaults to 0.5. + k: The number of results to return. Defaults to 4. + + Returns: + List[int]: The list of indices. + """ if min(k, len(embedding_list)) <= 0: return [] if query_embedding.ndim == 1: @@ -99,7 +109,7 @@ class Milvus(VectorStore): IF USING L2/IP metric, IT IS HIGHLY SUGGESTED TO NORMALIZE YOUR DATA. - Args: + Parameters: embedding_function (Embeddings): Function used to embed the text. collection_name (str): Which Milvus collection to use. Defaults to "LangChainCollection".