milvus: docstring (#23151)

Added missed docstrings. Format docstrings to the consistent format (used in the API Reference) --------- Co-authored-by: Isaac Francisco <78627776+isahers1@users.noreply.github.com> Co-authored-by: isaac hershenson <ihershenson@hmc.edu> Co-authored-by: Erick Friis <erick@langchain.dev>
2025-08-09 04:50:37 +00:00 · 2024-07-12 16:25:31 -07:00 · 2024-07-12 16:25:31 -07:00 · f0a7581b50
commit f0a7581b50
parent 474b88326f
4 changed files with 31 additions and 19 deletions
--- a/libs/partners/milvus/langchain_milvus/retrievers/milvus_hybrid_search.py
+++ b/libs/partners/milvus/langchain_milvus/retrievers/milvus_hybrid_search.py
@ -11,8 +11,9 @@ from langchain_milvus.utils.sparse import BaseSparseEmbedding


 class MilvusCollectionHybridSearchRetriever(BaseRetriever):
-    """This is a hybrid search retriever
+    """Hybrid search retriever
    that uses Milvus Collection to retrieve documents based on multiple fields.
+
    For more information, please refer to:
    https://milvus.io/docs/release_notes.md#Multi-Embedding---Hybrid-Search
    """
--- a/libs/partners/milvus/langchain_milvus/retrievers/zilliz_cloud_pipeline_retriever.py
+++ b/libs/partners/milvus/langchain_milvus/retrievers/zilliz_cloud_pipeline_retriever.py
@ -7,13 +7,13 @@ from langchain_core.retrievers import BaseRetriever


 class ZillizCloudPipelineRetriever(BaseRetriever):
-    """`Zilliz Cloud Pipeline` retriever
+    """`Zilliz Cloud Pipeline` retriever.

-    Args:
-        pipeline_ids (dict): A dictionary of pipeline ids.
+    Parameters:
+        pipeline_ids: A dictionary of pipeline ids.
            Valid keys: "ingestion", "search", "deletion".
-        token (str): Zilliz Cloud's token. Defaults to "".
-        cloud_region (str='gcp-us-west1'): The region of Zilliz Cloud's cluster.
+        token: Zilliz Cloud's token. Defaults to "".
+        cloud_region: The region of Zilliz Cloud's cluster.
            Defaults to 'gcp-us-west1'.
    """

@ -35,14 +35,14 @@ class ZillizCloudPipelineRetriever(BaseRetriever):
        Get documents relevant to a query.

        Args:
-            query (str): String to find relevant documents for
-            top_k (int=10): The number of results. Defaults to 10.
-            offset (int=0): The number of records to skip in the search result.
+            query: String to find relevant documents for
+            top_k: The number of results. Defaults to 10.
+            offset: The number of records to skip in the search result.
                Defaults to 0.
-            output_fields (list=[]): The extra fields to present in output.
-            filter (str=""): The Milvus expression to filter search results.
+            output_fields: The extra fields to present in output.
+            filter: The Milvus expression to filter search results.
                Defaults to "".
-            run_manager (CallBackManagerForRetrieverRun): The callbacks handler to use.
+            run_manager: The callbacks handler to use.

        Returns:
            List of relevant documents
@ -100,8 +100,8 @@ class ZillizCloudPipelineRetriever(BaseRetriever):
        Only supported by a text ingestion pipeline in Zilliz Cloud.

        Args:
-            texts (List[str]): A list of text strings.
-            metadata (Dict[str, Any]): A key-value dictionary of metadata will
+            texts: A list of text strings.
+            metadata: A key-value dictionary of metadata will
                be inserted as preserved fields required by ingestion pipeline.
                Defaults to None.
        """
@ -144,7 +144,7 @@ class ZillizCloudPipelineRetriever(BaseRetriever):

        Args:
            doc_url: A document url.
-            metadata (Dict[str, Any]): A key-value dictionary of metadata will
+            metadata: A key-value dictionary of metadata will
                be inserted as preserved fields required by ingestion pipeline.
                Defaults to None.
        """
--- a/libs/partners/milvus/langchain_milvus/utils/sparse.py
+++ b/libs/partners/milvus/langchain_milvus/utils/sparse.py
@ -6,6 +6,7 @@ from scipy.sparse import csr_array  # type: ignore

 class BaseSparseEmbedding(ABC):
    """Interface for Sparse embedding models.
+
    You can inherit from it and implement your custom sparse embedding model.
    """

@ -19,8 +20,8 @@ class BaseSparseEmbedding(ABC):


 class BM25SparseEmbedding(BaseSparseEmbedding):
-    """This is a class that inherits BaseSparseEmbedding
-    and implements a sparse vector embedding model based on BM25.
+    """Sparse embedding model based on BM25.
+
    This class uses the BM25 model in Milvus model to implement sparse vector embedding.
    This model requires pymilvus[model] to be installed.
    `pip install pymilvus[model]`
--- a/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py
+++ b/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py
@ -57,7 +57,17 @@ def maximal_marginal_relevance(
    lambda_mult: float = 0.5,
    k: int = 4,
 ) -> List[int]:
-    """Calculate maximal marginal relevance."""
+    """Calculate maximal marginal relevance.
+
+    Args:
+        query_embedding: The query embedding.
+        embedding_list: The list of embeddings.
+        lambda_mult: The lambda multiplier. Defaults to 0.5.
+        k: The number of results to return. Defaults to 4.
+
+    Returns:
+        List[int]: The list of indices.
+    """
    if min(k, len(embedding_list)) <= 0:
        return []
    if query_embedding.ndim == 1:
@ -99,7 +109,7 @@ class Milvus(VectorStore):

    IF USING L2/IP metric, IT IS HIGHLY SUGGESTED TO NORMALIZE YOUR DATA.

-    Args:
+    Parameters:
        embedding_function (Embeddings): Function used to embed the text.
        collection_name (str): Which Milvus collection to use. Defaults to
            "LangChainCollection".