milvus: docstring (#23151)

Added missed docstrings. Format docstrings to the consistent format
(used in the API Reference)

---------

Co-authored-by: Isaac Francisco <78627776+isahers1@users.noreply.github.com>
Co-authored-by: isaac hershenson <ihershenson@hmc.edu>
Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Leonid Ganeline 2024-07-12 16:25:31 -07:00 committed by GitHub
parent 474b88326f
commit f0a7581b50
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 31 additions and 19 deletions

View File

@ -11,8 +11,9 @@ from langchain_milvus.utils.sparse import BaseSparseEmbedding
class MilvusCollectionHybridSearchRetriever(BaseRetriever): class MilvusCollectionHybridSearchRetriever(BaseRetriever):
"""This is a hybrid search retriever """Hybrid search retriever
that uses Milvus Collection to retrieve documents based on multiple fields. that uses Milvus Collection to retrieve documents based on multiple fields.
For more information, please refer to: For more information, please refer to:
https://milvus.io/docs/release_notes.md#Multi-Embedding---Hybrid-Search https://milvus.io/docs/release_notes.md#Multi-Embedding---Hybrid-Search
""" """

View File

@ -7,13 +7,13 @@ from langchain_core.retrievers import BaseRetriever
class ZillizCloudPipelineRetriever(BaseRetriever): class ZillizCloudPipelineRetriever(BaseRetriever):
"""`Zilliz Cloud Pipeline` retriever """`Zilliz Cloud Pipeline` retriever.
Args: Parameters:
pipeline_ids (dict): A dictionary of pipeline ids. pipeline_ids: A dictionary of pipeline ids.
Valid keys: "ingestion", "search", "deletion". Valid keys: "ingestion", "search", "deletion".
token (str): Zilliz Cloud's token. Defaults to "". token: Zilliz Cloud's token. Defaults to "".
cloud_region (str='gcp-us-west1'): The region of Zilliz Cloud's cluster. cloud_region: The region of Zilliz Cloud's cluster.
Defaults to 'gcp-us-west1'. Defaults to 'gcp-us-west1'.
""" """
@ -35,14 +35,14 @@ class ZillizCloudPipelineRetriever(BaseRetriever):
Get documents relevant to a query. Get documents relevant to a query.
Args: Args:
query (str): String to find relevant documents for query: String to find relevant documents for
top_k (int=10): The number of results. Defaults to 10. top_k: The number of results. Defaults to 10.
offset (int=0): The number of records to skip in the search result. offset: The number of records to skip in the search result.
Defaults to 0. Defaults to 0.
output_fields (list=[]): The extra fields to present in output. output_fields: The extra fields to present in output.
filter (str=""): The Milvus expression to filter search results. filter: The Milvus expression to filter search results.
Defaults to "". Defaults to "".
run_manager (CallBackManagerForRetrieverRun): The callbacks handler to use. run_manager: The callbacks handler to use.
Returns: Returns:
List of relevant documents List of relevant documents
@ -100,8 +100,8 @@ class ZillizCloudPipelineRetriever(BaseRetriever):
Only supported by a text ingestion pipeline in Zilliz Cloud. Only supported by a text ingestion pipeline in Zilliz Cloud.
Args: Args:
texts (List[str]): A list of text strings. texts: A list of text strings.
metadata (Dict[str, Any]): A key-value dictionary of metadata will metadata: A key-value dictionary of metadata will
be inserted as preserved fields required by ingestion pipeline. be inserted as preserved fields required by ingestion pipeline.
Defaults to None. Defaults to None.
""" """
@ -144,7 +144,7 @@ class ZillizCloudPipelineRetriever(BaseRetriever):
Args: Args:
doc_url: A document url. doc_url: A document url.
metadata (Dict[str, Any]): A key-value dictionary of metadata will metadata: A key-value dictionary of metadata will
be inserted as preserved fields required by ingestion pipeline. be inserted as preserved fields required by ingestion pipeline.
Defaults to None. Defaults to None.
""" """

View File

@ -6,6 +6,7 @@ from scipy.sparse import csr_array # type: ignore
class BaseSparseEmbedding(ABC): class BaseSparseEmbedding(ABC):
"""Interface for Sparse embedding models. """Interface for Sparse embedding models.
You can inherit from it and implement your custom sparse embedding model. You can inherit from it and implement your custom sparse embedding model.
""" """
@ -19,8 +20,8 @@ class BaseSparseEmbedding(ABC):
class BM25SparseEmbedding(BaseSparseEmbedding): class BM25SparseEmbedding(BaseSparseEmbedding):
"""This is a class that inherits BaseSparseEmbedding """Sparse embedding model based on BM25.
and implements a sparse vector embedding model based on BM25.
This class uses the BM25 model in Milvus model to implement sparse vector embedding. This class uses the BM25 model in Milvus model to implement sparse vector embedding.
This model requires pymilvus[model] to be installed. This model requires pymilvus[model] to be installed.
`pip install pymilvus[model]` `pip install pymilvus[model]`

View File

@ -57,7 +57,17 @@ def maximal_marginal_relevance(
lambda_mult: float = 0.5, lambda_mult: float = 0.5,
k: int = 4, k: int = 4,
) -> List[int]: ) -> List[int]:
"""Calculate maximal marginal relevance.""" """Calculate maximal marginal relevance.
Args:
query_embedding: The query embedding.
embedding_list: The list of embeddings.
lambda_mult: The lambda multiplier. Defaults to 0.5.
k: The number of results to return. Defaults to 4.
Returns:
List[int]: The list of indices.
"""
if min(k, len(embedding_list)) <= 0: if min(k, len(embedding_list)) <= 0:
return [] return []
if query_embedding.ndim == 1: if query_embedding.ndim == 1:
@ -99,7 +109,7 @@ class Milvus(VectorStore):
IF USING L2/IP metric, IT IS HIGHLY SUGGESTED TO NORMALIZE YOUR DATA. IF USING L2/IP metric, IT IS HIGHLY SUGGESTED TO NORMALIZE YOUR DATA.
Args: Parameters:
embedding_function (Embeddings): Function used to embed the text. embedding_function (Embeddings): Function used to embed the text.
collection_name (str): Which Milvus collection to use. Defaults to collection_name (str): Which Milvus collection to use. Defaults to
"LangChainCollection". "LangChainCollection".