milvus: docstring (#23151)

Added missed docstrings. Format docstrings to the consistent format
(used in the API Reference)

---------

Co-authored-by: Isaac Francisco <78627776+isahers1@users.noreply.github.com>
Co-authored-by: isaac hershenson <ihershenson@hmc.edu>
Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Leonid Ganeline 2024-07-12 16:25:31 -07:00 committed by GitHub
parent 474b88326f
commit f0a7581b50
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 31 additions and 19 deletions

View File

@ -11,8 +11,9 @@ from langchain_milvus.utils.sparse import BaseSparseEmbedding
class MilvusCollectionHybridSearchRetriever(BaseRetriever):
"""This is a hybrid search retriever
"""Hybrid search retriever
that uses Milvus Collection to retrieve documents based on multiple fields.
For more information, please refer to:
https://milvus.io/docs/release_notes.md#Multi-Embedding---Hybrid-Search
"""

View File

@ -7,13 +7,13 @@ from langchain_core.retrievers import BaseRetriever
class ZillizCloudPipelineRetriever(BaseRetriever):
"""`Zilliz Cloud Pipeline` retriever
"""`Zilliz Cloud Pipeline` retriever.
Args:
pipeline_ids (dict): A dictionary of pipeline ids.
Parameters:
pipeline_ids: A dictionary of pipeline ids.
Valid keys: "ingestion", "search", "deletion".
token (str): Zilliz Cloud's token. Defaults to "".
cloud_region (str='gcp-us-west1'): The region of Zilliz Cloud's cluster.
token: Zilliz Cloud's token. Defaults to "".
cloud_region: The region of Zilliz Cloud's cluster.
Defaults to 'gcp-us-west1'.
"""
@ -35,14 +35,14 @@ class ZillizCloudPipelineRetriever(BaseRetriever):
Get documents relevant to a query.
Args:
query (str): String to find relevant documents for
top_k (int=10): The number of results. Defaults to 10.
offset (int=0): The number of records to skip in the search result.
query: String to find relevant documents for
top_k: The number of results. Defaults to 10.
offset: The number of records to skip in the search result.
Defaults to 0.
output_fields (list=[]): The extra fields to present in output.
filter (str=""): The Milvus expression to filter search results.
output_fields: The extra fields to present in output.
filter: The Milvus expression to filter search results.
Defaults to "".
run_manager (CallBackManagerForRetrieverRun): The callbacks handler to use.
run_manager: The callbacks handler to use.
Returns:
List of relevant documents
@ -100,8 +100,8 @@ class ZillizCloudPipelineRetriever(BaseRetriever):
Only supported by a text ingestion pipeline in Zilliz Cloud.
Args:
texts (List[str]): A list of text strings.
metadata (Dict[str, Any]): A key-value dictionary of metadata will
texts: A list of text strings.
metadata: A key-value dictionary of metadata will
be inserted as preserved fields required by ingestion pipeline.
Defaults to None.
"""
@ -144,7 +144,7 @@ class ZillizCloudPipelineRetriever(BaseRetriever):
Args:
doc_url: A document url.
metadata (Dict[str, Any]): A key-value dictionary of metadata will
metadata: A key-value dictionary of metadata will
be inserted as preserved fields required by ingestion pipeline.
Defaults to None.
"""

View File

@ -6,6 +6,7 @@ from scipy.sparse import csr_array # type: ignore
class BaseSparseEmbedding(ABC):
"""Interface for Sparse embedding models.
You can inherit from it and implement your custom sparse embedding model.
"""
@ -19,8 +20,8 @@ class BaseSparseEmbedding(ABC):
class BM25SparseEmbedding(BaseSparseEmbedding):
"""This is a class that inherits BaseSparseEmbedding
and implements a sparse vector embedding model based on BM25.
"""Sparse embedding model based on BM25.
This class uses the BM25 model in Milvus model to implement sparse vector embedding.
This model requires pymilvus[model] to be installed.
`pip install pymilvus[model]`

View File

@ -57,7 +57,17 @@ def maximal_marginal_relevance(
lambda_mult: float = 0.5,
k: int = 4,
) -> List[int]:
"""Calculate maximal marginal relevance."""
"""Calculate maximal marginal relevance.
Args:
query_embedding: The query embedding.
embedding_list: The list of embeddings.
lambda_mult: The lambda multiplier. Defaults to 0.5.
k: The number of results to return. Defaults to 4.
Returns:
List[int]: The list of indices.
"""
if min(k, len(embedding_list)) <= 0:
return []
if query_embedding.ndim == 1:
@ -99,7 +109,7 @@ class Milvus(VectorStore):
IF USING L2/IP metric, IT IS HIGHLY SUGGESTED TO NORMALIZE YOUR DATA.
Args:
Parameters:
embedding_function (Embeddings): Function used to embed the text.
collection_name (str): Which Milvus collection to use. Defaults to
"LangChainCollection".