From f12b5c1222c43cf198292be156eba51a969c6ef1 Mon Sep 17 00:00:00 2001 From: chyroc Date: Fri, 5 Jan 2024 12:07:23 +0800 Subject: [PATCH] Feat: support Milvus more params (#15447) fix https://github.com/langchain-ai/langchain/issues/15442 --- .../vectorstores/milvus.py | 50 ++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/milvus.py b/libs/community/langchain_community/vectorstores/milvus.py index 67ac8a7c687..476f165121d 100644 --- a/libs/community/langchain_community/vectorstores/milvus.py +++ b/libs/community/langchain_community/vectorstores/milvus.py @@ -119,6 +119,9 @@ class Milvus(VectorStore): text_field: str = "text", vector_field: str = "vector", metadata_field: Optional[str] = None, + partition_names: Optional[list] = None, + replica_number: int = 1, + timeout: Optional[float] = None, ): """Initialize the Milvus vector store.""" try: @@ -158,6 +161,10 @@ class Milvus(VectorStore): self._vector_field = vector_field self._metadata_field = metadata_field self.fields: list[str] = [] + self.partition_names = partition_names + self.replica_number = replica_number + self.timeout = timeout + # Create the connection to the server if connection_args is None: connection_args = DEFAULT_MILVUS_CONNECTION @@ -176,7 +183,11 @@ class Milvus(VectorStore): self.col = None # Initialize the vector store - self._init() + self._init( + partition_names=partition_names, + replica_number=replica_number, + timeout=timeout, + ) @property def embeddings(self) -> Embeddings: @@ -235,14 +246,23 @@ class Milvus(VectorStore): raise e def _init( - self, embeddings: Optional[list] = None, metadatas: Optional[list[dict]] = None + self, + embeddings: Optional[list] = None, + metadatas: Optional[list[dict]] = None, + partition_names: Optional[list] = None, + replica_number: int = 1, + timeout: Optional[float] = None, ) -> None: if embeddings is not None: self._create_collection(embeddings, metadatas) self._extract_fields() self._create_index() self._create_search_params() - self._load() + self._load( + partition_names=partition_names, + replica_number=replica_number, + timeout=timeout, + ) def _create_collection( self, embeddings: list, metadatas: Optional[list[dict]] = None @@ -396,12 +416,21 @@ class Milvus(VectorStore): self.search_params = self.default_search_params[index_type] self.search_params["metric_type"] = metric_type - def _load(self) -> None: + def _load( + self, + partition_names: Optional[list] = None, + replica_number: int = 1, + timeout: Optional[float] = None, + ) -> None: """Load the collection if available.""" from pymilvus import Collection if isinstance(self.col, Collection) and self._get_index() is not None: - self.col.load() + self.col.load( + partition_names=partition_names, + replica_number=replica_number, + timeout=timeout, + ) def add_texts( self, @@ -417,7 +446,7 @@ class Milvus(VectorStore): in creating a new Collection. The data of the first entity decides the schema of the new collection, the dim is extracted from the first embedding and the columns are decided by the first metadata dict. - Metada keys will need to be present for all inserted values. At + Metadata keys will need to be present for all inserted values. At the moment there is no None equivalent in Milvus. Args: @@ -451,7 +480,14 @@ class Milvus(VectorStore): # If the collection hasn't been initialized yet, perform all steps to do so if not isinstance(self.col, Collection): - self._init(embeddings, metadatas) + kwargs = {"embeddings": embeddings, "metadatas": metadatas} + if self.partition_names: + kwargs["partition_names"] = self.partition_names + if self.replica_number: + kwargs["replica_number"] = self.replica_number + if self.timeout: + kwargs["timeout"] = self.timeout + self._init(**kwargs) # Dict to hold all insert columns insert_dict: dict[str, list] = {