From 3ced563a2cfe58e10b6fd5858bd86dbdf761bbf0 Mon Sep 17 00:00:00 2001 From: aries_ckt <916701291@qq.com> Date: Wed, 6 Sep 2023 00:34:13 +0800 Subject: [PATCH] feat:Milvus support collection delete and delete by ids Close #534 --- pilot/vector_store/base.py | 10 ++++++++++ pilot/vector_store/milvus_store.py | 27 ++++++++++++++++++++------- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/pilot/vector_store/base.py b/pilot/vector_store/base.py index 0108e06b1..74cd2f98c 100644 --- a/pilot/vector_store/base.py +++ b/pilot/vector_store/base.py @@ -18,3 +18,13 @@ class VectorStoreBase(ABC): def vector_name_exists(self, text, topk) -> None: """is vector store name exist.""" pass + + @abstractmethod + def delete_by_ids(self, ids): + """delete vector by ids.""" + pass + + @abstractmethod + def delete_vector_name(self, vector_name): + """delete vector name.""" + pass diff --git a/pilot/vector_store/milvus_store.py b/pilot/vector_store/milvus_store.py index 0f1ac7e26..1eb08b1e4 100644 --- a/pilot/vector_store/milvus_store.py +++ b/pilot/vector_store/milvus_store.py @@ -115,8 +115,8 @@ class MilvusStore(VectorStoreBase): or x.dtype == DataType.BINARY_VECTOR ): self.vector_field = x.name - self._add_documents(texts, metadatas) - return self.collection_name + return self._add_documents(texts, metadatas) + # return self.collection_name dim = len(embeddings) # Generate unique names @@ -154,9 +154,9 @@ class MilvusStore(VectorStoreBase): self.primary_field = x.name if x.dtype == DataType.FLOAT_VECTOR or x.dtype == DataType.BINARY_VECTOR: self.vector_field = x.name - self._add_documents(texts, metadatas) + ids = self._add_documents(texts, metadatas) - return self.collection_name + return ids # def init_schema(self) -> None: # """Initialize collection in milvus database.""" @@ -251,9 +251,11 @@ class MilvusStore(VectorStoreBase): batched_list = [ documents[i : i + batch_size] for i in range(0, len(documents), batch_size) ] - # docs = [] + doc_ids = [] for doc_batch in batched_list: - self.init_schema_and_load(self.collection_name, doc_batch) + doc_ids.extend(self.init_schema_and_load(self.collection_name, doc_batch)) + doc_ids = [str(doc_id) for doc_id in doc_ids] + return doc_ids def similar_search(self, text, topk) -> None: """similar_search in vector database.""" @@ -324,8 +326,19 @@ class MilvusStore(VectorStoreBase): return utility.has_collection(self.collection_name) def delete_vector_name(self, vector_name): + """milvus delete collection name""" logger.info(f"milvus vector_name:{vector_name} begin delete...") - self.vector_store_client.drop() + utility.drop_collection(vector_name) + return True + + def delete_by_ids(self, ids): + self.col = Collection(self.collection_name) + """milvus delete vectors by ids""" + logger.info(f"begin delete milvus ids...") + delete_ids = ids.split(",") + doc_ids = [int(doc_id) for doc_id in delete_ids] + delet_expr = f"{self.primary_field} in {doc_ids}" + self.col.delete(delet_expr) return True def close(self):