feat:Milvus support collection delete and delete by ids

Close #534
This commit is contained in:
aries_ckt 2023-09-06 00:34:13 +08:00
parent c14ea35637
commit 3ced563a2c
2 changed files with 30 additions and 7 deletions

View File

@ -18,3 +18,13 @@ class VectorStoreBase(ABC):
def vector_name_exists(self, text, topk) -> None:
"""is vector store name exist."""
pass
@abstractmethod
def delete_by_ids(self, ids):
"""delete vector by ids."""
pass
@abstractmethod
def delete_vector_name(self, vector_name):
"""delete vector name."""
pass

View File

@ -115,8 +115,8 @@ class MilvusStore(VectorStoreBase):
or x.dtype == DataType.BINARY_VECTOR
):
self.vector_field = x.name
self._add_documents(texts, metadatas)
return self.collection_name
return self._add_documents(texts, metadatas)
# return self.collection_name
dim = len(embeddings)
# Generate unique names
@ -154,9 +154,9 @@ class MilvusStore(VectorStoreBase):
self.primary_field = x.name
if x.dtype == DataType.FLOAT_VECTOR or x.dtype == DataType.BINARY_VECTOR:
self.vector_field = x.name
self._add_documents(texts, metadatas)
ids = self._add_documents(texts, metadatas)
return self.collection_name
return ids
# def init_schema(self) -> None:
# """Initialize collection in milvus database."""
@ -251,9 +251,11 @@ class MilvusStore(VectorStoreBase):
batched_list = [
documents[i : i + batch_size] for i in range(0, len(documents), batch_size)
]
# docs = []
doc_ids = []
for doc_batch in batched_list:
self.init_schema_and_load(self.collection_name, doc_batch)
doc_ids.extend(self.init_schema_and_load(self.collection_name, doc_batch))
doc_ids = [str(doc_id) for doc_id in doc_ids]
return doc_ids
def similar_search(self, text, topk) -> None:
"""similar_search in vector database."""
@ -324,8 +326,19 @@ class MilvusStore(VectorStoreBase):
return utility.has_collection(self.collection_name)
def delete_vector_name(self, vector_name):
"""milvus delete collection name"""
logger.info(f"milvus vector_name:{vector_name} begin delete...")
self.vector_store_client.drop()
utility.drop_collection(vector_name)
return True
def delete_by_ids(self, ids):
self.col = Collection(self.collection_name)
"""milvus delete vectors by ids"""
logger.info(f"begin delete milvus ids...")
delete_ids = ids.split(",")
doc_ids = [int(doc_id) for doc_id in delete_ids]
delet_expr = f"{self.primary_field} in {doc_ids}"
self.col.delete(delet_expr)
return True
def close(self):