From e41b382e1c4ed24e25eb07f196370cb2f201d740 Mon Sep 17 00:00:00 2001 From: 0xcha05 <103983696+0xcha05@users.noreply.github.com> Date: Mon, 3 Jul 2023 00:16:19 +0530 Subject: [PATCH] Added filter and delete all option to delete function in Pinecone integration, updated base VectorStore's delete function (#6876) ### Description: Updated the delete function in the Pinecone integration to allow for deletion of vectors by specifying a filter condition, and to delete all vectors in a namespace. Made the ids parameter optional in the delete function in the base VectorStore class and allowed for additional keyword arguments. Updated the delete function in several classes (Redis, Chroma, Supabase, Deeplake, Elastic, Weaviate, and Cassandra) to match the changes made in the base VectorStore class. This involved making the ids parameter optional and allowing for additional keyword arguments. --- langchain/vectorstores/base.py | 9 +++--- langchain/vectorstores/cassandra.py | 9 ++++-- langchain/vectorstores/chroma.py | 2 +- langchain/vectorstores/deeplake.py | 23 +++++--------- .../vectorstores/elastic_vector_search.py | 5 ++- langchain/vectorstores/pinecone.py | 31 ++++++++++++++----- langchain/vectorstores/redis.py | 2 +- langchain/vectorstores/supabase.py | 6 +++- langchain/vectorstores/weaviate.py | 5 ++- 9 files changed, 58 insertions(+), 34 deletions(-) diff --git a/langchain/vectorstores/base.py b/langchain/vectorstores/base.py index 1e574af5d32..0328a3bcbfb 100644 --- a/langchain/vectorstores/base.py +++ b/langchain/vectorstores/base.py @@ -53,20 +53,19 @@ class VectorStore(ABC): List of ids from adding the texts into the vectorstore. """ - def delete(self, ids: List[str]) -> Optional[bool]: - """Delete by vector ID. + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + """Delete by vector ID or other criteria. Args: ids: List of ids to delete. + **kwargs: Other keyword arguments that subclasses might use. Returns: Optional[bool]: True if deletion is successful, False otherwise, None if not implemented. """ - raise NotImplementedError( - "delete_by_id method must be implemented by subclass." - ) + raise NotImplementedError("delete method must be implemented by subclass.") async def aadd_texts( self, diff --git a/langchain/vectorstores/cassandra.py b/langchain/vectorstores/cassandra.py index 68bbf2850af..6b752c76aa4 100644 --- a/langchain/vectorstores/cassandra.py +++ b/langchain/vectorstores/cassandra.py @@ -91,8 +91,9 @@ class Cassandra(VectorStore): def delete_by_document_id(self, document_id: str) -> None: return self.table.delete(document_id) - def delete(self, ids: List[str]) -> Optional[bool]: - """Delete by vector ID. + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + """Delete by vector IDs. + Args: ids: List of ids to delete. @@ -101,6 +102,10 @@ class Cassandra(VectorStore): Optional[bool]: True if deletion is successful, False otherwise, None if not implemented. """ + + if ids is None: + raise ValueError("No ids provided to delete.") + for document_id in ids: self.delete_by_document_id(document_id) return True diff --git a/langchain/vectorstores/chroma.py b/langchain/vectorstores/chroma.py index 394a6026fac..6ca60def7dc 100644 --- a/langchain/vectorstores/chroma.py +++ b/langchain/vectorstores/chroma.py @@ -470,7 +470,7 @@ class Chroma(VectorStore): client=client, ) - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: diff --git a/langchain/vectorstores/deeplake.py b/langchain/vectorstores/deeplake.py index 95210059253..5200898cb09 100644 --- a/langchain/vectorstores/deeplake.py +++ b/langchain/vectorstores/deeplake.py @@ -744,30 +744,23 @@ class DeepLake(VectorStore): ) return deeplake_dataset - def delete( - self, - ids: Any[List[str], None] = None, - filter: Any[Dict[str, str], None] = None, - delete_all: Any[bool, None] = None, - ) -> bool: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> bool: """Delete the entities in the dataset. Args: ids (Optional[List[str]], optional): The document_ids to delete. Defaults to None. - filter (Optional[Dict[str, str]], optional): The filter to delete by. - Defaults to None. - delete_all (Optional[bool], optional): Whether to drop the dataset. - Defaults to None. + **kwargs: Other keyword arguments that subclasses might use. + - filter (Optional[Dict[str, str]], optional): The filter to delete by. + - delete_all (Optional[bool], optional): Whether to drop the dataset. Returns: bool: Whether the delete operation was successful. """ - self.vectorstore.delete( - ids=ids, - filter=filter, - delete_all=delete_all, - ) + filter = kwargs.get("filter") + delete_all = kwargs.get("delete_all") + + self.vectorstore.delete(ids=ids, filter=filter, delete_all=delete_all) return True diff --git a/langchain/vectorstores/elastic_vector_search.py b/langchain/vectorstores/elastic_vector_search.py index 8d453538f8a..ac38d37c2e4 100644 --- a/langchain/vectorstores/elastic_vector_search.py +++ b/langchain/vectorstores/elastic_vector_search.py @@ -317,13 +317,16 @@ class ElasticVectorSearch(VectorStore, ABC): ) return response - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: ids: List of ids to delete. """ + if ids is None: + raise ValueError("No ids provided to delete.") + # TODO: Check if this can be done in bulk for id in ids: self.client.delete(index=self.index_name, id=id) diff --git a/langchain/vectorstores/pinecone.py b/langchain/vectorstores/pinecone.py index 3d4e12c6b26..552fa2ef1ee 100644 --- a/langchain/vectorstores/pinecone.py +++ b/langchain/vectorstores/pinecone.py @@ -354,16 +354,33 @@ class Pinecone(VectorStore): pinecone.Index(index_name), embedding.embed_query, text_key, namespace ) - def delete(self, ids: List[str], namespace: Optional[str] = None) -> None: - """Delete by vector IDs. + def delete( + self, + ids: Optional[List[str]] = None, + delete_all: Optional[bool] = None, + namespace: Optional[str] = None, + filter: Optional[dict] = None, + **kwargs: Any, + ) -> None: + """Delete by vector IDs or filter. Args: ids: List of ids to delete. + filter: Dictionary of conditions to filter vectors to delete. """ - # This is the maximum number of IDs that can be deleted if namespace is None: namespace = self._namespace - chunk_size = 1000 - for i in range(0, len(ids), chunk_size): - chunk = ids[i : i + chunk_size] - self._index.delete(ids=chunk, namespace=namespace) + + if delete_all: + self._index.delete(delete_all=True, namespace=namespace, **kwargs) + elif ids is not None: + chunk_size = 1000 + for i in range(0, len(ids), chunk_size): + chunk = ids[i : i + chunk_size] + self._index.delete(ids=chunk, namespace=namespace, **kwargs) + elif filter is not None: + self._index.delete(filter=filter, namespace=namespace, **kwargs) + else: + raise ValueError("Either ids, delete_all, or filter must be provided.") + + return None diff --git a/langchain/vectorstores/redis.py b/langchain/vectorstores/redis.py index 31c95e1f98d..10bda5123b7 100644 --- a/langchain/vectorstores/redis.py +++ b/langchain/vectorstores/redis.py @@ -469,7 +469,7 @@ class Redis(VectorStore): @staticmethod def delete( - ids: List[str], + ids: Optional[List[str]] = None, **kwargs: Any, ) -> bool: """ diff --git a/langchain/vectorstores/supabase.py b/langchain/vectorstores/supabase.py index 94a2c199c9d..063bcd376c5 100644 --- a/langchain/vectorstores/supabase.py +++ b/langchain/vectorstores/supabase.py @@ -346,12 +346,16 @@ class SupabaseVectorStore(VectorStore): ) return docs - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: ids: List of ids to delete. """ + + if ids is None: + raise ValueError("No ids provided to delete.") + rows: List[dict[str, Any]] = [ { "id": id, diff --git a/langchain/vectorstores/weaviate.py b/langchain/vectorstores/weaviate.py index 10d14f3d085..f623082e415 100644 --- a/langchain/vectorstores/weaviate.py +++ b/langchain/vectorstores/weaviate.py @@ -470,13 +470,16 @@ class Weaviate(VectorStore): by_text=by_text, ) - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: ids: List of ids to delete. """ + if ids is None: + raise ValueError("No ids provided to delete.") + # TODO: Check if this can be done in bulk for id in ids: self._client.data_object.delete(uuid=id)