diff --git a/langchain/vectorstores/base.py b/langchain/vectorstores/base.py index 1e574af5d32..0328a3bcbfb 100644 --- a/langchain/vectorstores/base.py +++ b/langchain/vectorstores/base.py @@ -53,20 +53,19 @@ class VectorStore(ABC): List of ids from adding the texts into the vectorstore. """ - def delete(self, ids: List[str]) -> Optional[bool]: - """Delete by vector ID. + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + """Delete by vector ID or other criteria. Args: ids: List of ids to delete. + **kwargs: Other keyword arguments that subclasses might use. Returns: Optional[bool]: True if deletion is successful, False otherwise, None if not implemented. """ - raise NotImplementedError( - "delete_by_id method must be implemented by subclass." - ) + raise NotImplementedError("delete method must be implemented by subclass.") async def aadd_texts( self, diff --git a/langchain/vectorstores/cassandra.py b/langchain/vectorstores/cassandra.py index 68bbf2850af..6b752c76aa4 100644 --- a/langchain/vectorstores/cassandra.py +++ b/langchain/vectorstores/cassandra.py @@ -91,8 +91,9 @@ class Cassandra(VectorStore): def delete_by_document_id(self, document_id: str) -> None: return self.table.delete(document_id) - def delete(self, ids: List[str]) -> Optional[bool]: - """Delete by vector ID. + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + """Delete by vector IDs. + Args: ids: List of ids to delete. @@ -101,6 +102,10 @@ class Cassandra(VectorStore): Optional[bool]: True if deletion is successful, False otherwise, None if not implemented. """ + + if ids is None: + raise ValueError("No ids provided to delete.") + for document_id in ids: self.delete_by_document_id(document_id) return True diff --git a/langchain/vectorstores/chroma.py b/langchain/vectorstores/chroma.py index 394a6026fac..6ca60def7dc 100644 --- a/langchain/vectorstores/chroma.py +++ b/langchain/vectorstores/chroma.py @@ -470,7 +470,7 @@ class Chroma(VectorStore): client=client, ) - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: diff --git a/langchain/vectorstores/deeplake.py b/langchain/vectorstores/deeplake.py index 95210059253..5200898cb09 100644 --- a/langchain/vectorstores/deeplake.py +++ b/langchain/vectorstores/deeplake.py @@ -744,30 +744,23 @@ class DeepLake(VectorStore): ) return deeplake_dataset - def delete( - self, - ids: Any[List[str], None] = None, - filter: Any[Dict[str, str], None] = None, - delete_all: Any[bool, None] = None, - ) -> bool: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> bool: """Delete the entities in the dataset. Args: ids (Optional[List[str]], optional): The document_ids to delete. Defaults to None. - filter (Optional[Dict[str, str]], optional): The filter to delete by. - Defaults to None. - delete_all (Optional[bool], optional): Whether to drop the dataset. - Defaults to None. + **kwargs: Other keyword arguments that subclasses might use. + - filter (Optional[Dict[str, str]], optional): The filter to delete by. + - delete_all (Optional[bool], optional): Whether to drop the dataset. Returns: bool: Whether the delete operation was successful. """ - self.vectorstore.delete( - ids=ids, - filter=filter, - delete_all=delete_all, - ) + filter = kwargs.get("filter") + delete_all = kwargs.get("delete_all") + + self.vectorstore.delete(ids=ids, filter=filter, delete_all=delete_all) return True diff --git a/langchain/vectorstores/elastic_vector_search.py b/langchain/vectorstores/elastic_vector_search.py index 8d453538f8a..ac38d37c2e4 100644 --- a/langchain/vectorstores/elastic_vector_search.py +++ b/langchain/vectorstores/elastic_vector_search.py @@ -317,13 +317,16 @@ class ElasticVectorSearch(VectorStore, ABC): ) return response - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: ids: List of ids to delete. """ + if ids is None: + raise ValueError("No ids provided to delete.") + # TODO: Check if this can be done in bulk for id in ids: self.client.delete(index=self.index_name, id=id) diff --git a/langchain/vectorstores/pinecone.py b/langchain/vectorstores/pinecone.py index 3d4e12c6b26..552fa2ef1ee 100644 --- a/langchain/vectorstores/pinecone.py +++ b/langchain/vectorstores/pinecone.py @@ -354,16 +354,33 @@ class Pinecone(VectorStore): pinecone.Index(index_name), embedding.embed_query, text_key, namespace ) - def delete(self, ids: List[str], namespace: Optional[str] = None) -> None: - """Delete by vector IDs. + def delete( + self, + ids: Optional[List[str]] = None, + delete_all: Optional[bool] = None, + namespace: Optional[str] = None, + filter: Optional[dict] = None, + **kwargs: Any, + ) -> None: + """Delete by vector IDs or filter. Args: ids: List of ids to delete. + filter: Dictionary of conditions to filter vectors to delete. """ - # This is the maximum number of IDs that can be deleted if namespace is None: namespace = self._namespace - chunk_size = 1000 - for i in range(0, len(ids), chunk_size): - chunk = ids[i : i + chunk_size] - self._index.delete(ids=chunk, namespace=namespace) + + if delete_all: + self._index.delete(delete_all=True, namespace=namespace, **kwargs) + elif ids is not None: + chunk_size = 1000 + for i in range(0, len(ids), chunk_size): + chunk = ids[i : i + chunk_size] + self._index.delete(ids=chunk, namespace=namespace, **kwargs) + elif filter is not None: + self._index.delete(filter=filter, namespace=namespace, **kwargs) + else: + raise ValueError("Either ids, delete_all, or filter must be provided.") + + return None diff --git a/langchain/vectorstores/redis.py b/langchain/vectorstores/redis.py index 31c95e1f98d..10bda5123b7 100644 --- a/langchain/vectorstores/redis.py +++ b/langchain/vectorstores/redis.py @@ -469,7 +469,7 @@ class Redis(VectorStore): @staticmethod def delete( - ids: List[str], + ids: Optional[List[str]] = None, **kwargs: Any, ) -> bool: """ diff --git a/langchain/vectorstores/supabase.py b/langchain/vectorstores/supabase.py index 94a2c199c9d..063bcd376c5 100644 --- a/langchain/vectorstores/supabase.py +++ b/langchain/vectorstores/supabase.py @@ -346,12 +346,16 @@ class SupabaseVectorStore(VectorStore): ) return docs - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: ids: List of ids to delete. """ + + if ids is None: + raise ValueError("No ids provided to delete.") + rows: List[dict[str, Any]] = [ { "id": id, diff --git a/langchain/vectorstores/weaviate.py b/langchain/vectorstores/weaviate.py index 10d14f3d085..f623082e415 100644 --- a/langchain/vectorstores/weaviate.py +++ b/langchain/vectorstores/weaviate.py @@ -470,13 +470,16 @@ class Weaviate(VectorStore): by_text=by_text, ) - def delete(self, ids: List[str]) -> None: + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: """Delete by vector IDs. Args: ids: List of ids to delete. """ + if ids is None: + raise ValueError("No ids provided to delete.") + # TODO: Check if this can be done in bulk for id in ids: self._client.data_object.delete(uuid=id)