mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-24 20:09:01 +00:00
core[patch]: throw exception indexing code if deletion fails in vectorstore (#28103)
The delete methods in the VectorStore and DocumentIndex interfaces return a status indicating the result. Therefore, we can assume that their implementations don't throw exceptions but instead return a result indicating whether the delete operations have failed. The current implementation doesn't check the returned value, so I modified it to throw an exception when the operation fails. --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
@@ -22,6 +22,7 @@ from pydantic import model_validator
|
||||
|
||||
from langchain_core.document_loaders.base import BaseLoader
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.exceptions import LangChainException
|
||||
from langchain_core.indexing.base import DocumentIndex, RecordManager
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
@@ -175,6 +176,32 @@ def _deduplicate_in_order(
|
||||
yield hashed_doc
|
||||
|
||||
|
||||
class IndexingException(LangChainException):
|
||||
"""Raised when an indexing operation fails."""
|
||||
|
||||
|
||||
def _delete(
|
||||
vector_store: Union[VectorStore, DocumentIndex],
|
||||
ids: list[str],
|
||||
) -> None:
|
||||
if isinstance(vector_store, VectorStore):
|
||||
delete_ok = vector_store.delete(ids)
|
||||
if delete_ok is not None and delete_ok is False:
|
||||
msg = "The delete operation to VectorStore failed."
|
||||
raise IndexingException(msg)
|
||||
elif isinstance(vector_store, DocumentIndex):
|
||||
delete_response = vector_store.delete(ids)
|
||||
if "num_failed" in delete_response and delete_response["num_failed"] > 0:
|
||||
msg = "The delete operation to DocumentIndex failed."
|
||||
raise IndexingException(msg)
|
||||
else:
|
||||
msg = (
|
||||
f"Vectorstore should be either a VectorStore or a DocumentIndex. "
|
||||
f"Got {type(vector_store)}."
|
||||
)
|
||||
raise TypeError(msg)
|
||||
|
||||
|
||||
# PUBLIC API
|
||||
|
||||
|
||||
@@ -441,7 +468,7 @@ def index(
|
||||
)
|
||||
if uids_to_delete:
|
||||
# Then delete from vector store.
|
||||
destination.delete(uids_to_delete)
|
||||
_delete(destination, uids_to_delete)
|
||||
# First delete from record store.
|
||||
record_manager.delete_keys(uids_to_delete)
|
||||
num_deleted += len(uids_to_delete)
|
||||
@@ -454,7 +481,7 @@ def index(
|
||||
group_ids=delete_group_ids, before=index_start_dt, limit=cleanup_batch_size
|
||||
):
|
||||
# First delete from record store.
|
||||
destination.delete(uids_to_delete)
|
||||
_delete(destination, uids_to_delete)
|
||||
# Then delete from record manager.
|
||||
record_manager.delete_keys(uids_to_delete)
|
||||
num_deleted += len(uids_to_delete)
|
||||
@@ -474,6 +501,28 @@ async def _to_async_iterator(iterator: Iterable[T]) -> AsyncIterator[T]:
|
||||
yield item
|
||||
|
||||
|
||||
async def _adelete(
|
||||
vector_store: Union[VectorStore, DocumentIndex],
|
||||
ids: list[str],
|
||||
) -> None:
|
||||
if isinstance(vector_store, VectorStore):
|
||||
delete_ok = await vector_store.adelete(ids)
|
||||
if delete_ok is not None and delete_ok is False:
|
||||
msg = "The delete operation to VectorStore failed."
|
||||
raise IndexingException(msg)
|
||||
elif isinstance(vector_store, DocumentIndex):
|
||||
delete_response = await vector_store.adelete(ids)
|
||||
if "num_failed" in delete_response and delete_response["num_failed"] > 0:
|
||||
msg = "The delete operation to DocumentIndex failed."
|
||||
raise IndexingException(msg)
|
||||
else:
|
||||
msg = (
|
||||
f"Vectorstore should be either a VectorStore or a DocumentIndex. "
|
||||
f"Got {type(vector_store)}."
|
||||
)
|
||||
raise TypeError(msg)
|
||||
|
||||
|
||||
async def aindex(
|
||||
docs_source: Union[BaseLoader, Iterable[Document], AsyncIterator[Document]],
|
||||
record_manager: RecordManager,
|
||||
@@ -733,7 +782,7 @@ async def aindex(
|
||||
)
|
||||
if uids_to_delete:
|
||||
# Then delete from vector store.
|
||||
await destination.adelete(uids_to_delete)
|
||||
await _adelete(destination, uids_to_delete)
|
||||
# First delete from record store.
|
||||
await record_manager.adelete_keys(uids_to_delete)
|
||||
num_deleted += len(uids_to_delete)
|
||||
@@ -746,7 +795,7 @@ async def aindex(
|
||||
group_ids=delete_group_ids, before=index_start_dt, limit=cleanup_batch_size
|
||||
):
|
||||
# First delete from record store.
|
||||
await destination.adelete(uids_to_delete)
|
||||
await _adelete(destination, uids_to_delete)
|
||||
# Then delete from record manager.
|
||||
await record_manager.adelete_keys(uids_to_delete)
|
||||
num_deleted += len(uids_to_delete)
|
||||
|
Reference in New Issue
Block a user