mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-09 15:03:21 +00:00
I reported the bug 2 weeks ago here: https://github.com/langchain-ai/langchain/issues/28447 I believe this is a critical bug for the indexer, so I submitted a PR to revert the change and added unit tests to prevent similar bugs from being introduced in the future. @eyurtsev Could you check this?
This commit is contained in:
@@ -408,18 +408,17 @@ def index(
|
||||
|
||||
# mypy isn't good enough to determine that source ids cannot be None
|
||||
# here due to a check that's happening above, so we check again.
|
||||
if any(source_id is None for source_id in source_ids):
|
||||
msg = "Source ids cannot be if cleanup=='incremental'."
|
||||
raise AssertionError(msg)
|
||||
for source_id in source_ids:
|
||||
if source_id is None:
|
||||
msg = "Source ids cannot be None here."
|
||||
raise AssertionError(msg)
|
||||
|
||||
indexed_source_ids = cast(
|
||||
Sequence[str], [source_id_assigner(doc) for doc in docs_to_index]
|
||||
)
|
||||
_source_ids = cast(Sequence[str], source_ids)
|
||||
|
||||
uids_to_delete = record_manager.list_keys(
|
||||
group_ids=indexed_source_ids, before=index_start_dt
|
||||
group_ids=_source_ids, before=index_start_dt
|
||||
)
|
||||
if indexed_source_ids and uids_to_delete:
|
||||
if uids_to_delete:
|
||||
# Then delete from vector store.
|
||||
destination.delete(uids_to_delete)
|
||||
# First delete from record store.
|
||||
@@ -669,18 +668,17 @@ async def aindex(
|
||||
|
||||
# mypy isn't good enough to determine that source ids cannot be None
|
||||
# here due to a check that's happening above, so we check again.
|
||||
if any(source_id is None for source_id in source_ids):
|
||||
msg = "Source ids cannot be if cleanup=='incremental'."
|
||||
raise AssertionError(msg)
|
||||
for source_id in source_ids:
|
||||
if source_id is None:
|
||||
msg = "Source ids cannot be None here."
|
||||
raise AssertionError(msg)
|
||||
|
||||
indexed_source_ids = cast(
|
||||
Sequence[str], [source_id_assigner(doc) for doc in docs_to_index]
|
||||
)
|
||||
_source_ids = cast(Sequence[str], source_ids)
|
||||
|
||||
uids_to_delete = await record_manager.alist_keys(
|
||||
group_ids=indexed_source_ids, before=index_start_dt
|
||||
group_ids=_source_ids, before=index_start_dt
|
||||
)
|
||||
if indexed_source_ids and uids_to_delete:
|
||||
if uids_to_delete:
|
||||
# Then delete from vector store.
|
||||
await destination.adelete(uids_to_delete)
|
||||
# First delete from record store.
|
||||
|
Reference in New Issue
Block a user