mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-07 20:15:40 +00:00
core[patch]: stop deleting records with "scoped_full" when doc is empty (#30520)
Fix a bug that causes `scoped_full` in index to delete records when there are no input docs.
This commit is contained in:
parent
b28a474e79
commit
956b09f468
@ -473,7 +473,9 @@ def index(
|
|||||||
record_manager.delete_keys(uids_to_delete)
|
record_manager.delete_keys(uids_to_delete)
|
||||||
num_deleted += len(uids_to_delete)
|
num_deleted += len(uids_to_delete)
|
||||||
|
|
||||||
if cleanup == "full" or cleanup == "scoped_full":
|
if cleanup == "full" or (
|
||||||
|
cleanup == "scoped_full" and scoped_full_cleanup_source_ids
|
||||||
|
):
|
||||||
delete_group_ids: Optional[Sequence[str]] = None
|
delete_group_ids: Optional[Sequence[str]] = None
|
||||||
if cleanup == "scoped_full":
|
if cleanup == "scoped_full":
|
||||||
delete_group_ids = list(scoped_full_cleanup_source_ids)
|
delete_group_ids = list(scoped_full_cleanup_source_ids)
|
||||||
@ -786,7 +788,9 @@ async def aindex(
|
|||||||
await record_manager.adelete_keys(uids_to_delete)
|
await record_manager.adelete_keys(uids_to_delete)
|
||||||
num_deleted += len(uids_to_delete)
|
num_deleted += len(uids_to_delete)
|
||||||
|
|
||||||
if cleanup == "full" or cleanup == "scoped_full":
|
if cleanup == "full" or (
|
||||||
|
cleanup == "scoped_full" and scoped_full_cleanup_source_ids
|
||||||
|
):
|
||||||
delete_group_ids: Optional[Sequence[str]] = None
|
delete_group_ids: Optional[Sequence[str]] = None
|
||||||
if cleanup == "scoped_full":
|
if cleanup == "scoped_full":
|
||||||
delete_group_ids = list(scoped_full_cleanup_source_ids)
|
delete_group_ids = list(scoped_full_cleanup_source_ids)
|
||||||
|
@ -822,6 +822,158 @@ async def test_ascoped_full_fails_with_bad_source_ids(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_index_empty_doc_scoped_full(
|
||||||
|
record_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
|
||||||
|
) -> None:
|
||||||
|
"""Test Indexing with scoped_full strategy"""
|
||||||
|
loader = ToyLoader(
|
||||||
|
documents=[
|
||||||
|
Document(
|
||||||
|
page_content="This is a test document.",
|
||||||
|
metadata={"source": "1"},
|
||||||
|
),
|
||||||
|
Document(
|
||||||
|
page_content="This is another document.",
|
||||||
|
metadata={"source": "1"},
|
||||||
|
),
|
||||||
|
Document(
|
||||||
|
page_content="This is yet another document.",
|
||||||
|
metadata={"source": "1"},
|
||||||
|
),
|
||||||
|
Document(
|
||||||
|
page_content="This is a test document from another source.",
|
||||||
|
metadata={"source": "2"},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
record_manager, "get_time", return_value=datetime(2021, 1, 1).timestamp()
|
||||||
|
):
|
||||||
|
assert index(
|
||||||
|
loader,
|
||||||
|
record_manager,
|
||||||
|
vector_store,
|
||||||
|
cleanup="scoped_full",
|
||||||
|
source_id_key="source",
|
||||||
|
) == {
|
||||||
|
"num_added": 4,
|
||||||
|
"num_deleted": 0,
|
||||||
|
"num_skipped": 0,
|
||||||
|
"num_updated": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
record_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
|
||||||
|
):
|
||||||
|
assert index(
|
||||||
|
loader,
|
||||||
|
record_manager,
|
||||||
|
vector_store,
|
||||||
|
cleanup="scoped_full",
|
||||||
|
source_id_key="source",
|
||||||
|
) == {
|
||||||
|
"num_added": 0,
|
||||||
|
"num_deleted": 0,
|
||||||
|
"num_skipped": 4,
|
||||||
|
"num_updated": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
loader = ToyLoader(documents=[])
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
record_manager, "get_time", return_value=datetime(2021, 1, 3).timestamp()
|
||||||
|
):
|
||||||
|
assert index(
|
||||||
|
loader,
|
||||||
|
record_manager,
|
||||||
|
vector_store,
|
||||||
|
cleanup="scoped_full",
|
||||||
|
source_id_key="source",
|
||||||
|
) == {
|
||||||
|
"num_added": 0,
|
||||||
|
"num_deleted": 0,
|
||||||
|
"num_skipped": 0,
|
||||||
|
"num_updated": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def test_aindex_empty_doc_scoped_full(
|
||||||
|
arecord_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
|
||||||
|
) -> None:
|
||||||
|
"""Test Indexing with scoped_full strategy."""
|
||||||
|
loader = ToyLoader(
|
||||||
|
documents=[
|
||||||
|
Document(
|
||||||
|
page_content="This is a test document.",
|
||||||
|
metadata={"source": "1"},
|
||||||
|
),
|
||||||
|
Document(
|
||||||
|
page_content="This is another document.",
|
||||||
|
metadata={"source": "1"},
|
||||||
|
),
|
||||||
|
Document(
|
||||||
|
page_content="This is yet another document.",
|
||||||
|
metadata={"source": "1"},
|
||||||
|
),
|
||||||
|
Document(
|
||||||
|
page_content="This is a test document from another source.",
|
||||||
|
metadata={"source": "2"},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
arecord_manager, "get_time", return_value=datetime(2021, 1, 1).timestamp()
|
||||||
|
):
|
||||||
|
assert await aindex(
|
||||||
|
loader,
|
||||||
|
arecord_manager,
|
||||||
|
vector_store,
|
||||||
|
cleanup="scoped_full",
|
||||||
|
source_id_key="source",
|
||||||
|
) == {
|
||||||
|
"num_added": 4,
|
||||||
|
"num_deleted": 0,
|
||||||
|
"num_skipped": 0,
|
||||||
|
"num_updated": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
arecord_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
|
||||||
|
):
|
||||||
|
assert await aindex(
|
||||||
|
loader,
|
||||||
|
arecord_manager,
|
||||||
|
vector_store,
|
||||||
|
cleanup="scoped_full",
|
||||||
|
source_id_key="source",
|
||||||
|
) == {
|
||||||
|
"num_added": 0,
|
||||||
|
"num_deleted": 0,
|
||||||
|
"num_skipped": 4,
|
||||||
|
"num_updated": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
loader = ToyLoader(documents=[])
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
arecord_manager, "get_time", return_value=datetime(2021, 1, 3).timestamp()
|
||||||
|
):
|
||||||
|
assert await aindex(
|
||||||
|
loader,
|
||||||
|
arecord_manager,
|
||||||
|
vector_store,
|
||||||
|
cleanup="scoped_full",
|
||||||
|
source_id_key="source",
|
||||||
|
) == {
|
||||||
|
"num_added": 0,
|
||||||
|
"num_deleted": 0,
|
||||||
|
"num_skipped": 0,
|
||||||
|
"num_updated": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_no_delete(
|
def test_no_delete(
|
||||||
record_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
|
record_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
|
||||||
) -> None:
|
) -> None:
|
||||||
|
Loading…
Reference in New Issue
Block a user