test(core): use secure hash algorithm in indexing test to eliminate SHA-1 warning (#33107)

Finish work from #33101
This commit is contained in:
Mason Daugherty
2025-09-25 00:49:11 -04:00
committed by GitHub
parent e97baeb9a6
commit c9eb3bdb2d

View File

@@ -86,7 +86,7 @@ def test_indexing_same_content(
]
)
assert index(loader, record_manager, vector_store) == {
assert index(loader, record_manager, vector_store, key_encoder="sha256") == {
"num_added": 2,
"num_deleted": 0,
"num_skipped": 0,
@@ -97,7 +97,7 @@ def test_indexing_same_content(
for _ in range(2):
# Run the indexing again
assert index(loader, record_manager, vector_store) == {
assert index(loader, record_manager, vector_store, key_encoder="sha256") == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 2,
@@ -120,7 +120,12 @@ async def test_aindexing_same_content(
]
)
assert await aindex(loader, arecord_manager, vector_store) == {
assert await aindex(
loader,
arecord_manager,
vector_store,
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
"num_skipped": 0,
@@ -131,7 +136,12 @@ async def test_aindexing_same_content(
for _ in range(2):
# Run the indexing again
assert await aindex(loader, arecord_manager, vector_store) == {
assert await aindex(
loader,
arecord_manager,
vector_store,
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 2,
@@ -159,7 +169,13 @@ def test_index_simple_delete_full(
"get_time",
return_value=datetime(2021, 1, 1, tzinfo=timezone.utc).timestamp(),
):
assert index(loader, record_manager, vector_store, cleanup="full") == {
assert index(
loader,
record_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
"num_skipped": 0,
@@ -171,7 +187,13 @@ def test_index_simple_delete_full(
"get_time",
return_value=datetime(2021, 1, 1, tzinfo=timezone.utc).timestamp(),
):
assert index(loader, record_manager, vector_store, cleanup="full") == {
assert index(
loader,
record_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 2,
@@ -194,7 +216,13 @@ def test_index_simple_delete_full(
"get_time",
return_value=datetime(2021, 1, 2, tzinfo=timezone.utc).timestamp(),
):
indexing_result = index(loader, record_manager, vector_store, cleanup="full")
indexing_result = index(
loader,
record_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
)
doc_texts = {
# Ignoring type since doc should be in the store and not a None
@@ -216,7 +244,13 @@ def test_index_simple_delete_full(
"get_time",
return_value=datetime(2021, 1, 2, tzinfo=timezone.utc).timestamp(),
):
assert index(loader, record_manager, vector_store, cleanup="full") == {
assert index(
loader,
record_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 2,
@@ -244,7 +278,13 @@ async def test_aindex_simple_delete_full(
"get_time",
return_value=datetime(2021, 1, 1, tzinfo=timezone.utc).timestamp(),
):
assert await aindex(loader, arecord_manager, vector_store, cleanup="full") == {
assert await aindex(
loader,
arecord_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
"num_skipped": 0,
@@ -256,7 +296,13 @@ async def test_aindex_simple_delete_full(
"get_time",
return_value=datetime(2021, 1, 1, tzinfo=timezone.utc).timestamp(),
):
assert await aindex(loader, arecord_manager, vector_store, cleanup="full") == {
assert await aindex(
loader,
arecord_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 2,
@@ -279,7 +325,13 @@ async def test_aindex_simple_delete_full(
"get_time",
return_value=datetime(2021, 1, 2, tzinfo=timezone.utc).timestamp(),
):
assert await aindex(loader, arecord_manager, vector_store, cleanup="full") == {
assert await aindex(
loader,
arecord_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 1,
"num_deleted": 1,
"num_skipped": 1,
@@ -299,7 +351,13 @@ async def test_aindex_simple_delete_full(
"get_time",
return_value=datetime(2021, 1, 2, tzinfo=timezone.utc).timestamp(),
):
assert await aindex(loader, arecord_manager, vector_store, cleanup="full") == {
assert await aindex(
loader,
arecord_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 2,
@@ -327,7 +385,13 @@ def test_index_delete_full_recovery_after_deletion_failure(
"get_time",
return_value=datetime(2021, 1, 1, tzinfo=timezone.utc).timestamp(),
):
assert index(loader, record_manager, vector_store, cleanup="full") == {
assert index(
loader,
record_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
"num_skipped": 0,
@@ -354,7 +418,13 @@ def test_index_delete_full_recovery_after_deletion_failure(
patch.object(vector_store, "delete", return_value=False),
pytest.raises(IndexingException),
):
indexing_result = index(loader, record_manager, vector_store, cleanup="full")
indexing_result = index(
loader,
record_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
)
# At this point, there should be 3 records in both the record manager
# and the vector store
@@ -374,7 +444,13 @@ def test_index_delete_full_recovery_after_deletion_failure(
"get_time",
return_value=datetime(2021, 1, 3, tzinfo=timezone.utc).timestamp(),
):
indexing_result = index(loader, record_manager, vector_store, cleanup="full")
indexing_result = index(
loader,
record_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
)
doc_texts = {
# Ignoring type since doc should be in the store and not a None
vector_store.get_by_ids([uid])[0].page_content
@@ -410,7 +486,13 @@ async def test_aindex_delete_full_recovery_after_deletion_failure(
"get_time",
return_value=datetime(2021, 1, 1, tzinfo=timezone.utc).timestamp(),
):
assert await aindex(loader, arecord_manager, vector_store, cleanup="full") == {
assert await aindex(
loader,
arecord_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
"num_skipped": 0,
@@ -438,7 +520,11 @@ async def test_aindex_delete_full_recovery_after_deletion_failure(
pytest.raises(IndexingException),
):
indexing_result = await aindex(
loader, arecord_manager, vector_store, cleanup="full"
loader,
arecord_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
)
# At this point, there should be 3 records in both the record manager
@@ -460,7 +546,11 @@ async def test_aindex_delete_full_recovery_after_deletion_failure(
return_value=datetime(2021, 1, 3, tzinfo=timezone.utc).timestamp(),
):
indexing_result = await aindex(
loader, arecord_manager, vector_store, cleanup="full"
loader,
arecord_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
)
doc_texts = {
# Ignoring type since doc should be in the store and not a None
@@ -504,7 +594,13 @@ def test_incremental_fails_with_bad_source_ids(
"incremental or scoped_full",
):
# Should raise an error because no source id function was specified
index(loader, record_manager, vector_store, cleanup="incremental")
index(
loader,
record_manager,
vector_store,
cleanup="incremental",
key_encoder="sha256",
)
with pytest.raises(
ValueError,
@@ -517,6 +613,7 @@ def test_incremental_fails_with_bad_source_ids(
vector_store,
cleanup="incremental",
source_id_key="source",
key_encoder="sha256",
)
@@ -552,6 +649,7 @@ async def test_aincremental_fails_with_bad_source_ids(
arecord_manager,
vector_store,
cleanup="incremental",
key_encoder="sha256",
)
with pytest.raises(
@@ -565,6 +663,7 @@ async def test_aincremental_fails_with_bad_source_ids(
vector_store,
cleanup="incremental",
source_id_key="source",
key_encoder="sha256",
)
@@ -604,6 +703,7 @@ def test_index_simple_delete_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 4,
"num_deleted": 0,
@@ -622,6 +722,7 @@ def test_index_simple_delete_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -653,6 +754,7 @@ def test_index_simple_delete_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 1,
"num_deleted": 2,
@@ -682,6 +784,7 @@ def test_index_simple_delete_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -726,6 +829,7 @@ async def test_aindex_simple_delete_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 4,
"num_deleted": 0,
@@ -744,6 +848,7 @@ async def test_aindex_simple_delete_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -775,6 +880,7 @@ async def test_aindex_simple_delete_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 1,
"num_deleted": 2,
@@ -804,6 +910,7 @@ async def test_aindex_simple_delete_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -839,7 +946,13 @@ def test_scoped_full_fails_with_bad_source_ids(
"is incremental or scoped_full",
):
# Should raise an error because no source id function was specified
index(loader, record_manager, vector_store, cleanup="scoped_full")
index(
loader,
record_manager,
vector_store,
cleanup="scoped_full",
key_encoder="sha256",
)
with pytest.raises(
ValueError,
@@ -852,6 +965,7 @@ def test_scoped_full_fails_with_bad_source_ids(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
)
@@ -882,7 +996,13 @@ async def test_ascoped_full_fails_with_bad_source_ids(
"is incremental or scoped_full",
):
# Should raise an error because no source id function was specified
await aindex(loader, arecord_manager, vector_store, cleanup="scoped_full")
await aindex(
loader,
arecord_manager,
vector_store,
cleanup="scoped_full",
key_encoder="sha256",
)
with pytest.raises(
ValueError,
@@ -895,6 +1015,7 @@ async def test_ascoped_full_fails_with_bad_source_ids(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
)
@@ -934,6 +1055,7 @@ def test_index_empty_doc_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 4,
"num_deleted": 0,
@@ -952,6 +1074,7 @@ def test_index_empty_doc_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -972,6 +1095,7 @@ def test_index_empty_doc_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -1016,6 +1140,7 @@ async def test_aindex_empty_doc_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 4,
"num_deleted": 0,
@@ -1034,6 +1159,7 @@ async def test_aindex_empty_doc_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -1054,6 +1180,7 @@ async def test_aindex_empty_doc_scoped_full(
vector_store,
cleanup="scoped_full",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -1090,6 +1217,7 @@ def test_no_delete(
vector_store,
cleanup=None,
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
@@ -1109,6 +1237,7 @@ def test_no_delete(
vector_store,
cleanup=None,
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -1141,6 +1270,7 @@ def test_no_delete(
vector_store,
cleanup=None,
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 1,
"num_deleted": 0,
@@ -1177,6 +1307,7 @@ async def test_ano_delete(
vector_store,
cleanup=None,
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
@@ -1196,6 +1327,7 @@ async def test_ano_delete(
vector_store,
cleanup=None,
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -1228,6 +1360,7 @@ async def test_ano_delete(
vector_store,
cleanup=None,
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 1,
"num_deleted": 0,
@@ -1264,6 +1397,7 @@ def test_incremental_delete(
vector_store,
cleanup="incremental",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
@@ -1290,6 +1424,7 @@ def test_incremental_delete(
vector_store,
cleanup="incremental",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -1327,6 +1462,7 @@ def test_incremental_delete(
vector_store,
cleanup="incremental",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 1,
@@ -1374,6 +1510,7 @@ def test_incremental_delete_with_same_source(
vector_store,
cleanup="incremental",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
@@ -1409,6 +1546,7 @@ def test_incremental_delete_with_same_source(
vector_store,
cleanup="incremental",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 1,
@@ -1463,6 +1601,7 @@ def test_incremental_indexing_with_batch_size(
cleanup="incremental",
source_id_key="source",
batch_size=2,
key_encoder="sha256",
) == {
"num_added": 4,
"num_deleted": 0,
@@ -1489,6 +1628,7 @@ def test_incremental_indexing_with_batch_size(
cleanup="incremental",
source_id_key="source",
batch_size=2,
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 2,
@@ -1541,6 +1681,7 @@ def test_incremental_delete_with_batch_size(
cleanup="incremental",
source_id_key="source",
batch_size=3,
key_encoder="sha256",
) == {
"num_added": 4,
"num_deleted": 0,
@@ -1568,6 +1709,7 @@ def test_incremental_delete_with_batch_size(
cleanup="incremental",
source_id_key="source",
batch_size=3,
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -1606,6 +1748,7 @@ def test_incremental_delete_with_batch_size(
cleanup="incremental",
source_id_key="source",
batch_size=1,
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -1644,6 +1787,7 @@ def test_incremental_delete_with_batch_size(
cleanup="incremental",
source_id_key="source",
batch_size=1,
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -1681,6 +1825,7 @@ def test_incremental_delete_with_batch_size(
vector_store,
cleanup="incremental",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 2,
@@ -1724,6 +1869,7 @@ async def test_aincremental_delete(
vector_store,
cleanup="incremental",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
@@ -1750,6 +1896,7 @@ async def test_aincremental_delete(
vector_store,
cleanup="incremental",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -1787,6 +1934,7 @@ async def test_aincremental_delete(
vector_store,
cleanup="incremental",
source_id_key="source",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 1,
@@ -1812,7 +1960,13 @@ def test_indexing_with_no_docs(
"""Check edge case when loader returns no new docs."""
loader = ToyLoader(documents=[])
assert index(loader, record_manager, vector_store, cleanup="full") == {
assert index(
loader,
record_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 0,
@@ -1826,7 +1980,13 @@ async def test_aindexing_with_no_docs(
"""Check edge case when loader returns no new docs."""
loader = ToyLoader(documents=[])
assert await aindex(loader, arecord_manager, vector_store, cleanup="full") == {
assert await aindex(
loader,
arecord_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 0,
@@ -1850,7 +2010,13 @@ def test_deduplication(
]
# Should result in only a single document being added
assert index(docs, record_manager, vector_store, cleanup="full") == {
assert index(
docs,
record_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 1,
"num_deleted": 0,
"num_skipped": 1,
@@ -1874,7 +2040,13 @@ async def test_adeduplication(
]
# Should result in only a single document being added
assert await aindex(docs, arecord_manager, vector_store, cleanup="full") == {
assert await aindex(
docs,
arecord_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 1,
"num_deleted": 0,
"num_skipped": 1,
@@ -1917,6 +2089,7 @@ def test_within_batch_deduplication_counting(
vector_store,
batch_size=10, # All docs in one batch
cleanup="full",
key_encoder="sha256",
)
# Should have 3 unique documents added
@@ -1972,6 +2145,7 @@ async def test_awithin_batch_deduplication_counting(
vector_store,
batch_size=10, # All docs in one batch
cleanup="full",
key_encoder="sha256",
)
# Should have 3 unique documents added
@@ -2004,7 +2178,13 @@ def test_full_cleanup_with_different_batchsize(
for d in range(1000)
]
assert index(docs, record_manager, vector_store, cleanup="full") == {
assert index(
docs,
record_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 1000,
"num_deleted": 0,
"num_skipped": 0,
@@ -2020,7 +2200,12 @@ def test_full_cleanup_with_different_batchsize(
]
assert index(
docs, record_manager, vector_store, cleanup="full", cleanup_batch_size=17
docs,
record_manager,
vector_store,
cleanup="full",
cleanup_batch_size=17,
key_encoder="sha256",
) == {
"num_added": 1001,
"num_deleted": 1000,
@@ -2047,6 +2232,7 @@ def test_incremental_cleanup_with_different_batchsize(
vector_store,
source_id_key="source",
cleanup="incremental",
key_encoder="sha256",
) == {
"num_added": 1000,
"num_deleted": 0,
@@ -2069,6 +2255,7 @@ def test_incremental_cleanup_with_different_batchsize(
source_id_key="source",
cleanup="incremental",
cleanup_batch_size=17,
key_encoder="sha256",
) == {
"num_added": 1001,
"num_deleted": 1000,
@@ -2089,7 +2276,13 @@ async def test_afull_cleanup_with_different_batchsize(
for d in range(1000)
]
assert await aindex(docs, arecord_manager, vector_store, cleanup="full") == {
assert await aindex(
docs,
arecord_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 1000,
"num_deleted": 0,
"num_skipped": 0,
@@ -2105,7 +2298,12 @@ async def test_afull_cleanup_with_different_batchsize(
]
assert await aindex(
docs, arecord_manager, vector_store, cleanup="full", cleanup_batch_size=17
docs,
arecord_manager,
vector_store,
cleanup="full",
cleanup_batch_size=17,
key_encoder="sha256",
) == {
"num_added": 1001,
"num_deleted": 1000,
@@ -2132,6 +2330,7 @@ async def test_aincremental_cleanup_with_different_batchsize(
vector_store,
source_id_key="source",
cleanup="incremental",
key_encoder="sha256",
) == {
"num_added": 1000,
"num_deleted": 0,
@@ -2154,6 +2353,7 @@ async def test_aincremental_cleanup_with_different_batchsize(
cleanup="incremental",
source_id_key="source",
cleanup_batch_size=17,
key_encoder="sha256",
) == {
"num_added": 1001,
"num_deleted": 1000,
@@ -2185,7 +2385,13 @@ def test_deduplication_v2(
),
]
assert index(docs, record_manager, vector_store, cleanup="full") == {
assert index(
docs,
record_manager,
vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 3,
"num_deleted": 0,
"num_skipped": 1,
@@ -2246,14 +2452,26 @@ def test_indexing_force_update(
),
]
assert index(docs, record_manager, upserting_vector_store, cleanup="full") == {
assert index(
docs,
record_manager,
upserting_vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
"num_skipped": 1,
"num_updated": 0,
}
assert index(docs, record_manager, upserting_vector_store, cleanup="full") == {
assert index(
docs,
record_manager,
upserting_vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 3,
@@ -2261,7 +2479,12 @@ def test_indexing_force_update(
}
assert index(
docs, record_manager, upserting_vector_store, cleanup="full", force_update=True
docs,
record_manager,
upserting_vector_store,
cleanup="full",
force_update=True,
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -2290,7 +2513,11 @@ async def test_aindexing_force_update(
]
assert await aindex(
docs, arecord_manager, upserting_vector_store, cleanup="full"
docs,
arecord_manager,
upserting_vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
@@ -2299,7 +2526,11 @@ async def test_aindexing_force_update(
}
assert await aindex(
docs, arecord_manager, upserting_vector_store, cleanup="full"
docs,
arecord_manager,
upserting_vector_store,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -2313,6 +2544,7 @@ async def test_aindexing_force_update(
upserting_vector_store,
cleanup="full",
force_update=True,
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -2377,7 +2609,11 @@ async def test_aindexing_custom_batch_size(
)
vector_store.aadd_documents = mock_add_documents # type: ignore[method-assign]
await aindex(
docs, arecord_manager, vector_store, batch_size=batch_size, key_encoder="sha256"
docs,
arecord_manager,
vector_store,
batch_size=batch_size,
key_encoder="sha256",
)
args, kwargs = mock_add_documents.call_args
assert args == ([doc_with_id],)
@@ -2398,14 +2634,26 @@ def test_index_into_document_index(record_manager: InMemoryRecordManager) -> Non
),
]
assert index(docs, record_manager, document_index, cleanup="full") == {
assert index(
docs,
record_manager,
document_index,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
"num_skipped": 0,
"num_updated": 0,
}
assert index(docs, record_manager, document_index, cleanup="full") == {
assert index(
docs,
record_manager,
document_index,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
"num_skipped": 2,
@@ -2413,7 +2661,12 @@ def test_index_into_document_index(record_manager: InMemoryRecordManager) -> Non
}
assert index(
docs, record_manager, document_index, cleanup="full", force_update=True
docs,
record_manager,
document_index,
cleanup="full",
force_update=True,
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 0,
@@ -2421,7 +2674,13 @@ def test_index_into_document_index(record_manager: InMemoryRecordManager) -> Non
"num_updated": 2,
}
assert index([], record_manager, document_index, cleanup="full") == {
assert index(
[],
record_manager,
document_index,
cleanup="full",
key_encoder="sha256",
) == {
"num_added": 0,
"num_deleted": 2,
"num_skipped": 0,
@@ -2519,7 +2778,13 @@ def test_index_with_upsert_kwargs(
upsert_kwargs = {"vector_field": "embedding"}
index(docs, record_manager, upserting_vector_store, upsert_kwargs=upsert_kwargs)
index(
docs,
record_manager,
upserting_vector_store,
upsert_kwargs=upsert_kwargs,
key_encoder="sha256",
)
# Assert that add_documents was called with the correct arguments
mock_add_documents.assert_called_once()
@@ -2572,6 +2837,7 @@ def test_index_with_upsert_kwargs_for_document_indexer(
document_index,
cleanup="full",
upsert_kwargs=upsert_kwargs,
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
@@ -2610,6 +2876,7 @@ async def test_aindex_with_upsert_kwargs_for_document_indexer(
document_index,
cleanup="full",
upsert_kwargs=upsert_kwargs,
key_encoder="sha256",
) == {
"num_added": 2,
"num_deleted": 0,
@@ -2647,6 +2914,7 @@ async def test_aindex_with_upsert_kwargs(
arecord_manager,
upserting_vector_store,
upsert_kwargs=upsert_kwargs,
key_encoder="sha256",
)
# Assert that aadd_documents was called with the correct arguments