mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-09 17:18:31 +00:00
langchain[patch]: Add tests for indexing (#19342)
This PR adds tests for the indexing API
This commit is contained in:
parent
68298cdc82
commit
aa9ccca775
@ -736,6 +736,160 @@ def test_incremental_delete(
|
||||
}
|
||||
|
||||
|
||||
def test_incremental_delete_with_batch_size(
|
||||
record_manager: SQLRecordManager, vector_store: InMemoryVectorStore
|
||||
) -> None:
|
||||
"""Test indexing with incremental deletion strategy and batch size."""
|
||||
loader = ToyLoader(
|
||||
documents=[
|
||||
Document(
|
||||
page_content="1",
|
||||
metadata={"source": "1"},
|
||||
),
|
||||
Document(
|
||||
page_content="2",
|
||||
metadata={"source": "2"},
|
||||
),
|
||||
Document(
|
||||
page_content="3",
|
||||
metadata={"source": "3"},
|
||||
),
|
||||
Document(
|
||||
page_content="4",
|
||||
metadata={"source": "4"},
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
with patch.object(
|
||||
record_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
|
||||
):
|
||||
assert index(
|
||||
loader,
|
||||
record_manager,
|
||||
vector_store,
|
||||
cleanup="incremental",
|
||||
source_id_key="source",
|
||||
batch_size=3,
|
||||
) == {
|
||||
"num_added": 4,
|
||||
"num_deleted": 0,
|
||||
"num_skipped": 0,
|
||||
"num_updated": 0,
|
||||
}
|
||||
|
||||
doc_texts = set(
|
||||
# Ignoring type since doc should be in the store and not a None
|
||||
vector_store.store.get(uid).page_content # type: ignore
|
||||
for uid in vector_store.store
|
||||
)
|
||||
assert doc_texts == {"1", "2", "3", "4"}
|
||||
|
||||
# Attempt to index again verify that nothing changes
|
||||
with patch.object(
|
||||
record_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
|
||||
):
|
||||
assert index(
|
||||
loader,
|
||||
record_manager,
|
||||
vector_store,
|
||||
cleanup="incremental",
|
||||
source_id_key="source",
|
||||
batch_size=3,
|
||||
) == {
|
||||
"num_added": 0,
|
||||
"num_deleted": 0,
|
||||
"num_skipped": 4,
|
||||
"num_updated": 0,
|
||||
}
|
||||
|
||||
# Attempt to index again verify that nothing changes
|
||||
with patch.object(
|
||||
record_manager, "get_time", return_value=datetime(2022, 1, 3).timestamp()
|
||||
):
|
||||
# Docs with same content
|
||||
docs = [
|
||||
Document(
|
||||
page_content="1",
|
||||
metadata={"source": "1"},
|
||||
),
|
||||
Document(
|
||||
page_content="2",
|
||||
metadata={"source": "2"},
|
||||
),
|
||||
]
|
||||
assert index(
|
||||
docs,
|
||||
record_manager,
|
||||
vector_store,
|
||||
cleanup="incremental",
|
||||
source_id_key="source",
|
||||
batch_size=1,
|
||||
) == {
|
||||
"num_added": 0,
|
||||
"num_deleted": 0,
|
||||
"num_skipped": 2,
|
||||
"num_updated": 0,
|
||||
}
|
||||
|
||||
# Attempt to index again verify that nothing changes
|
||||
with patch.object(
|
||||
record_manager, "get_time", return_value=datetime(2023, 1, 3).timestamp()
|
||||
):
|
||||
# Docs with same content
|
||||
docs = [
|
||||
Document(
|
||||
page_content="1",
|
||||
metadata={"source": "1"},
|
||||
),
|
||||
Document(
|
||||
page_content="2",
|
||||
metadata={"source": "2"},
|
||||
),
|
||||
]
|
||||
assert index(
|
||||
docs,
|
||||
record_manager,
|
||||
vector_store,
|
||||
cleanup="incremental",
|
||||
source_id_key="source",
|
||||
batch_size=1,
|
||||
) == {
|
||||
"num_added": 0,
|
||||
"num_deleted": 0,
|
||||
"num_skipped": 2,
|
||||
"num_updated": 0,
|
||||
}
|
||||
|
||||
# Try to index with changed docs now
|
||||
with patch.object(
|
||||
record_manager, "get_time", return_value=datetime(2024, 1, 3).timestamp()
|
||||
):
|
||||
# Docs with same content
|
||||
docs = [
|
||||
Document(
|
||||
page_content="changed 1",
|
||||
metadata={"source": "1"},
|
||||
),
|
||||
Document(
|
||||
page_content="changed 2",
|
||||
metadata={"source": "2"},
|
||||
),
|
||||
]
|
||||
assert index(
|
||||
docs,
|
||||
record_manager,
|
||||
vector_store,
|
||||
cleanup="incremental",
|
||||
source_id_key="source",
|
||||
) == {
|
||||
"num_added": 2,
|
||||
"num_deleted": 2,
|
||||
"num_skipped": 0,
|
||||
"num_updated": 0,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.requires("aiosqlite")
|
||||
async def test_aincremental_delete(
|
||||
arecord_manager: SQLRecordManager, vector_store: InMemoryVectorStore
|
||||
|
Loading…
Reference in New Issue
Block a user