community[patch]: Implement Unique ID Enforcement in FAISS (#17244)

**Description:**
Implemented unique ID validation in the FAISS component to ensure all
document IDs are distinct. This update resolves issues related to
non-unique IDs, such as inconsistent behavior during deletion processes.
This commit is contained in:
ByeongUk Choi
2024-02-09 05:03:33 +09:00
committed by GitHub
parent 88609565a3
commit b88329e9a5
2 changed files with 15 additions and 0 deletions

View File

@@ -774,3 +774,15 @@ async def test_async_delete() -> None:
result = await docsearch.asimilarity_search("bar", k=2)
assert sorted([d.page_content for d in result]) == ["baz", "foo"]
assert docsearch.index_to_docstore_id == {0: ids[0], 1: ids[2]}
@pytest.mark.requires("faiss")
def test_faiss_with_duplicate_ids() -> None:
"""Test whether FAISS raises an exception for duplicate ids."""
texts = ["foo", "bar", "baz"]
duplicate_ids = ["id1", "id1", "id2"]
with pytest.raises(ValueError) as exc_info:
FAISS.from_texts(texts, FakeEmbeddings(), ids=duplicate_ids)
assert "Duplicate ids found in the ids list." in str(exc_info.value)