diff --git a/libs/community/langchain_community/vectorstores/faiss.py b/libs/community/langchain_community/vectorstores/faiss.py index 0341756fc5f..06f28d385c9 100644 --- a/libs/community/langchain_community/vectorstores/faiss.py +++ b/libs/community/langchain_community/vectorstores/faiss.py @@ -190,6 +190,9 @@ class FAISS(VectorStore): _len_check_if_sized(documents, embeddings, "documents", "embeddings") _len_check_if_sized(documents, ids, "documents", "ids") + if ids and len(ids) != len(set(ids)): + raise ValueError("Duplicate ids found in the ids list.") + # Add to the index. vector = np.array(embeddings, dtype=np.float32) if self._normalize_L2: diff --git a/libs/community/tests/unit_tests/vectorstores/test_faiss.py b/libs/community/tests/unit_tests/vectorstores/test_faiss.py index 350e0a1a641..375fa00eb6d 100644 --- a/libs/community/tests/unit_tests/vectorstores/test_faiss.py +++ b/libs/community/tests/unit_tests/vectorstores/test_faiss.py @@ -774,3 +774,15 @@ async def test_async_delete() -> None: result = await docsearch.asimilarity_search("bar", k=2) assert sorted([d.page_content for d in result]) == ["baz", "foo"] assert docsearch.index_to_docstore_id == {0: ids[0], 1: ids[2]} + + +@pytest.mark.requires("faiss") +def test_faiss_with_duplicate_ids() -> None: + """Test whether FAISS raises an exception for duplicate ids.""" + texts = ["foo", "bar", "baz"] + duplicate_ids = ["id1", "id1", "id2"] + + with pytest.raises(ValueError) as exc_info: + FAISS.from_texts(texts, FakeEmbeddings(), ids=duplicate_ids) + + assert "Duplicate ids found in the ids list." in str(exc_info.value)