mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-20 13:54:48 +00:00
community[patch]: Implement Unique ID Enforcement in FAISS (#17244)
**Description:** Implemented unique ID validation in the FAISS component to ensure all document IDs are distinct. This update resolves issues related to non-unique IDs, such as inconsistent behavior during deletion processes.
This commit is contained in:
parent
88609565a3
commit
b88329e9a5
@ -190,6 +190,9 @@ class FAISS(VectorStore):
|
||||
_len_check_if_sized(documents, embeddings, "documents", "embeddings")
|
||||
_len_check_if_sized(documents, ids, "documents", "ids")
|
||||
|
||||
if ids and len(ids) != len(set(ids)):
|
||||
raise ValueError("Duplicate ids found in the ids list.")
|
||||
|
||||
# Add to the index.
|
||||
vector = np.array(embeddings, dtype=np.float32)
|
||||
if self._normalize_L2:
|
||||
|
@ -774,3 +774,15 @@ async def test_async_delete() -> None:
|
||||
result = await docsearch.asimilarity_search("bar", k=2)
|
||||
assert sorted([d.page_content for d in result]) == ["baz", "foo"]
|
||||
assert docsearch.index_to_docstore_id == {0: ids[0], 1: ids[2]}
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_with_duplicate_ids() -> None:
|
||||
"""Test whether FAISS raises an exception for duplicate ids."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
duplicate_ids = ["id1", "id1", "id2"]
|
||||
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
FAISS.from_texts(texts, FakeEmbeddings(), ids=duplicate_ids)
|
||||
|
||||
assert "Duplicate ids found in the ids list." in str(exc_info.value)
|
||||
|
Loading…
Reference in New Issue
Block a user