mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-02 01:23:07 +00:00
community: FAISS vectorstore - consistent Document id field (#28728)
make sure id field of Documents in `FAISS` docstore have the same id as values in `index_to_docstore_id`, implement `get_by_ids` method
This commit is contained in:
parent
a0534ae62a
commit
a3851cb3bc
@ -14,6 +14,7 @@ from typing import (
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Sized,
|
||||
Tuple,
|
||||
Union,
|
||||
@ -284,7 +285,6 @@ class FAISS(VectorStore):
|
||||
ids: Optional[List[str]] = None,
|
||||
) -> List[str]:
|
||||
faiss = dependable_faiss_import()
|
||||
|
||||
if not isinstance(self.docstore, AddableMixin):
|
||||
raise ValueError(
|
||||
"If trying to add texts, the underlying docstore should support "
|
||||
@ -292,17 +292,20 @@ class FAISS(VectorStore):
|
||||
)
|
||||
|
||||
_len_check_if_sized(texts, metadatas, "texts", "metadatas")
|
||||
|
||||
ids = ids or [str(uuid.uuid4()) for _ in texts]
|
||||
_len_check_if_sized(texts, ids, "texts", "ids")
|
||||
|
||||
_metadatas = metadatas or ({} for _ in texts)
|
||||
documents = [
|
||||
Document(page_content=t, metadata=m) for t, m in zip(texts, _metadatas)
|
||||
Document(id=id_, page_content=t, metadata=m)
|
||||
for id_, t, m in zip(ids, texts, _metadatas)
|
||||
]
|
||||
|
||||
_len_check_if_sized(documents, embeddings, "documents", "embeddings")
|
||||
_len_check_if_sized(documents, ids, "documents", "ids")
|
||||
|
||||
if ids and len(ids) != len(set(ids)):
|
||||
raise ValueError("Duplicate ids found in the ids list.")
|
||||
|
||||
# Add to the index.
|
||||
vector = np.array(embeddings, dtype=np.float32)
|
||||
if self._normalize_L2:
|
||||
@ -310,7 +313,6 @@ class FAISS(VectorStore):
|
||||
self.index.add(vector)
|
||||
|
||||
# Add information to docstore and index.
|
||||
ids = ids or [str(uuid.uuid4()) for _ in texts]
|
||||
self.docstore.add({id_: doc for id_, doc in zip(ids, documents)})
|
||||
starting_len = len(self.index_to_docstore_id)
|
||||
index_to_id = {starting_len + j: id_ for j, id_ in enumerate(ids)}
|
||||
@ -1475,3 +1477,7 @@ class FAISS(VectorStore):
|
||||
return lambda doc: all(condition(doc) for condition in conditions)
|
||||
|
||||
return filter_func(filter)
|
||||
|
||||
def get_by_ids(self, ids: Sequence[str], /) -> list[Document]:
|
||||
docs = [self.docstore.search(id_) for id_ in ids]
|
||||
return [doc for doc in docs if isinstance(doc, Document)]
|
||||
|
@ -40,14 +40,14 @@ def test_faiss() -> None:
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
index_to_id[0]: Document(id=index_to_id[0], page_content="foo"),
|
||||
index_to_id[1]: Document(id=index_to_id[1], page_content="bar"),
|
||||
index_to_id[2]: Document(id=index_to_id[2], page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert output == [Document(id=output[0].id, page_content="foo")]
|
||||
|
||||
# Retriever standard params
|
||||
retriever = docsearch.as_retriever()
|
||||
@ -67,14 +67,14 @@ async def test_faiss_afrom_texts() -> None:
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
index_to_id[0]: Document(id=index_to_id[0], page_content="foo"),
|
||||
index_to_id[1]: Document(id=index_to_id[1], page_content="bar"),
|
||||
index_to_id[2]: Document(id=index_to_id[2], page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert output == [Document(id=output[0].id, page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -85,15 +85,15 @@ def test_faiss_vector_sim() -> None:
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
index_to_id[0]: Document(id=index_to_id[0], page_content="foo"),
|
||||
index_to_id[1]: Document(id=index_to_id[1], page_content="bar"),
|
||||
index_to_id[2]: Document(id=index_to_id[2], page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
query_vec = FakeEmbeddings().embed_query(text="foo")
|
||||
output = docsearch.similarity_search_by_vector(query_vec, k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert output == [Document(id=output[0].id, page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -104,15 +104,15 @@ async def test_faiss_async_vector_sim() -> None:
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
index_to_id[0]: Document(id=index_to_id[0], page_content="foo"),
|
||||
index_to_id[1]: Document(id=index_to_id[1], page_content="bar"),
|
||||
index_to_id[2]: Document(id=index_to_id[2], page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
query_vec = await FakeEmbeddings().aembed_query(text="foo")
|
||||
output = await docsearch.asimilarity_search_by_vector(query_vec, k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert output == [Document(id=output[0].id, page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -123,15 +123,15 @@ def test_faiss_vector_sim_with_score_threshold() -> None:
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
index_to_id[0]: Document(id=index_to_id[0], page_content="foo"),
|
||||
index_to_id[1]: Document(id=index_to_id[1], page_content="bar"),
|
||||
index_to_id[2]: Document(id=index_to_id[2], page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
query_vec = FakeEmbeddings().embed_query(text="foo")
|
||||
output = docsearch.similarity_search_by_vector(query_vec, k=2, score_threshold=0.2)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert output == [Document(id=output[0].id, page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -142,9 +142,9 @@ async def test_faiss_vector_async_sim_with_score_threshold() -> None:
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
index_to_id[0]: Document(id=index_to_id[0], page_content="foo"),
|
||||
index_to_id[1]: Document(id=index_to_id[1], page_content="bar"),
|
||||
index_to_id[2]: Document(id=index_to_id[2], page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
@ -152,7 +152,7 @@ async def test_faiss_vector_async_sim_with_score_threshold() -> None:
|
||||
output = await docsearch.asimilarity_search_by_vector(
|
||||
query_vec, k=2, score_threshold=0.2
|
||||
)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert output == [Document(id=output[0].id, page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -163,16 +163,16 @@ def test_similarity_search_with_score_by_vector() -> None:
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
index_to_id[0]: Document(id=index_to_id[0], page_content="foo"),
|
||||
index_to_id[1]: Document(id=index_to_id[1], page_content="bar"),
|
||||
index_to_id[2]: Document(id=index_to_id[2], page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
query_vec = FakeEmbeddings().embed_query(text="foo")
|
||||
output = docsearch.similarity_search_with_score_by_vector(query_vec, k=1)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo")
|
||||
assert output[0][0] == Document(id=output[0][0].id, page_content="foo")
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -183,16 +183,16 @@ async def test_similarity_async_search_with_score_by_vector() -> None:
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
index_to_id[0]: Document(id=index_to_id[0], page_content="foo"),
|
||||
index_to_id[1]: Document(id=index_to_id[1], page_content="bar"),
|
||||
index_to_id[2]: Document(id=index_to_id[2], page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
query_vec = await FakeEmbeddings().aembed_query(text="foo")
|
||||
output = await docsearch.asimilarity_search_with_score_by_vector(query_vec, k=1)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo")
|
||||
assert output[0][0] == Document(id=output[0][0].id, page_content="foo")
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -203,9 +203,9 @@ def test_similarity_search_with_score_by_vector_with_score_threshold() -> None:
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
index_to_id[0]: Document(id=index_to_id[0], page_content="foo"),
|
||||
index_to_id[1]: Document(id=index_to_id[1], page_content="bar"),
|
||||
index_to_id[2]: Document(id=index_to_id[2], page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
@ -216,7 +216,7 @@ def test_similarity_search_with_score_by_vector_with_score_threshold() -> None:
|
||||
score_threshold=0.2,
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo")
|
||||
assert output[0][0] == Document(id=output[0][0].id, page_content="foo")
|
||||
assert output[0][1] < 0.2
|
||||
|
||||
|
||||
@ -228,9 +228,9 @@ async def test_sim_asearch_with_score_by_vector_with_score_threshold() -> None:
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
index_to_id[0]: Document(id=index_to_id[0], page_content="foo"),
|
||||
index_to_id[1]: Document(id=index_to_id[1], page_content="bar"),
|
||||
index_to_id[2]: Document(id=index_to_id[2], page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
@ -241,7 +241,7 @@ async def test_sim_asearch_with_score_by_vector_with_score_threshold() -> None:
|
||||
score_threshold=0.2,
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo")
|
||||
assert output[0][0] == Document(id=output[0][0].id, page_content="foo")
|
||||
assert output[0][1] < 0.2
|
||||
|
||||
|
||||
@ -255,9 +255,9 @@ def test_faiss_mmr() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1
|
||||
)
|
||||
assert len(output) == len(texts)
|
||||
assert output[0][0] == Document(page_content="foo")
|
||||
assert output[0][0] == Document(id=output[0][0].id, page_content="foo")
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo")
|
||||
assert output[1][0] != Document(id=output[1][0].id, page_content="foo")
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -270,9 +270,9 @@ async def test_faiss_async_mmr() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1
|
||||
)
|
||||
assert len(output) == len(texts)
|
||||
assert output[0][0] == Document(page_content="foo")
|
||||
assert output[0][0] == Document(id=output[0][0].id, page_content="foo")
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo")
|
||||
assert output[1][0] != Document(id=output[1][0].id, page_content="foo")
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -285,9 +285,13 @@ def test_faiss_mmr_with_metadatas() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1
|
||||
)
|
||||
assert len(output) == len(texts)
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[1][0] != Document(
|
||||
id=output[1][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -300,9 +304,13 @@ async def test_faiss_async_mmr_with_metadatas() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1
|
||||
)
|
||||
assert len(output) == len(texts)
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[1][0] != Document(
|
||||
id=output[1][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -315,7 +323,9 @@ def test_faiss_mmr_with_metadatas_and_filter() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": 1}
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] == 1
|
||||
@ -332,7 +342,9 @@ def test_faiss_mmr_with_metadatas_and_comparison_operators_filter_eq() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": {"$eq": 1}}
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] == 1
|
||||
@ -349,10 +361,16 @@ def test_faiss_mmr_with_metadatas_and_comparison_operators_filter_neq() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": {"$neq": 1}}
|
||||
)
|
||||
assert len(output) == 3
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[2][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[1][0] != Document(
|
||||
id=output[1][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[2][0] != Document(
|
||||
id=output[2][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] != 1
|
||||
)
|
||||
@ -368,10 +386,16 @@ def test_faiss_mmr_with_metadatas_and_comparison_operators_filter_gt() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": {"$gt": 0}}
|
||||
)
|
||||
assert len(output) == 3
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[2][0] != Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[1][0] != Document(
|
||||
id=output[1][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output[2][0] != Document(
|
||||
id=output[2][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] > 0
|
||||
)
|
||||
@ -387,9 +411,13 @@ def test_faiss_mmr_with_metadatas_and_comparison_operators_filter_lt() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": {"$lt": 2}}
|
||||
)
|
||||
assert len(output) == 2
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] == Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[1][0] == Document(
|
||||
id=output[1][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output[1][1] == 1.0
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] < 2
|
||||
@ -406,10 +434,16 @@ def test_faiss_mmr_with_metadatas_and_comparison_operators_filter_gte() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": {"$gte": 1}}
|
||||
)
|
||||
assert len(output) == 3
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[2][0] != Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[1][0] != Document(
|
||||
id=output[1][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output[2][0] != Document(
|
||||
id=output[2][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] >= 1
|
||||
)
|
||||
@ -425,7 +459,9 @@ def test_faiss_mmr_with_metadatas_and_comparison_operators_filter_lte() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": {"$lte": 0}}
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="fou", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="fou", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] <= 0
|
||||
@ -442,7 +478,9 @@ def test_faiss_mmr_with_metadatas_and_comparison_operators_filter_in_1() -> None
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": {"$in": [0]}}
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] in [0]
|
||||
@ -459,9 +497,13 @@ def test_faiss_mmr_with_metadatas_and_comparison_operators_filter_in_2() -> None
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": {"$in": [1, 2]}}
|
||||
)
|
||||
assert len(output) == 2
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] == Document(page_content="fou", metadata={"page": 2})
|
||||
assert output[1][0] == Document(
|
||||
id=output[1][0].id, page_content="fou", metadata={"page": 2}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] in [1, 2]
|
||||
)
|
||||
@ -477,9 +519,13 @@ def test_faiss_mmr_with_metadatas_and_comparison_operators_filter_nin_1() -> Non
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": {"$nin": [0, 1]}}
|
||||
)
|
||||
assert len(output) == 2
|
||||
assert output[0][0] == Document(page_content="fou", metadata={"page": 2})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="fou", metadata={"page": 2}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] == Document(page_content="foy", metadata={"page": 3})
|
||||
assert output[1][0] == Document(
|
||||
id=output[1][0].id, page_content="foy", metadata={"page": 3}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] not in [0, 1]
|
||||
)
|
||||
@ -495,7 +541,9 @@ def test_faiss_mmr_with_metadatas_and_comparison_operators_filter_nin_2() -> Non
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": {"$nin": [0, 1, 2]}}
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foy", metadata={"page": 3})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foy", metadata={"page": 3}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] not in [0, 1, 2]
|
||||
@ -512,10 +560,16 @@ def test_faiss_mmr_with_metadatas_and_logical_operators_filter_not() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"$not": {"page": 1}}
|
||||
)
|
||||
assert len(output) == 3
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] == Document(page_content="foy", metadata={"page": 3})
|
||||
assert output[2][0] == Document(page_content="fou", metadata={"page": 2})
|
||||
assert output[1][0] == Document(
|
||||
id=output[1][0].id, page_content="foy", metadata={"page": 3}
|
||||
)
|
||||
assert output[2][0] == Document(
|
||||
id=output[2][0].id, page_content="fou", metadata={"page": 2}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: not di["page"] == 1
|
||||
)
|
||||
@ -531,7 +585,9 @@ def test_faiss_mmr_with_metadatas_and_logical_operators_filter_or_1() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"$or": [{"page": 0}]}
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: (di["page"] == 0)
|
||||
@ -548,9 +604,13 @@ def test_faiss_mmr_with_metadatas_and_logical_operators_filter_or_2() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"$or": [{"page": 0}, {"page": 1}]}
|
||||
)
|
||||
assert len(output) == 2
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] == Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[1][0] == Document(
|
||||
id=output[1][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output[1][1] == 1.0
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec,
|
||||
@ -573,10 +633,16 @@ def test_faiss_mmr_with_metadatas_and_logical_operators_filter_or_3() -> None:
|
||||
filter={"$or": [{"page": 0}, {"page": 1}, {"page": 2}]},
|
||||
)
|
||||
assert len(output) == 3
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[2][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[1][0] != Document(
|
||||
id=output[1][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[2][0] != Document(
|
||||
id=output[2][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec,
|
||||
k=10,
|
||||
@ -595,7 +661,9 @@ def test_faiss_mmr_with_metadatas_and_logical_operators_filter_and_1() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"$and": [{"page": 0}]}
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: (di["page"] == 0)
|
||||
@ -654,7 +722,9 @@ def test_faiss_mmr_with_metadatas_and_logical_operators_filter_and_4() -> None:
|
||||
filter={"$and": [{"page": 0}, {"page": 0}, {"page": 0}]},
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec,
|
||||
@ -677,7 +747,9 @@ def test_faiss_mmr_with_metadatas_and_nested_logical_operators_filter_1() -> Non
|
||||
filter={"$and": [{"$or": [{"page": 1}, {"page": 2}]}, {"$not": {"page": 1}}]},
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="fou", metadata={"page": 2})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="fou", metadata={"page": 2}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec,
|
||||
k=10,
|
||||
@ -705,7 +777,9 @@ def test_faiss_mmr_with_metadatas_and_nested_logical_operators_filter_2() -> Non
|
||||
},
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="fou", metadata={"page": 2})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="fou", metadata={"page": 2}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec,
|
||||
k=10,
|
||||
@ -755,9 +829,13 @@ def test_faiss_mmr_with_metadatas_and_logical_comparsion_operators_filter_1() ->
|
||||
filter={"$or": [{"page": {"$lt": 1}}, {"page": {"$gt": 2}}]},
|
||||
)
|
||||
assert len(output) == 2
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] == Document(page_content="foy", metadata={"page": 3})
|
||||
assert output[1][0] == Document(
|
||||
id=output[1][0].id, page_content="foy", metadata={"page": 3}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec,
|
||||
k=10,
|
||||
@ -776,10 +854,16 @@ def test_faiss_mmr_with_metadatas_and_logical_comparsion_operators_filter_2() ->
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"$not": {"page": {"$lt": 1}}}
|
||||
)
|
||||
assert len(output) == 3
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] == Document(page_content="foy", metadata={"page": 3})
|
||||
assert output[2][0] == Document(page_content="fou", metadata={"page": 2})
|
||||
assert output[1][0] == Document(
|
||||
id=output[1][0].id, page_content="foy", metadata={"page": 3}
|
||||
)
|
||||
assert output[2][0] == Document(
|
||||
id=output[2][0].id, page_content="fou", metadata={"page": 2}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: not di["page"] < 1
|
||||
)
|
||||
@ -803,7 +887,9 @@ def test_faiss_mmr_with_metadatas_and_nested_logical_comparsion_ops_filter_1() -
|
||||
},
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec,
|
||||
k=10,
|
||||
@ -861,9 +947,13 @@ def test_faiss_mmr_with_metadatas_and_nested_logical_comparsion_ops_filter_3() -
|
||||
},
|
||||
)
|
||||
assert len(output) == 2
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] == Document(page_content="foy", metadata={"page": 3})
|
||||
assert output[1][0] == Document(
|
||||
id=output[1][0].id, page_content="foy", metadata={"page": 3}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec,
|
||||
k=10,
|
||||
@ -1134,7 +1224,9 @@ async def test_faiss_async_mmr_with_metadatas_and_filter() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": 1}
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 1}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert (
|
||||
output
|
||||
@ -1154,9 +1246,13 @@ def test_faiss_mmr_with_metadatas_and_list_filter() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": [0, 1, 2]}
|
||||
)
|
||||
assert len(output) == 3
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[1][0] != Document(
|
||||
id=output[1][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output == docsearch.max_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] in [0, 1, 2]
|
||||
)
|
||||
@ -1172,9 +1268,13 @@ async def test_faiss_async_mmr_with_metadatas_and_list_filter() -> None:
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": [0, 1, 2]}
|
||||
)
|
||||
assert len(output) == 3
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][0] == Document(
|
||||
id=output[0][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[1][0] != Document(
|
||||
id=output[1][0].id, page_content="foo", metadata={"page": 0}
|
||||
)
|
||||
assert output == (
|
||||
await docsearch.amax_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] in [0, 1, 2]
|
||||
@ -1191,19 +1291,27 @@ def test_faiss_with_metadatas() -> None:
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
docsearch.index_to_docstore_id[0]: Document(
|
||||
page_content="foo", metadata={"page": 0}
|
||||
id=docsearch.index_to_docstore_id[0],
|
||||
page_content="foo",
|
||||
metadata={"page": 0},
|
||||
),
|
||||
docsearch.index_to_docstore_id[1]: Document(
|
||||
page_content="bar", metadata={"page": 1}
|
||||
id=docsearch.index_to_docstore_id[1],
|
||||
page_content="bar",
|
||||
metadata={"page": 1},
|
||||
),
|
||||
docsearch.index_to_docstore_id[2]: Document(
|
||||
page_content="baz", metadata={"page": 2}
|
||||
id=docsearch.index_to_docstore_id[2],
|
||||
page_content="baz",
|
||||
metadata={"page": 2},
|
||||
),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
assert output == [
|
||||
Document(id=output[0].id, page_content="foo", metadata={"page": 0})
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -1215,19 +1323,27 @@ async def test_faiss_async_with_metadatas() -> None:
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
docsearch.index_to_docstore_id[0]: Document(
|
||||
page_content="foo", metadata={"page": 0}
|
||||
id=docsearch.index_to_docstore_id[0],
|
||||
page_content="foo",
|
||||
metadata={"page": 0},
|
||||
),
|
||||
docsearch.index_to_docstore_id[1]: Document(
|
||||
page_content="bar", metadata={"page": 1}
|
||||
id=docsearch.index_to_docstore_id[1],
|
||||
page_content="bar",
|
||||
metadata={"page": 1},
|
||||
),
|
||||
docsearch.index_to_docstore_id[2]: Document(
|
||||
page_content="baz", metadata={"page": 2}
|
||||
id=docsearch.index_to_docstore_id[2],
|
||||
page_content="baz",
|
||||
metadata={"page": 2},
|
||||
),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
assert output == [
|
||||
Document(id=output[0].id, page_content="foo", metadata={"page": 0})
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -1238,13 +1354,19 @@ def test_faiss_with_metadatas_and_filter() -> None:
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
docsearch.index_to_docstore_id[0]: Document(
|
||||
page_content="foo", metadata={"page": 0}
|
||||
id=docsearch.index_to_docstore_id[0],
|
||||
page_content="foo",
|
||||
metadata={"page": 0},
|
||||
),
|
||||
docsearch.index_to_docstore_id[1]: Document(
|
||||
page_content="bar", metadata={"page": 1}
|
||||
id=docsearch.index_to_docstore_id[1],
|
||||
page_content="bar",
|
||||
metadata={"page": 1},
|
||||
),
|
||||
docsearch.index_to_docstore_id[2]: Document(
|
||||
page_content="baz", metadata={"page": 2}
|
||||
id=docsearch.index_to_docstore_id[2],
|
||||
page_content="baz",
|
||||
metadata={"page": 2},
|
||||
),
|
||||
}
|
||||
)
|
||||
@ -1252,7 +1374,9 @@ def test_faiss_with_metadatas_and_filter() -> None:
|
||||
output = docsearch.similarity_search("foo", k=1, filter={"page": 1})
|
||||
# make sure it returns the result that matches the filter.
|
||||
# Not the one who's text matches better.
|
||||
assert output == [Document(page_content="bar", metadata={"page": 1})]
|
||||
assert output == [
|
||||
Document(id=output[0].id, page_content="bar", metadata={"page": 1})
|
||||
]
|
||||
assert output == docsearch.similarity_search(
|
||||
"foo", k=1, filter=lambda di: di["page"] == 1
|
||||
)
|
||||
@ -1266,13 +1390,19 @@ async def test_faiss_async_with_metadatas_and_filter() -> None:
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
docsearch.index_to_docstore_id[0]: Document(
|
||||
page_content="foo", metadata={"page": 0}
|
||||
id=docsearch.index_to_docstore_id[0],
|
||||
page_content="foo",
|
||||
metadata={"page": 0},
|
||||
),
|
||||
docsearch.index_to_docstore_id[1]: Document(
|
||||
page_content="bar", metadata={"page": 1}
|
||||
id=docsearch.index_to_docstore_id[1],
|
||||
page_content="bar",
|
||||
metadata={"page": 1},
|
||||
),
|
||||
docsearch.index_to_docstore_id[2]: Document(
|
||||
page_content="baz", metadata={"page": 2}
|
||||
id=docsearch.index_to_docstore_id[2],
|
||||
page_content="baz",
|
||||
metadata={"page": 2},
|
||||
),
|
||||
}
|
||||
)
|
||||
@ -1280,7 +1410,9 @@ async def test_faiss_async_with_metadatas_and_filter() -> None:
|
||||
output = await docsearch.asimilarity_search("foo", k=1, filter={"page": 1})
|
||||
# make sure it returns the result that matches the filter.
|
||||
# Not the one who's text matches better.
|
||||
assert output == [Document(page_content="bar", metadata={"page": 1})]
|
||||
assert output == [
|
||||
Document(id=output[0].id, page_content="bar", metadata={"page": 1})
|
||||
]
|
||||
assert output == await docsearch.asimilarity_search(
|
||||
"foo", k=1, filter=lambda di: di["page"] == 1
|
||||
)
|
||||
@ -1294,25 +1426,37 @@ def test_faiss_with_metadatas_and_list_filter() -> None:
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
docsearch.index_to_docstore_id[0]: Document(
|
||||
page_content="foo", metadata={"page": 0}
|
||||
id=docsearch.index_to_docstore_id[0],
|
||||
page_content="foo",
|
||||
metadata={"page": 0},
|
||||
),
|
||||
docsearch.index_to_docstore_id[1]: Document(
|
||||
page_content="bar", metadata={"page": 1}
|
||||
id=docsearch.index_to_docstore_id[1],
|
||||
page_content="bar",
|
||||
metadata={"page": 1},
|
||||
),
|
||||
docsearch.index_to_docstore_id[2]: Document(
|
||||
page_content="baz", metadata={"page": 2}
|
||||
id=docsearch.index_to_docstore_id[2],
|
||||
page_content="baz",
|
||||
metadata={"page": 2},
|
||||
),
|
||||
docsearch.index_to_docstore_id[3]: Document(
|
||||
page_content="foo", metadata={"page": 3}
|
||||
id=docsearch.index_to_docstore_id[3],
|
||||
page_content="foo",
|
||||
metadata={"page": 3},
|
||||
),
|
||||
docsearch.index_to_docstore_id[4]: Document(
|
||||
page_content="qux", metadata={"page": 3}
|
||||
id=docsearch.index_to_docstore_id[4],
|
||||
page_content="qux",
|
||||
metadata={"page": 3},
|
||||
),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = docsearch.similarity_search("foor", k=1, filter={"page": [0, 1, 2]})
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
assert output == [
|
||||
Document(id=output[0].id, page_content="foo", metadata={"page": 0})
|
||||
]
|
||||
assert output == docsearch.similarity_search(
|
||||
"foor", k=1, filter=lambda di: di["page"] in [0, 1, 2]
|
||||
)
|
||||
@ -1326,25 +1470,37 @@ async def test_faiss_async_with_metadatas_and_list_filter() -> None:
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
docsearch.index_to_docstore_id[0]: Document(
|
||||
page_content="foo", metadata={"page": 0}
|
||||
id=docsearch.index_to_docstore_id[0],
|
||||
page_content="foo",
|
||||
metadata={"page": 0},
|
||||
),
|
||||
docsearch.index_to_docstore_id[1]: Document(
|
||||
page_content="bar", metadata={"page": 1}
|
||||
id=docsearch.index_to_docstore_id[1],
|
||||
page_content="bar",
|
||||
metadata={"page": 1},
|
||||
),
|
||||
docsearch.index_to_docstore_id[2]: Document(
|
||||
page_content="baz", metadata={"page": 2}
|
||||
id=docsearch.index_to_docstore_id[2],
|
||||
page_content="baz",
|
||||
metadata={"page": 2},
|
||||
),
|
||||
docsearch.index_to_docstore_id[3]: Document(
|
||||
page_content="foo", metadata={"page": 3}
|
||||
id=docsearch.index_to_docstore_id[3],
|
||||
page_content="foo",
|
||||
metadata={"page": 3},
|
||||
),
|
||||
docsearch.index_to_docstore_id[4]: Document(
|
||||
page_content="qux", metadata={"page": 3}
|
||||
id=docsearch.index_to_docstore_id[4],
|
||||
page_content="qux",
|
||||
metadata={"page": 3},
|
||||
),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = await docsearch.asimilarity_search("foor", k=1, filter={"page": [0, 1, 2]})
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
assert output == [
|
||||
Document(id=output[0].id, page_content="foo", metadata={"page": 0})
|
||||
]
|
||||
assert output == await docsearch.asimilarity_search(
|
||||
"foor", k=1, filter=lambda di: di["page"] in [0, 1, 2]
|
||||
)
|
||||
@ -1381,7 +1537,10 @@ def test_faiss_add_texts() -> None:
|
||||
# Test adding a similar document as before.
|
||||
docsearch.add_texts(["foo"])
|
||||
output = docsearch.similarity_search("foo", k=2)
|
||||
assert output == [Document(page_content="foo"), Document(page_content="foo")]
|
||||
assert output == [
|
||||
Document(id=output[0].id, page_content="foo"),
|
||||
Document(id=output[1].id, page_content="foo"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -1393,7 +1552,10 @@ async def test_faiss_async_add_texts() -> None:
|
||||
# Test adding a similar document as before.
|
||||
await docsearch.aadd_texts(["foo"])
|
||||
output = await docsearch.asimilarity_search("foo", k=2)
|
||||
assert output == [Document(page_content="foo"), Document(page_content="foo")]
|
||||
assert output == [
|
||||
Document(id=output[0].id, page_content="foo"),
|
||||
Document(id=output[1].id, page_content="foo"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@ -1451,7 +1613,7 @@ def test_faiss_similarity_search_with_relevance_scores() -> None:
|
||||
)
|
||||
outputs = docsearch.similarity_search_with_relevance_scores("foo", k=1)
|
||||
output, score = outputs[0]
|
||||
assert output == Document(page_content="foo")
|
||||
assert output == Document(id=output.id, page_content="foo")
|
||||
assert score == 1.0
|
||||
|
||||
|
||||
@ -1466,7 +1628,7 @@ async def test_faiss_async_similarity_search_with_relevance_scores() -> None:
|
||||
)
|
||||
outputs = await docsearch.asimilarity_search_with_relevance_scores("foo", k=1)
|
||||
output, score = outputs[0]
|
||||
assert output == Document(page_content="foo")
|
||||
assert output == Document(id=output.id, page_content="foo")
|
||||
assert score == 1.0
|
||||
|
||||
|
||||
@ -1484,7 +1646,7 @@ def test_faiss_similarity_search_with_relevance_scores_with_threshold() -> None:
|
||||
)
|
||||
assert len(outputs) == 1
|
||||
output, score = outputs[0]
|
||||
assert output == Document(page_content="foo")
|
||||
assert output == Document(id=output.id, page_content="foo")
|
||||
assert score == 1.0
|
||||
|
||||
|
||||
@ -1502,7 +1664,7 @@ async def test_faiss_asimilarity_search_with_relevance_scores_with_threshold() -
|
||||
)
|
||||
assert len(outputs) == 1
|
||||
output, score = outputs[0]
|
||||
assert output == Document(page_content="foo")
|
||||
assert output == Document(id=output.id, page_content="foo")
|
||||
assert score == 1.0
|
||||
|
||||
|
||||
@ -1604,3 +1766,34 @@ def test_faiss_with_duplicate_ids() -> None:
|
||||
FAISS.from_texts(texts, FakeEmbeddings(), ids=duplicate_ids)
|
||||
|
||||
assert "Duplicate ids found in the ids list." in str(exc_info.value)
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_document_ids() -> None:
|
||||
"""Test whether FAISS assigns the correct document ids."""
|
||||
ids = ["id1", "id2", "id3"]
|
||||
texts = ["foo", "bar", "baz"]
|
||||
|
||||
vstore = FAISS.from_texts(texts, FakeEmbeddings(), ids=ids)
|
||||
for id_, text in zip(ids, texts):
|
||||
doc = vstore.docstore.search(id_)
|
||||
assert isinstance(doc, Document)
|
||||
assert doc.id == id_
|
||||
assert doc.page_content == text
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_get_by_ids() -> None:
|
||||
"""Test FAISS `get_by_ids` method."""
|
||||
ids = ["id1", "id2", "id3"]
|
||||
texts = ["foo", "bar", "baz"]
|
||||
|
||||
vstore = FAISS.from_texts(texts, FakeEmbeddings(), ids=ids)
|
||||
docs = vstore.get_by_ids(ids)
|
||||
assert len(docs) == 3
|
||||
assert {doc.id for doc in docs} == set(ids)
|
||||
|
||||
for id_ in ids:
|
||||
res = vstore.get_by_ids([id_])
|
||||
assert len(res) == 1
|
||||
assert res[0].id == id_
|
||||
|
Loading…
Reference in New Issue
Block a user