mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 21:47:12 +00:00
Async support for FAISS (#11333)
Following this tutoral about using OpenAI Embeddings with FAISS https://python.langchain.com/docs/integrations/vectorstores/faiss ```python from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from langchain.document_loaders import TextLoader from langchain.document_loaders import TextLoader loader = TextLoader("../../../extras/modules/state_of_the_union.txt") documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) docs = text_splitter.split_documents(documents) embeddings = OpenAIEmbeddings() ``` This works fine ```python db = FAISS.from_documents(docs, embeddings) query = "What did the president say about Ketanji Brown Jackson" docs = db.similarity_search(query) ``` But the async version is not ```python db = await FAISS.afrom_documents(docs, embeddings) # NotImplementedError query = "What did the president say about Ketanji Brown Jackson" docs = await db.asimilarity_search(query) # this will use await asyncio.get_event_loop().run_in_executor under the hood and will not call OpenAIEmbeddings.aembed_query but call OpenAIEmbeddings.embed_query ``` So this PR add async/await supports for FAISS --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
committed by
GitHub
parent
26f0ca222d
commit
33e77a1007
@@ -216,6 +216,17 @@ class VectorStore(ABC):
|
||||
"""Run similarity search with distance."""
|
||||
raise NotImplementedError
|
||||
|
||||
async def asimilarity_search_with_score(
|
||||
self, *args: Any, **kwargs: Any
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Run similarity search with distance asynchronously."""
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(self.similarity_search_with_score, *args, **kwargs)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def _similarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
@@ -243,6 +254,33 @@ class VectorStore(ABC):
|
||||
docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
|
||||
return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores]
|
||||
|
||||
async def _asimilarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""
|
||||
Default async similarity search with relevance scores. Modify if necessary
|
||||
in subclass.
|
||||
Return docs and relevance scores in the range [0, 1].
|
||||
|
||||
0 is dissimilar, 1 is most similar.
|
||||
|
||||
Args:
|
||||
query: input text
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||
filter the resulting set of retrieved docs
|
||||
|
||||
Returns:
|
||||
List of Tuples of (doc, similarity_score)
|
||||
"""
|
||||
relevance_score_fn = self._select_relevance_score_fn()
|
||||
docs_and_scores = await self.asimilarity_search_with_score(query, k, **kwargs)
|
||||
return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores]
|
||||
|
||||
def similarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
@@ -291,17 +329,51 @@ class VectorStore(ABC):
|
||||
return docs_and_similarities
|
||||
|
||||
async def asimilarity_search_with_relevance_scores(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs most similar to query."""
|
||||
"""Return docs and relevance scores in the range [0, 1], asynchronously.
|
||||
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
# asynchronous in the vector store implementations.
|
||||
func = partial(
|
||||
self.similarity_search_with_relevance_scores, query, k=k, **kwargs
|
||||
0 is dissimilar, 1 is most similar.
|
||||
|
||||
Args:
|
||||
query: input text
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include:
|
||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||
filter the resulting set of retrieved docs
|
||||
|
||||
Returns:
|
||||
List of Tuples of (doc, similarity_score)
|
||||
"""
|
||||
score_threshold = kwargs.pop("score_threshold", None)
|
||||
|
||||
docs_and_similarities = await self._asimilarity_search_with_relevance_scores(
|
||||
query, k=k, **kwargs
|
||||
)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
if any(
|
||||
similarity < 0.0 or similarity > 1.0
|
||||
for _, similarity in docs_and_similarities
|
||||
):
|
||||
warnings.warn(
|
||||
"Relevance scores must be between"
|
||||
f" 0 and 1, got {docs_and_similarities}"
|
||||
)
|
||||
|
||||
if score_threshold is not None:
|
||||
docs_and_similarities = [
|
||||
(doc, similarity)
|
||||
for doc, similarity in docs_and_similarities
|
||||
if similarity >= score_threshold
|
||||
]
|
||||
if len(docs_and_similarities) == 0:
|
||||
warnings.warn(
|
||||
"No relevant docs were retrieved using the relevance score"
|
||||
f" threshold {score_threshold}"
|
||||
)
|
||||
return docs_and_similarities
|
||||
|
||||
async def asimilarity_search(
|
||||
self, query: str, k: int = 4, **kwargs: Any
|
||||
|
@@ -1,11 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import operator
|
||||
import os
|
||||
import pickle
|
||||
import uuid
|
||||
import warnings
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Any,
|
||||
@@ -86,7 +88,10 @@ class FAISS(VectorStore):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embedding_function: Union[Callable, Embeddings],
|
||||
embedding_function: Union[
|
||||
Callable[[str], List[float]],
|
||||
Embeddings,
|
||||
],
|
||||
index: Any,
|
||||
docstore: Docstore,
|
||||
index_to_docstore_id: Dict[int, str],
|
||||
@@ -131,12 +136,34 @@ class FAISS(VectorStore):
|
||||
else:
|
||||
return [self.embedding_function(text) for text in texts]
|
||||
|
||||
async def _aembed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
if isinstance(self.embedding_function, Embeddings):
|
||||
return await self.embedding_function.aembed_documents(texts)
|
||||
else:
|
||||
# return await asyncio.gather(
|
||||
# [self.embedding_function(text) for text in texts]
|
||||
# )
|
||||
raise Exception(
|
||||
"`embedding_function` is expected to be an Embeddings object, support "
|
||||
"for passing in a function will soon be removed."
|
||||
)
|
||||
|
||||
def _embed_query(self, text: str) -> List[float]:
|
||||
if isinstance(self.embedding_function, Embeddings):
|
||||
return self.embedding_function.embed_query(text)
|
||||
else:
|
||||
return self.embedding_function(text)
|
||||
|
||||
async def _aembed_query(self, text: str) -> List[float]:
|
||||
if isinstance(self.embedding_function, Embeddings):
|
||||
return await self.embedding_function.aembed_query(text)
|
||||
else:
|
||||
# return await self.embedding_function(text)
|
||||
raise Exception(
|
||||
"`embedding_function` is expected to be an Embeddings object, support "
|
||||
"for passing in a function will soon be removed."
|
||||
)
|
||||
|
||||
def __add(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
@@ -196,6 +223,28 @@ class FAISS(VectorStore):
|
||||
embeddings = self._embed_documents(texts)
|
||||
return self.__add(texts, embeddings, metadatas=metadatas, ids=ids)
|
||||
|
||||
async def aadd_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embeddings and add to the vectorstore
|
||||
asynchronously.
|
||||
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
ids: Optional list of unique IDs.
|
||||
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
"""
|
||||
texts = list(texts)
|
||||
embeddings = await self._aembed_documents(texts)
|
||||
return self.__add(texts, embeddings, metadatas=metadatas, ids=ids)
|
||||
|
||||
def add_embeddings(
|
||||
self,
|
||||
text_embeddings: Iterable[Tuple[str, List[float]]],
|
||||
@@ -281,6 +330,42 @@ class FAISS(VectorStore):
|
||||
]
|
||||
return docs[:k]
|
||||
|
||||
async def asimilarity_search_with_score_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
fetch_k: int = 20,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs most similar to query asynchronously.
|
||||
|
||||
Args:
|
||||
embedding: Embedding vector to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
filter (Optional[Dict[str, Any]]): Filter by metadata. Defaults to None.
|
||||
fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
|
||||
Defaults to 20.
|
||||
**kwargs: kwargs to be passed to similarity search. Can include:
|
||||
score_threshold: Optional, a floating point value between 0 to 1 to
|
||||
filter the resulting set of retrieved docs
|
||||
|
||||
Returns:
|
||||
List of documents most similar to the query text and L2 distance
|
||||
in float for each. Lower score represents more similarity.
|
||||
"""
|
||||
|
||||
# This is a temporary workaround to make the similarity search asynchronous.
|
||||
func = partial(
|
||||
self.similarity_search_with_score_by_vector,
|
||||
embedding,
|
||||
k=k,
|
||||
filter=filter,
|
||||
fetch_k=fetch_k,
|
||||
**kwargs,
|
||||
)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def similarity_search_with_score(
|
||||
self,
|
||||
query: str,
|
||||
@@ -312,6 +397,37 @@ class FAISS(VectorStore):
|
||||
)
|
||||
return docs
|
||||
|
||||
async def asimilarity_search_with_score(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
fetch_k: int = 20,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs most similar to query asynchronously.
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
||||
fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
|
||||
Defaults to 20.
|
||||
|
||||
Returns:
|
||||
List of documents most similar to the query text with
|
||||
L2 distance in float. Lower score represents more similarity.
|
||||
"""
|
||||
embedding = await self._aembed_query(query)
|
||||
docs = await self.asimilarity_search_with_score_by_vector(
|
||||
embedding,
|
||||
k,
|
||||
filter=filter,
|
||||
fetch_k=fetch_k,
|
||||
**kwargs,
|
||||
)
|
||||
return docs
|
||||
|
||||
def similarity_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
@@ -341,6 +457,35 @@ class FAISS(VectorStore):
|
||||
)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
async def asimilarity_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
fetch_k: int = 20,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to embedding vector asynchronously.
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
||||
fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
|
||||
Defaults to 20.
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the embedding.
|
||||
"""
|
||||
docs_and_scores = await self.asimilarity_search_with_score_by_vector(
|
||||
embedding,
|
||||
k,
|
||||
filter=filter,
|
||||
fetch_k=fetch_k,
|
||||
**kwargs,
|
||||
)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
def similarity_search(
|
||||
self,
|
||||
query: str,
|
||||
@@ -366,6 +511,31 @@ class FAISS(VectorStore):
|
||||
)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
async def asimilarity_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
fetch_k: int = 20,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to query asynchronously.
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
||||
fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
|
||||
Defaults to 20.
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the query.
|
||||
"""
|
||||
docs_and_scores = await self.asimilarity_search_with_score(
|
||||
query, k, filter=filter, fetch_k=fetch_k, **kwargs
|
||||
)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
def max_marginal_relevance_search_with_score_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
@@ -438,6 +608,45 @@ class FAISS(VectorStore):
|
||||
docs_and_scores.append((doc, score))
|
||||
return docs_and_scores
|
||||
|
||||
async def amax_marginal_relevance_search_with_score_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
*,
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs and their similarity scores selected using the maximal marginal
|
||||
relevance asynchronously.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch before filtering to
|
||||
pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
Returns:
|
||||
List of Documents and similarity scores selected by maximal marginal
|
||||
relevance and score for each.
|
||||
"""
|
||||
# This is a temporary workaround to make the similarity search asynchronous.
|
||||
func = partial(
|
||||
self.max_marginal_relevance_search_with_score_by_vector,
|
||||
embedding,
|
||||
k=k,
|
||||
fetch_k=fetch_k,
|
||||
lambda_mult=lambda_mult,
|
||||
filter=filter,
|
||||
)
|
||||
return await asyncio.get_event_loop().run_in_executor(None, func)
|
||||
|
||||
def max_marginal_relevance_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
@@ -469,6 +678,39 @@ class FAISS(VectorStore):
|
||||
)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
async def amax_marginal_relevance_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance asynchronously.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch before filtering to
|
||||
pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
Returns:
|
||||
List of Documents selected by maximal marginal relevance.
|
||||
"""
|
||||
docs_and_scores = (
|
||||
await self.amax_marginal_relevance_search_with_score_by_vector(
|
||||
embedding, k=k, fetch_k=fetch_k, lambda_mult=lambda_mult, filter=filter
|
||||
)
|
||||
)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
def max_marginal_relevance_search(
|
||||
self,
|
||||
query: str,
|
||||
@@ -506,6 +748,43 @@ class FAISS(VectorStore):
|
||||
)
|
||||
return docs
|
||||
|
||||
async def amax_marginal_relevance_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance asynchronously.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch before filtering (if needed) to
|
||||
pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
Returns:
|
||||
List of Documents selected by maximal marginal relevance.
|
||||
"""
|
||||
embedding = await self._aembed_query(query)
|
||||
docs = await self.amax_marginal_relevance_search_by_vector(
|
||||
embedding,
|
||||
k=k,
|
||||
fetch_k=fetch_k,
|
||||
lambda_mult=lambda_mult,
|
||||
filter=filter,
|
||||
**kwargs,
|
||||
)
|
||||
return docs
|
||||
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
|
||||
"""Delete by ID. These are the IDs in the vectorstore.
|
||||
|
||||
@@ -639,6 +918,43 @@ class FAISS(VectorStore):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def afrom_texts(
|
||||
cls,
|
||||
texts: list[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: List[dict] | None = None,
|
||||
ids: List[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> FAISS:
|
||||
"""Construct FAISS wrapper from raw documents asynchronously.
|
||||
|
||||
This is a user friendly interface that:
|
||||
1. Embeds documents.
|
||||
2. Creates an in memory docstore
|
||||
3. Initializes the FAISS database
|
||||
|
||||
This is intended to be a quick way to get started.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
|
||||
embeddings = OpenAIEmbeddings()
|
||||
faiss = await FAISS.afrom_texts(texts, embeddings)
|
||||
"""
|
||||
embeddings = await embedding.aembed_documents(texts)
|
||||
return cls.__from(
|
||||
texts,
|
||||
embeddings,
|
||||
embedding,
|
||||
metadatas=metadatas,
|
||||
ids=ids,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_embeddings(
|
||||
cls,
|
||||
@@ -679,6 +995,24 @@ class FAISS(VectorStore):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def afrom_embeddings(
|
||||
cls,
|
||||
text_embeddings: Iterable[Tuple[str, List[float]]],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[Iterable[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> FAISS:
|
||||
"""Construct FAISS wrapper from raw documents asynchronously."""
|
||||
return cls.from_embeddings(
|
||||
text_embeddings,
|
||||
embedding,
|
||||
metadatas=metadatas,
|
||||
ids=ids,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def save_local(self, folder_path: str, index_name: str = "index") -> None:
|
||||
"""Save FAISS index, docstore, and index_to_docstore_id to disk.
|
||||
|
||||
@@ -715,6 +1049,7 @@ class FAISS(VectorStore):
|
||||
and index_to_docstore_id from.
|
||||
embeddings: Embeddings to use when generating queries
|
||||
index_name: for saving with a specific index file name
|
||||
asynchronous: whether to use async version or not
|
||||
"""
|
||||
path = Path(folder_path)
|
||||
# load index separately since it is not picklable
|
||||
@@ -798,3 +1133,32 @@ class FAISS(VectorStore):
|
||||
(doc, relevance_score_fn(score)) for doc, score in docs_and_scores
|
||||
]
|
||||
return docs_and_rel_scores
|
||||
|
||||
async def _asimilarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
fetch_k: int = 20,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return docs and their similarity scores on a scale from 0 to 1."""
|
||||
# Pop score threshold so that only relevancy scores, not raw scores, are
|
||||
# filtered.
|
||||
relevance_score_fn = self._select_relevance_score_fn()
|
||||
if relevance_score_fn is None:
|
||||
raise ValueError(
|
||||
"normalize_score_fn must be provided to"
|
||||
" FAISS constructor to normalize scores"
|
||||
)
|
||||
docs_and_scores = await self.asimilarity_search_with_score(
|
||||
query,
|
||||
k=k,
|
||||
filter=filter,
|
||||
fetch_k=fetch_k,
|
||||
**kwargs,
|
||||
)
|
||||
docs_and_rel_scores = [
|
||||
(doc, relevance_score_fn(score)) for doc, score in docs_and_scores
|
||||
]
|
||||
return docs_and_rel_scores
|
||||
|
@@ -30,6 +30,25 @@ def test_faiss() -> None:
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_afrom_texts() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings())
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_vector_sim() -> None:
|
||||
"""Test vector similarity."""
|
||||
@@ -49,6 +68,26 @@ def test_faiss_vector_sim() -> None:
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_vector_sim() -> None:
|
||||
"""Test vector similarity."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings())
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
query_vec = await FakeEmbeddings().aembed_query(text="foo")
|
||||
output = await docsearch.asimilarity_search_by_vector(query_vec, k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_vector_sim_with_score_threshold() -> None:
|
||||
"""Test vector similarity."""
|
||||
@@ -68,6 +107,28 @@ def test_faiss_vector_sim_with_score_threshold() -> None:
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_vector_async_sim_with_score_threshold() -> None:
|
||||
"""Test vector similarity."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings())
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
query_vec = await FakeEmbeddings().aembed_query(text="foo")
|
||||
output = await docsearch.asimilarity_search_by_vector(
|
||||
query_vec, k=2, score_threshold=0.2
|
||||
)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_similarity_search_with_score_by_vector() -> None:
|
||||
"""Test vector similarity with score by vector."""
|
||||
@@ -88,6 +149,27 @@ def test_similarity_search_with_score_by_vector() -> None:
|
||||
assert output[0][0] == Document(page_content="foo")
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_similarity_async_search_with_score_by_vector() -> None:
|
||||
"""Test vector similarity with score by vector."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings())
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
query_vec = await FakeEmbeddings().aembed_query(text="foo")
|
||||
output = await docsearch.asimilarity_search_with_score_by_vector(query_vec, k=1)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo")
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_similarity_search_with_score_by_vector_with_score_threshold() -> None:
|
||||
"""Test vector similarity with score by vector."""
|
||||
@@ -113,6 +195,32 @@ def test_similarity_search_with_score_by_vector_with_score_threshold() -> None:
|
||||
assert output[0][1] < 0.2
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_sim_asearch_with_score_by_vector_with_score_threshold() -> None:
|
||||
"""Test vector similarity with score by vector."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings())
|
||||
index_to_id = docsearch.index_to_docstore_id
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
index_to_id[0]: Document(page_content="foo"),
|
||||
index_to_id[1]: Document(page_content="bar"),
|
||||
index_to_id[2]: Document(page_content="baz"),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
query_vec = await FakeEmbeddings().aembed_query(text="foo")
|
||||
output = await docsearch.asimilarity_search_with_score_by_vector(
|
||||
query_vec,
|
||||
k=2,
|
||||
score_threshold=0.2,
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo")
|
||||
assert output[0][1] < 0.2
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_mmr() -> None:
|
||||
texts = ["foo", "foo", "fou", "foy"]
|
||||
@@ -128,6 +236,22 @@ def test_faiss_mmr() -> None:
|
||||
assert output[1][0] != Document(page_content="foo")
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_mmr() -> None:
|
||||
texts = ["foo", "foo", "fou", "foy"]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings())
|
||||
query_vec = await FakeEmbeddings().aembed_query(text="foo")
|
||||
# make sure we can have k > docstore size
|
||||
output = await docsearch.amax_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1
|
||||
)
|
||||
assert len(output) == len(texts)
|
||||
assert output[0][0] == Document(page_content="foo")
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo")
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_mmr_with_metadatas() -> None:
|
||||
texts = ["foo", "foo", "fou", "foy"]
|
||||
@@ -143,6 +267,22 @@ def test_faiss_mmr_with_metadatas() -> None:
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_mmr_with_metadatas() -> None:
|
||||
texts = ["foo", "foo", "fou", "foy"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas)
|
||||
query_vec = await FakeEmbeddings().aembed_query(text="foo")
|
||||
output = await docsearch.amax_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1
|
||||
)
|
||||
assert len(output) == len(texts)
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_mmr_with_metadatas_and_filter() -> None:
|
||||
texts = ["foo", "foo", "fou", "foy"]
|
||||
@@ -157,6 +297,21 @@ def test_faiss_mmr_with_metadatas_and_filter() -> None:
|
||||
assert output[0][1] == 0.0
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_mmr_with_metadatas_and_filter() -> None:
|
||||
texts = ["foo", "foo", "fou", "foy"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas)
|
||||
query_vec = await FakeEmbeddings().aembed_query(text="foo")
|
||||
output = await docsearch.amax_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": 1}
|
||||
)
|
||||
assert len(output) == 1
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 1})
|
||||
assert output[0][1] == 0.0
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_mmr_with_metadatas_and_list_filter() -> None:
|
||||
texts = ["foo", "foo", "fou", "foy"]
|
||||
@@ -172,6 +327,22 @@ def test_faiss_mmr_with_metadatas_and_list_filter() -> None:
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_mmr_with_metadatas_and_list_filter() -> None:
|
||||
texts = ["foo", "foo", "fou", "foy"]
|
||||
metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas)
|
||||
query_vec = await FakeEmbeddings().aembed_query(text="foo")
|
||||
output = await docsearch.amax_marginal_relevance_search_with_score_by_vector(
|
||||
query_vec, k=10, lambda_mult=0.1, filter={"page": [0, 1, 2]}
|
||||
)
|
||||
assert len(output) == 3
|
||||
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
|
||||
assert output[0][1] == 0.0
|
||||
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_with_metadatas() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
@@ -196,6 +367,31 @@ def test_faiss_with_metadatas() -> None:
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_with_metadatas() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas)
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
docsearch.index_to_docstore_id[0]: Document(
|
||||
page_content="foo", metadata={"page": 0}
|
||||
),
|
||||
docsearch.index_to_docstore_id[1]: Document(
|
||||
page_content="bar", metadata={"page": 1}
|
||||
),
|
||||
docsearch.index_to_docstore_id[2]: Document(
|
||||
page_content="baz", metadata={"page": 2}
|
||||
),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_with_metadatas_and_filter() -> None:
|
||||
texts = ["foo", "bar", "baz"]
|
||||
@@ -219,6 +415,30 @@ def test_faiss_with_metadatas_and_filter() -> None:
|
||||
assert output == [Document(page_content="bar", metadata={"page": 1})]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_with_metadatas_and_filter() -> None:
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas)
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
docsearch.index_to_docstore_id[0]: Document(
|
||||
page_content="foo", metadata={"page": 0}
|
||||
),
|
||||
docsearch.index_to_docstore_id[1]: Document(
|
||||
page_content="bar", metadata={"page": 1}
|
||||
),
|
||||
docsearch.index_to_docstore_id[2]: Document(
|
||||
page_content="baz", metadata={"page": 2}
|
||||
),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = await docsearch.asimilarity_search("foo", k=1, filter={"page": 1})
|
||||
assert output == [Document(page_content="bar", metadata={"page": 1})]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_with_metadatas_and_list_filter() -> None:
|
||||
texts = ["foo", "bar", "baz", "foo", "qux"]
|
||||
@@ -248,6 +468,36 @@ def test_faiss_with_metadatas_and_list_filter() -> None:
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_with_metadatas_and_list_filter() -> None:
|
||||
texts = ["foo", "bar", "baz", "foo", "qux"]
|
||||
metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas)
|
||||
expected_docstore = InMemoryDocstore(
|
||||
{
|
||||
docsearch.index_to_docstore_id[0]: Document(
|
||||
page_content="foo", metadata={"page": 0}
|
||||
),
|
||||
docsearch.index_to_docstore_id[1]: Document(
|
||||
page_content="bar", metadata={"page": 1}
|
||||
),
|
||||
docsearch.index_to_docstore_id[2]: Document(
|
||||
page_content="baz", metadata={"page": 2}
|
||||
),
|
||||
docsearch.index_to_docstore_id[3]: Document(
|
||||
page_content="foo", metadata={"page": 3}
|
||||
),
|
||||
docsearch.index_to_docstore_id[4]: Document(
|
||||
page_content="qux", metadata={"page": 3}
|
||||
),
|
||||
}
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = await docsearch.asimilarity_search("foor", k=1, filter={"page": [0, 1, 2]})
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_search_not_found() -> None:
|
||||
"""Test what happens when document is not found."""
|
||||
@@ -259,6 +509,18 @@ def test_faiss_search_not_found() -> None:
|
||||
docsearch.similarity_search("foo")
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_search_not_found() -> None:
|
||||
"""Test what happens when document is not found."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings())
|
||||
# Get rid of the docstore to purposefully induce errors.
|
||||
docsearch.docstore = InMemoryDocstore({})
|
||||
with pytest.raises(ValueError):
|
||||
await docsearch.asimilarity_search("foo")
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_add_texts() -> None:
|
||||
"""Test end to end adding of texts."""
|
||||
@@ -271,14 +533,36 @@ def test_faiss_add_texts() -> None:
|
||||
assert output == [Document(page_content="foo"), Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_add_texts() -> None:
|
||||
"""Test end to end adding of texts."""
|
||||
# Create initial doc store.
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings())
|
||||
# Test adding a similar document as before.
|
||||
await docsearch.aadd_texts(["foo"])
|
||||
output = await docsearch.asimilarity_search("foo", k=2)
|
||||
assert output == [Document(page_content="foo"), Document(page_content="foo")]
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_add_texts_not_supported() -> None:
|
||||
"""Test adding of texts to a docstore that doesn't support it."""
|
||||
docsearch = FAISS(FakeEmbeddings().embed_query, None, FakeDocstore(), {})
|
||||
docsearch = FAISS(FakeEmbeddings(), None, FakeDocstore(), {})
|
||||
with pytest.raises(ValueError):
|
||||
docsearch.add_texts(["foo"])
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_add_texts_not_supported() -> None:
|
||||
"""Test adding of texts to a docstore that doesn't support it."""
|
||||
docsearch = FAISS(FakeEmbeddings(), None, FakeDocstore(), {})
|
||||
with pytest.raises(ValueError):
|
||||
await docsearch.aadd_texts(["foo"])
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_local_save_load() -> None:
|
||||
"""Test end to end serialization."""
|
||||
@@ -291,6 +575,19 @@ def test_faiss_local_save_load() -> None:
|
||||
assert new_docsearch.index is not None
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_local_save_load() -> None:
|
||||
"""Test end to end serialization."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings())
|
||||
temp_timestamp = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
|
||||
with tempfile.TemporaryDirectory(suffix="_" + temp_timestamp + "/") as temp_folder:
|
||||
docsearch.save_local(temp_folder)
|
||||
new_docsearch = FAISS.load_local(temp_folder, FakeEmbeddings())
|
||||
assert new_docsearch.index is not None
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_similarity_search_with_relevance_scores() -> None:
|
||||
"""Test the similarity search with normalized similarities."""
|
||||
@@ -306,6 +603,22 @@ def test_faiss_similarity_search_with_relevance_scores() -> None:
|
||||
assert score == 1.0
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_similarity_search_with_relevance_scores() -> None:
|
||||
"""Test the similarity search with normalized similarities."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = await FAISS.afrom_texts(
|
||||
texts,
|
||||
FakeEmbeddings(),
|
||||
relevance_score_fn=lambda score: 1.0 - score / math.sqrt(2),
|
||||
)
|
||||
outputs = await docsearch.asimilarity_search_with_relevance_scores("foo", k=1)
|
||||
output, score = outputs[0]
|
||||
assert output == Document(page_content="foo")
|
||||
assert score == 1.0
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_similarity_search_with_relevance_scores_with_threshold() -> None:
|
||||
"""Test the similarity search with normalized similarities with score threshold."""
|
||||
@@ -324,6 +637,25 @@ def test_faiss_similarity_search_with_relevance_scores_with_threshold() -> None:
|
||||
assert score == 1.0
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_asimilarity_search_with_relevance_scores_with_threshold() -> None:
|
||||
"""Test the similarity search with normalized similarities with score threshold."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = await FAISS.afrom_texts(
|
||||
texts,
|
||||
FakeEmbeddings(),
|
||||
relevance_score_fn=lambda score: 1.0 - score / math.sqrt(2),
|
||||
)
|
||||
outputs = await docsearch.asimilarity_search_with_relevance_scores(
|
||||
"foo", k=2, score_threshold=0.5
|
||||
)
|
||||
assert len(outputs) == 1
|
||||
output, score = outputs[0]
|
||||
assert output == Document(page_content="foo")
|
||||
assert score == 1.0
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_faiss_invalid_normalize_fn() -> None:
|
||||
"""Test the similarity search with normalized similarities."""
|
||||
@@ -335,6 +667,18 @@ def test_faiss_invalid_normalize_fn() -> None:
|
||||
docsearch.similarity_search_with_relevance_scores("foo", k=1)
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_faiss_async_invalid_normalize_fn() -> None:
|
||||
"""Test the similarity search with normalized similarities."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = await FAISS.afrom_texts(
|
||||
texts, FakeEmbeddings(), relevance_score_fn=lambda _: 2.0
|
||||
)
|
||||
with pytest.warns(Warning, match="scores must be between"):
|
||||
await docsearch.asimilarity_search_with_relevance_scores("foo", k=1)
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_missing_normalize_score_fn() -> None:
|
||||
"""Test doesn't perform similarity search without a valid distance strategy."""
|
||||
@@ -344,6 +688,18 @@ def test_missing_normalize_score_fn() -> None:
|
||||
faiss_instance.similarity_search_with_relevance_scores("foo", k=2)
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_missing_normalize_score_fn() -> None:
|
||||
"""Test doesn't perform similarity search without a valid distance strategy."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
faiss_instance = await FAISS.afrom_texts(
|
||||
texts, FakeEmbeddings(), distance_strategy="fake"
|
||||
)
|
||||
with pytest.raises(ValueError):
|
||||
await faiss_instance.asimilarity_search_with_relevance_scores("foo", k=2)
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
def test_delete() -> None:
|
||||
"""Test the similarity search with normalized similarities."""
|
||||
@@ -354,3 +710,18 @@ def test_delete() -> None:
|
||||
result = docsearch.similarity_search("bar", k=2)
|
||||
assert sorted([d.page_content for d in result]) == ["baz", "foo"]
|
||||
assert docsearch.index_to_docstore_id == {0: ids[0], 1: ids[2]}
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_delete() -> None:
|
||||
"""Test the similarity search with normalized similarities."""
|
||||
ids = ["a", "b", "c"]
|
||||
docsearch = await FAISS.afrom_texts(
|
||||
["foo", "bar", "baz"], FakeEmbeddings(), ids=ids
|
||||
)
|
||||
docsearch.delete(ids[1:2])
|
||||
|
||||
result = await docsearch.asimilarity_search("bar", k=2)
|
||||
assert sorted([d.page_content for d in result]) == ["baz", "foo"]
|
||||
assert docsearch.index_to_docstore_id == {0: ids[0], 1: ids[2]}
|
||||
|
Reference in New Issue
Block a user