diff --git a/docs/modules/indexes/vectorstores/examples/chroma.ipynb b/docs/modules/indexes/vectorstores/examples/chroma.ipynb index 70416c02207..d0e09157400 100644 --- a/docs/modules/indexes/vectorstores/examples/chroma.ipynb +++ b/docs/modules/indexes/vectorstores/examples/chroma.ipynb @@ -151,6 +151,15 @@ "## Similarity search with score" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "346347d7", + "metadata": {}, + "source": [ + "The returned distance score is cosine distance. Therefore, a lower score is better." + ] + }, { "cell_type": "code", "execution_count": 10, diff --git a/docs/modules/indexes/vectorstores/examples/docarray_hnsw.ipynb b/docs/modules/indexes/vectorstores/examples/docarray_hnsw.ipynb index 94f6b952b46..8221d133a1c 100644 --- a/docs/modules/indexes/vectorstores/examples/docarray_hnsw.ipynb +++ b/docs/modules/indexes/vectorstores/examples/docarray_hnsw.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "2ce41f46-5711-4311-b04d-2fe233ac5b1b", "metadata": {}, @@ -13,6 +14,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "7ee37d28", "metadata": {}, @@ -55,6 +57,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "8dbb6de2", "metadata": { @@ -98,6 +101,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "ed6f905b-4853-4a44-9730-614aa8e22b78", "metadata": {}, @@ -145,6 +149,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "3febb987-e903-416f-af26-6897d84c8d61", "metadata": {}, @@ -152,6 +157,15 @@ "### Similarity search with score" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bb1df11a", + "metadata": {}, + "source": [ + "The returned distance score is cosine distance. Therefore, a lower score is better." + ] + }, { "cell_type": "code", "execution_count": 7, diff --git a/docs/modules/indexes/vectorstores/examples/docarray_in_memory.ipynb b/docs/modules/indexes/vectorstores/examples/docarray_in_memory.ipynb index 306439eae6f..12919c3b5d4 100644 --- a/docs/modules/indexes/vectorstores/examples/docarray_in_memory.ipynb +++ b/docs/modules/indexes/vectorstores/examples/docarray_in_memory.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "a3afefb0-7e99-4912-a222-c6b186da11af", "metadata": {}, @@ -13,6 +14,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "5031a3ec", "metadata": {}, @@ -54,6 +56,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6e57a389-f637-4b8f-9ab2-759ae7485f78", "metadata": {}, @@ -95,6 +98,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "efbb6684-3846-4332-a624-ddd4d75844c1", "metadata": {}, @@ -142,6 +146,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "43896697-f99e-47b6-9117-47a25e9afa9c", "metadata": {}, @@ -149,6 +154,15 @@ "### Similarity search with score" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "414a9bc9", + "metadata": {}, + "source": [ + "The returned distance score is cosine distance. Therefore, a lower score is better." + ] + }, { "cell_type": "code", "execution_count": 7, diff --git a/docs/modules/indexes/vectorstores/examples/faiss.ipynb b/docs/modules/indexes/vectorstores/examples/faiss.ipynb index 78000c2c0c5..d967068e13c 100644 --- a/docs/modules/indexes/vectorstores/examples/faiss.ipynb +++ b/docs/modules/indexes/vectorstores/examples/faiss.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "683953b3", "metadata": {}, @@ -29,6 +30,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "38237514-b3fa-44a4-9cff-30cd6bf50073", "metadata": {}, @@ -45,7 +47,7 @@ }, "outputs": [ { - "name": "stdin", + "name": "stdout", "output_type": "stream", "text": [ "OpenAI API Key: ········\n" @@ -137,12 +139,13 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "f13473b5", "metadata": {}, "source": [ "## Similarity Search with score\n", - "There are some FAISS specific methods. One of them is `similarity_search_with_score`, which allows you to return not only the documents but also the similarity score of the query to them." + "There are some FAISS specific methods. One of them is `similarity_search_with_score`, which allows you to return not only the documents but also the distance score of the query to them. The returned distance score is L2 distance. Therefore, a lower score is better." ] }, { @@ -178,6 +181,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "f34420cf", "metadata": {}, @@ -197,6 +201,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "31bda7fd", "metadata": {}, @@ -257,6 +262,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "57da60d4", "metadata": {}, diff --git a/docs/modules/indexes/vectorstores/examples/myscale.ipynb b/docs/modules/indexes/vectorstores/examples/myscale.ipynb index 12505987575..15ae20621b2 100644 --- a/docs/modules/indexes/vectorstores/examples/myscale.ipynb +++ b/docs/modules/indexes/vectorstores/examples/myscale.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "683953b3", "metadata": {}, @@ -13,6 +14,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "43ead5d5-2c1f-4dce-a69a-cb00e4f9d6f0", "metadata": {}, @@ -33,6 +35,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "15a1d477-9cdb-4d82-b019-96951ecb2b72", "metadata": {}, @@ -54,6 +57,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a9d16fa3", "metadata": {}, @@ -169,6 +173,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "e3a8b105", "metadata": {}, @@ -187,6 +192,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "f59360c0", "metadata": {}, @@ -231,6 +237,24 @@ "docsearch = MyScale.from_documents(docs, embeddings)" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "8d867b05", + "metadata": {}, + "source": [ + "### Similarity search with score" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9ec25cc5", + "metadata": {}, + "source": [ + "The returned distance score is cosine distance. Therefore, a lower score is better." + ] + }, { "cell_type": "code", "execution_count": 16, @@ -257,6 +281,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a359ed74", "metadata": {}, diff --git a/docs/modules/indexes/vectorstores/examples/qdrant.ipynb b/docs/modules/indexes/vectorstores/examples/qdrant.ipynb index 526d08a9e9e..c53c46e9c8a 100644 --- a/docs/modules/indexes/vectorstores/examples/qdrant.ipynb +++ b/docs/modules/indexes/vectorstores/examples/qdrant.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "683953b3", "metadata": {}, @@ -33,6 +34,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "7b2f111b-357a-4f42-9730-ef0603bdc1b5", "metadata": {}, @@ -49,7 +51,7 @@ }, "outputs": [ { - "name": "stdin", + "name": "stdout", "output_type": "stream", "text": [ "OpenAI API Key: ········\n" @@ -104,6 +106,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "eeead681", "metadata": {}, @@ -140,6 +143,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "59f0b954", "metadata": {}, @@ -170,6 +174,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "749658ce", "metadata": {}, @@ -200,6 +205,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "c9e21ce9", "metadata": {}, @@ -231,6 +237,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "93540013", "metadata": {}, @@ -279,6 +286,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1f9215c8", "metadata": { @@ -341,13 +349,15 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1bda9bf5", "metadata": {}, "source": [ "## Similarity search with score\n", "\n", - "Sometimes we might want to perform the search, but also obtain a relevancy score to know how good is a particular result." + "Sometimes we might want to perform the search, but also obtain a relevancy score to know how good is a particular result. \n", + "The returned distance score is cosine distance. Therefore, a lower score is better." ] }, { @@ -400,6 +410,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "525e3582", "metadata": {}, @@ -410,6 +421,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1c2c58dc", "metadata": {}, @@ -423,6 +435,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "c58c30bf", "metadata": { @@ -503,6 +516,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "691a82d6", "metadata": {}, @@ -540,6 +554,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "0c851b4f", "metadata": {}, @@ -602,6 +617,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "0358ecde", "metadata": {}, diff --git a/docs/modules/indexes/vectorstores/examples/supabase.ipynb b/docs/modules/indexes/vectorstores/examples/supabase.ipynb index 5cb264ff3cf..b653df69b2a 100644 --- a/docs/modules/indexes/vectorstores/examples/supabase.ipynb +++ b/docs/modules/indexes/vectorstores/examples/supabase.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "683953b3", "metadata": {}, @@ -9,6 +10,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "cc80fa84-1f2f-48b4-bd39-3e6412f012f1", "metadata": {}, @@ -85,6 +87,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "69bff365-3039-4ff8-a641-aa190166179d", "metadata": {}, @@ -236,6 +239,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "18152965", "metadata": {}, @@ -243,6 +247,15 @@ "## Similarity search with score\n" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "ea13e80a", + "metadata": {}, + "source": [ + "The returned distance score is cosine distance. Therefore, a lower score is better." + ] + }, { "cell_type": "code", "execution_count": 9, @@ -276,6 +289,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "794a7552", "metadata": {}, diff --git a/docs/modules/indexes/vectorstores/examples/vectara.ipynb b/docs/modules/indexes/vectorstores/examples/vectara.ipynb index 5b2dad0814b..6551f55cbc0 100644 --- a/docs/modules/indexes/vectorstores/examples/vectara.ipynb +++ b/docs/modules/indexes/vectorstores/examples/vectara.ipynb @@ -1,21 +1,23 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "683953b3", "metadata": {}, "source": [ "# Vectara\n", "\n", - ">[Vectara](https://Vectara.com/docs/) is a API platform for building LLM-powered applications. It provides a simple to use API for document indexing and query that is managed by Vectara and is optimized for performance and accuracy. \n", + ">[Vectara](https://vectara.com/) is a API platform for building LLM-powered applications. It provides a simple to use API for document indexing and query that is managed by Vectara and is optimized for performance and accuracy. \n", "\n", "\n", "This notebook shows how to use functionality related to the `Vectara` vector database. \n", "\n", - "See the [Vectara API documentation ](https://Vectara.com/docs/) for more information on how to use the API." + "See the [Vectara API documentation ](https://docs.vectara.com/docs/) for more information on how to use the API." ] }, { + "attachments": {}, "cell_type": "markdown", "id": "7b2f111b-357a-4f42-9730-ef0603bdc1b5", "metadata": {}, @@ -87,6 +89,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "eeead681", "metadata": {}, @@ -113,6 +116,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1f9215c8", "metadata": { @@ -169,6 +173,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1bda9bf5", "metadata": {}, @@ -222,6 +227,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "691a82d6", "metadata": {}, diff --git a/docs/modules/indexes/vectorstores/examples/weaviate.ipynb b/docs/modules/indexes/vectorstores/examples/weaviate.ipynb index e2494e38d9f..2b151716be1 100644 --- a/docs/modules/indexes/vectorstores/examples/weaviate.ipynb +++ b/docs/modules/indexes/vectorstores/examples/weaviate.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "683953b3", "metadata": {}, @@ -47,6 +48,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6b34828d-e627-4d85-aabd-eeb15d9f4b00", "metadata": {}, @@ -165,6 +167,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a15863ee", "metadata": {}, @@ -172,6 +175,16 @@ "## Similarity search with score" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "64e03db8", + "metadata": {}, + "source": [ + "Sometimes we might want to perform the search, but also obtain a relevancy score to know how good is a particular result. \n", + "The returned distance score is cosine distance. Therefore, a lower score is better." + ] + }, { "cell_type": "code", "execution_count": 10, @@ -214,6 +227,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "05fd146c", "metadata": {}, diff --git a/langchain/vectorstores/chroma.py b/langchain/vectorstores/chroma.py index 3c8e8239d8f..6b9be25eb85 100644 --- a/langchain/vectorstores/chroma.py +++ b/langchain/vectorstores/chroma.py @@ -217,8 +217,9 @@ class Chroma(VectorStore): filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. Returns: - List[Tuple[Document, float]]: List of documents most similar to the query - text with distance in float. + List[Tuple[Document, float]]: List of documents most similar to + the query text and cosine distance in float for each. + Lower score represents more similarity. """ if self._embedding_function is None: results = self.__query_collection( diff --git a/langchain/vectorstores/docarray/base.py b/langchain/vectorstores/docarray/base.py index d8b0f4b570a..18b4a01a35a 100644 --- a/langchain/vectorstores/docarray/base.py +++ b/langchain/vectorstores/docarray/base.py @@ -96,7 +96,9 @@ class DocArrayIndex(VectorStore, ABC): k: Number of Documents to return. Defaults to 4. Returns: - List of Documents most similar to the query and score for each. + List of documents most similar to the query text and + cosine distance in float for each. + Lower score represents more similarity. """ query_embedding = self.embedding.embed_query(query) query_doc = self.doc_cls(embedding=query_embedding) # type: ignore diff --git a/langchain/vectorstores/faiss.py b/langchain/vectorstores/faiss.py index 26266e04c44..42f9cf1bcd0 100644 --- a/langchain/vectorstores/faiss.py +++ b/langchain/vectorstores/faiss.py @@ -189,7 +189,8 @@ class FAISS(VectorStore): k: Number of Documents to return. Defaults to 4. Returns: - List of Documents most similar to the query and score for each + List of documents most similar to the query text and L2 distance + in float for each. Lower score represents more similarity. """ faiss = dependable_faiss_import() vector = np.array([embedding], dtype=np.float32) @@ -218,7 +219,8 @@ class FAISS(VectorStore): k: Number of Documents to return. Defaults to 4. Returns: - List of Documents most similar to the query and score for each + List of documents most similar to the query text with + L2 distance in float. Lower score represents more similarity. """ embedding = self.embedding_function(query) docs = self.similarity_search_with_score_by_vector(embedding, k) diff --git a/langchain/vectorstores/myscale.py b/langchain/vectorstores/myscale.py index cc43aa7ee2e..fbea41ae267 100644 --- a/langchain/vectorstores/myscale.py +++ b/langchain/vectorstores/myscale.py @@ -404,7 +404,9 @@ class MyScale(VectorStore): alone. The default name for it is `metadata`. Returns: - List[Document]: List of documents + List[Document]: List of documents most similar to the query text + and cosine distance in float for each. + Lower score represents more similarity. """ q_str = self._build_qstr(self.embedding_function(query), k, where_str) try: diff --git a/langchain/vectorstores/qdrant.py b/langchain/vectorstores/qdrant.py index 9c5f2f7a985..f1f16137d23 100644 --- a/langchain/vectorstores/qdrant.py +++ b/langchain/vectorstores/qdrant.py @@ -192,7 +192,9 @@ class Qdrant(VectorStore): filter: Filter by metadata. Defaults to None. Returns: - List of Documents most similar to the query and score for each. + List of documents most similar to the query text and cosine + distance in float for each. + Lower score represents more similarity. """ if filter is not None and isinstance(filter, dict): diff --git a/langchain/vectorstores/weaviate.py b/langchain/vectorstores/weaviate.py index 1ff98f3b4be..43501452053 100644 --- a/langchain/vectorstores/weaviate.py +++ b/langchain/vectorstores/weaviate.py @@ -314,6 +314,11 @@ class Weaviate(VectorStore): def similarity_search_with_score( self, query: str, k: int = 4, **kwargs: Any ) -> List[Tuple[Document, float]]: + """ + Return list of documents most similar to the query + text and cosine distance in float for each. + Lower score represents more similarity. + """ if self._embedding is None: raise ValueError( "_embedding cannot be None for similarity_search_with_score"