From c34ad8c163ef7b0e7805d6c031f05646f0dba549 Mon Sep 17 00:00:00 2001
From: Christophe Bornet <cbornet@hotmail.com>
Date: Wed, 5 Jun 2024 16:23:44 +0200
Subject: [PATCH] core[patch]: Improve VectorStore API doc (#22547)

---
 libs/core/langchain_core/vectorstores.py | 158 +++++++++++++++++++----
 1 file changed, 132 insertions(+), 26 deletions(-)

diff --git a/libs/core/langchain_core/vectorstores.py b/libs/core/langchain_core/vectorstores.py
index aed6bdd6caf..04b281262f5 100644
--- a/libs/core/langchain_core/vectorstores.py
+++ b/libs/core/langchain_core/vectorstores.py
@@ -71,7 +71,7 @@ class VectorStore(ABC):
         Args:
             texts: Iterable of strings to add to the vectorstore.
             metadatas: Optional list of metadatas associated with the texts.
-            kwargs: vectorstore specific parameters
+            **kwargs: vectorstore specific parameters.
 
         Returns:
             List of ids from adding the texts into the vectorstore.
@@ -120,17 +120,26 @@ class VectorStore(ABC):
         metadatas: Optional[List[dict]] = None,
         **kwargs: Any,
     ) -> List[str]:
-        """Run more texts through the embeddings and add to the vectorstore."""
+        """Run more texts through the embeddings and add to the vectorstore.
+
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+            metadatas: Optional list of metadatas associated with the texts.
+            **kwargs: vectorstore specific parameters.
+
+        Returns:
+            List of ids from adding the texts into the vectorstore.
+        """
         return await run_in_executor(None, self.add_texts, texts, metadatas, **kwargs)
 
     def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
         """Run more documents through the embeddings and add to the vectorstore.
 
         Args:
-            documents (List[Document]: Documents to add to the vectorstore.
+            documents: Documents to add to the vectorstore.
 
         Returns:
-            List[str]: List of IDs of the added texts.
+            List of IDs of the added texts.
         """
         # TODO: Handle the case where the user doesn't provide ids on the Collection
         texts = [doc.page_content for doc in documents]
@@ -143,17 +152,24 @@ class VectorStore(ABC):
         """Run more documents through the embeddings and add to the vectorstore.
 
         Args:
-            documents (List[Document]: Documents to add to the vectorstore.
+            documents: Documents to add to the vectorstore.
 
         Returns:
-            List[str]: List of IDs of the added texts.
+            List of IDs of the added texts.
         """
         texts = [doc.page_content for doc in documents]
         metadatas = [doc.metadata for doc in documents]
         return await self.aadd_texts(texts, metadatas, **kwargs)
 
     def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
-        """Return docs most similar to query using specified search type."""
+        """Return docs most similar to query using specified search type.
+
+        Args:
+            query: Input text
+            search_type: Type of search to perform. Can be "similarity",
+                "mmr", or "similarity_score_threshold".
+            **kwargs: Arguments to pass to the search method.
+        """
         if search_type == "similarity":
             return self.similarity_search(query, **kwargs)
         elif search_type == "similarity_score_threshold":
@@ -172,7 +188,14 @@ class VectorStore(ABC):
     async def asearch(
         self, query: str, search_type: str, **kwargs: Any
     ) -> List[Document]:
-        """Return docs most similar to query using specified search type."""
+        """Return docs most similar to query using specified search type.
+
+        Args:
+            query: Input text.
+            search_type: Type of search to perform. Can be "similarity",
+                "mmr", or "similarity_score_threshold".
+            **kwargs: Arguments to pass to the search method.
+        """
         if search_type == "similarity":
             return await self.asimilarity_search(query, **kwargs)
         elif search_type == "similarity_score_threshold":
@@ -192,7 +215,15 @@ class VectorStore(ABC):
     def similarity_search(
         self, query: str, k: int = 4, **kwargs: Any
     ) -> List[Document]:
-        """Return docs most similar to query."""
+        """Return docs most similar to query.
+
+        Args:
+            query: Input text.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query.
+        """
 
     @staticmethod
     def _euclidean_relevance_score_fn(distance: float) -> float:
@@ -239,13 +270,21 @@ class VectorStore(ABC):
     def similarity_search_with_score(
         self, *args: Any, **kwargs: Any
     ) -> List[Tuple[Document, float]]:
-        """Run similarity search with distance."""
+        """Run similarity search with distance.
+
+        Returns:
+            List of Tuples of (doc, similarity_score)
+        """
         raise NotImplementedError
 
     async def asimilarity_search_with_score(
         self, *args: Any, **kwargs: Any
     ) -> List[Tuple[Document, float]]:
-        """Run similarity search with distance asynchronously."""
+        """Run similarity search with distance.
+
+        Returns:
+            List of Tuples of (doc, similarity_score)
+        """
 
         # This is a temporary workaround to make the similarity search
         # asynchronous. The proper solution is to make the similarity search
@@ -268,7 +307,7 @@ class VectorStore(ABC):
         0 is dissimilar, 1 is most similar.
 
         Args:
-            query: input text
+            query: Input text.
             k: Number of Documents to return. Defaults to 4.
             **kwargs: kwargs to be passed to similarity search. Should include:
                 score_threshold: Optional, a floating point value between 0 to 1 to
@@ -288,14 +327,14 @@ class VectorStore(ABC):
         **kwargs: Any,
     ) -> List[Tuple[Document, float]]:
         """
-        Default async similarity search with relevance scores. Modify if necessary
+        Default similarity search with relevance scores. Modify if necessary
         in subclass.
         Return docs and relevance scores in the range [0, 1].
 
         0 is dissimilar, 1 is most similar.
 
         Args:
-            query: input text
+            query: Input text.
             k: Number of Documents to return. Defaults to 4.
             **kwargs: kwargs to be passed to similarity search. Should include:
                 score_threshold: Optional, a floating point value between 0 to 1 to
@@ -319,7 +358,7 @@ class VectorStore(ABC):
         0 is dissimilar, 1 is most similar.
 
         Args:
-            query: input text
+            query: Input text.
             k: Number of Documents to return. Defaults to 4.
             **kwargs: kwargs to be passed to similarity search. Should include:
                 score_threshold: Optional, a floating point value between 0 to 1 to
@@ -361,12 +400,12 @@ class VectorStore(ABC):
         k: int = 4,
         **kwargs: Any,
     ) -> List[Tuple[Document, float]]:
-        """Return docs and relevance scores in the range [0, 1], asynchronously.
+        """Return docs and relevance scores in the range [0, 1].
 
         0 is dissimilar, 1 is most similar.
 
         Args:
-            query: input text
+            query: Input text.
             k: Number of Documents to return. Defaults to 4.
             **kwargs: kwargs to be passed to similarity search. Should include:
                 score_threshold: Optional, a floating point value between 0 to 1 to
@@ -405,7 +444,15 @@ class VectorStore(ABC):
     async def asimilarity_search(
         self, query: str, k: int = 4, **kwargs: Any
     ) -> List[Document]:
-        """Return docs most similar to query."""
+        """Return docs most similar to query.
+
+        Args:
+            query: Input text.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query.
+        """
 
         # This is a temporary workaround to make the similarity search
         # asynchronous. The proper solution is to make the similarity search
@@ -429,7 +476,15 @@ class VectorStore(ABC):
     async def asimilarity_search_by_vector(
         self, embedding: List[float], k: int = 4, **kwargs: Any
     ) -> List[Document]:
-        """Return docs most similar to embedding vector."""
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+
+        Returns:
+            List of Documents most similar to the query vector.
+        """
 
         # This is a temporary workaround to make the similarity search
         # asynchronous. The proper solution is to make the similarity search
@@ -536,7 +591,22 @@ class VectorStore(ABC):
         lambda_mult: float = 0.5,
         **kwargs: Any,
     ) -> List[Document]:
-        """Return docs selected using the maximal marginal relevance."""
+        """Return docs selected using the maximal marginal relevance.
+
+        Maximal marginal relevance optimizes for similarity to query AND diversity
+        among selected documents.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5.
+        Returns:
+            List of Documents selected by maximal marginal relevance.
+        """
         return await run_in_executor(
             None,
             self.max_marginal_relevance_search_by_vector,
@@ -554,7 +624,12 @@ class VectorStore(ABC):
         embedding: Embeddings,
         **kwargs: Any,
     ) -> VST:
-        """Return VectorStore initialized from documents and embeddings."""
+        """Return VectorStore initialized from documents and embeddings.
+
+        Args:
+            documents: List of Documents to add to the vectorstore.
+            embedding: Embedding function to use.
+        """
         texts = [d.page_content for d in documents]
         metadatas = [d.metadata for d in documents]
         return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs)
@@ -566,7 +641,12 @@ class VectorStore(ABC):
         embedding: Embeddings,
         **kwargs: Any,
     ) -> VST:
-        """Return VectorStore initialized from documents and embeddings."""
+        """Return VectorStore initialized from documents and embeddings.
+
+        Args:
+            documents: List of Documents to add to the vectorstore.
+            embedding: Embedding function to use.
+        """
         texts = [d.page_content for d in documents]
         metadatas = [d.metadata for d in documents]
         return await cls.afrom_texts(texts, embedding, metadatas=metadatas, **kwargs)
@@ -580,7 +660,13 @@ class VectorStore(ABC):
         metadatas: Optional[List[dict]] = None,
         **kwargs: Any,
     ) -> VST:
-        """Return VectorStore initialized from texts and embeddings."""
+        """Return VectorStore initialized from texts and embeddings.
+
+        Args:
+            texts: Texts to add to the vectorstore.
+            metadatas: Optional list of metadatas associated with the texts.
+            embedding: Embedding function to use.
+        """
 
     @classmethod
     async def afrom_texts(
@@ -590,7 +676,13 @@ class VectorStore(ABC):
         metadatas: Optional[List[dict]] = None,
         **kwargs: Any,
     ) -> VST:
-        """Return VectorStore initialized from texts and embeddings."""
+        """Return VectorStore initialized from texts and embeddings.
+
+        Args:
+            texts: Texts to add to the vectorstore.
+            metadatas: Optional list of metadatas associated with the texts.
+            embedding: Embedding function to use.
+        """
         return await run_in_executor(
             None, cls.from_texts, texts, embedding, metadatas, **kwargs
         )
@@ -741,11 +833,25 @@ class VectorStoreRetriever(BaseRetriever):
         return docs
 
     def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
-        """Add documents to vectorstore."""
+        """Add documents to the vectorstore.
+
+        Args:
+            documents: Documents to add to the vectorstore.
+
+        Returns:
+            List of IDs of the added texts.
+        """
         return self.vectorstore.add_documents(documents, **kwargs)
 
     async def aadd_documents(
         self, documents: List[Document], **kwargs: Any
     ) -> List[str]:
-        """Add documents to vectorstore."""
+        """Add documents to the vectorstore.
+
+        Args:
+            documents: Documents to add to the vectorstore.
+
+        Returns:
+            List of IDs of the added texts.
+        """
         return await self.vectorstore.aadd_documents(documents, **kwargs)