From 66a4da8ad046544a4118f803186864af4f4ef0d8 Mon Sep 17 00:00:00 2001
From: Christophe Bornet <cbornet@hotmail.com>
Date: Mon, 13 May 2024 12:24:26 -0700
Subject: [PATCH] community[patch]: Improve Cassandra VectorStore docsctrings
 (#21620)

---
 .../vectorstores/cassandra.py                 | 166 ++++++++++++++----
 1 file changed, 133 insertions(+), 33 deletions(-)

diff --git a/libs/community/langchain_community/vectorstores/cassandra.py b/libs/community/langchain_community/vectorstores/cassandra.py
index 6f64000da4e..a20c1dc6561 100644
--- a/libs/community/langchain_community/vectorstores/cassandra.py
+++ b/libs/community/langchain_community/vectorstores/cassandra.py
@@ -33,35 +33,6 @@ CVST = TypeVar("CVST", bound="Cassandra")
 
 
 class Cassandra(VectorStore):
-    """Apache Cassandra(R) for vector-store workloads.
-
-    To use it, you need a recent installation of the `cassio` library
-    and a Cassandra cluster / Astra DB instance supporting vector capabilities.
-
-    Visit the cassio.org website for extensive quickstarts and code examples.
-
-    Example:
-        .. code-block:: python
-
-                from langchain_community.vectorstores import Cassandra
-                from langchain_community.embeddings.openai import OpenAIEmbeddings
-
-                embeddings = OpenAIEmbeddings()
-                session = ...             # create your Cassandra session object
-                keyspace = 'my_keyspace'  # the keyspace should exist already
-                table_name = 'my_vector_store'
-                vectorstore = Cassandra(embeddings, session, keyspace, table_name)
-
-    Args:
-        embedding: Embedding function to use.
-        session: Cassandra driver session. If not provided, it is resolved from cassio.
-        keyspace: Cassandra key space. If not provided, it is resolved from cassio.
-        table_name: Cassandra table (required).
-        ttl_seconds: Optional time-to-live for the added texts.
-        body_index_options: Optional options used to create the body index.
-            Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER]
-    """
-
     _embedding_dimension: Union[int, None]
 
     def _get_embedding_dimension(self) -> int:
@@ -89,6 +60,37 @@ class Cassandra(VectorStore):
         body_index_options: Optional[List[Tuple[str, Any]]] = None,
         setup_mode: SetupMode = SetupMode.SYNC,
     ) -> None:
+        """Apache Cassandra(R) for vector-store workloads.
+
+        To use it, you need a recent installation of the `cassio` library
+        and a Cassandra cluster / Astra DB instance supporting vector capabilities.
+
+        Visit the cassio.org website for extensive quickstarts and code examples.
+
+        Example:
+            .. code-block:: python
+
+                    from langchain_community.vectorstores import Cassandra
+                    from langchain_openai import OpenAIEmbeddings
+
+                    embeddings = OpenAIEmbeddings()
+                    session = ...             # create your Cassandra session object
+                    keyspace = 'my_keyspace'  # the keyspace should exist already
+                    table_name = 'my_vector_store'
+                    vectorstore = Cassandra(embeddings, session, keyspace, table_name)
+
+        Args:
+            embedding: Embedding function to use.
+            session: Cassandra driver session. If not provided, it is resolved from
+                cassio.
+            keyspace: Cassandra key space. If not provided, it is resolved from cassio.
+            table_name: Cassandra table (required).
+            ttl_seconds: Optional time-to-live for the added texts.
+            body_index_options: Optional options used to create the body index.
+                Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER]
+            setup_mode: mode used to create the Cassandra table (SYNC,
+                ASYNC or OFF).
+        """
         try:
             from cassio.table import MetadataVectorCassandraTable
         except (ImportError, ModuleNotFoundError):
@@ -164,9 +166,19 @@ class Cassandra(VectorStore):
         await self.table.aclear()
 
     def delete_by_document_id(self, document_id: str) -> None:
+        """Delete by document ID.
+
+        Args:
+            document_id: the document ID to delete.
+        """
         return self.table.delete(row_id=document_id)
 
     async def adelete_by_document_id(self, document_id: str) -> None:
+        """Delete by document ID.
+
+        Args:
+            document_id: the document ID to delete.
+        """
         return await self.table.adelete(row_id=document_id)
 
     def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
@@ -370,8 +382,8 @@ class Cassandra(VectorStore):
         """Return docs most similar to embedding vector.
 
         Args:
-            embedding (str): Embedding to look up documents similar to.
-            k (int): Number of Documents to return. Defaults to 4.
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
             filter: Filter on the metadata to apply.
             body_search: Document textual search terms to apply.
                 Only supported by Astra DB at the moment.
@@ -399,6 +411,17 @@ class Cassandra(VectorStore):
         filter: Optional[Dict[str, str]] = None,
         body_search: Optional[Union[str, List[str]]] = None,
     ) -> List[Tuple[Document, float, str]]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+            body_search: Document textual search terms to apply.
+                Only supported by Astra DB at the moment.
+        Returns:
+            List of (Document, score, id), the most similar to the query vector.
+        """
         embedding_vector = self.embedding.embed_query(query)
         return self.similarity_search_with_score_id_by_vector(
             embedding=embedding_vector,
@@ -414,6 +437,17 @@ class Cassandra(VectorStore):
         filter: Optional[Dict[str, str]] = None,
         body_search: Optional[Union[str, List[str]]] = None,
     ) -> List[Tuple[Document, float, str]]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+            body_search: Document textual search terms to apply.
+                Only supported by Astra DB at the moment.
+        Returns:
+            List of (Document, score, id), the most similar to the query vector.
+        """
         embedding_vector = await self.embedding.aembed_query(query)
         return await self.asimilarity_search_with_score_id_by_vector(
             embedding=embedding_vector,
@@ -461,8 +495,8 @@ class Cassandra(VectorStore):
         """Return docs most similar to embedding vector.
 
         Args:
-            embedding (str): Embedding to look up documents similar to.
-            k (int): Number of Documents to return. Defaults to 4.
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
             filter: Filter on the metadata to apply.
             body_search: Document textual search terms to apply.
                 Only supported by Astra DB at the moment.
@@ -491,6 +525,17 @@ class Cassandra(VectorStore):
         body_search: Optional[Union[str, List[str]]] = None,
         **kwargs: Any,
     ) -> List[Document]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+            body_search: Document textual search terms to apply.
+                Only supported by Astra DB at the moment.
+        Returns:
+            List of Document, the most similar to the query vector.
+        """
         embedding_vector = self.embedding.embed_query(query)
         return self.similarity_search_by_vector(
             embedding_vector,
@@ -507,6 +552,17 @@ class Cassandra(VectorStore):
         body_search: Optional[Union[str, List[str]]] = None,
         **kwargs: Any,
     ) -> List[Document]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+            body_search: Document textual search terms to apply.
+                Only supported by Astra DB at the moment.
+        Returns:
+            List of Document, the most similar to the query vector.
+        """
         embedding_vector = await self.embedding.aembed_query(query)
         return await self.asimilarity_search_by_vector(
             embedding_vector,
@@ -523,6 +579,17 @@ class Cassandra(VectorStore):
         body_search: Optional[Union[str, List[str]]] = None,
         **kwargs: Any,
     ) -> List[Document]:
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+            body_search: Document textual search terms to apply.
+                Only supported by Astra DB at the moment.
+        Returns:
+            List of Document, the most similar to the query vector.
+        """
         return [
             doc
             for doc, _ in self.similarity_search_with_score_by_vector(
@@ -541,6 +608,17 @@ class Cassandra(VectorStore):
         body_search: Optional[Union[str, List[str]]] = None,
         **kwargs: Any,
     ) -> List[Document]:
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+            body_search: Document textual search terms to apply.
+                Only supported by Astra DB at the moment.
+        Returns:
+            List of Document, the most similar to the query vector.
+        """
         return [
             doc
             for doc, _ in await self.asimilarity_search_with_score_by_vector(
@@ -558,6 +636,17 @@ class Cassandra(VectorStore):
         filter: Optional[Dict[str, str]] = None,
         body_search: Optional[Union[str, List[str]]] = None,
     ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+            body_search: Document textual search terms to apply.
+                Only supported by Astra DB at the moment.
+        Returns:
+            List of (Document, score), the most similar to the query vector.
+        """
         embedding_vector = self.embedding.embed_query(query)
         return self.similarity_search_with_score_by_vector(
             embedding_vector,
@@ -573,6 +662,17 @@ class Cassandra(VectorStore):
         filter: Optional[Dict[str, str]] = None,
         body_search: Optional[Union[str, List[str]]] = None,
     ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Text to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+            body_search: Document textual search terms to apply.
+                Only supported by Astra DB at the moment.
+        Returns:
+            List of (Document, score), the most similar to the query vector.
+        """
         embedding_vector = await self.embedding.aembed_query(query)
         return await self.asimilarity_search_with_score_by_vector(
             embedding_vector,