From 66a4da8ad046544a4118f803186864af4f4ef0d8 Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Mon, 13 May 2024 12:24:26 -0700 Subject: [PATCH] community[patch]: Improve Cassandra VectorStore docsctrings (#21620) --- .../vectorstores/cassandra.py | 166 ++++++++++++++---- 1 file changed, 133 insertions(+), 33 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/cassandra.py b/libs/community/langchain_community/vectorstores/cassandra.py index 6f64000da4e..a20c1dc6561 100644 --- a/libs/community/langchain_community/vectorstores/cassandra.py +++ b/libs/community/langchain_community/vectorstores/cassandra.py @@ -33,35 +33,6 @@ CVST = TypeVar("CVST", bound="Cassandra") class Cassandra(VectorStore): - """Apache Cassandra(R) for vector-store workloads. - - To use it, you need a recent installation of the `cassio` library - and a Cassandra cluster / Astra DB instance supporting vector capabilities. - - Visit the cassio.org website for extensive quickstarts and code examples. - - Example: - .. code-block:: python - - from langchain_community.vectorstores import Cassandra - from langchain_community.embeddings.openai import OpenAIEmbeddings - - embeddings = OpenAIEmbeddings() - session = ... # create your Cassandra session object - keyspace = 'my_keyspace' # the keyspace should exist already - table_name = 'my_vector_store' - vectorstore = Cassandra(embeddings, session, keyspace, table_name) - - Args: - embedding: Embedding function to use. - session: Cassandra driver session. If not provided, it is resolved from cassio. - keyspace: Cassandra key space. If not provided, it is resolved from cassio. - table_name: Cassandra table (required). - ttl_seconds: Optional time-to-live for the added texts. - body_index_options: Optional options used to create the body index. - Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER] - """ - _embedding_dimension: Union[int, None] def _get_embedding_dimension(self) -> int: @@ -89,6 +60,37 @@ class Cassandra(VectorStore): body_index_options: Optional[List[Tuple[str, Any]]] = None, setup_mode: SetupMode = SetupMode.SYNC, ) -> None: + """Apache Cassandra(R) for vector-store workloads. + + To use it, you need a recent installation of the `cassio` library + and a Cassandra cluster / Astra DB instance supporting vector capabilities. + + Visit the cassio.org website for extensive quickstarts and code examples. + + Example: + .. code-block:: python + + from langchain_community.vectorstores import Cassandra + from langchain_openai import OpenAIEmbeddings + + embeddings = OpenAIEmbeddings() + session = ... # create your Cassandra session object + keyspace = 'my_keyspace' # the keyspace should exist already + table_name = 'my_vector_store' + vectorstore = Cassandra(embeddings, session, keyspace, table_name) + + Args: + embedding: Embedding function to use. + session: Cassandra driver session. If not provided, it is resolved from + cassio. + keyspace: Cassandra key space. If not provided, it is resolved from cassio. + table_name: Cassandra table (required). + ttl_seconds: Optional time-to-live for the added texts. + body_index_options: Optional options used to create the body index. + Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER] + setup_mode: mode used to create the Cassandra table (SYNC, + ASYNC or OFF). + """ try: from cassio.table import MetadataVectorCassandraTable except (ImportError, ModuleNotFoundError): @@ -164,9 +166,19 @@ class Cassandra(VectorStore): await self.table.aclear() def delete_by_document_id(self, document_id: str) -> None: + """Delete by document ID. + + Args: + document_id: the document ID to delete. + """ return self.table.delete(row_id=document_id) async def adelete_by_document_id(self, document_id: str) -> None: + """Delete by document ID. + + Args: + document_id: the document ID to delete. + """ return await self.table.adelete(row_id=document_id) def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: @@ -370,8 +382,8 @@ class Cassandra(VectorStore): """Return docs most similar to embedding vector. Args: - embedding (str): Embedding to look up documents similar to. - k (int): Number of Documents to return. Defaults to 4. + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. @@ -399,6 +411,17 @@ class Cassandra(VectorStore): filter: Optional[Dict[str, str]] = None, body_search: Optional[Union[str, List[str]]] = None, ) -> List[Tuple[Document, float, str]]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Filter on the metadata to apply. + body_search: Document textual search terms to apply. + Only supported by Astra DB at the moment. + Returns: + List of (Document, score, id), the most similar to the query vector. + """ embedding_vector = self.embedding.embed_query(query) return self.similarity_search_with_score_id_by_vector( embedding=embedding_vector, @@ -414,6 +437,17 @@ class Cassandra(VectorStore): filter: Optional[Dict[str, str]] = None, body_search: Optional[Union[str, List[str]]] = None, ) -> List[Tuple[Document, float, str]]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Filter on the metadata to apply. + body_search: Document textual search terms to apply. + Only supported by Astra DB at the moment. + Returns: + List of (Document, score, id), the most similar to the query vector. + """ embedding_vector = await self.embedding.aembed_query(query) return await self.asimilarity_search_with_score_id_by_vector( embedding=embedding_vector, @@ -461,8 +495,8 @@ class Cassandra(VectorStore): """Return docs most similar to embedding vector. Args: - embedding (str): Embedding to look up documents similar to. - k (int): Number of Documents to return. Defaults to 4. + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. @@ -491,6 +525,17 @@ class Cassandra(VectorStore): body_search: Optional[Union[str, List[str]]] = None, **kwargs: Any, ) -> List[Document]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Filter on the metadata to apply. + body_search: Document textual search terms to apply. + Only supported by Astra DB at the moment. + Returns: + List of Document, the most similar to the query vector. + """ embedding_vector = self.embedding.embed_query(query) return self.similarity_search_by_vector( embedding_vector, @@ -507,6 +552,17 @@ class Cassandra(VectorStore): body_search: Optional[Union[str, List[str]]] = None, **kwargs: Any, ) -> List[Document]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Filter on the metadata to apply. + body_search: Document textual search terms to apply. + Only supported by Astra DB at the moment. + Returns: + List of Document, the most similar to the query vector. + """ embedding_vector = await self.embedding.aembed_query(query) return await self.asimilarity_search_by_vector( embedding_vector, @@ -523,6 +579,17 @@ class Cassandra(VectorStore): body_search: Optional[Union[str, List[str]]] = None, **kwargs: Any, ) -> List[Document]: + """Return docs most similar to embedding vector. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Filter on the metadata to apply. + body_search: Document textual search terms to apply. + Only supported by Astra DB at the moment. + Returns: + List of Document, the most similar to the query vector. + """ return [ doc for doc, _ in self.similarity_search_with_score_by_vector( @@ -541,6 +608,17 @@ class Cassandra(VectorStore): body_search: Optional[Union[str, List[str]]] = None, **kwargs: Any, ) -> List[Document]: + """Return docs most similar to embedding vector. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Filter on the metadata to apply. + body_search: Document textual search terms to apply. + Only supported by Astra DB at the moment. + Returns: + List of Document, the most similar to the query vector. + """ return [ doc for doc, _ in await self.asimilarity_search_with_score_by_vector( @@ -558,6 +636,17 @@ class Cassandra(VectorStore): filter: Optional[Dict[str, str]] = None, body_search: Optional[Union[str, List[str]]] = None, ) -> List[Tuple[Document, float]]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Filter on the metadata to apply. + body_search: Document textual search terms to apply. + Only supported by Astra DB at the moment. + Returns: + List of (Document, score), the most similar to the query vector. + """ embedding_vector = self.embedding.embed_query(query) return self.similarity_search_with_score_by_vector( embedding_vector, @@ -573,6 +662,17 @@ class Cassandra(VectorStore): filter: Optional[Dict[str, str]] = None, body_search: Optional[Union[str, List[str]]] = None, ) -> List[Tuple[Document, float]]: + """Return docs most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + filter: Filter on the metadata to apply. + body_search: Document textual search terms to apply. + Only supported by Astra DB at the moment. + Returns: + List of (Document, score), the most similar to the query vector. + """ embedding_vector = await self.embedding.aembed_query(query) return await self.asimilarity_search_with_score_by_vector( embedding_vector,