community[minor]: Improve Cassandra VectorStore as_retriever (#22465)

The Vectorstore's API `as_retriever` doesn't expose explicitly the parameters `search_type` and `search_kwargs` and so these are not well documented. This PR improves `as_retriever` for the Cassandra VectorStore by making these parameters explicit. NB: An alternative would have been to modify `as_retriever` in `Vectorstore`. But there's probably a good reason these were not exposed in the first place ? Is it because implementations may decide to not support them and have fixed values when creating the VectorStoreRetriever ?
2025-08-02 01:23:07 +00:00 · 2024-06-04 15:51:17 +02:00 · 2024-06-04 15:51:17 +02:00 · 9a8fe58ebe
commit 9a8fe58ebe
parent 23bba18f92
1 changed files with 75 additions and 1 deletions
--- a/libs/community/langchain_community/vectorstores/cassandra.py
+++ b/libs/community/langchain_community/vectorstores/cassandra.py
@ -24,7 +24,7 @@ if typing.TYPE_CHECKING:

 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
-from langchain_core.vectorstores import VectorStore
+from langchain_core.vectorstores import VectorStore, VectorStoreRetriever

 from langchain_community.utilities.cassandra import SetupMode
 from langchain_community.vectorstores.utils import maximal_marginal_relevance
@ -1073,3 +1073,77 @@ class Cassandra(VectorStore):
            body_index_options=body_index_options,
            **kwargs,
        )
+
+    def as_retriever(
+        self,
+        search_type: str = "similarity",
+        search_kwargs: Optional[Dict[str, Any]] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> VectorStoreRetriever:
+        """Return VectorStoreRetriever initialized from this VectorStore.
+
+        Args:
+            search_type: Defines the type of search that
+                the Retriever should perform.
+                Can be "similarity" (default), "mmr", or
+                "similarity_score_threshold".
+            search_kwargs: Keyword arguments to pass to the
+                search function. Can include things like:
+                    k: Amount of documents to return (Default: 4)
+                    score_threshold: Minimum relevance threshold
+                        for similarity_score_threshold
+                    fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
+                    lambda_mult: Diversity of results returned by MMR;
+                        1 for minimum diversity and 0 for maximum. (Default: 0.5)
+                    filter: Filter by document metadata
+            tags: List of tags associated with the retriever.
+            metadata: Metadata associated with the retriever.
+            kwargs: Other arguments passed to the VectorStoreRetriever init.
+
+        Returns:
+            Retriever for VectorStore.
+
+        Examples:
+
+        .. code-block:: python
+
+            # Retrieve more documents with higher diversity
+            # Useful if your dataset has many similar documents
+            docsearch.as_retriever(
+                search_type="mmr",
+                search_kwargs={'k': 6, 'lambda_mult': 0.25}
+            )
+
+            # Fetch more documents for the MMR algorithm to consider
+            # But only return the top 5
+            docsearch.as_retriever(
+                search_type="mmr",
+                search_kwargs={'k': 5, 'fetch_k': 50}
+            )
+
+            # Only retrieve documents that have a relevance score
+            # Above a certain threshold
+            docsearch.as_retriever(
+                search_type="similarity_score_threshold",
+                search_kwargs={'score_threshold': 0.8}
+            )
+
+            # Only get the single most similar document from the dataset
+            docsearch.as_retriever(search_kwargs={'k': 1})
+
+            # Use a filter to only retrieve documents from a specific paper
+            docsearch.as_retriever(
+                search_kwargs={'filter': {'paper_title':'GPT-4 Technical Report'}}
+            )
+        """
+        _tags = tags or [] + self._get_retriever_tags()
+        return VectorStoreRetriever(
+            vectorstore=self,
+            search_type=search_type,
+            search_kwargs=search_kwargs or {},
+            tags=_tags,
+            metadata=metadata,
+            **kwargs,
+        )