mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 15:43:54 +00:00
community[minor]: Improve Cassandra VectorStore as_retriever (#22465)
The Vectorstore's API `as_retriever` doesn't expose explicitly the parameters `search_type` and `search_kwargs` and so these are not well documented. This PR improves `as_retriever` for the Cassandra VectorStore by making these parameters explicit. NB: An alternative would have been to modify `as_retriever` in `Vectorstore`. But there's probably a good reason these were not exposed in the first place ? Is it because implementations may decide to not support them and have fixed values when creating the VectorStoreRetriever ?
This commit is contained in:
parent
23bba18f92
commit
9a8fe58ebe
@ -24,7 +24,7 @@ if typing.TYPE_CHECKING:
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
from langchain_core.vectorstores import VectorStore, VectorStoreRetriever
|
||||
|
||||
from langchain_community.utilities.cassandra import SetupMode
|
||||
from langchain_community.vectorstores.utils import maximal_marginal_relevance
|
||||
@ -1073,3 +1073,77 @@ class Cassandra(VectorStore):
|
||||
body_index_options=body_index_options,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def as_retriever(
|
||||
self,
|
||||
search_type: str = "similarity",
|
||||
search_kwargs: Optional[Dict[str, Any]] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> VectorStoreRetriever:
|
||||
"""Return VectorStoreRetriever initialized from this VectorStore.
|
||||
|
||||
Args:
|
||||
search_type: Defines the type of search that
|
||||
the Retriever should perform.
|
||||
Can be "similarity" (default), "mmr", or
|
||||
"similarity_score_threshold".
|
||||
search_kwargs: Keyword arguments to pass to the
|
||||
search function. Can include things like:
|
||||
k: Amount of documents to return (Default: 4)
|
||||
score_threshold: Minimum relevance threshold
|
||||
for similarity_score_threshold
|
||||
fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
|
||||
lambda_mult: Diversity of results returned by MMR;
|
||||
1 for minimum diversity and 0 for maximum. (Default: 0.5)
|
||||
filter: Filter by document metadata
|
||||
tags: List of tags associated with the retriever.
|
||||
metadata: Metadata associated with the retriever.
|
||||
kwargs: Other arguments passed to the VectorStoreRetriever init.
|
||||
|
||||
Returns:
|
||||
Retriever for VectorStore.
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# Retrieve more documents with higher diversity
|
||||
# Useful if your dataset has many similar documents
|
||||
docsearch.as_retriever(
|
||||
search_type="mmr",
|
||||
search_kwargs={'k': 6, 'lambda_mult': 0.25}
|
||||
)
|
||||
|
||||
# Fetch more documents for the MMR algorithm to consider
|
||||
# But only return the top 5
|
||||
docsearch.as_retriever(
|
||||
search_type="mmr",
|
||||
search_kwargs={'k': 5, 'fetch_k': 50}
|
||||
)
|
||||
|
||||
# Only retrieve documents that have a relevance score
|
||||
# Above a certain threshold
|
||||
docsearch.as_retriever(
|
||||
search_type="similarity_score_threshold",
|
||||
search_kwargs={'score_threshold': 0.8}
|
||||
)
|
||||
|
||||
# Only get the single most similar document from the dataset
|
||||
docsearch.as_retriever(search_kwargs={'k': 1})
|
||||
|
||||
# Use a filter to only retrieve documents from a specific paper
|
||||
docsearch.as_retriever(
|
||||
search_kwargs={'filter': {'paper_title':'GPT-4 Technical Report'}}
|
||||
)
|
||||
"""
|
||||
_tags = tags or [] + self._get_retriever_tags()
|
||||
return VectorStoreRetriever(
|
||||
vectorstore=self,
|
||||
search_type=search_type,
|
||||
search_kwargs=search_kwargs or {},
|
||||
tags=_tags,
|
||||
metadata=metadata,
|
||||
**kwargs,
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user