mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 20:58:25 +00:00
core[patch]: Update documentation for base retriever (#20345)
Updating in code documentation for base retriever to direct folks toward the .invoke and .ainvoke methods + explain how to implement
This commit is contained in:
parent
d2f4153fe6
commit
2900720cd3
@ -51,12 +51,48 @@ RetrieverOutputLike = Runnable[Any, RetrieverOutput]
|
|||||||
class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||||
"""Abstract base class for a Document retrieval system.
|
"""Abstract base class for a Document retrieval system.
|
||||||
|
|
||||||
A retrieval system is defined as something that can take string queries and return
|
|
||||||
the most 'relevant' Documents from some source.
|
|
||||||
|
|
||||||
Example:
|
A retrieval system is defined as something that can take string queries and return
|
||||||
|
the most 'relevant' Documents from some source.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
A retriever follows the standard Runnable interface, and should be used
|
||||||
|
via the standard runnable methods of `invoke`, `ainvoke`, `batch`, `abatch`.
|
||||||
|
|
||||||
|
Implementation:
|
||||||
|
|
||||||
|
When implementing a custom retriever, the class should implement
|
||||||
|
the `_get_relevant_documents` method to define the logic for retrieving documents.
|
||||||
|
|
||||||
|
Optionally, an async native implementations can be provided by overriding the
|
||||||
|
`_aget_relevant_documents` method.
|
||||||
|
|
||||||
|
Example: A retriever that returns the first 5 documents from a list of documents
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
from langchain_core import Document, BaseRetriever
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
class SimpleRetriever(BaseRetriever):
|
||||||
|
docs: List[Document]
|
||||||
|
k: int = 5
|
||||||
|
|
||||||
|
def _get_relevant_documents(self, query: str) -> List[Document]:
|
||||||
|
\"\"\"Return the first k documents from the list of documents\"\"\"
|
||||||
|
return self.docs[:self.k]
|
||||||
|
|
||||||
|
async def _aget_relevant_documents(self, query: str) -> List[Document]:
|
||||||
|
\"\"\"(Optional) async native implementation.\"\"\"
|
||||||
|
return self.docs[:self.k]
|
||||||
|
|
||||||
|
Example: A simple retriever based on a scitkit learn vectorizer
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from sklearn.metrics.pairwise import cosine_similarity
|
||||||
|
|
||||||
class TFIDFRetriever(BaseRetriever, BaseModel):
|
class TFIDFRetriever(BaseRetriever, BaseModel):
|
||||||
vectorizer: Any
|
vectorizer: Any
|
||||||
docs: List[Document]
|
docs: List[Document]
|
||||||
@ -66,9 +102,7 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|||||||
class Config:
|
class Config:
|
||||||
arbitrary_types_allowed = True
|
arbitrary_types_allowed = True
|
||||||
|
|
||||||
def get_relevant_documents(self, query: str) -> List[Document]:
|
def _get_relevant_documents(self, query: str) -> List[Document]:
|
||||||
from sklearn.metrics.pairwise import cosine_similarity
|
|
||||||
|
|
||||||
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
|
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
|
||||||
query_vec = self.vectorizer.transform([query])
|
query_vec = self.vectorizer.transform([query])
|
||||||
# Op -- (n_docs,1) -- Cosine Sim with each doc
|
# Op -- (n_docs,1) -- Cosine Sim with each doc
|
||||||
@ -137,6 +171,24 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|||||||
def invoke(
|
def invoke(
|
||||||
self, input: str, config: Optional[RunnableConfig] = None, **kwargs: Any
|
self, input: str, config: Optional[RunnableConfig] = None, **kwargs: Any
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
|
"""Invoke the retriever to get relevant documents.
|
||||||
|
|
||||||
|
Main entry point for synchronous retriever invocations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input: The query string
|
||||||
|
config: Configuration for the retriever
|
||||||
|
**kwargs: Additional arguments to pass to the retriever
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of relevant documents
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
retriever.invoke("query")
|
||||||
|
"""
|
||||||
config = ensure_config(config)
|
config = ensure_config(config)
|
||||||
return self.get_relevant_documents(
|
return self.get_relevant_documents(
|
||||||
input,
|
input,
|
||||||
@ -153,6 +205,24 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|||||||
config: Optional[RunnableConfig] = None,
|
config: Optional[RunnableConfig] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
|
"""Asynchronously invoke the retriever to get relevant documents.
|
||||||
|
|
||||||
|
Main entry point for asynchronous retriever invocations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input: The query string
|
||||||
|
config: Configuration for the retriever
|
||||||
|
**kwargs: Additional arguments to pass to the retriever
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of relevant documents
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
await retriever.ainvoke("query")
|
||||||
|
"""
|
||||||
config = ensure_config(config)
|
config = ensure_config(config)
|
||||||
return await self.aget_relevant_documents(
|
return await self.aget_relevant_documents(
|
||||||
input,
|
input,
|
||||||
@ -203,6 +273,10 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Retrieve documents relevant to a query.
|
"""Retrieve documents relevant to a query.
|
||||||
|
|
||||||
|
Users should favor using `.invoke` or `.batch` rather than
|
||||||
|
`get_relevant_documents directly`.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: string to find relevant documents for
|
query: string to find relevant documents for
|
||||||
callbacks: Callback manager or list of callbacks
|
callbacks: Callback manager or list of callbacks
|
||||||
@ -212,6 +286,8 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|||||||
metadata: Optional metadata associated with the retriever. Defaults to None
|
metadata: Optional metadata associated with the retriever. Defaults to None
|
||||||
This metadata will be associated with each call to this retriever,
|
This metadata will be associated with each call to this retriever,
|
||||||
and passed as arguments to the handlers defined in `callbacks`.
|
and passed as arguments to the handlers defined in `callbacks`.
|
||||||
|
run_name: Optional name for the run.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of relevant documents
|
List of relevant documents
|
||||||
"""
|
"""
|
||||||
@ -260,6 +336,10 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Asynchronously get documents relevant to a query.
|
"""Asynchronously get documents relevant to a query.
|
||||||
|
|
||||||
|
Users should favor using `.ainvoke` or `.abatch` rather than
|
||||||
|
`aget_relevant_documents directly`.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: string to find relevant documents for
|
query: string to find relevant documents for
|
||||||
callbacks: Callback manager or list of callbacks
|
callbacks: Callback manager or list of callbacks
|
||||||
@ -269,6 +349,8 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
|||||||
metadata: Optional metadata associated with the retriever. Defaults to None
|
metadata: Optional metadata associated with the retriever. Defaults to None
|
||||||
This metadata will be associated with each call to this retriever,
|
This metadata will be associated with each call to this retriever,
|
||||||
and passed as arguments to the handlers defined in `callbacks`.
|
and passed as arguments to the handlers defined in `callbacks`.
|
||||||
|
run_name: Optional name for the run.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of relevant documents
|
List of relevant documents
|
||||||
"""
|
"""
|
||||||
|
Loading…
Reference in New Issue
Block a user