mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-23 07:09:31 +00:00
Parent Doc Retriever (#9214)
2 things: - Implement the private method rather than the public one so callbacks are handled properly - Add search_kwargs (Open to not adding this if we are trying to deprecate this UX but seems like as a user i'd assume similar args to the vector store retriever. In fact some may assume this implements the same interface but I'm not dealing with that here) -
This commit is contained in:
parent
17ae2998e7
commit
7124f2ebfa
@ -1,7 +1,9 @@
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain.callbacks.base import Callbacks
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
||||
from langchain.schema.document import Document
|
||||
from langchain.schema.retriever import BaseRetriever
|
||||
from langchain.schema.storage import BaseStore
|
||||
@ -71,17 +73,20 @@ class ParentDocumentRetriever(BaseRetriever):
|
||||
parent_splitter: Optional[TextSplitter] = None
|
||||
"""The text splitter to use to create parent documents.
|
||||
If none, then the parent documents will be the raw documents passed in."""
|
||||
search_kwargs: dict = Field(default_factory=dict)
|
||||
"""Keyword arguments to pass to the search function."""
|
||||
|
||||
def get_relevant_documents(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
callbacks: Callbacks = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
sub_docs = self.vectorstore.similarity_search(query)
|
||||
"""Get documents relevant to a query.
|
||||
Args:
|
||||
query: String to find relevant documents for
|
||||
run_manager: The callbacks handler to use
|
||||
Returns:
|
||||
List of relevant documents
|
||||
"""
|
||||
sub_docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
|
||||
# We do this to maintain the order of the ids that are returned
|
||||
ids = []
|
||||
for d in sub_docs:
|
||||
|
Loading…
Reference in New Issue
Block a user