mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 22:29:51 +00:00
community[patch]: support query filters for NotionDBLoader (#17217)
- **Description:** Support filtering databases in the use case where devs do not want to query ALL entries within a DB, - **Issue:** N/A, - **Dependencies:** N/A, - **Twitter handle:** I don't have Twitter but feel free to tag my Github! --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
parent
e36bc379f2
commit
8562a1e7d4
@ -20,6 +20,17 @@ class NotionDBLoader(BaseLoader):
|
|||||||
database_id (str): Notion database id.
|
database_id (str): Notion database id.
|
||||||
request_timeout_sec (int): Timeout for Notion requests in seconds.
|
request_timeout_sec (int): Timeout for Notion requests in seconds.
|
||||||
Defaults to 10.
|
Defaults to 10.
|
||||||
|
filter_object (Dict[str, Any]): Filter object used to limit returned
|
||||||
|
entries based on specified criteria.
|
||||||
|
E.g.: {
|
||||||
|
"timestamp": "last_edited_time",
|
||||||
|
"last_edited_time": {
|
||||||
|
"on_or_after": "2024-02-07"
|
||||||
|
}
|
||||||
|
} -> will only return entries that were last edited
|
||||||
|
on or after 2024-02-07
|
||||||
|
Notion docs: https://developers.notion.com/reference/post-database-query-filter
|
||||||
|
Defaults to None, which will return ALL entries.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -27,6 +38,8 @@ class NotionDBLoader(BaseLoader):
|
|||||||
integration_token: str,
|
integration_token: str,
|
||||||
database_id: str,
|
database_id: str,
|
||||||
request_timeout_sec: Optional[int] = 10,
|
request_timeout_sec: Optional[int] = 10,
|
||||||
|
*,
|
||||||
|
filter_object: Optional[Dict[str, Any]] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize with parameters."""
|
"""Initialize with parameters."""
|
||||||
if not integration_token:
|
if not integration_token:
|
||||||
@ -42,6 +55,7 @@ class NotionDBLoader(BaseLoader):
|
|||||||
"Notion-Version": "2022-06-28",
|
"Notion-Version": "2022-06-28",
|
||||||
}
|
}
|
||||||
self.request_timeout_sec = request_timeout_sec
|
self.request_timeout_sec = request_timeout_sec
|
||||||
|
self.filter_object = filter_object or {}
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load documents from the Notion database.
|
"""Load documents from the Notion database.
|
||||||
@ -55,7 +69,10 @@ class NotionDBLoader(BaseLoader):
|
|||||||
def _retrieve_page_summaries(
|
def _retrieve_page_summaries(
|
||||||
self, query_dict: Dict[str, Any] = {"page_size": 100}
|
self, query_dict: Dict[str, Any] = {"page_size": 100}
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""Get all the pages from a Notion database."""
|
"""
|
||||||
|
Get all the pages from a Notion database
|
||||||
|
OR filter based on specified criteria.
|
||||||
|
"""
|
||||||
pages: List[Dict[str, Any]] = []
|
pages: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
@ -63,6 +80,7 @@ class NotionDBLoader(BaseLoader):
|
|||||||
DATABASE_URL.format(database_id=self.database_id),
|
DATABASE_URL.format(database_id=self.database_id),
|
||||||
method="POST",
|
method="POST",
|
||||||
query_dict=query_dict,
|
query_dict=query_dict,
|
||||||
|
filter_object=self.filter_object,
|
||||||
)
|
)
|
||||||
|
|
||||||
pages.extend(data.get("results"))
|
pages.extend(data.get("results"))
|
||||||
@ -182,13 +200,18 @@ class NotionDBLoader(BaseLoader):
|
|||||||
return "\n".join(result_lines_arr)
|
return "\n".join(result_lines_arr)
|
||||||
|
|
||||||
def _request(
|
def _request(
|
||||||
self, url: str, method: str = "GET", query_dict: Dict[str, Any] = {}
|
self,
|
||||||
|
url: str,
|
||||||
|
method: str = "GET",
|
||||||
|
query_dict: Dict[str, Any] = {},
|
||||||
|
*,
|
||||||
|
filter_object: Optional[Dict[str, Any]] = None,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
res = requests.request(
|
res = requests.request(
|
||||||
method,
|
method,
|
||||||
url,
|
url,
|
||||||
headers=self.headers,
|
headers=self.headers,
|
||||||
json=query_dict,
|
json={**query_dict, "filter": filter_object or {}},
|
||||||
timeout=self.request_timeout_sec,
|
timeout=self.request_timeout_sec,
|
||||||
)
|
)
|
||||||
res.raise_for_status()
|
res.raise_for_status()
|
||||||
|
Loading…
Reference in New Issue
Block a user