Improve performance when retrieving Notion DB pages (#6710)

This commit is contained in:
James Croft 2023-06-26 14:46:09 +02:00 committed by GitHub
parent ec8247ec59
commit ba622764cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -48,13 +48,13 @@ class NotionDBLoader(BaseLoader):
Returns:
List[Document]: List of documents.
"""
page_ids = self._retrieve_page_ids()
page_summaries = self._retrieve_page_summaries()
return list(self.load_page(page_id) for page_id in page_ids)
return list(self.load_page(page_summary) for page_summary in page_summaries)
def _retrieve_page_ids(
def _retrieve_page_summaries(
self, query_dict: Dict[str, Any] = {"page_size": 100}
) -> List[str]:
) -> List[Dict[str, Any]]:
"""Get all the pages from a Notion database."""
pages: List[Dict[str, Any]] = []
@ -72,18 +72,16 @@ class NotionDBLoader(BaseLoader):
query_dict["start_cursor"] = data.get("next_cursor")
page_ids = [page["id"] for page in pages]
return pages
return page_ids
def load_page(self, page_id: str) -> Document:
def load_page(self, page_summary: Dict[str, Any]) -> Document:
"""Read a page."""
data = self._request(PAGE_URL.format(page_id=page_id))
page_id = page_summary["id"]
# load properties as metadata
metadata: Dict[str, Any] = {}
for prop_name, prop_data in data["properties"].items():
for prop_name, prop_data in page_summary["properties"].items():
prop_type = prop_data["type"]
if prop_type == "rich_text":