mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-01 17:13:22 +00:00
Improve performance when retrieving Notion DB pages (#6710)
This commit is contained in:
parent
ec8247ec59
commit
ba622764cb
@ -48,13 +48,13 @@ class NotionDBLoader(BaseLoader):
|
||||
Returns:
|
||||
List[Document]: List of documents.
|
||||
"""
|
||||
page_ids = self._retrieve_page_ids()
|
||||
page_summaries = self._retrieve_page_summaries()
|
||||
|
||||
return list(self.load_page(page_id) for page_id in page_ids)
|
||||
return list(self.load_page(page_summary) for page_summary in page_summaries)
|
||||
|
||||
def _retrieve_page_ids(
|
||||
def _retrieve_page_summaries(
|
||||
self, query_dict: Dict[str, Any] = {"page_size": 100}
|
||||
) -> List[str]:
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Get all the pages from a Notion database."""
|
||||
pages: List[Dict[str, Any]] = []
|
||||
|
||||
@ -72,18 +72,16 @@ class NotionDBLoader(BaseLoader):
|
||||
|
||||
query_dict["start_cursor"] = data.get("next_cursor")
|
||||
|
||||
page_ids = [page["id"] for page in pages]
|
||||
return pages
|
||||
|
||||
return page_ids
|
||||
|
||||
def load_page(self, page_id: str) -> Document:
|
||||
def load_page(self, page_summary: Dict[str, Any]) -> Document:
|
||||
"""Read a page."""
|
||||
data = self._request(PAGE_URL.format(page_id=page_id))
|
||||
page_id = page_summary["id"]
|
||||
|
||||
# load properties as metadata
|
||||
metadata: Dict[str, Any] = {}
|
||||
|
||||
for prop_name, prop_data in data["properties"].items():
|
||||
for prop_name, prop_data in page_summary["properties"].items():
|
||||
prop_type = prop_data["type"]
|
||||
|
||||
if prop_type == "rich_text":
|
||||
|
Loading…
Reference in New Issue
Block a user