From bfc59c1d261124b9d80edcb98b4cdb4dc77147dc Mon Sep 17 00:00:00 2001 From: BottlePumpkin <61003485+BottlePumpkin@users.noreply.github.com> Date: Thu, 1 Aug 2024 22:55:40 +0900 Subject: [PATCH] community: Fix KeyError in NotionDB loader when 'name' is missing (#24224) Thank you for contributing to LangChain! - [x] **PR title**: "package: description" - Where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" - [x] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! - [x] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17. **Description:** This PR fixes a KeyError in NotionDBLoader when the "name" key is missing in the "people" property. **Issue:** Fixes #24223 **Dependencies:** None --------- Co-authored-by: Chester Curme --- .../document_loaders/notiondb.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/libs/community/langchain_community/document_loaders/notiondb.py b/libs/community/langchain_community/document_loaders/notiondb.py index 76215727bd9..d5c03da16d3 100644 --- a/libs/community/langchain_community/document_loaders/notiondb.py +++ b/libs/community/langchain_community/document_loaders/notiondb.py @@ -1,3 +1,4 @@ +import logging from typing import Any, Dict, List, Optional import requests @@ -10,6 +11,10 @@ DATABASE_URL = NOTION_BASE_URL + "/databases/{database_id}/query" PAGE_URL = NOTION_BASE_URL + "/pages/{page_id}" BLOCK_URL = NOTION_BASE_URL + "/blocks/{block_id}/children" +# Configure logging +logging.basicConfig(level=logging.WARNING) +logger = logging.getLogger(__name__) + class NotionDBLoader(BaseLoader): """Load from `Notion DB`. @@ -63,7 +68,6 @@ class NotionDBLoader(BaseLoader): List[Document]: List of documents. """ page_summaries = self._retrieve_page_summaries() - return list(self.load_page(page_summary) for page_summary in page_summaries) def _retrieve_page_summaries( @@ -133,11 +137,16 @@ class NotionDBLoader(BaseLoader): elif prop_type == "status": value = prop_data["status"]["name"] if prop_data["status"] else None elif prop_type == "people": - value = ( - [item["name"] for item in prop_data["people"]] - if prop_data["people"] - else [] - ) + value = [] + if prop_data["people"]: + for item in prop_data["people"]: + name = item.get("name") + if not name: + logger.warning( + "Missing 'name' in 'people' property " + f"for page {page_id}" + ) + value.append(name) elif prop_type == "date": value = prop_data["date"] if prop_data["date"] else None elif prop_type == "last_edited_time":