mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-15 22:44:36 +00:00
community: Correctly handle multi-element rich text (#25762)
**Description:** - Add _concatenate_rich_text method to combine all elements in rich text arrays - Update load_page method to use _concatenate_rich_text for rich text properties - Ensure all text content is captured, including inline code and formatted text - Add unit tests to verify correct handling of multi-element rich text This fix prevents truncation of content after backticks or other formatting elements. **Issue:** Using Notion DB Loader, the text for `richtext` and `title` is truncated after 1st element was loaded as Notion Loader only read the first element. **Dependencies:** any dependencies required for this change None. --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
@@ -107,19 +107,15 @@ class NotionDBLoader(BaseLoader):
|
||||
# load properties as metadata
|
||||
metadata: Dict[str, Any] = {}
|
||||
|
||||
value: Any
|
||||
|
||||
for prop_name, prop_data in page_summary["properties"].items():
|
||||
prop_type = prop_data["type"]
|
||||
|
||||
if prop_type == "rich_text":
|
||||
value = (
|
||||
prop_data["rich_text"][0]["plain_text"]
|
||||
if prop_data["rich_text"]
|
||||
else None
|
||||
)
|
||||
value = self._concatenate_rich_text(prop_data["rich_text"])
|
||||
elif prop_type == "title":
|
||||
value = (
|
||||
prop_data["title"][0]["plain_text"] if prop_data["title"] else None
|
||||
)
|
||||
value = self._concatenate_rich_text(prop_data["title"])
|
||||
elif prop_type == "multi_select":
|
||||
value = (
|
||||
[item["name"] for item in prop_data["multi_select"]]
|
||||
@@ -228,3 +224,7 @@ class NotionDBLoader(BaseLoader):
|
||||
)
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
|
||||
def _concatenate_rich_text(self, rich_text_array: List[Dict[str, Any]]) -> str:
|
||||
"""Concatenate all text content from a rich_text array."""
|
||||
return "".join(item["plain_text"] for item in rich_text_array)
|
||||
|
Reference in New Issue
Block a user