From 652c542b2f17753a84fe3c8097c3ddb2147c3bc0 Mon Sep 17 00:00:00 2001 From: Sathindu <11785398+SathinduGA@users.noreply.github.com> Date: Mon, 21 Aug 2023 16:44:52 -0400 Subject: [PATCH] fix: Imports for the ConfluenceLoader:process_page (#9432) ### Description When we're loading documents using `ConfluenceLoader`:`load` function and, if both `include_comments=True` and `keep_markdown_format=True`, we're getting an error saying `NameError: free variable 'BeautifulSoup' referenced before assignment in enclosing scope`. loader = ConfluenceLoader(url="URI", token="TOKEN") documents = loader.load( space_key="SPACE", include_comments=True, keep_markdown_format=True, ) This happens because previous imports only consider the `keep_markdown_format` parameter, however to include the comments, it's using `BeautifulSoup` Now it's fixed to handle all four scenarios considering both `include_comments` and `keep_markdown_format`. ### Twitter `@SathinduGA` --------- Co-authored-by: Bagatur --- libs/langchain/langchain/document_loaders/confluence.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/confluence.py b/libs/langchain/langchain/document_loaders/confluence.py index 59d4be02229..3d986a7153a 100644 --- a/libs/langchain/langchain/document_loaders/confluence.py +++ b/libs/langchain/langchain/document_loaders/confluence.py @@ -460,7 +460,7 @@ class ConfluenceLoader(BaseLoader): "`markdownify` package not found, please run " "`pip install markdownify`" ) - else: + if include_comments or not keep_markdown_format: try: from bs4 import BeautifulSoup # type: ignore except ImportError: @@ -468,7 +468,6 @@ class ConfluenceLoader(BaseLoader): "`beautifulsoup4` package not found, please run " "`pip install beautifulsoup4`" ) - if include_attachments: attachment_texts = self.process_attachment(page["id"], ocr_languages) else: