From 652c542b2f17753a84fe3c8097c3ddb2147c3bc0 Mon Sep 17 00:00:00 2001
From: Sathindu <11785398+SathinduGA@users.noreply.github.com>
Date: Mon, 21 Aug 2023 16:44:52 -0400
Subject: [PATCH] fix: Imports for the ConfluenceLoader:process_page (#9432)

### Description
When we're loading documents using `ConfluenceLoader`:`load` function
and, if both `include_comments=True` and `keep_markdown_format=True`,
we're getting an error saying `NameError: free variable 'BeautifulSoup'
referenced before assignment in enclosing scope`.

    loader = ConfluenceLoader(url="URI", token="TOKEN")
    documents = loader.load(
        space_key="SPACE",
        include_comments=True,
        keep_markdown_format=True,
    )

This happens because previous imports only consider the
`keep_markdown_format` parameter, however to include the comments, it's
using `BeautifulSoup`

Now it's fixed to handle all four scenarios considering both
`include_comments` and `keep_markdown_format`.

### Twitter
`@SathinduGA`

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 libs/langchain/langchain/document_loaders/confluence.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/libs/langchain/langchain/document_loaders/confluence.py b/libs/langchain/langchain/document_loaders/confluence.py
index 59d4be02229..3d986a7153a 100644
--- a/libs/langchain/langchain/document_loaders/confluence.py
+++ b/libs/langchain/langchain/document_loaders/confluence.py
@@ -460,7 +460,7 @@ class ConfluenceLoader(BaseLoader):
                     "`markdownify` package not found, please run "
                     "`pip install markdownify`"
                 )
-        else:
+        if include_comments or not keep_markdown_format:
             try:
                 from bs4 import BeautifulSoup  # type: ignore
             except ImportError:
@@ -468,7 +468,6 @@ class ConfluenceLoader(BaseLoader):
                     "`beautifulsoup4` package not found, please run "
                     "`pip install beautifulsoup4`"
                 )
-
         if include_attachments:
             attachment_texts = self.process_attachment(page["id"], ocr_languages)
         else: