community: support Confluence cookies (#28760)

**Description**: Some confluence instances don't support personal access
token, then cookie is a convenient way to authenticate. This PR adds
support for Confluence cookies.

**Twitter handle**: soulmachine
This commit is contained in:
Frank Dai 2024-12-17 09:16:36 -08:00 committed by GitHub
parent b745281eec
commit e81433497b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 25 additions and 5 deletions

View File

@ -11,7 +11,7 @@
"A loader for `Confluence` pages.\n", "A loader for `Confluence` pages.\n",
"\n", "\n",
"\n", "\n",
"This currently supports `username/api_key`, `Oauth2 login`. Additionally, on-prem installations also support `token` authentication. \n", "This currently supports `username/api_key`, `Oauth2 login`, `cookies`. Additionally, on-prem installations also support `token` authentication. \n",
"\n", "\n",
"\n", "\n",
"Specify a list `page_id`-s and/or `space_key` to load in the corresponding pages into Document objects, if both are specified the union of both sets will be returned.\n", "Specify a list `page_id`-s and/or `space_key` to load in the corresponding pages into Document objects, if both are specified the union of both sets will be returned.\n",

View File

@ -34,8 +34,8 @@ class ConfluenceLoader(BaseLoader):
"""Load `Confluence` pages. """Load `Confluence` pages.
Port of https://llamahub.ai/l/confluence Port of https://llamahub.ai/l/confluence
This currently supports username/api_key, Oauth2 login or personal access token This currently supports username/api_key, Oauth2 login, personal access token
authentication. or cookies authentication.
Specify a list page_ids and/or space_key to load in the corresponding pages into Specify a list page_ids and/or space_key to load in the corresponding pages into
Document objects, if both are specified the union of both sets will be returned. Document objects, if both are specified the union of both sets will be returned.
@ -103,6 +103,8 @@ class ConfluenceLoader(BaseLoader):
:type max_retry_seconds: Optional[int], optional :type max_retry_seconds: Optional[int], optional
:param confluence_kwargs: additional kwargs to initialize confluence with :param confluence_kwargs: additional kwargs to initialize confluence with
:type confluence_kwargs: dict, optional :type confluence_kwargs: dict, optional
:param cookies: _description_, defaults to {}
:type cookies: dict, optional
:param space_key: Space key retrieved from a confluence URL, defaults to None :param space_key: Space key retrieved from a confluence URL, defaults to None
:type space_key: Optional[str], optional :type space_key: Optional[str], optional
:param page_ids: List of specific page IDs to load, defaults to None :param page_ids: List of specific page IDs to load, defaults to None
@ -158,6 +160,7 @@ class ConfluenceLoader(BaseLoader):
max_retry_seconds: Optional[int] = 10, max_retry_seconds: Optional[int] = 10,
confluence_kwargs: Optional[dict] = None, confluence_kwargs: Optional[dict] = None,
*, *,
cookies: Optional[dict] = None,
space_key: Optional[str] = None, space_key: Optional[str] = None,
page_ids: Optional[List[str]] = None, page_ids: Optional[List[str]] = None,
label: Optional[str] = None, label: Optional[str] = None,
@ -197,6 +200,7 @@ class ConfluenceLoader(BaseLoader):
username=username, username=username,
session=session, session=session,
oauth2=oauth2, oauth2=oauth2,
cookies=cookies,
token=token, token=token,
) )
if errors: if errors:
@ -224,6 +228,10 @@ class ConfluenceLoader(BaseLoader):
self.confluence = Confluence( self.confluence = Confluence(
url=url, token=token, cloud=cloud, **confluence_kwargs url=url, token=token, cloud=cloud, **confluence_kwargs
) )
elif cookies:
self.confluence = Confluence(
url=url, cookies=cookies, cloud=cloud, **confluence_kwargs
)
else: else:
self.confluence = Confluence( self.confluence = Confluence(
url=url, url=url,
@ -241,6 +249,7 @@ class ConfluenceLoader(BaseLoader):
session: Optional[requests.Session] = None, session: Optional[requests.Session] = None,
oauth2: Optional[dict] = None, oauth2: Optional[dict] = None,
token: Optional[str] = None, token: Optional[str] = None,
cookies: Optional[dict] = None,
) -> Union[List, None]: ) -> Union[List, None]:
"""Validates proper combinations of init arguments""" """Validates proper combinations of init arguments"""
@ -255,10 +264,11 @@ class ConfluenceLoader(BaseLoader):
) )
non_null_creds = list( non_null_creds = list(
x is not None for x in ((api_key or username), session, oauth2, token) x is not None
for x in ((api_key or username), session, oauth2, token, cookies)
) )
if sum(non_null_creds) > 1: if sum(non_null_creds) > 1:
all_names = ("(api_key, username)", "session", "oauth2", "token") all_names = ("(api_key, username)", "session", "oauth2", "token", "cookies")
provided = tuple(n for x, n in zip(non_null_creds, all_names) if x) provided = tuple(n for x, n in zip(non_null_creds, all_names) if x)
errors.append( errors.append(
f"Cannot provide a value for more than one of: {all_names}. Received " f"Cannot provide a value for more than one of: {all_names}. Received "

View File

@ -68,6 +68,16 @@ class TestConfluenceLoader:
session=requests.Session(), session=requests.Session(),
) )
with pytest.raises(ValueError):
ConfluenceLoader(
self.CONFLUENCE_URL,
username=self.MOCK_USERNAME,
api_key=self.MOCK_API_TOKEN,
cookies={
"key": "value",
},
)
def test_confluence_loader_initialization_from_env( def test_confluence_loader_initialization_from_env(
self, mock_confluence: MagicMock self, mock_confluence: MagicMock
) -> None: ) -> None: