mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-16 17:53:37 +00:00
Confluence: Add page status filter for spaces (#3732)
At the moment all content in Confluence is retrieved by default, including archived content. Often, this is undesired as the content is not relevant anymore. **Notes** Fetching pages by label does not support excluding archived content. This may lead to unexpected results.
This commit is contained in:
parent
7a129ac043
commit
8cf2ff0be0
@ -156,6 +156,7 @@ class ConfluenceLoader(BaseLoader):
|
||||
page_ids: Optional[List[str]] = None,
|
||||
label: Optional[str] = None,
|
||||
cql: Optional[str] = None,
|
||||
include_archived_content: bool = False,
|
||||
include_attachments: bool = False,
|
||||
include_comments: bool = False,
|
||||
limit: Optional[int] = 50,
|
||||
@ -170,6 +171,9 @@ class ConfluenceLoader(BaseLoader):
|
||||
:type label: Optional[str], optional
|
||||
:param cql: CQL Expression, defaults to None
|
||||
:type cql: Optional[str], optional
|
||||
:param include_archived_content: Whether to include archived content,
|
||||
defaults to False
|
||||
:type include_archived_content: bool, optional
|
||||
:param include_attachments: defaults to False
|
||||
:type include_attachments: bool, optional
|
||||
:param include_comments: defaults to False
|
||||
@ -197,6 +201,7 @@ class ConfluenceLoader(BaseLoader):
|
||||
space=space_key,
|
||||
limit=limit,
|
||||
max_pages=max_pages,
|
||||
status="any" if include_archived_content else "current",
|
||||
expand="body.storage.value",
|
||||
)
|
||||
for page in pages:
|
||||
@ -221,6 +226,7 @@ class ConfluenceLoader(BaseLoader):
|
||||
cql=cql,
|
||||
limit=limit,
|
||||
max_pages=max_pages,
|
||||
include_archived_spaces=include_archived_content,
|
||||
expand="body.storage.value",
|
||||
)
|
||||
for page in pages:
|
||||
|
Loading…
Reference in New Issue
Block a user