mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 12:48:12 +00:00
Add exclude to GenericLoader.from_file_system (#9539)
support exclude param in GenericLoader.from_filesystem --------- Co-authored-by: Kyle Pancamo <50267605+KylePancamo@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
278ef0bdcf
commit
cd81e8a8f2
@ -44,6 +44,7 @@ class ConcurrentLoader(GenericLoader):
|
||||
path: _PathLike,
|
||||
*,
|
||||
glob: str = "**/[!.]*",
|
||||
exclude: Sequence[str] = (),
|
||||
suffixes: Optional[Sequence[str]] = None,
|
||||
show_progress: bool = False,
|
||||
parser: Union[DEFAULT, BaseBlobParser] = "default",
|
||||
@ -52,12 +53,28 @@ class ConcurrentLoader(GenericLoader):
|
||||
"""
|
||||
Create a concurrent generic document loader using a
|
||||
filesystem blob loader.
|
||||
|
||||
|
||||
Args:
|
||||
path: The path to the directory to load documents from.
|
||||
glob: The glob pattern to use to find documents.
|
||||
suffixes: The suffixes to use to filter documents. If None, all files
|
||||
matching the glob will be loaded.
|
||||
exclude: A list of patterns to exclude from the loader.
|
||||
show_progress: Whether to show a progress bar or not (requires tqdm).
|
||||
Proxies to the file system loader.
|
||||
parser: A blob parser which knows how to parse blobs into documents
|
||||
num_workers: Max number of concurrent workers to use.
|
||||
"""
|
||||
blob_loader = FileSystemBlobLoader(
|
||||
path, glob=glob, suffixes=suffixes, show_progress=show_progress
|
||||
path,
|
||||
glob=glob,
|
||||
exclude=exclude,
|
||||
suffixes=suffixes,
|
||||
show_progress=show_progress,
|
||||
)
|
||||
if isinstance(parser, str):
|
||||
blob_parser = get_parser(parser)
|
||||
else:
|
||||
blob_parser = parser
|
||||
return cls(blob_loader, blob_parser, num_workers)
|
||||
return cls(blob_loader, blob_parser, num_workers=num_workers)
|
||||
|
@ -105,6 +105,7 @@ class GenericLoader(BaseLoader):
|
||||
path: _PathLike,
|
||||
*,
|
||||
glob: str = "**/[!.]*",
|
||||
exclude: Sequence[str] = (),
|
||||
suffixes: Optional[Sequence[str]] = None,
|
||||
show_progress: bool = False,
|
||||
parser: Union[DEFAULT, BaseBlobParser] = "default",
|
||||
@ -116,6 +117,7 @@ class GenericLoader(BaseLoader):
|
||||
glob: The glob pattern to use to find documents.
|
||||
suffixes: The suffixes to use to filter documents. If None, all files
|
||||
matching the glob will be loaded.
|
||||
exclude: A list of patterns to exclude from the loader.
|
||||
show_progress: Whether to show a progress bar or not (requires tqdm).
|
||||
Proxies to the file system loader.
|
||||
parser: A blob parser which knows how to parse blobs into documents
|
||||
@ -124,7 +126,11 @@ class GenericLoader(BaseLoader):
|
||||
A generic document loader.
|
||||
"""
|
||||
blob_loader = FileSystemBlobLoader(
|
||||
path, glob=glob, suffixes=suffixes, show_progress=show_progress
|
||||
path,
|
||||
glob=glob,
|
||||
exclude=exclude,
|
||||
suffixes=suffixes,
|
||||
show_progress=show_progress,
|
||||
)
|
||||
if isinstance(parser, str):
|
||||
blob_parser = get_parser(parser)
|
||||
|
Loading…
Reference in New Issue
Block a user