mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 20:58:25 +00:00
Add exclude to GenericLoader.from_file_system (#9539)
support exclude param in GenericLoader.from_filesystem --------- Co-authored-by: Kyle Pancamo <50267605+KylePancamo@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
278ef0bdcf
commit
cd81e8a8f2
@ -44,6 +44,7 @@ class ConcurrentLoader(GenericLoader):
|
|||||||
path: _PathLike,
|
path: _PathLike,
|
||||||
*,
|
*,
|
||||||
glob: str = "**/[!.]*",
|
glob: str = "**/[!.]*",
|
||||||
|
exclude: Sequence[str] = (),
|
||||||
suffixes: Optional[Sequence[str]] = None,
|
suffixes: Optional[Sequence[str]] = None,
|
||||||
show_progress: bool = False,
|
show_progress: bool = False,
|
||||||
parser: Union[DEFAULT, BaseBlobParser] = "default",
|
parser: Union[DEFAULT, BaseBlobParser] = "default",
|
||||||
@ -52,12 +53,28 @@ class ConcurrentLoader(GenericLoader):
|
|||||||
"""
|
"""
|
||||||
Create a concurrent generic document loader using a
|
Create a concurrent generic document loader using a
|
||||||
filesystem blob loader.
|
filesystem blob loader.
|
||||||
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: The path to the directory to load documents from.
|
||||||
|
glob: The glob pattern to use to find documents.
|
||||||
|
suffixes: The suffixes to use to filter documents. If None, all files
|
||||||
|
matching the glob will be loaded.
|
||||||
|
exclude: A list of patterns to exclude from the loader.
|
||||||
|
show_progress: Whether to show a progress bar or not (requires tqdm).
|
||||||
|
Proxies to the file system loader.
|
||||||
|
parser: A blob parser which knows how to parse blobs into documents
|
||||||
|
num_workers: Max number of concurrent workers to use.
|
||||||
"""
|
"""
|
||||||
blob_loader = FileSystemBlobLoader(
|
blob_loader = FileSystemBlobLoader(
|
||||||
path, glob=glob, suffixes=suffixes, show_progress=show_progress
|
path,
|
||||||
|
glob=glob,
|
||||||
|
exclude=exclude,
|
||||||
|
suffixes=suffixes,
|
||||||
|
show_progress=show_progress,
|
||||||
)
|
)
|
||||||
if isinstance(parser, str):
|
if isinstance(parser, str):
|
||||||
blob_parser = get_parser(parser)
|
blob_parser = get_parser(parser)
|
||||||
else:
|
else:
|
||||||
blob_parser = parser
|
blob_parser = parser
|
||||||
return cls(blob_loader, blob_parser, num_workers)
|
return cls(blob_loader, blob_parser, num_workers=num_workers)
|
||||||
|
@ -105,6 +105,7 @@ class GenericLoader(BaseLoader):
|
|||||||
path: _PathLike,
|
path: _PathLike,
|
||||||
*,
|
*,
|
||||||
glob: str = "**/[!.]*",
|
glob: str = "**/[!.]*",
|
||||||
|
exclude: Sequence[str] = (),
|
||||||
suffixes: Optional[Sequence[str]] = None,
|
suffixes: Optional[Sequence[str]] = None,
|
||||||
show_progress: bool = False,
|
show_progress: bool = False,
|
||||||
parser: Union[DEFAULT, BaseBlobParser] = "default",
|
parser: Union[DEFAULT, BaseBlobParser] = "default",
|
||||||
@ -116,6 +117,7 @@ class GenericLoader(BaseLoader):
|
|||||||
glob: The glob pattern to use to find documents.
|
glob: The glob pattern to use to find documents.
|
||||||
suffixes: The suffixes to use to filter documents. If None, all files
|
suffixes: The suffixes to use to filter documents. If None, all files
|
||||||
matching the glob will be loaded.
|
matching the glob will be loaded.
|
||||||
|
exclude: A list of patterns to exclude from the loader.
|
||||||
show_progress: Whether to show a progress bar or not (requires tqdm).
|
show_progress: Whether to show a progress bar or not (requires tqdm).
|
||||||
Proxies to the file system loader.
|
Proxies to the file system loader.
|
||||||
parser: A blob parser which knows how to parse blobs into documents
|
parser: A blob parser which knows how to parse blobs into documents
|
||||||
@ -124,7 +126,11 @@ class GenericLoader(BaseLoader):
|
|||||||
A generic document loader.
|
A generic document loader.
|
||||||
"""
|
"""
|
||||||
blob_loader = FileSystemBlobLoader(
|
blob_loader = FileSystemBlobLoader(
|
||||||
path, glob=glob, suffixes=suffixes, show_progress=show_progress
|
path,
|
||||||
|
glob=glob,
|
||||||
|
exclude=exclude,
|
||||||
|
suffixes=suffixes,
|
||||||
|
show_progress=show_progress,
|
||||||
)
|
)
|
||||||
if isinstance(parser, str):
|
if isinstance(parser, str):
|
||||||
blob_parser = get_parser(parser)
|
blob_parser = get_parser(parser)
|
||||||
|
Loading…
Reference in New Issue
Block a user