From cd81e8a8f22d7f4c99db884632df769231538d2b Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Wed, 23 Aug 2023 19:09:10 -0400
Subject: [PATCH] Add exclude to GenericLoader.from_file_system (#9539)

support exclude param in GenericLoader.from_filesystem

---------

Co-authored-by: Kyle Pancamo <50267605+KylePancamo@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 .../langchain/document_loaders/concurrent.py  | 21 +++++++++++++++++--
 .../langchain/document_loaders/generic.py     |  8 ++++++-
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/libs/langchain/langchain/document_loaders/concurrent.py b/libs/langchain/langchain/document_loaders/concurrent.py
index 2044dcc7fbb..bb55d670000 100644
--- a/libs/langchain/langchain/document_loaders/concurrent.py
+++ b/libs/langchain/langchain/document_loaders/concurrent.py
@@ -44,6 +44,7 @@ class ConcurrentLoader(GenericLoader):
         path: _PathLike,
         *,
         glob: str = "**/[!.]*",
+        exclude: Sequence[str] = (),
         suffixes: Optional[Sequence[str]] = None,
         show_progress: bool = False,
         parser: Union[DEFAULT, BaseBlobParser] = "default",
@@ -52,12 +53,28 @@ class ConcurrentLoader(GenericLoader):
         """
         Create a concurrent generic document loader using a
         filesystem blob loader.
+
+
+        Args:
+            path: The path to the directory to load documents from.
+            glob: The glob pattern to use to find documents.
+            suffixes: The suffixes to use to filter documents. If None, all files
+                      matching the glob will be loaded.
+            exclude: A list of patterns to exclude from the loader.
+            show_progress: Whether to show a progress bar or not (requires tqdm).
+                           Proxies to the file system loader.
+            parser: A blob parser which knows how to parse blobs into documents
+            num_workers: Max number of concurrent workers to use.
         """
         blob_loader = FileSystemBlobLoader(
-            path, glob=glob, suffixes=suffixes, show_progress=show_progress
+            path,
+            glob=glob,
+            exclude=exclude,
+            suffixes=suffixes,
+            show_progress=show_progress,
         )
         if isinstance(parser, str):
             blob_parser = get_parser(parser)
         else:
             blob_parser = parser
-        return cls(blob_loader, blob_parser, num_workers)
+        return cls(blob_loader, blob_parser, num_workers=num_workers)
diff --git a/libs/langchain/langchain/document_loaders/generic.py b/libs/langchain/langchain/document_loaders/generic.py
index 2728e0ae9fe..26d7577a332 100644
--- a/libs/langchain/langchain/document_loaders/generic.py
+++ b/libs/langchain/langchain/document_loaders/generic.py
@@ -105,6 +105,7 @@ class GenericLoader(BaseLoader):
         path: _PathLike,
         *,
         glob: str = "**/[!.]*",
+        exclude: Sequence[str] = (),
         suffixes: Optional[Sequence[str]] = None,
         show_progress: bool = False,
         parser: Union[DEFAULT, BaseBlobParser] = "default",
@@ -116,6 +117,7 @@ class GenericLoader(BaseLoader):
             glob: The glob pattern to use to find documents.
             suffixes: The suffixes to use to filter documents. If None, all files
                       matching the glob will be loaded.
+            exclude: A list of patterns to exclude from the loader.
             show_progress: Whether to show a progress bar or not (requires tqdm).
                            Proxies to the file system loader.
             parser: A blob parser which knows how to parse blobs into documents
@@ -124,7 +126,11 @@ class GenericLoader(BaseLoader):
             A generic document loader.
         """
         blob_loader = FileSystemBlobLoader(
-            path, glob=glob, suffixes=suffixes, show_progress=show_progress
+            path,
+            glob=glob,
+            exclude=exclude,
+            suffixes=suffixes,
+            show_progress=show_progress,
         )
         if isinstance(parser, str):
             blob_parser = get_parser(parser)