From 463160c3f6fd0f0f531c38e1f9f04ec8a49393ef Mon Sep 17 00:00:00 2001 From: Tomer Cagan Date: Thu, 18 Apr 2024 00:12:16 +0300 Subject: [PATCH] community: fix `DirectoryLoader` progress bar (#19821) **Description:** currently, the `DirectoryLoader` progress-bar maximum value is based on an incorrect number of files to process In langchain_community/document_loaders/directory.py:127: ```python paths = p.rglob(self.glob) if self.recursive else p.glob(self.glob) items = [ path for path in paths if not (self.exclude and any(path.match(glob) for glob in self.exclude)) ] ``` `paths` returns both files and directories. `items` is later used to determine the maximum value of the progress-bar which gives an incorrect progress indication. --- libs/community/langchain_community/document_loaders/directory.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/community/langchain_community/document_loaders/directory.py b/libs/community/langchain_community/document_loaders/directory.py index f181ebe95b7..b20eff88759 100644 --- a/libs/community/langchain_community/document_loaders/directory.py +++ b/libs/community/langchain_community/document_loaders/directory.py @@ -129,6 +129,7 @@ class DirectoryLoader(BaseLoader): path for path in paths if not (self.exclude and any(path.match(glob) for glob in self.exclude)) + and path.is_file() ] if self.sample_size > 0: