mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-04 04:07:54 +00:00
Adds progress bar using tqdm to directory_loader (#3349)
Approach copied from `WebBaseLoader`. Assumes the user doesn't have `tqdm` installed.
This commit is contained in:
parent
344e3508b1
commit
980cc41709
@ -68,6 +68,51 @@
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "e633d62f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Show a progress bar"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "43911860",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By default a progress bar will not be shown. To show a progress bar, install the `tqdm` library (e.g. `pip install tqdm`), and set the `show_progress` parameter to `True`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "bb93daac",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: tqdm in /Users/jon/.pyenv/versions/3.9.16/envs/microbiome-app/lib/python3.9/site-packages (4.65.0)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0it [00:00, ?it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install tqdm\n",
|
||||
"loader = DirectoryLoader('../', glob=\"**/*.md\", show_progress=True)\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c5652850",
|
||||
|
@ -35,6 +35,7 @@ class DirectoryLoader(BaseLoader):
|
||||
loader_cls: FILE_LOADER_TYPE = UnstructuredFileLoader,
|
||||
loader_kwargs: Union[dict, None] = None,
|
||||
recursive: bool = False,
|
||||
show_progress: bool = False,
|
||||
):
|
||||
"""Initialize with path to directory and how to glob over it."""
|
||||
if loader_kwargs is None:
|
||||
@ -46,12 +47,30 @@ class DirectoryLoader(BaseLoader):
|
||||
self.loader_kwargs = loader_kwargs
|
||||
self.silent_errors = silent_errors
|
||||
self.recursive = recursive
|
||||
self.show_progress = show_progress
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load documents."""
|
||||
p = Path(self.path)
|
||||
docs = []
|
||||
items = p.rglob(self.glob) if self.recursive else p.glob(self.glob)
|
||||
items = list(p.rglob(self.glob) if self.recursive else p.glob(self.glob))
|
||||
|
||||
pbar = None
|
||||
if self.show_progress:
|
||||
try:
|
||||
from tqdm import tqdm
|
||||
|
||||
pbar = tqdm(total=len(items))
|
||||
except ImportError as e:
|
||||
logger.warning(
|
||||
"To log the progress of DirectoryLoader you need to install tqdm, "
|
||||
"`pip install tqdm`"
|
||||
)
|
||||
if self.silent_errors:
|
||||
logger.warning(e)
|
||||
else:
|
||||
raise e
|
||||
|
||||
for i in items:
|
||||
if i.is_file():
|
||||
if _is_visible(i.relative_to(p)) or self.load_hidden:
|
||||
@ -63,4 +82,11 @@ class DirectoryLoader(BaseLoader):
|
||||
logger.warning(e)
|
||||
else:
|
||||
raise e
|
||||
finally:
|
||||
if pbar:
|
||||
pbar.update(1)
|
||||
|
||||
if pbar:
|
||||
pbar.close()
|
||||
|
||||
return docs
|
||||
|
Loading…
Reference in New Issue
Block a user