Add progress bar to load_documents function

Enhanced the load_documents() function by adding a progress bar using the tqdm library. This change improves user experience by providing real-time feedback on the progress of document loading. Now, users can easily track the progress of this operation, especially when loading a large number of documents.
This commit is contained in:
jiangzhuo 2023-05-19 03:18:41 +09:00 committed by Iván Martínez
parent e3b769d33a
commit cb7c96b31d
2 changed files with 11 additions and 4 deletions

View File

@ -4,6 +4,7 @@ import glob
from typing import List
from dotenv import load_dotenv
from multiprocessing import Pool
from tqdm import tqdm
from langchain.document_loaders import (
CSVLoader,
@ -80,7 +81,6 @@ def load_single_document(file_path: str) -> Document:
raise ValueError(f"Unsupported file extension '{ext}'")
def load_documents(source_dir: str) -> List[Document]:
# Loads all documents from source documents directory
all_files = []
@ -88,9 +88,15 @@ def load_documents(source_dir: str) -> List[Document]:
all_files.extend(
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
)
with Pool(processes=os.cpu_count()) as pool:
documents = pool.map(load_single_document, all_files)
return documents
results = []
with tqdm(total=len(all_files), desc='Loading documents', ncols=80) as pbar:
for i, doc in enumerate(pool.imap_unordered(load_single_document, all_files)):
results.append(doc)
pbar.update()
return results
def main():

View File

@ -10,3 +10,4 @@ extract-msg==0.41.1
tabulate==0.9.0
pandoc==2.3
pypandoc==1.11
tqdm==4.65.0