mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-06-25 06:52:57 +00:00
Add progress bar to load_documents function
Enhanced the load_documents() function by adding a progress bar using the tqdm library. This change improves user experience by providing real-time feedback on the progress of document loading. Now, users can easily track the progress of this operation, especially when loading a large number of documents.
This commit is contained in:
parent
e3b769d33a
commit
cb7c96b31d
12
ingest.py
12
ingest.py
@ -4,6 +4,7 @@ import glob
|
|||||||
from typing import List
|
from typing import List
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
from langchain.document_loaders import (
|
from langchain.document_loaders import (
|
||||||
CSVLoader,
|
CSVLoader,
|
||||||
@ -80,7 +81,6 @@ def load_single_document(file_path: str) -> Document:
|
|||||||
|
|
||||||
raise ValueError(f"Unsupported file extension '{ext}'")
|
raise ValueError(f"Unsupported file extension '{ext}'")
|
||||||
|
|
||||||
|
|
||||||
def load_documents(source_dir: str) -> List[Document]:
|
def load_documents(source_dir: str) -> List[Document]:
|
||||||
# Loads all documents from source documents directory
|
# Loads all documents from source documents directory
|
||||||
all_files = []
|
all_files = []
|
||||||
@ -88,9 +88,15 @@ def load_documents(source_dir: str) -> List[Document]:
|
|||||||
all_files.extend(
|
all_files.extend(
|
||||||
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
|
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
|
||||||
)
|
)
|
||||||
|
|
||||||
with Pool(processes=os.cpu_count()) as pool:
|
with Pool(processes=os.cpu_count()) as pool:
|
||||||
documents = pool.map(load_single_document, all_files)
|
results = []
|
||||||
return documents
|
with tqdm(total=len(all_files), desc='Loading documents', ncols=80) as pbar:
|
||||||
|
for i, doc in enumerate(pool.imap_unordered(load_single_document, all_files)):
|
||||||
|
results.append(doc)
|
||||||
|
pbar.update()
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -10,3 +10,4 @@ extract-msg==0.41.1
|
|||||||
tabulate==0.9.0
|
tabulate==0.9.0
|
||||||
pandoc==2.3
|
pandoc==2.3
|
||||||
pypandoc==1.11
|
pypandoc==1.11
|
||||||
|
tqdm==4.65.0
|
Loading…
Reference in New Issue
Block a user