mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-06-21 13:10:56 +00:00
Optimize load_documents function with multiprocessing
This commit is contained in:
parent
ad64589c8f
commit
81b221bccb
@ -2,6 +2,7 @@ import os
|
||||
import glob
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
from multiprocessing import Pool
|
||||
|
||||
from langchain.document_loaders import (
|
||||
CSVLoader,
|
||||
@ -64,7 +65,9 @@ def load_documents(source_dir: str) -> List[Document]:
|
||||
all_files.extend(
|
||||
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
|
||||
)
|
||||
return [load_single_document(file_path) for file_path in all_files]
|
||||
with Pool(processes=os.cpu_count()) as pool:
|
||||
documents = pool.map(load_single_document, all_files)
|
||||
return documents
|
||||
|
||||
|
||||
def main():
|
||||
|
Loading…
Reference in New Issue
Block a user