Optimize load_documents function with multiprocessing

This commit is contained in:
jiangzhuo 2023-05-19 02:35:20 +09:00
parent ad64589c8f
commit 81b221bccb

View File

@ -2,6 +2,7 @@ import os
import glob
from typing import List
from dotenv import load_dotenv
from multiprocessing import Pool
from langchain.document_loaders import (
CSVLoader,
@ -64,7 +65,9 @@ def load_documents(source_dir: str) -> List[Document]:
all_files.extend(
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
)
return [load_single_document(file_path) for file_path in all_files]
with Pool(processes=os.cpu_count()) as pool:
documents = pool.map(load_single_document, all_files)
return documents
def main():