Optimize load_documents function with multiprocessing

2025-10-01 01:21:39 +00:00 · 2023-05-19 02:35:20 +09:00
parent ad64589c8f
commit 81b221bccb
1 changed files with 4 additions and 1 deletions
--- a/ingest.py
+++ b/ingest.py
@@ -2,6 +2,7 @@ import os
 import glob
 from typing import List
 from dotenv import load_dotenv
 from multiprocessing import Pool
 from langchain.document_loaders import (
    CSVLoader,
@@ -64,7 +65,9 @@ def load_documents(source_dir: str) -> List[Document]:
        all_files.extend(
            glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
        )
-    return [load_single_document(file_path) for file_path in all_files]
+    with Pool(processes=os.cpu_count()) as pool:
        documents = pool.map(load_single_document, all_files)
    return documents
 def main():