Optimize load_documents function with multiprocessing

2025-06-21 13:10:56 +00:00 · 2023-05-19 02:35:20 +09:00 · 2023-05-19 02:35:20 +09:00 · 81b221bccb
commit 81b221bccb
parent ad64589c8f
1 changed files with 4 additions and 1 deletions
--- a/ingest.py
+++ b/ingest.py
@ -2,6 +2,7 @@ import os
 import glob
 from typing import List
 from dotenv import load_dotenv
+from multiprocessing import Pool

 from langchain.document_loaders import (
    CSVLoader,
@ -64,7 +65,9 @@ def load_documents(source_dir: str) -> List[Document]:
        all_files.extend(
            glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
        )
-    return [load_single_document(file_path) for file_path in all_files]
+    with Pool(processes=os.cpu_count()) as pool:
+        documents = pool.map(load_single_document, all_files)
+    return documents


 def main():