mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-06-22 05:30:34 +00:00
Optimize load_documents function with multiprocessing
This commit is contained in:
parent
ad64589c8f
commit
81b221bccb
@ -2,6 +2,7 @@ import os
|
|||||||
import glob
|
import glob
|
||||||
from typing import List
|
from typing import List
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from multiprocessing import Pool
|
||||||
|
|
||||||
from langchain.document_loaders import (
|
from langchain.document_loaders import (
|
||||||
CSVLoader,
|
CSVLoader,
|
||||||
@ -64,7 +65,9 @@ def load_documents(source_dir: str) -> List[Document]:
|
|||||||
all_files.extend(
|
all_files.extend(
|
||||||
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
|
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
|
||||||
)
|
)
|
||||||
return [load_single_document(file_path) for file_path in all_files]
|
with Pool(processes=os.cpu_count()) as pool:
|
||||||
|
documents = pool.map(load_single_document, all_files)
|
||||||
|
return documents
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
Loading…
Reference in New Issue
Block a user