mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-08-10 20:02:04 +00:00
Merge pull request #660 from doskoi/master
Improving performance for PDF loader
This commit is contained in:
commit
51fa989679
@ -9,7 +9,7 @@ from tqdm import tqdm
|
|||||||
from langchain.document_loaders import (
|
from langchain.document_loaders import (
|
||||||
CSVLoader,
|
CSVLoader,
|
||||||
EverNoteLoader,
|
EverNoteLoader,
|
||||||
PDFMinerLoader,
|
PyMuPDFLoader,
|
||||||
TextLoader,
|
TextLoader,
|
||||||
UnstructuredEmailLoader,
|
UnstructuredEmailLoader,
|
||||||
UnstructuredEPubLoader,
|
UnstructuredEPubLoader,
|
||||||
@ -73,7 +73,7 @@ LOADER_MAPPING = {
|
|||||||
".html": (UnstructuredHTMLLoader, {}),
|
".html": (UnstructuredHTMLLoader, {}),
|
||||||
".md": (UnstructuredMarkdownLoader, {}),
|
".md": (UnstructuredMarkdownLoader, {}),
|
||||||
".odt": (UnstructuredODTLoader, {}),
|
".odt": (UnstructuredODTLoader, {}),
|
||||||
".pdf": (PDFMinerLoader, {}),
|
".pdf": (PyMuPDFLoader, {}),
|
||||||
".ppt": (UnstructuredPowerPointLoader, {}),
|
".ppt": (UnstructuredPowerPointLoader, {}),
|
||||||
".pptx": (UnstructuredPowerPointLoader, {}),
|
".pptx": (UnstructuredPowerPointLoader, {}),
|
||||||
".txt": (TextLoader, {"encoding": "utf8"}),
|
".txt": (TextLoader, {"encoding": "utf8"}),
|
||||||
|
@ -3,7 +3,7 @@ gpt4all==0.2.3
|
|||||||
chromadb==0.3.23
|
chromadb==0.3.23
|
||||||
llama-cpp-python==0.1.50
|
llama-cpp-python==0.1.50
|
||||||
urllib3==2.0.2
|
urllib3==2.0.2
|
||||||
pdfminer.six==20221105
|
PyMuPDF==1.22.3
|
||||||
python-dotenv==1.0.0
|
python-dotenv==1.0.0
|
||||||
unstructured==0.6.6
|
unstructured==0.6.6
|
||||||
extract-msg==0.41.1
|
extract-msg==0.41.1
|
||||||
|
Loading…
Reference in New Issue
Block a user