mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-06-26 07:22:42 +00:00
Merge pull request #254 from Fabio3rs/formatOffice97-2003
Add .doc .ppt (Word and PowerPoint 97/2003 formats)
This commit is contained in:
commit
b9f8dc312f
@ -37,6 +37,7 @@ The supported extensions are:
|
||||
|
||||
- `.csv`: CSV,
|
||||
- `.docx`: Word Document,
|
||||
- `.doc`: Word Document,
|
||||
- `.enex`: EverNote,
|
||||
- `.eml`: Email,
|
||||
- `.epub`: EPub,
|
||||
@ -46,6 +47,7 @@ The supported extensions are:
|
||||
- `.odt`: Open Document Text,
|
||||
- `.pdf`: Portable Document Format (PDF),
|
||||
- `.pptx` : PowerPoint Document,
|
||||
- `.ppt` : PowerPoint Document,
|
||||
- `.txt`: Text file (UTF-8),
|
||||
|
||||
Run the following command to ingest all the data.
|
||||
|
@ -28,6 +28,7 @@ from constants import CHROMA_SETTINGS
|
||||
LOADER_MAPPING = {
|
||||
".csv": (CSVLoader, {}),
|
||||
# ".docx": (Docx2txtLoader, {}),
|
||||
".doc": (UnstructuredWordDocumentLoader, {}),
|
||||
".docx": (UnstructuredWordDocumentLoader, {}),
|
||||
".enex": (EverNoteLoader, {}),
|
||||
".eml": (UnstructuredEmailLoader, {}),
|
||||
@ -36,6 +37,7 @@ LOADER_MAPPING = {
|
||||
".md": (UnstructuredMarkdownLoader, {}),
|
||||
".odt": (UnstructuredODTLoader, {}),
|
||||
".pdf": (PDFMinerLoader, {}),
|
||||
".ppt": (UnstructuredPowerPointLoader, {}),
|
||||
".pptx": (UnstructuredPowerPointLoader, {}),
|
||||
".txt": (TextLoader, {"encoding": "utf8"}),
|
||||
# Add more mappings for other file extensions and loaders as needed
|
||||
|
Loading…
Reference in New Issue
Block a user