diff --git a/docs/docs/modules/data_connection/document_loaders/pdf.mdx b/docs/docs/modules/data_connection/document_loaders/pdf.mdx index 936aafdd89c..ec264f61f3f 100644 --- a/docs/docs/modules/data_connection/document_loaders/pdf.mdx +++ b/docs/docs/modules/data_connection/document_loaders/pdf.mdx @@ -129,6 +129,11 @@ data = loader.load() ## Using Unstructured +The `unstructured[all-docs]` package currently supports loading of text files, powerpoints, html, pdfs, images, and more. + +```bash +pip install unstructured[pdf] +``` ```python from langchain_community.document_loaders import UnstructuredPDFLoader @@ -225,6 +230,11 @@ data = loader.load() ## Using PDFMiner +PDFMiner is a tool that can help with extracting information and analyzing data from PDF documents. + +```bash +pip install pdfminer.six +``` ```python from langchain_community.document_loaders import PDFMinerLoader