Harrison/pypdf loader (#3764)

Co-authored-by: Felipe Meres <felipe@felipemeres.com>
This commit is contained in:
Harrison Chase
2023-04-28 19:56:21 -07:00
committed by GitHub
parent 4eefea0fe8
commit 7a129ac043
3 changed files with 95 additions and 4 deletions

View File

@@ -566,10 +566,50 @@
"Additionally, you can pass along any of the options from the [PyMuPDF documentation](https://pymupdf.readthedocs.io/en/latest/app1.html#plain-text/) as keyword arguments in the `load` call, and it will be pass along to the `get_text()` call."
]
},
{
"cell_type": "markdown",
"id": "f0048206",
"metadata": {},
"source": [
"## PyPDF Directory\n",
"\n",
"Load PDFs from directory"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "ecd0cb16",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import PyPDFDirectoryLoader"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "96592167",
"metadata": {},
"outputs": [],
"source": [
"loader = PyPDFDirectoryLoader(\"example_data/\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c750454c",
"metadata": {},
"outputs": [],
"source": [
"docs = loader.load()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1bf73c97",
"id": "ab7f8fdb",
"metadata": {},
"outputs": [],
"source": []
@@ -577,9 +617,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "langchain_dev",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "langchain_dev"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -591,7 +631,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
"version": "3.9.1"
}
},
"nbformat": 4,