Dev2049/pypdfium2 (#4209)

thanks @jerrytigerxu for the addition!

---------

Co-authored-by: Jere Xu <jtxu2008@gmail.com>
Co-authored-by: jerrytigerxu <jere.tiger.xu@gmailc.om>
This commit is contained in:
Davis Chase
2023-05-05 17:55:31 -07:00
committed by GitHub
parent 59204a5033
commit 5ca13cc1f0
4 changed files with 102 additions and 2 deletions

View File

@@ -372,6 +372,44 @@
{
"cell_type": "code",
"execution_count": 9,
"id": "483720b5",
"metadata": {},
"outputs": [],
"source": [
"data = loader.load()"
]
},
{
"cell_type": "markdown",
"id": "96351714",
"metadata": {},
"source": [
"# Using PyPDFium2"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "003fcc1d",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import PyPDFium2Loader"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "46766e29",
"metadata": {},
"outputs": [],
"source": [
"loader = PyPDFium2Loader(\"example_data/layout-parser-paper.pdf\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "010d5cdd",
"metadata": {},
"outputs": [],
@@ -662,7 +700,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.11.3"
}
},
"nbformat": 4,