mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-04 04:07:54 +00:00
docs: fix pdf docs hierarchy and formatting (#4593)
# Fix pdf loader docs page  Using h1's messes with hierarchy, this fixes that, and moves the PyPDFium2 loader out of the middle of PDFMiner docs
This commit is contained in:
parent
36f9e9a0ba
commit
ed0d557ede
@ -335,56 +335,12 @@
|
||||
"print(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "05187b33",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "21998d18",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using PDFMiner"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2f0cc9ff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PDFMinerLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "42b531e8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = PDFMinerLoader(\"example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "483720b5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "96351714",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Using PyPDFium2"
|
||||
"## Using PyPDFium2"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -407,6 +363,48 @@
|
||||
"loader = PyPDFium2Loader(\"example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Using PDFMiner"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PDFMinerLoader"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = PDFMinerLoader(\"example_data/layout-parser-paper.pdf\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
@ -422,7 +420,7 @@
|
||||
"id": "c90a5fe8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using PDFMiner to generate HTML text"
|
||||
"### Using PDFMiner to generate HTML text"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user