From ed0d557ede8776921cc3c5ca1f3aef81d3d0c7b5 Mon Sep 17 00:00:00 2001 From: Tim Asp <707699+timothyasp@users.noreply.github.com> Date: Fri, 12 May 2023 12:03:01 -0700 Subject: [PATCH] docs: fix pdf docs hierarchy and formatting (#4593) # Fix pdf loader docs page ![image](https://github.com/hwchase17/langchain/assets/707699/4a11f379-00ed-4f7a-9870-71f74e0cadc6) Using h1's messes with hierarchy, this fixes that, and moves the PyPDFium2 loader out of the middle of PDFMiner docs --- .../document_loaders/examples/pdf.ipynb | 90 +++++++++---------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/docs/modules/indexes/document_loaders/examples/pdf.ipynb b/docs/modules/indexes/document_loaders/examples/pdf.ipynb index e1ec7035016..abccc80c973 100644 --- a/docs/modules/indexes/document_loaders/examples/pdf.ipynb +++ b/docs/modules/indexes/document_loaders/examples/pdf.ipynb @@ -335,56 +335,12 @@ "print(data)" ] }, - { - "cell_type": "markdown", - "id": "05187b33", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "id": "21998d18", - "metadata": {}, - "source": [ - "## Using PDFMiner" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "2f0cc9ff", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.document_loaders import PDFMinerLoader" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "42b531e8", - "metadata": {}, - "outputs": [], - "source": [ - "loader = PDFMinerLoader(\"example_data/layout-parser-paper.pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "483720b5", - "metadata": {}, - "outputs": [], - "source": [ - "data = loader.load()" - ] - }, { "cell_type": "markdown", "id": "96351714", "metadata": {}, "source": [ - "# Using PyPDFium2" + "## Using PyPDFium2" ] }, { @@ -407,6 +363,48 @@ "loader = PyPDFium2Loader(\"example_data/layout-parser-paper.pdf\")" ] }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [], + "source": [ + "data = loader.load()" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Using PDFMiner" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "from langchain.document_loaders import PDFMinerLoader" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [], + "source": [ + "loader = PDFMinerLoader(\"example_data/layout-parser-paper.pdf\")" + ], + "metadata": { + "collapsed": false + } + }, { "cell_type": "code", "execution_count": 4, @@ -422,7 +420,7 @@ "id": "c90a5fe8", "metadata": {}, "source": [ - "## Using PDFMiner to generate HTML text" + "### Using PDFMiner to generate HTML text" ] }, {