Vwp/docs improved document loaders (#4006)

Huge thanks to @leo-gan for improving the document loaders notebooks

---------

Co-authored-by: Leonid Ganeline <leo.gan.57@gmail.com>
This commit is contained in:
Zander Chase
2023-05-02 15:24:53 -07:00
committed by GitHub
parent 1c68cbdb28
commit aa38355999
57 changed files with 1227 additions and 779 deletions

View File

@@ -18,11 +18,25 @@
"## Using Unstructured"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db8e56db-2e66-443b-8a0b-ef69fa5fae9a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#!pip install pdfminer"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "0cc0cd42",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.document_loaders.image import UnstructuredImageLoader"
@@ -32,7 +46,9 @@
"cell_type": "code",
"execution_count": 2,
"id": "082d557c",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"loader = UnstructuredImageLoader(\"layout-parser-paper-fast.jpg\")"
@@ -40,9 +56,11 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "df11c953",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"data = loader.load()"
@@ -137,7 +155,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
"version": "3.10.6"
}
},
"nbformat": 4,