Vwp/docs improved document loaders (#4006)

Huge thanks to @leo-gan for improving the document loaders notebooks

---------

Co-authored-by: Leonid Ganeline <leo.gan.57@gmail.com>
This commit is contained in:
Zander Chase
2023-05-02 15:24:53 -07:00
committed by GitHub
parent 1c68cbdb28
commit aa38355999
57 changed files with 1227 additions and 779 deletions

View File

@@ -7,7 +7,7 @@
"source": [
"# Email\n",
"\n",
"This notebook shows how to load email (`.eml`) and Microsoft Outlook (`.msg`) files."
"This notebook shows how to load email (`.eml`) or `Microsoft Outlook` (`.msg`) files."
]
},
{
@@ -20,9 +20,23 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "226e50aa-407d-43d9-a81d-f6706298b10c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#!pip install unstructured"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "40cd9806",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.document_loaders import UnstructuredEmailLoader"
@@ -30,9 +44,11 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"id": "2d20b852",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"loader = UnstructuredEmailLoader('example_data/fake-email.eml')"
@@ -40,9 +56,11 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "579fa702",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"data = loader.load()"
@@ -50,17 +68,19 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"id": "90c1d899",
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='This is a test email to use for unit tests.\\n\\nImportant points:\\n\\nRoses are red\\n\\nViolets are blue', lookup_str='', metadata={'source': 'example_data/fake-email.eml'}, lookup_index=0)]"
"[Document(page_content='This is a test email to use for unit tests.\\n\\nImportant points:\\n\\nRoses are red\\n\\nViolets are blue', metadata={'source': 'example_data/fake-email.eml'})]"
]
},
"execution_count": 4,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -128,6 +148,16 @@
"## Using OutlookMessageLoader"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "058e670e-9964-44ee-b888-44f23ffb9310",
"metadata": {},
"outputs": [],
"source": [
"#!pip install extract_msg"
]
},
{
"cell_type": "code",
"execution_count": 8,
@@ -204,7 +234,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
"version": "3.10.6"
}
},
"nbformat": 4,