mirror of
https://github.com/hwchase17/langchain.git
synced 2025-10-05 20:28:56 +00:00
Vwp/docs improved document loaders (#4006)
Huge thanks to @leo-gan for improving the document loaders notebooks --------- Co-authored-by: Leonid Ganeline <leo.gan.57@gmail.com>
This commit is contained in:
@@ -2,20 +2,21 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CSV Loader\n",
|
||||
"# CSV Files\n",
|
||||
"\n",
|
||||
"Load csv files with a single row per document."
|
||||
"Load [csv](https://en.wikipedia.org/wiki/Comma-separated_values) data with a single row per document."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": true,
|
||||
"jupyter": {
|
||||
"outputs_hidden": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -26,7 +27,10 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -39,7 +43,10 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -56,9 +63,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customizing the csv parsing and loading\n",
|
||||
"\n",
|
||||
@@ -69,7 +74,10 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -86,7 +94,10 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -102,13 +113,12 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specify a column to be used identify the document source\n",
|
||||
"## Specify a column to identify the document source\n",
|
||||
"\n",
|
||||
"Use the `source_column` argument to specify a column to be set as the source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the csv file.\n",
|
||||
"Use the `source_column` argument to specify a source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the CSV file.\n",
|
||||
"\n",
|
||||
"This is useful when using documents loaded from CSV files for chains that answer questions using sources."
|
||||
]
|
||||
@@ -144,7 +154,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -158,9 +168,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
Reference in New Issue
Block a user