mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-31 07:41:40 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			385 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			385 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| {
 | |
|  "cells": [
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "# CSV\n",
 | |
|     "\n",
 | |
|     ">A [comma-separated values (CSV)](https://en.wikipedia.org/wiki/Comma-separated_values) file is a delimited text file that uses a comma to separate values. Each line of the file is a data record. Each record consists of one or more fields, separated by commas.\n",
 | |
|     "\n",
 | |
|     "Load [csv](https://en.wikipedia.org/wiki/Comma-separated_values) data with a single row per document."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 25,
 | |
|    "metadata": {
 | |
|     "collapsed": true,
 | |
|     "jupyter": {
 | |
|      "outputs_hidden": true
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "from langchain.document_loaders.csv_loader import CSVLoader"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 26,
 | |
|    "metadata": {
 | |
|     "jupyter": {
 | |
|      "outputs_hidden": false
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "loader = CSVLoader(file_path=\"./example_data/mlb_teams_2012.csv\")\n",
 | |
|     "\n",
 | |
|     "data = loader.load()"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 27,
 | |
|    "metadata": {
 | |
|     "jupyter": {
 | |
|      "outputs_hidden": false
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "name": "stdout",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "[Document(page_content='Team: Nationals\\n\"Payroll (millions)\": 81.34\\n\"Wins\": 98', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 0}, lookup_index=0), Document(page_content='Team: Reds\\n\"Payroll (millions)\": 82.20\\n\"Wins\": 97', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 1}, lookup_index=0), Document(page_content='Team: Yankees\\n\"Payroll (millions)\": 197.96\\n\"Wins\": 95', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 2}, lookup_index=0), Document(page_content='Team: Giants\\n\"Payroll (millions)\": 117.62\\n\"Wins\": 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 3}, lookup_index=0), Document(page_content='Team: Braves\\n\"Payroll (millions)\": 83.31\\n\"Wins\": 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 4}, lookup_index=0), Document(page_content='Team: Athletics\\n\"Payroll (millions)\": 55.37\\n\"Wins\": 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 5}, lookup_index=0), Document(page_content='Team: Rangers\\n\"Payroll (millions)\": 120.51\\n\"Wins\": 93', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 6}, lookup_index=0), Document(page_content='Team: Orioles\\n\"Payroll (millions)\": 81.43\\n\"Wins\": 93', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 7}, lookup_index=0), Document(page_content='Team: Rays\\n\"Payroll (millions)\": 64.17\\n\"Wins\": 90', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 8}, lookup_index=0), Document(page_content='Team: Angels\\n\"Payroll (millions)\": 154.49\\n\"Wins\": 89', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 9}, lookup_index=0), Document(page_content='Team: Tigers\\n\"Payroll (millions)\": 132.30\\n\"Wins\": 88', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 10}, lookup_index=0), Document(page_content='Team: Cardinals\\n\"Payroll (millions)\": 110.30\\n\"Wins\": 88', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 11}, lookup_index=0), Document(page_content='Team: Dodgers\\n\"Payroll (millions)\": 95.14\\n\"Wins\": 86', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 12}, lookup_index=0), Document(page_content='Team: White Sox\\n\"Payroll (millions)\": 96.92\\n\"Wins\": 85', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 13}, lookup_index=0), Document(page_content='Team: Brewers\\n\"Payroll (millions)\": 97.65\\n\"Wins\": 83', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 14}, lookup_index=0), Document(page_content='Team: Phillies\\n\"Payroll (millions)\": 174.54\\n\"Wins\": 81', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 15}, lookup_index=0), Document(page_content='Team: Diamondbacks\\n\"Payroll (millions)\": 74.28\\n\"Wins\": 81', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 16}, lookup_index=0), Document(page_content='Team: Pirates\\n\"Payroll (millions)\": 63.43\\n\"Wins\": 79', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 17}, lookup_index=0), Document(page_content='Team: Padres\\n\"Payroll (millions)\": 55.24\\n\"Wins\": 76', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 18}, lookup_index=0), Document(page_content='Team: Mariners\\n\"Payroll (millions)\": 81.97\\n\"Wins\": 75', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 19}, lookup_index=0), Document(page_content='Team: Mets\\n\"Payroll (millions)\": 93.35\\n\"Wins\": 74', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 20}, lookup_index=0), Document(page_content='Team: Blue Jays\\n\"Payroll (millions)\": 75.48\\n\"Wins\": 73', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 21}, lookup_index=0), Document(page_content='Team: Royals\\n\"Payroll (millions)\": 60.91\\n\"Wins\": 72', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 22}, lookup_index=0), Document(page_content='Team: Marlins\\n\"Payroll (millions)\": 118.07\\n\"Wins\": 69', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 23}, lookup_index=0), Document(page_content='Team: Red Sox\\n\"Payroll (millions)\": 173.18\\n\"Wins\": 69', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 24}, lookup_index=0), Document(page_content='Team: Indians\\n\"Payroll (millions)\": 78.43\\n\"Wins\": 68', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 25}, lookup_index=0), Document(page_content='Team: Twins\\n\"Payroll (millions)\": 94.08\\n\"Wins\": 66', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 26}, lookup_index=0), Document(page_content='Team: Rockies\\n\"Payroll (millions)\": 78.06\\n\"Wins\": 64', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 27}, lookup_index=0), Document(page_content='Team: Cubs\\n\"Payroll (millions)\": 88.19\\n\"Wins\": 61', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 28}, lookup_index=0), Document(page_content='Team: Astros\\n\"Payroll (millions)\": 60.65\\n\"Wins\": 55', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 29}, lookup_index=0)]\n"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "print(data)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "## Customizing the csv parsing and loading\n",
 | |
|     "\n",
 | |
|     "See the [csv module](https://docs.python.org/3/library/csv.html) documentation for more information of what csv args are supported."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 28,
 | |
|    "metadata": {
 | |
|     "jupyter": {
 | |
|      "outputs_hidden": false
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "loader = CSVLoader(\n",
 | |
|     "    file_path=\"./example_data/mlb_teams_2012.csv\",\n",
 | |
|     "    csv_args={\n",
 | |
|     "        \"delimiter\": \",\",\n",
 | |
|     "        \"quotechar\": '\"',\n",
 | |
|     "        \"fieldnames\": [\"MLB Team\", \"Payroll in millions\", \"Wins\"],\n",
 | |
|     "    },\n",
 | |
|     ")\n",
 | |
|     "\n",
 | |
|     "data = loader.load()"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 29,
 | |
|    "metadata": {
 | |
|     "jupyter": {
 | |
|      "outputs_hidden": false
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "name": "stdout",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "[Document(page_content='MLB Team: Team\\nPayroll in millions: \"Payroll (millions)\"\\nWins: \"Wins\"', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 0}, lookup_index=0), Document(page_content='MLB Team: Nationals\\nPayroll in millions: 81.34\\nWins: 98', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 1}, lookup_index=0), Document(page_content='MLB Team: Reds\\nPayroll in millions: 82.20\\nWins: 97', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 2}, lookup_index=0), Document(page_content='MLB Team: Yankees\\nPayroll in millions: 197.96\\nWins: 95', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 3}, lookup_index=0), Document(page_content='MLB Team: Giants\\nPayroll in millions: 117.62\\nWins: 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 4}, lookup_index=0), Document(page_content='MLB Team: Braves\\nPayroll in millions: 83.31\\nWins: 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 5}, lookup_index=0), Document(page_content='MLB Team: Athletics\\nPayroll in millions: 55.37\\nWins: 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 6}, lookup_index=0), Document(page_content='MLB Team: Rangers\\nPayroll in millions: 120.51\\nWins: 93', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 7}, lookup_index=0), Document(page_content='MLB Team: Orioles\\nPayroll in millions: 81.43\\nWins: 93', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 8}, lookup_index=0), Document(page_content='MLB Team: Rays\\nPayroll in millions: 64.17\\nWins: 90', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 9}, lookup_index=0), Document(page_content='MLB Team: Angels\\nPayroll in millions: 154.49\\nWins: 89', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 10}, lookup_index=0), Document(page_content='MLB Team: Tigers\\nPayroll in millions: 132.30\\nWins: 88', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 11}, lookup_index=0), Document(page_content='MLB Team: Cardinals\\nPayroll in millions: 110.30\\nWins: 88', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 12}, lookup_index=0), Document(page_content='MLB Team: Dodgers\\nPayroll in millions: 95.14\\nWins: 86', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 13}, lookup_index=0), Document(page_content='MLB Team: White Sox\\nPayroll in millions: 96.92\\nWins: 85', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 14}, lookup_index=0), Document(page_content='MLB Team: Brewers\\nPayroll in millions: 97.65\\nWins: 83', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 15}, lookup_index=0), Document(page_content='MLB Team: Phillies\\nPayroll in millions: 174.54\\nWins: 81', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 16}, lookup_index=0), Document(page_content='MLB Team: Diamondbacks\\nPayroll in millions: 74.28\\nWins: 81', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 17}, lookup_index=0), Document(page_content='MLB Team: Pirates\\nPayroll in millions: 63.43\\nWins: 79', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 18}, lookup_index=0), Document(page_content='MLB Team: Padres\\nPayroll in millions: 55.24\\nWins: 76', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 19}, lookup_index=0), Document(page_content='MLB Team: Mariners\\nPayroll in millions: 81.97\\nWins: 75', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 20}, lookup_index=0), Document(page_content='MLB Team: Mets\\nPayroll in millions: 93.35\\nWins: 74', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 21}, lookup_index=0), Document(page_content='MLB Team: Blue Jays\\nPayroll in millions: 75.48\\nWins: 73', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 22}, lookup_index=0), Document(page_content='MLB Team: Royals\\nPayroll in millions: 60.91\\nWins: 72', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 23}, lookup_index=0), Document(page_content='MLB Team: Marlins\\nPayroll in millions: 118.07\\nWins: 69', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 24}, lookup_index=0), Document(page_content='MLB Team: Red Sox\\nPayroll in millions: 173.18\\nWins: 69', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 25}, lookup_index=0), Document(page_content='MLB Team: Indians\\nPayroll in millions: 78.43\\nWins: 68', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 26}, lookup_index=0), Document(page_content='MLB Team: Twins\\nPayroll in millions: 94.08\\nWins: 66', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 27}, lookup_index=0), Document(page_content='MLB Team: Rockies\\nPayroll in millions: 78.06\\nWins: 64', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 28}, lookup_index=0), Document(page_content='MLB Team: Cubs\\nPayroll in millions: 88.19\\nWins: 61', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 29}, lookup_index=0), Document(page_content='MLB Team: Astros\\nPayroll in millions: 60.65\\nWins: 55', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 30}, lookup_index=0)]\n"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "print(data)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "## Specify a column to identify the document source\n",
 | |
|     "\n",
 | |
|     "Use the `source_column` argument to specify a source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the CSV file.\n",
 | |
|     "\n",
 | |
|     "This is useful when using documents loaded from CSV files for chains that answer questions using sources."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 30,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "loader = CSVLoader(file_path=\"./example_data/mlb_teams_2012.csv\", source_column=\"Team\")\n",
 | |
|     "\n",
 | |
|     "data = loader.load()"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 31,
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "name": "stdout",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "[Document(page_content='Team: Nationals\\n\"Payroll (millions)\": 81.34\\n\"Wins\": 98', lookup_str='', metadata={'source': 'Nationals', 'row': 0}, lookup_index=0), Document(page_content='Team: Reds\\n\"Payroll (millions)\": 82.20\\n\"Wins\": 97', lookup_str='', metadata={'source': 'Reds', 'row': 1}, lookup_index=0), Document(page_content='Team: Yankees\\n\"Payroll (millions)\": 197.96\\n\"Wins\": 95', lookup_str='', metadata={'source': 'Yankees', 'row': 2}, lookup_index=0), Document(page_content='Team: Giants\\n\"Payroll (millions)\": 117.62\\n\"Wins\": 94', lookup_str='', metadata={'source': 'Giants', 'row': 3}, lookup_index=0), Document(page_content='Team: Braves\\n\"Payroll (millions)\": 83.31\\n\"Wins\": 94', lookup_str='', metadata={'source': 'Braves', 'row': 4}, lookup_index=0), Document(page_content='Team: Athletics\\n\"Payroll (millions)\": 55.37\\n\"Wins\": 94', lookup_str='', metadata={'source': 'Athletics', 'row': 5}, lookup_index=0), Document(page_content='Team: Rangers\\n\"Payroll (millions)\": 120.51\\n\"Wins\": 93', lookup_str='', metadata={'source': 'Rangers', 'row': 6}, lookup_index=0), Document(page_content='Team: Orioles\\n\"Payroll (millions)\": 81.43\\n\"Wins\": 93', lookup_str='', metadata={'source': 'Orioles', 'row': 7}, lookup_index=0), Document(page_content='Team: Rays\\n\"Payroll (millions)\": 64.17\\n\"Wins\": 90', lookup_str='', metadata={'source': 'Rays', 'row': 8}, lookup_index=0), Document(page_content='Team: Angels\\n\"Payroll (millions)\": 154.49\\n\"Wins\": 89', lookup_str='', metadata={'source': 'Angels', 'row': 9}, lookup_index=0), Document(page_content='Team: Tigers\\n\"Payroll (millions)\": 132.30\\n\"Wins\": 88', lookup_str='', metadata={'source': 'Tigers', 'row': 10}, lookup_index=0), Document(page_content='Team: Cardinals\\n\"Payroll (millions)\": 110.30\\n\"Wins\": 88', lookup_str='', metadata={'source': 'Cardinals', 'row': 11}, lookup_index=0), Document(page_content='Team: Dodgers\\n\"Payroll (millions)\": 95.14\\n\"Wins\": 86', lookup_str='', metadata={'source': 'Dodgers', 'row': 12}, lookup_index=0), Document(page_content='Team: White Sox\\n\"Payroll (millions)\": 96.92\\n\"Wins\": 85', lookup_str='', metadata={'source': 'White Sox', 'row': 13}, lookup_index=0), Document(page_content='Team: Brewers\\n\"Payroll (millions)\": 97.65\\n\"Wins\": 83', lookup_str='', metadata={'source': 'Brewers', 'row': 14}, lookup_index=0), Document(page_content='Team: Phillies\\n\"Payroll (millions)\": 174.54\\n\"Wins\": 81', lookup_str='', metadata={'source': 'Phillies', 'row': 15}, lookup_index=0), Document(page_content='Team: Diamondbacks\\n\"Payroll (millions)\": 74.28\\n\"Wins\": 81', lookup_str='', metadata={'source': 'Diamondbacks', 'row': 16}, lookup_index=0), Document(page_content='Team: Pirates\\n\"Payroll (millions)\": 63.43\\n\"Wins\": 79', lookup_str='', metadata={'source': 'Pirates', 'row': 17}, lookup_index=0), Document(page_content='Team: Padres\\n\"Payroll (millions)\": 55.24\\n\"Wins\": 76', lookup_str='', metadata={'source': 'Padres', 'row': 18}, lookup_index=0), Document(page_content='Team: Mariners\\n\"Payroll (millions)\": 81.97\\n\"Wins\": 75', lookup_str='', metadata={'source': 'Mariners', 'row': 19}, lookup_index=0), Document(page_content='Team: Mets\\n\"Payroll (millions)\": 93.35\\n\"Wins\": 74', lookup_str='', metadata={'source': 'Mets', 'row': 20}, lookup_index=0), Document(page_content='Team: Blue Jays\\n\"Payroll (millions)\": 75.48\\n\"Wins\": 73', lookup_str='', metadata={'source': 'Blue Jays', 'row': 21}, lookup_index=0), Document(page_content='Team: Royals\\n\"Payroll (millions)\": 60.91\\n\"Wins\": 72', lookup_str='', metadata={'source': 'Royals', 'row': 22}, lookup_index=0), Document(page_content='Team: Marlins\\n\"Payroll (millions)\": 118.07\\n\"Wins\": 69', lookup_str='', metadata={'source': 'Marlins', 'row': 23}, lookup_index=0), Document(page_content='Team: Red Sox\\n\"Payroll (millions)\": 173.18\\n\"Wins\": 69', lookup_str='', metadata={'source': 'Red Sox', 'row': 24}, lookup_index=0), Document(page_content='Team: Indians\\n\"Payroll (millions)\": 78.43\\n\"Wins\": 68', lookup_str='', metadata={'source': 'Indians', 'row': 25}, lookup_index=0), Document(page_content='Team: Twins\\n\"Payroll (millions)\": 94.08\\n\"Wins\": 66', lookup_str='', metadata={'source': 'Twins', 'row': 26}, lookup_index=0), Document(page_content='Team: Rockies\\n\"Payroll (millions)\": 78.06\\n\"Wins\": 64', lookup_str='', metadata={'source': 'Rockies', 'row': 27}, lookup_index=0), Document(page_content='Team: Cubs\\n\"Payroll (millions)\": 88.19\\n\"Wins\": 61', lookup_str='', metadata={'source': 'Cubs', 'row': 28}, lookup_index=0), Document(page_content='Team: Astros\\n\"Payroll (millions)\": 60.65\\n\"Wins\": 55', lookup_str='', metadata={'source': 'Astros', 'row': 29}, lookup_index=0)]\n"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "print(data)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "## `UnstructuredCSVLoader`\n",
 | |
|     "\n",
 | |
|     "You can also load the table using the `UnstructuredCSVLoader`. One advantage of using `UnstructuredCSVLoader` is that if you use it in `\"elements\"` mode, an HTML representation of the table will be available in the metadata."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 1,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "from langchain.document_loaders.csv_loader import UnstructuredCSVLoader"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 2,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "loader = UnstructuredCSVLoader(\n",
 | |
|     "    file_path=\"example_data/mlb_teams_2012.csv\", mode=\"elements\"\n",
 | |
|     ")\n",
 | |
|     "docs = loader.load()"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 3,
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "name": "stdout",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "<table border=\"1\" class=\"dataframe\">\n",
 | |
|       "  <tbody>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Nationals</td>\n",
 | |
|       "      <td>81.34</td>\n",
 | |
|       "      <td>98</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Reds</td>\n",
 | |
|       "      <td>82.20</td>\n",
 | |
|       "      <td>97</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Yankees</td>\n",
 | |
|       "      <td>197.96</td>\n",
 | |
|       "      <td>95</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Giants</td>\n",
 | |
|       "      <td>117.62</td>\n",
 | |
|       "      <td>94</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Braves</td>\n",
 | |
|       "      <td>83.31</td>\n",
 | |
|       "      <td>94</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Athletics</td>\n",
 | |
|       "      <td>55.37</td>\n",
 | |
|       "      <td>94</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Rangers</td>\n",
 | |
|       "      <td>120.51</td>\n",
 | |
|       "      <td>93</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Orioles</td>\n",
 | |
|       "      <td>81.43</td>\n",
 | |
|       "      <td>93</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Rays</td>\n",
 | |
|       "      <td>64.17</td>\n",
 | |
|       "      <td>90</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Angels</td>\n",
 | |
|       "      <td>154.49</td>\n",
 | |
|       "      <td>89</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Tigers</td>\n",
 | |
|       "      <td>132.30</td>\n",
 | |
|       "      <td>88</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Cardinals</td>\n",
 | |
|       "      <td>110.30</td>\n",
 | |
|       "      <td>88</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Dodgers</td>\n",
 | |
|       "      <td>95.14</td>\n",
 | |
|       "      <td>86</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>White Sox</td>\n",
 | |
|       "      <td>96.92</td>\n",
 | |
|       "      <td>85</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Brewers</td>\n",
 | |
|       "      <td>97.65</td>\n",
 | |
|       "      <td>83</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Phillies</td>\n",
 | |
|       "      <td>174.54</td>\n",
 | |
|       "      <td>81</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Diamondbacks</td>\n",
 | |
|       "      <td>74.28</td>\n",
 | |
|       "      <td>81</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Pirates</td>\n",
 | |
|       "      <td>63.43</td>\n",
 | |
|       "      <td>79</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Padres</td>\n",
 | |
|       "      <td>55.24</td>\n",
 | |
|       "      <td>76</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Mariners</td>\n",
 | |
|       "      <td>81.97</td>\n",
 | |
|       "      <td>75</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Mets</td>\n",
 | |
|       "      <td>93.35</td>\n",
 | |
|       "      <td>74</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Blue Jays</td>\n",
 | |
|       "      <td>75.48</td>\n",
 | |
|       "      <td>73</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Royals</td>\n",
 | |
|       "      <td>60.91</td>\n",
 | |
|       "      <td>72</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Marlins</td>\n",
 | |
|       "      <td>118.07</td>\n",
 | |
|       "      <td>69</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Red Sox</td>\n",
 | |
|       "      <td>173.18</td>\n",
 | |
|       "      <td>69</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Indians</td>\n",
 | |
|       "      <td>78.43</td>\n",
 | |
|       "      <td>68</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Twins</td>\n",
 | |
|       "      <td>94.08</td>\n",
 | |
|       "      <td>66</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Rockies</td>\n",
 | |
|       "      <td>78.06</td>\n",
 | |
|       "      <td>64</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Cubs</td>\n",
 | |
|       "      <td>88.19</td>\n",
 | |
|       "      <td>61</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "    <tr>\n",
 | |
|       "      <td>Astros</td>\n",
 | |
|       "      <td>60.65</td>\n",
 | |
|       "      <td>55</td>\n",
 | |
|       "    </tr>\n",
 | |
|       "  </tbody>\n",
 | |
|       "</table>\n"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "print(docs[0].metadata[\"text_as_html\"])"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": []
 | |
|   }
 | |
|  ],
 | |
|  "metadata": {
 | |
|   "kernelspec": {
 | |
|    "display_name": "Python 3 (ipykernel)",
 | |
|    "language": "python",
 | |
|    "name": "python3"
 | |
|   },
 | |
|   "language_info": {
 | |
|    "codemirror_mode": {
 | |
|     "name": "ipython",
 | |
|     "version": 3
 | |
|    },
 | |
|    "file_extension": ".py",
 | |
|    "mimetype": "text/x-python",
 | |
|    "name": "python",
 | |
|    "nbconvert_exporter": "python",
 | |
|    "pygments_lexer": "ipython3",
 | |
|    "version": "3.8.13"
 | |
|   }
 | |
|  },
 | |
|  "nbformat": 4,
 | |
|  "nbformat_minor": 4
 | |
| }
 |