Harrison/document cleanup (#2062)

Co-authored-by: Delip Rao <delip@users.noreply.github.com>
This commit is contained in:
Harrison Chase
2023-03-27 16:32:55 -07:00
committed by GitHub
parent a0cd6672aa
commit 30e3b31b04
10 changed files with 545 additions and 259 deletions

View File

@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 1,
"id": "4e272b47",
"metadata": {},
"outputs": [],
@@ -40,7 +40,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 2,
"id": "8078c8f1",
"metadata": {},
"outputs": [
@@ -61,7 +61,8 @@
"Observation 2: \u001b[36;1m\u001b[1;3mWilliam James Crowe Jr. (January 2, 1925 October 18, 2007) was a United States Navy admiral and diplomat who served as the 11th chairman of the Joint Chiefs of Staff under Presidents Ronald Reagan and George H. W. Bush, and as the ambassador to the United Kingdom and Chair of the Intelligence Oversight Board under President Bill Clinton.\u001b[0m\n",
"Thought 3:\u001b[32;1m\u001b[1;3m The President William J. Crowe served as the ambassador to the United Kingdom under is Bill Clinton.\n",
"Action 3: Finish[Bill Clinton]\u001b[0m\n",
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
@@ -70,7 +71,7 @@
"'Bill Clinton'"
]
},
"execution_count": 5,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -79,6 +80,14 @@
"question = \"Author David Chanoff has collaborated with a U.S. Navy admiral who served as the ambassador to the United Kingdom under which President?\"\n",
"react.run(question)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc4a6efe",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

View File

@@ -0,0 +1,218 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "213a38a2",
"metadata": {},
"source": [
"# DataFrame Loader\n",
"\n",
"This notebook goes over how to load data from a pandas dataframe"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "79331964",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e487044c",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('example_data/mlb_teams_2012.csv')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ac273ca1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Team</th>\n",
" <th>\"Payroll (millions)\"</th>\n",
" <th>\"Wins\"</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Nationals</td>\n",
" <td>81.34</td>\n",
" <td>98</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Reds</td>\n",
" <td>82.20</td>\n",
" <td>97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Yankees</td>\n",
" <td>197.96</td>\n",
" <td>95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Giants</td>\n",
" <td>117.62</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Braves</td>\n",
" <td>83.31</td>\n",
" <td>94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Team \"Payroll (millions)\" \"Wins\"\n",
"0 Nationals 81.34 98\n",
"1 Reds 82.20 97\n",
"2 Yankees 197.96 95\n",
"3 Giants 117.62 94\n",
"4 Braves 83.31 94"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "66e47a13",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import DataFrameLoader"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "2334caca",
"metadata": {},
"outputs": [],
"source": [
"loader = DataFrameLoader(df, page_content_column=\"Team\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d616c2b0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Nationals', metadata={' \"Payroll (millions)\"': 81.34, ' \"Wins\"': 98}),\n",
" Document(page_content='Reds', metadata={' \"Payroll (millions)\"': 82.2, ' \"Wins\"': 97}),\n",
" Document(page_content='Yankees', metadata={' \"Payroll (millions)\"': 197.96, ' \"Wins\"': 95}),\n",
" Document(page_content='Giants', metadata={' \"Payroll (millions)\"': 117.62, ' \"Wins\"': 94}),\n",
" Document(page_content='Braves', metadata={' \"Payroll (millions)\"': 83.31, ' \"Wins\"': 94}),\n",
" Document(page_content='Athletics', metadata={' \"Payroll (millions)\"': 55.37, ' \"Wins\"': 94}),\n",
" Document(page_content='Rangers', metadata={' \"Payroll (millions)\"': 120.51, ' \"Wins\"': 93}),\n",
" Document(page_content='Orioles', metadata={' \"Payroll (millions)\"': 81.43, ' \"Wins\"': 93}),\n",
" Document(page_content='Rays', metadata={' \"Payroll (millions)\"': 64.17, ' \"Wins\"': 90}),\n",
" Document(page_content='Angels', metadata={' \"Payroll (millions)\"': 154.49, ' \"Wins\"': 89}),\n",
" Document(page_content='Tigers', metadata={' \"Payroll (millions)\"': 132.3, ' \"Wins\"': 88}),\n",
" Document(page_content='Cardinals', metadata={' \"Payroll (millions)\"': 110.3, ' \"Wins\"': 88}),\n",
" Document(page_content='Dodgers', metadata={' \"Payroll (millions)\"': 95.14, ' \"Wins\"': 86}),\n",
" Document(page_content='White Sox', metadata={' \"Payroll (millions)\"': 96.92, ' \"Wins\"': 85}),\n",
" Document(page_content='Brewers', metadata={' \"Payroll (millions)\"': 97.65, ' \"Wins\"': 83}),\n",
" Document(page_content='Phillies', metadata={' \"Payroll (millions)\"': 174.54, ' \"Wins\"': 81}),\n",
" Document(page_content='Diamondbacks', metadata={' \"Payroll (millions)\"': 74.28, ' \"Wins\"': 81}),\n",
" Document(page_content='Pirates', metadata={' \"Payroll (millions)\"': 63.43, ' \"Wins\"': 79}),\n",
" Document(page_content='Padres', metadata={' \"Payroll (millions)\"': 55.24, ' \"Wins\"': 76}),\n",
" Document(page_content='Mariners', metadata={' \"Payroll (millions)\"': 81.97, ' \"Wins\"': 75}),\n",
" Document(page_content='Mets', metadata={' \"Payroll (millions)\"': 93.35, ' \"Wins\"': 74}),\n",
" Document(page_content='Blue Jays', metadata={' \"Payroll (millions)\"': 75.48, ' \"Wins\"': 73}),\n",
" Document(page_content='Royals', metadata={' \"Payroll (millions)\"': 60.91, ' \"Wins\"': 72}),\n",
" Document(page_content='Marlins', metadata={' \"Payroll (millions)\"': 118.07, ' \"Wins\"': 69}),\n",
" Document(page_content='Red Sox', metadata={' \"Payroll (millions)\"': 173.18, ' \"Wins\"': 69}),\n",
" Document(page_content='Indians', metadata={' \"Payroll (millions)\"': 78.43, ' \"Wins\"': 68}),\n",
" Document(page_content='Twins', metadata={' \"Payroll (millions)\"': 94.08, ' \"Wins\"': 66}),\n",
" Document(page_content='Rockies', metadata={' \"Payroll (millions)\"': 78.06, ' \"Wins\"': 64}),\n",
" Document(page_content='Cubs', metadata={' \"Payroll (millions)\"': 88.19, ' \"Wins\"': 61}),\n",
" Document(page_content='Astros', metadata={' \"Payroll (millions)\"': 60.65, ' \"Wins\"': 55})]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loader.load()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "beb55c2f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}