docs: fix errors and table formatting in notebook (#21696)

There are 2 issues fixed here:

* In the notebook pandas dataframes are formatted as HTML in the cells.
On the documentation site the renderer that converts notebooks
incorrectly displays the raw HTML. I can't find any examples of where
this is working and so I am formatting the dataframes as text.

* Some incorrect table names were referenced resulting in errors.
This commit is contained in:
Chad Juliano 2024-05-16 18:00:14 -05:00 committed by GitHub
parent f3289b898c
commit 685c13e157
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -54,18 +54,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [ "source": [
"# Install Langchain community and core packages\n", "# Install Langchain community and core packages\n",
"%pip install --upgrade --quiet langchain-core langchain-community\n", "%pip install --upgrade --quiet langchain-core langchain-community\n",
@ -123,126 +114,37 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "name": "stdout",
"text/html": [ "output_type": "stream",
"<div>\n", "text": [
"<style scoped>\n", " username name sex \\\n",
" .dataframe tbody tr th:only-of-type {\n", "id \n",
" vertical-align: middle;\n", "0 eduardo69 Haley Beck F \n",
" }\n", "1 lbarrera Joshua Stephens M \n",
"\n", "2 bburton Paula Kaiser F \n",
" .dataframe tbody tr th {\n", "3 melissa49 Wendy Reese F \n",
" vertical-align: top;\n", "4 melissacarter Manuel Rios M \n",
" }\n", "\n",
"\n", " address mail \\\n",
" .dataframe thead th {\n", "id \n",
" text-align: right;\n", "0 59836 Carla Causeway Suite 939\\nPort Eugene, I... meltondenise@yahoo.com \n",
" }\n", "1 3108 Christina Forges\\nPort Timothychester, KY... erica80@hotmail.com \n",
"</style>\n", "2 Unit 7405 Box 3052\\nDPO AE 09858 timothypotts@gmail.com \n",
"<table border=\"1\" class=\"dataframe\">\n", "3 6408 Christopher Hill Apt. 459\\nNew Benjamin, ... dadams@gmail.com \n",
" <thead>\n", "4 2241 Bell Gardens Suite 723\\nScottside, CA 38463 williamayala@gmail.com \n",
" <tr style=\"text-align: right;\">\n", "\n",
" <th></th>\n", " birthdate \n",
" <th>username</th>\n", "id \n",
" <th>name</th>\n", "0 1997-12-01 \n",
" <th>sex</th>\n", "1 1924-07-27 \n",
" <th>address</th>\n", "2 1933-11-28 \n",
" <th>mail</th>\n", "3 1988-10-19 \n",
" <th>birthdate</th>\n", "4 1931-03-12 \n"
" </tr>\n", ]
" <tr>\n",
" <th>id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>eduardo69</td>\n",
" <td>Haley Beck</td>\n",
" <td>F</td>\n",
" <td>59836 Carla Causeway Suite 939\\nPort Eugene, I...</td>\n",
" <td>meltondenise@yahoo.com</td>\n",
" <td>1997-11-23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>lbarrera</td>\n",
" <td>Joshua Stephens</td>\n",
" <td>M</td>\n",
" <td>3108 Christina Forges\\nPort Timothychester, KY...</td>\n",
" <td>erica80@hotmail.com</td>\n",
" <td>1924-07-19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>bburton</td>\n",
" <td>Paula Kaiser</td>\n",
" <td>F</td>\n",
" <td>Unit 7405 Box 3052\\nDPO AE 09858</td>\n",
" <td>timothypotts@gmail.com</td>\n",
" <td>1933-11-20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>melissa49</td>\n",
" <td>Wendy Reese</td>\n",
" <td>F</td>\n",
" <td>6408 Christopher Hill Apt. 459\\nNew Benjamin, ...</td>\n",
" <td>dadams@gmail.com</td>\n",
" <td>1988-10-11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>melissacarter</td>\n",
" <td>Manuel Rios</td>\n",
" <td>M</td>\n",
" <td>2241 Bell Gardens Suite 723\\nScottside, CA 38463</td>\n",
" <td>williamayala@gmail.com</td>\n",
" <td>1931-03-04</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" username name sex \\\n",
"id \n",
"0 eduardo69 Haley Beck F \n",
"1 lbarrera Joshua Stephens M \n",
"2 bburton Paula Kaiser F \n",
"3 melissa49 Wendy Reese F \n",
"4 melissacarter Manuel Rios M \n",
"\n",
" address mail \\\n",
"id \n",
"0 59836 Carla Causeway Suite 939\\nPort Eugene, I... meltondenise@yahoo.com \n",
"1 3108 Christina Forges\\nPort Timothychester, KY... erica80@hotmail.com \n",
"2 Unit 7405 Box 3052\\nDPO AE 09858 timothypotts@gmail.com \n",
"3 6408 Christopher Hill Apt. 459\\nNew Benjamin, ... dadams@gmail.com \n",
"4 2241 Bell Gardens Suite 723\\nScottside, CA 38463 williamayala@gmail.com \n",
"\n",
" birthdate \n",
"id \n",
"0 1997-11-23 \n",
"1 1924-07-19 \n",
"2 1933-11-20 \n",
"3 1988-10-11 \n",
"4 1931-03-04 "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
@ -263,7 +165,7 @@
"\n", "\n",
"\n", "\n",
"load_df = pd.DataFrame.from_records(data=profile_gen(100), index=\"id\")\n", "load_df = pd.DataFrame.from_records(data=profile_gen(100), index=\"id\")\n",
"load_df.head()" "print(load_df.head())"
] ]
}, },
{ {
@ -279,85 +181,17 @@
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "name": "stdout",
"text/html": [ "output_type": "stream",
"<div>\n", "text": [
"<style scoped>\n", " name type properties\n",
" .dataframe tbody tr th:only-of-type {\n", "0 username string [char32]\n",
" vertical-align: middle;\n", "1 name string [char32]\n",
" }\n", "2 sex string [char2]\n",
"\n", "3 address string [char64]\n",
" .dataframe tbody tr th {\n", "4 mail string [char32]\n",
" vertical-align: top;\n", "5 birthdate long [timestamp]\n"
" }\n", ]
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>type</th>\n",
" <th>properties</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>username</td>\n",
" <td>string</td>\n",
" <td>[char32]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>name</td>\n",
" <td>string</td>\n",
" <td>[char32]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>sex</td>\n",
" <td>string</td>\n",
" <td>[char1]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>address</td>\n",
" <td>string</td>\n",
" <td>[char64]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>mail</td>\n",
" <td>string</td>\n",
" <td>[char32]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>birthdate</td>\n",
" <td>long</td>\n",
" <td>[timestamp]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name type properties\n",
"0 username string [char32]\n",
"1 name string [char32]\n",
"2 sex string [char1]\n",
"3 address string [char64]\n",
"4 mail string [char32]\n",
"5 birthdate long [timestamp]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
@ -372,7 +206,7 @@
")\n", ")\n",
"\n", "\n",
"# See the Kinetica column types\n", "# See the Kinetica column types\n",
"gpudb_table.type_as_df()" "print(gpudb_table.type_as_df())"
] ]
}, },
{ {
@ -394,10 +228,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"{'status': 'OK',\n", "1"
" 'message': '',\n",
" 'data_type': 'execute_sql_response',\n",
" 'response_time': 0.0148}"
] ]
}, },
"execution_count": 4, "execution_count": 4,
@ -408,34 +239,23 @@
"source": [ "source": [
"# create an LLM context for the table.\n", "# create an LLM context for the table.\n",
"\n", "\n",
"from gpudb import GPUdbException\n",
"\n",
"sql = f\"\"\"\n", "sql = f\"\"\"\n",
"CREATE OR REPLACE CONTEXT {kinetica_ctx}\n", "CREATE OR REPLACE CONTEXT {kinetica_ctx}\n",
"(\n", "(\n",
" TABLE = demo.test_profiles\n", " TABLE = {table_name}\n",
" COMMENT = 'Contains user profiles.'\n", " COMMENT = 'Contains user profiles.'\n",
"),\n", "),\n",
"(\n", "(\n",
" SAMPLES = (\n", " SAMPLES = (\n",
" 'How many male users are there?' = \n", " 'How many male users are there?' = \n",
" 'select count(1) as num_users\n", " 'select count(1) as num_users\n",
" from demo.test_profiles\n", " from {table_name}\n",
" where sex = ''M'';')\n", " where sex = ''M'';')\n",
")\n", ")\n",
"\"\"\"\n", "\"\"\"\n",
"\n", "\n",
"\n", "count_affected = kinetica_llm.kdbc.execute(sql)\n",
"def _check_error(response: dict) -> None:\n", "count_affected"
" status = response[\"status_info\"][\"status\"]\n",
" if status != \"OK\":\n",
" message = response[\"status_info\"][\"message\"]\n",
" raise GPUdbException(\"[%s]: %s\" % (status, message))\n",
"\n",
"\n",
"response = kinetica_llm.kdbc.execute_sql(sql)\n",
"_check_error(response)\n",
"response[\"status_info\"]"
] ]
}, },
{ {
@ -462,16 +282,16 @@
"text": [ "text": [
"================================\u001b[1m System Message \u001b[0m================================\n", "================================\u001b[1m System Message \u001b[0m================================\n",
"\n", "\n",
"CREATE TABLE demo.test_profiles AS\n", "CREATE TABLE demo.user_profiles AS\n",
"(\n", "(\n",
" username VARCHAR (32) NOT NULL,\n", " username VARCHAR (32) NOT NULL,\n",
" name VARCHAR (32) NOT NULL,\n", " name VARCHAR (32) NOT NULL,\n",
" sex VARCHAR (1) NOT NULL,\n", " sex VARCHAR (2) NOT NULL,\n",
" address VARCHAR (64) NOT NULL,\n", " address VARCHAR (64) NOT NULL,\n",
" mail VARCHAR (32) NOT NULL,\n", " mail VARCHAR (32) NOT NULL,\n",
" birthdate TIMESTAMP NOT NULL\n", " birthdate TIMESTAMP NOT NULL\n",
");\n", ");\n",
"COMMENT ON TABLE demo.test_profiles IS 'Contains user profiles.';\n", "COMMENT ON TABLE demo.user_profiles IS 'Contains user profiles.';\n",
"\n", "\n",
"================================\u001b[1m Human Message \u001b[0m=================================\n", "================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n", "\n",
@ -480,7 +300,7 @@
"==================================\u001b[1m Ai Message \u001b[0m==================================\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n", "\n",
"select count(1) as num_users\n", "select count(1) as num_users\n",
" from demo.test_profiles\n", " from demo.user_profiles\n",
" where sex = 'M';\n", " where sex = 'M';\n",
"\n", "\n",
"================================\u001b[1m Human Message \u001b[0m=================================\n", "================================\u001b[1m Human Message \u001b[0m=================================\n",
@ -545,78 +365,16 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"SQL: SELECT username, name\n", "SQL: SELECT username, name\n",
" FROM demo.test_profiles\n", " FROM demo.user_profiles\n",
" WHERE sex = 'F'\n", " WHERE sex = 'F'\n",
" ORDER BY username;\n" " ORDER BY username;\n",
" username name\n",
"0 alexander40 Tina Ramirez\n",
"1 bburton Paula Kaiser\n",
"2 brian12 Stefanie Williams\n",
"3 brownanna Jennifer Rowe\n",
"4 carl19 Amanda Potts\n"
] ]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>username</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>alexander40</td>\n",
" <td>Tina Ramirez</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>bburton</td>\n",
" <td>Paula Kaiser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>brian12</td>\n",
" <td>Stefanie Williams</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>brownanna</td>\n",
" <td>Jennifer Rowe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>carl19</td>\n",
" <td>Amanda Potts</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" username name\n",
"0 alexander40 Tina Ramirez\n",
"1 bburton Paula Kaiser\n",
"2 brian12 Stefanie Williams\n",
"3 brownanna Jennifer Rowe\n",
"4 carl19 Amanda Potts"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
@ -626,7 +384,7 @@
")\n", ")\n",
"\n", "\n",
"print(f\"SQL: {response.sql}\")\n", "print(f\"SQL: {response.sql}\")\n",
"response.dataframe.head()" "print(response.dataframe.head())"
] ]
} }
], ],
@ -646,7 +404,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.4" "version": "3.8.19"
} }
}, },
"nbformat": 4, "nbformat": 4,