docs: fix errors and table formatting in notebook (#21696)

There are 2 issues fixed here:

* In the notebook pandas dataframes are formatted as HTML in the cells.
On the documentation site the renderer that converts notebooks
incorrectly displays the raw HTML. I can't find any examples of where
this is working and so I am formatting the dataframes as text.

* Some incorrect table names were referenced resulting in errors.
This commit is contained in:
Chad Juliano 2024-05-16 18:00:14 -05:00 committed by GitHub
parent f3289b898c
commit 685c13e157
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -54,18 +54,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [ "source": [
"# Install Langchain community and core packages\n", "# Install Langchain community and core packages\n",
"%pip install --upgrade --quiet langchain-core langchain-community\n", "%pip install --upgrade --quiet langchain-core langchain-community\n",
@ -123,98 +114,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "name": "stdout",
"text/html": [ "output_type": "stream",
"<div>\n", "text": [
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>username</th>\n",
" <th>name</th>\n",
" <th>sex</th>\n",
" <th>address</th>\n",
" <th>mail</th>\n",
" <th>birthdate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>eduardo69</td>\n",
" <td>Haley Beck</td>\n",
" <td>F</td>\n",
" <td>59836 Carla Causeway Suite 939\\nPort Eugene, I...</td>\n",
" <td>meltondenise@yahoo.com</td>\n",
" <td>1997-11-23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>lbarrera</td>\n",
" <td>Joshua Stephens</td>\n",
" <td>M</td>\n",
" <td>3108 Christina Forges\\nPort Timothychester, KY...</td>\n",
" <td>erica80@hotmail.com</td>\n",
" <td>1924-07-19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>bburton</td>\n",
" <td>Paula Kaiser</td>\n",
" <td>F</td>\n",
" <td>Unit 7405 Box 3052\\nDPO AE 09858</td>\n",
" <td>timothypotts@gmail.com</td>\n",
" <td>1933-11-20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>melissa49</td>\n",
" <td>Wendy Reese</td>\n",
" <td>F</td>\n",
" <td>6408 Christopher Hill Apt. 459\\nNew Benjamin, ...</td>\n",
" <td>dadams@gmail.com</td>\n",
" <td>1988-10-11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>melissacarter</td>\n",
" <td>Manuel Rios</td>\n",
" <td>M</td>\n",
" <td>2241 Bell Gardens Suite 723\\nScottside, CA 38463</td>\n",
" <td>williamayala@gmail.com</td>\n",
" <td>1931-03-04</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" username name sex \\\n", " username name sex \\\n",
"id \n", "id \n",
"0 eduardo69 Haley Beck F \n", "0 eduardo69 Haley Beck F \n",
@ -233,16 +139,12 @@
"\n", "\n",
" birthdate \n", " birthdate \n",
"id \n", "id \n",
"0 1997-11-23 \n", "0 1997-12-01 \n",
"1 1924-07-19 \n", "1 1924-07-27 \n",
"2 1933-11-20 \n", "2 1933-11-28 \n",
"3 1988-10-11 \n", "3 1988-10-19 \n",
"4 1931-03-04 " "4 1931-03-12 \n"
] ]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
@ -263,7 +165,7 @@
"\n", "\n",
"\n", "\n",
"load_df = pd.DataFrame.from_records(data=profile_gen(100), index=\"id\")\n", "load_df = pd.DataFrame.from_records(data=profile_gen(100), index=\"id\")\n",
"load_df.head()" "print(load_df.head())"
] ]
}, },
{ {
@ -279,85 +181,17 @@
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "name": "stdout",
"text/html": [ "output_type": "stream",
"<div>\n", "text": [
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>type</th>\n",
" <th>properties</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>username</td>\n",
" <td>string</td>\n",
" <td>[char32]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>name</td>\n",
" <td>string</td>\n",
" <td>[char32]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>sex</td>\n",
" <td>string</td>\n",
" <td>[char1]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>address</td>\n",
" <td>string</td>\n",
" <td>[char64]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>mail</td>\n",
" <td>string</td>\n",
" <td>[char32]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>birthdate</td>\n",
" <td>long</td>\n",
" <td>[timestamp]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name type properties\n", " name type properties\n",
"0 username string [char32]\n", "0 username string [char32]\n",
"1 name string [char32]\n", "1 name string [char32]\n",
"2 sex string [char1]\n", "2 sex string [char2]\n",
"3 address string [char64]\n", "3 address string [char64]\n",
"4 mail string [char32]\n", "4 mail string [char32]\n",
"5 birthdate long [timestamp]" "5 birthdate long [timestamp]\n"
] ]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
@ -372,7 +206,7 @@
")\n", ")\n",
"\n", "\n",
"# See the Kinetica column types\n", "# See the Kinetica column types\n",
"gpudb_table.type_as_df()" "print(gpudb_table.type_as_df())"
] ]
}, },
{ {
@ -394,10 +228,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"{'status': 'OK',\n", "1"
" 'message': '',\n",
" 'data_type': 'execute_sql_response',\n",
" 'response_time': 0.0148}"
] ]
}, },
"execution_count": 4, "execution_count": 4,
@ -408,34 +239,23 @@
"source": [ "source": [
"# create an LLM context for the table.\n", "# create an LLM context for the table.\n",
"\n", "\n",
"from gpudb import GPUdbException\n",
"\n",
"sql = f\"\"\"\n", "sql = f\"\"\"\n",
"CREATE OR REPLACE CONTEXT {kinetica_ctx}\n", "CREATE OR REPLACE CONTEXT {kinetica_ctx}\n",
"(\n", "(\n",
" TABLE = demo.test_profiles\n", " TABLE = {table_name}\n",
" COMMENT = 'Contains user profiles.'\n", " COMMENT = 'Contains user profiles.'\n",
"),\n", "),\n",
"(\n", "(\n",
" SAMPLES = (\n", " SAMPLES = (\n",
" 'How many male users are there?' = \n", " 'How many male users are there?' = \n",
" 'select count(1) as num_users\n", " 'select count(1) as num_users\n",
" from demo.test_profiles\n", " from {table_name}\n",
" where sex = ''M'';')\n", " where sex = ''M'';')\n",
")\n", ")\n",
"\"\"\"\n", "\"\"\"\n",
"\n", "\n",
"\n", "count_affected = kinetica_llm.kdbc.execute(sql)\n",
"def _check_error(response: dict) -> None:\n", "count_affected"
" status = response[\"status_info\"][\"status\"]\n",
" if status != \"OK\":\n",
" message = response[\"status_info\"][\"message\"]\n",
" raise GPUdbException(\"[%s]: %s\" % (status, message))\n",
"\n",
"\n",
"response = kinetica_llm.kdbc.execute_sql(sql)\n",
"_check_error(response)\n",
"response[\"status_info\"]"
] ]
}, },
{ {
@ -462,16 +282,16 @@
"text": [ "text": [
"================================\u001b[1m System Message \u001b[0m================================\n", "================================\u001b[1m System Message \u001b[0m================================\n",
"\n", "\n",
"CREATE TABLE demo.test_profiles AS\n", "CREATE TABLE demo.user_profiles AS\n",
"(\n", "(\n",
" username VARCHAR (32) NOT NULL,\n", " username VARCHAR (32) NOT NULL,\n",
" name VARCHAR (32) NOT NULL,\n", " name VARCHAR (32) NOT NULL,\n",
" sex VARCHAR (1) NOT NULL,\n", " sex VARCHAR (2) NOT NULL,\n",
" address VARCHAR (64) NOT NULL,\n", " address VARCHAR (64) NOT NULL,\n",
" mail VARCHAR (32) NOT NULL,\n", " mail VARCHAR (32) NOT NULL,\n",
" birthdate TIMESTAMP NOT NULL\n", " birthdate TIMESTAMP NOT NULL\n",
");\n", ");\n",
"COMMENT ON TABLE demo.test_profiles IS 'Contains user profiles.';\n", "COMMENT ON TABLE demo.user_profiles IS 'Contains user profiles.';\n",
"\n", "\n",
"================================\u001b[1m Human Message \u001b[0m=================================\n", "================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n", "\n",
@ -480,7 +300,7 @@
"==================================\u001b[1m Ai Message \u001b[0m==================================\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n", "\n",
"select count(1) as num_users\n", "select count(1) as num_users\n",
" from demo.test_profiles\n", " from demo.user_profiles\n",
" where sex = 'M';\n", " where sex = 'M';\n",
"\n", "\n",
"================================\u001b[1m Human Message \u001b[0m=================================\n", "================================\u001b[1m Human Message \u001b[0m=================================\n",
@ -545,78 +365,16 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"SQL: SELECT username, name\n", "SQL: SELECT username, name\n",
" FROM demo.test_profiles\n", " FROM demo.user_profiles\n",
" WHERE sex = 'F'\n", " WHERE sex = 'F'\n",
" ORDER BY username;\n" " ORDER BY username;\n",
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>username</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>alexander40</td>\n",
" <td>Tina Ramirez</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>bburton</td>\n",
" <td>Paula Kaiser</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>brian12</td>\n",
" <td>Stefanie Williams</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>brownanna</td>\n",
" <td>Jennifer Rowe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>carl19</td>\n",
" <td>Amanda Potts</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" username name\n", " username name\n",
"0 alexander40 Tina Ramirez\n", "0 alexander40 Tina Ramirez\n",
"1 bburton Paula Kaiser\n", "1 bburton Paula Kaiser\n",
"2 brian12 Stefanie Williams\n", "2 brian12 Stefanie Williams\n",
"3 brownanna Jennifer Rowe\n", "3 brownanna Jennifer Rowe\n",
"4 carl19 Amanda Potts" "4 carl19 Amanda Potts\n"
] ]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
@ -626,7 +384,7 @@
")\n", ")\n",
"\n", "\n",
"print(f\"SQL: {response.sql}\")\n", "print(f\"SQL: {response.sql}\")\n",
"response.dataframe.head()" "print(response.dataframe.head())"
] ]
} }
], ],
@ -646,7 +404,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.4" "version": "3.8.19"
} }
}, },
"nbformat": 4, "nbformat": 4,