feat: add postgresql support (#626)

- add postgresql support
- dashboard chat prompt update
This commit is contained in:
FangYin Cheng 2023-09-28 14:27:45 +08:00 committed by GitHub
commit a2108e51a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 229 additions and 6 deletions

View File

@ -16,6 +16,7 @@ from pilot.connections.rdbms.conn_sqlite import SQLiteConnect
from pilot.connections.rdbms.conn_mssql import MSSQLConnect
from pilot.connections.rdbms.base import RDBMSDatabase
from pilot.connections.rdbms.conn_clickhouse import ClickhouseConnect
from pilot.connections.rdbms.conn_postgresql import PostgreSQLDatabase
from pilot.singleton import Singleton
from pilot.common.sql_database import Database
from pilot.connections.db_conn_info import DBConfig

View File

@ -0,0 +1,217 @@
from typing import Iterable, Optional, Any
from sqlalchemy import text
from urllib.parse import quote
from pilot.connections.rdbms.base import RDBMSDatabase
class PostgreSQLDatabase(RDBMSDatabase):
driver = "postgresql+psycopg2"
db_type = "postgresql"
db_dialect = "postgresql"
@classmethod
def from_uri_db(
cls,
host: str,
port: int,
user: str,
pwd: str,
db_name: str,
engine_args: Optional[dict] = None,
**kwargs: Any,
) -> RDBMSDatabase:
db_url: str = (
cls.driver
+ "://"
+ quote(user)
+ ":"
+ quote(pwd)
+ "@"
+ host
+ ":"
+ str(port)
+ "/"
+ db_name
)
return cls.from_uri(db_url, engine_args, **kwargs)
def _sync_tables_from_db(self) -> Iterable[str]:
table_results = self.session.execute(
text(
"SELECT tablename FROM pg_catalog.pg_tables WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'"
)
)
view_results = self.session.execute(
text(
"SELECT viewname FROM pg_catalog.pg_views WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'"
)
)
table_results = set(row[0] for row in table_results)
view_results = set(row[0] for row in view_results)
self._all_tables = table_results.union(view_results)
self._metadata.reflect(bind=self._engine)
return self._all_tables
def get_grants(self):
session = self._db_sessions()
cursor = session.execute(
text(
f"""
SELECT DISTINCT grantee, privilege_type
FROM information_schema.role_table_grants
WHERE grantee = CURRENT_USER;"""
)
)
grants = cursor.fetchall()
return grants
def get_collation(self):
"""Get collation."""
try:
session = self._db_sessions()
cursor = session.execute(
text(
"SELECT datcollate AS collation FROM pg_database WHERE datname = current_database();"
)
)
collation = cursor.fetchone()[0]
return collation
except Exception as e:
print("postgresql get collation error: ", e)
return None
def get_users(self):
"""Get user info."""
try:
cursor = self.session.execute(
text("SELECT rolname FROM pg_roles WHERE rolname NOT LIKE 'pg_%';")
)
users = cursor.fetchall()
return [user[0] for user in users]
except Exception as e:
print("postgresql get users error: ", e)
return []
def get_fields(self, table_name):
"""Get column fields about specified table."""
session = self._db_sessions()
cursor = session.execute(
text(
f"SELECT column_name, data_type, column_default, is_nullable, column_name as column_comment \
FROM information_schema.columns WHERE table_name = :table_name",
),
{"table_name": table_name},
)
fields = cursor.fetchall()
return [(field[0], field[1], field[2], field[3], field[4]) for field in fields]
def get_charset(self):
"""Get character_set."""
session = self._db_sessions()
cursor = session.execute(
text(
"SELECT pg_encoding_to_char(encoding) FROM pg_database WHERE datname = current_database();"
)
)
character_set = cursor.fetchone()[0]
return character_set
def get_show_create_table(self, table_name):
cur = self.session.execute(
text(
f"""
SELECT a.attname as column_name, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type
FROM pg_catalog.pg_attribute a
WHERE a.attnum > 0 AND NOT a.attisdropped AND a.attnum <= (
SELECT max(a.attnum)
FROM pg_catalog.pg_attribute a
WHERE a.attrelid = (SELECT oid FROM pg_catalog.pg_class WHERE relname='{table_name}')
) AND a.attrelid = (SELECT oid FROM pg_catalog.pg_class WHERE relname='{table_name}')
"""
)
)
rows = cur.fetchall()
create_table_query = f"CREATE TABLE {table_name} (\n"
for row in rows:
create_table_query += f" {row[0]} {row[1]},\n"
create_table_query = create_table_query.rstrip(",\n") + "\n)"
return create_table_query
def get_table_comments(self, db_name=None):
tablses = self.table_simple_info()
comments = []
for table in tablses:
table_name = table[0]
table_comment = self.get_show_create_table(table_name)
comments.append((table_name, table_comment))
return comments
def get_database_list(self):
session = self._db_sessions()
cursor = session.execute(text("SELECT datname FROM pg_database;"))
results = cursor.fetchall()
return [
d[0] for d in results if d[0] not in ["template0", "template1", "postgres"]
]
def get_database_names(self):
session = self._db_sessions()
cursor = session.execute(text("SELECT datname FROM pg_database;"))
results = cursor.fetchall()
return [
d[0] for d in results if d[0] not in ["template0", "template1", "postgres"]
]
def get_current_db_name(self) -> str:
return self.session.execute(text("SELECT current_database()")).scalar()
def table_simple_info(self):
_sql = f"""
SELECT table_name, string_agg(column_name, ', ') AS schema_info
FROM (
SELECT c.relname AS table_name, a.attname AS column_name
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
JOIN pg_catalog.pg_attribute a ON a.attrelid = c.oid
WHERE c.relkind = 'r'
AND a.attnum > 0
AND NOT a.attisdropped
AND n.nspname NOT LIKE 'pg_%'
AND n.nspname != 'information_schema'
ORDER BY c.relname, a.attnum
) sub
GROUP BY table_name;
"""
cursor = self.session.execute(text(_sql))
results = cursor.fetchall()
return results
def get_fields(self, table_name, schema_name="public"):
"""Get column fields about specified table."""
session = self._db_sessions()
cursor = session.execute(
text(
f"""
SELECT c.column_name, c.data_type, c.column_default, c.is_nullable, d.description
FROM information_schema.columns c
LEFT JOIN pg_catalog.pg_description d
ON (c.table_schema || '.' || c.table_name)::regclass::oid = d.objoid AND c.ordinal_position = d.objsubid
WHERE c.table_name='{table_name}' AND c.table_schema='{schema_name}'
"""
)
)
fields = cursor.fetchall()
return [(field[0], field[1], field[2], field[3], field[4]) for field in fields]
def get_indexes(self, table_name):
"""Get table indexes about specified table."""
session = self._db_sessions()
cursor = session.execute(
text(
f"SELECT indexname, indexdef FROM pg_indexes WHERE tablename = '{table_name}'"
)
)
indexes = cursor.fetchall()
return [(index[0], index[1]) for index in indexes]

View File

@ -21,9 +21,9 @@ class ChatDashboardOutputParser(BaseOutputParser):
super().__init__(sep=sep, is_stream_out=is_stream_out)
def parse_prompt_response(self, model_out_text):
clean_str = super().parse_prompt_response(model_out_text)
print("clean prompt response:", clean_str)
response = json.loads(clean_str)
# clean_str = super().parse_prompt_response(model_out_text)
print("clean prompt response:", model_out_text)
response = json.loads(model_out_text)
chart_items: List[ChartItem] = []
if not isinstance(response, list):
response = [response]

View File

@ -22,9 +22,13 @@ According to the characteristics of the analyzed data, choose the most suitable
Pay attention to the length of the output content of the analysis result, do not exceed 4000 tokens
Give the correct {dialect} analysis SQL (don't use unprovided values such as 'paid'), analysis title(don't exist the same), display method and summary of brief analysis thinking, and respond in the following json format:
Give the correct {dialect} analysis SQL
1.Do not use unprovided values such as 'paid'
2.All queried values must have aliases, such as select count(*) as count from table
3.If the table structure definition uses the keywords of {dialect} as field names, you need to use escape characters, such as select `count` from table
4.Carefully check the correctness of the SQL, the SQL must be correct, display method and summary of brief analysis thinking, and respond in the following json format:
{response}
Ensure the response is correct json and can be parsed by Python json.loads
The important thing is: Please make sure to only return the json string, do not add any other content (for direct processing by the program), and the json can be parsed by Python json.loads
"""
RESPONSE_FORMAT = [

View File

@ -366,7 +366,8 @@ def all_datasource_requires():
"""
pip install "db-gpt[datasource]"
"""
setup_spec.extras["datasource"] = ["pymssql", "pymysql", "pyspark"]
setup_spec.extras["datasource"] = ["pymssql", "pymysql", "pyspark", "psycopg2"]
def openai_requires():