mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-01 00:03:29 +00:00
[Feature] Add Apache doris 2.0+ Database connector (#902)
Co-authored-by: 一帆 <zhang.f@digitalcnzz.com>
This commit is contained in:
parent
18565149ea
commit
a6642dc67f
@ -196,7 +196,7 @@ The core capabilities primarily consist of the following components:
|
||||
- [ ] Redis
|
||||
- [ ] MongoDB
|
||||
- [ ] HBase
|
||||
- [ ] Doris
|
||||
- [x] Doris
|
||||
- [ ] DB2
|
||||
- [ ] Couchbase
|
||||
- [ ] Elasticsearch
|
||||
|
@ -229,7 +229,7 @@ The MIT License (MIT)
|
||||
- [ ] Redis
|
||||
- [ ] MongoDB
|
||||
- [ ] HBase
|
||||
- [ ] Doris
|
||||
- [x] Doris
|
||||
- [ ] DB2
|
||||
- [ ] Couchbase
|
||||
- [ ] Elasticsearch
|
||||
|
@ -3,23 +3,23 @@ The connections module supports connecting to various structured, semi-structure
|
||||
|
||||
The list of data sources we currently support is as follows.
|
||||
|
||||
| DataSource | support | Notes |
|
||||
| ------------------------------------------------------------------------------ | ----------- | ------------------------------------------- |
|
||||
| [MySQL](https://www.mysql.com/) | Yes | MySQL is the world's most popular open source database. |
|
||||
| [PostgresSQL](https://www.postgresql.org/) | Yes | The World's Most Advanced Open Source Relational Database |
|
||||
| [Spark](https://github.com/apache/spark) | Yes | Unified Engine for large-scale data analytics |
|
||||
| [DuckDB](https://github.com/duckdb/duckdb) | Yes | DuckDB is an in-process SQL OLAP database management system |
|
||||
| [Sqlite](https://github.com/sqlite/sqlite) | Yes | |
|
||||
| [MSSQL](https://github.com/microsoft/mssql-jdbc) | Yes | |
|
||||
| [ClickHouse](https://github.com/ClickHouse/ClickHouse) | Yes | ClickHouse is the fastest and most resource efficient open-source database for real-time apps and analytics. |
|
||||
| [Oracle](https://github.com/oracle) | No | TODO |
|
||||
| [Redis](https://github.com/redis/redis) | No | The Multi-model NoSQL Database |
|
||||
| [MongoDB](https://github.com/mongodb/mongo) | No | MongoDB is a source-available cross-platform document-oriented database program |
|
||||
| [HBase](https://github.com/apache/hbase) | No | Open-source, distributed, versioned, column-oriented store modeled |
|
||||
| [Doris](https://github.com/apache/doris) | No | Apache Doris is an easy-to-use, high performance and unified analytics database. |
|
||||
| [DB2](https://github.com/IBM/Db2) | No | TODO |
|
||||
| [Couchbase](https://github.com/couchbase) | No | TODO |
|
||||
| [Elasticsearch](https://github.com/elastic/elasticsearch) | No | Free and Open, Distributed, RESTful Search Engine |
|
||||
| [OceanBase](https://github.com/OceanBase) | No | OceanBase is a distributed relational database. |
|
||||
| [TiDB](https://github.com/pingcap/tidb) | No | TODO |
|
||||
| [StarRocks](https://github.com/StarRocks/starrocks) | Yes | StarRocks is a next-gen, high-performance analytical data warehouse |
|
||||
| DataSource | support | Notes |
|
||||
| ------------------------------------------------------------------------------ |---------| ------------------------------------------- |
|
||||
| [MySQL](https://www.mysql.com/) | Yes | MySQL is the world's most popular open source database. |
|
||||
| [PostgresSQL](https://www.postgresql.org/) | Yes | The World's Most Advanced Open Source Relational Database |
|
||||
| [Spark](https://github.com/apache/spark) | Yes | Unified Engine for large-scale data analytics |
|
||||
| [DuckDB](https://github.com/duckdb/duckdb) | Yes | DuckDB is an in-process SQL OLAP database management system |
|
||||
| [Sqlite](https://github.com/sqlite/sqlite) | Yes | |
|
||||
| [MSSQL](https://github.com/microsoft/mssql-jdbc) | Yes | |
|
||||
| [ClickHouse](https://github.com/ClickHouse/ClickHouse) | Yes | ClickHouse is the fastest and most resource efficient open-source database for real-time apps and analytics. |
|
||||
| [Oracle](https://github.com/oracle) | No | TODO |
|
||||
| [Redis](https://github.com/redis/redis) | No | The Multi-model NoSQL Database |
|
||||
| [MongoDB](https://github.com/mongodb/mongo) | No | MongoDB is a source-available cross-platform document-oriented database program |
|
||||
| [HBase](https://github.com/apache/hbase) | No | Open-source, distributed, versioned, column-oriented store modeled |
|
||||
| [Doris](https://github.com/apache/doris) | Yes | Apache Doris is an easy-to-use, high performance and unified analytics database. |
|
||||
| [DB2](https://github.com/IBM/Db2) | No | TODO |
|
||||
| [Couchbase](https://github.com/couchbase) | No | TODO |
|
||||
| [Elasticsearch](https://github.com/elastic/elasticsearch) | No | Free and Open, Distributed, RESTful Search Engine |
|
||||
| [OceanBase](https://github.com/OceanBase) | No | OceanBase is a distributed relational database. |
|
||||
| [TiDB](https://github.com/pingcap/tidb) | No | TODO |
|
||||
| [StarRocks](https://github.com/StarRocks/starrocks) | Yes | StarRocks is a next-gen, high-performance analytical data warehouse |
|
@ -31,6 +31,7 @@ class DBType(Enum):
|
||||
Clickhouse = DbInfo("clickhouse")
|
||||
StarRocks = DbInfo("starrocks")
|
||||
Spark = DbInfo("spark", True)
|
||||
Doris = DbInfo("doris")
|
||||
|
||||
def value(self):
|
||||
return self._value_.name
|
||||
|
@ -19,6 +19,7 @@ from pilot.connections.rdbms.base import RDBMSDatabase
|
||||
from pilot.connections.rdbms.conn_clickhouse import ClickhouseConnect
|
||||
from pilot.connections.rdbms.conn_postgresql import PostgreSQLDatabase
|
||||
from pilot.connections.rdbms.conn_starrocks import StarRocksConnect
|
||||
from pilot.connections.rdbms.conn_doris import DorisConnect
|
||||
from pilot.singleton import Singleton
|
||||
from pilot.common.sql_database import Database
|
||||
from pilot.connections.db_conn_info import DBConfig
|
||||
|
159
pilot/connections/rdbms/conn_doris.py
Normal file
159
pilot/connections/rdbms/conn_doris.py
Normal file
@ -0,0 +1,159 @@
|
||||
from typing import Iterable, Optional, Any
|
||||
from sqlalchemy import text
|
||||
from urllib.parse import quote
|
||||
from urllib.parse import quote_plus as urlquote
|
||||
from pilot.connections.rdbms.base import RDBMSDatabase
|
||||
|
||||
|
||||
class DorisConnect(RDBMSDatabase):
|
||||
driver = "doris"
|
||||
db_type = "doris"
|
||||
db_dialect = "doris"
|
||||
|
||||
@classmethod
|
||||
def from_uri_db(
|
||||
cls,
|
||||
host: str,
|
||||
port: int,
|
||||
user: str,
|
||||
pwd: str,
|
||||
db_name: str,
|
||||
engine_args: Optional[dict] = None,
|
||||
**kwargs: Any,
|
||||
) -> RDBMSDatabase:
|
||||
db_url: str = (
|
||||
f"{cls.driver}://{quote(user)}:{urlquote(pwd)}@{host}:{str(port)}/{db_name}"
|
||||
)
|
||||
return cls.from_uri(db_url, engine_args, **kwargs)
|
||||
|
||||
def _sync_tables_from_db(self) -> Iterable[str]:
|
||||
table_results = self.get_session().execute(
|
||||
text(
|
||||
f"SELECT TABLE_NAME FROM information_schema.tables where TABLE_SCHEMA=database()"
|
||||
)
|
||||
)
|
||||
table_results = set(row[0] for row in table_results)
|
||||
self._all_tables = table_results
|
||||
self._metadata.reflect(bind=self._engine)
|
||||
return self._all_tables
|
||||
|
||||
def get_grants(self):
|
||||
cursor = self.get_session().execute(text("SHOW GRANTS"))
|
||||
grants = cursor.fetchall()
|
||||
if len(grants) == 0:
|
||||
return []
|
||||
if len(grants[0]) == 2:
|
||||
grants_list = [x[1] for x in grants]
|
||||
else:
|
||||
grants_list = [x[2] for x in grants]
|
||||
return grants_list
|
||||
|
||||
def _get_current_version(self):
|
||||
"""Get database current version"""
|
||||
return int(
|
||||
self.get_session().execute(text("select current_version()")).scalar()
|
||||
)
|
||||
|
||||
def get_collation(self):
|
||||
"""Get collation.
|
||||
ref: https://doris.apache.org/zh-CN/docs/dev/sql-manual/sql-reference/Show-Statements/SHOW-COLLATION/
|
||||
"""
|
||||
cursor = self.get_session().execute(text("SHOW COLLATION"))
|
||||
results = cursor.fetchall()
|
||||
return "" if not results else results[0][0]
|
||||
|
||||
def get_users(self):
|
||||
"""Get user info."""
|
||||
return []
|
||||
|
||||
def get_fields(self, table_name):
|
||||
"""Get column fields about specified table."""
|
||||
cursor = self.get_session().execute(
|
||||
text(
|
||||
f"select COLUMN_NAME, COLUMN_TYPE, COLUMN_DEFAULT, IS_NULLABLE, COLUMN_COMMENT "
|
||||
f"from information_schema.columns "
|
||||
f'where TABLE_NAME="{table_name}" and TABLE_SCHEMA=database()'
|
||||
)
|
||||
)
|
||||
fields = cursor.fetchall()
|
||||
return [(field[0], field[1], field[2], field[3], field[4]) for field in fields]
|
||||
|
||||
def get_charset(self):
|
||||
"""Get character_set."""
|
||||
return "utf-8"
|
||||
|
||||
def get_show_create_table(self, table_name):
|
||||
# cur = self.get_session().execute(
|
||||
# text(
|
||||
# f"""show create table {table_name}"""
|
||||
# )
|
||||
# )
|
||||
# rows = cur.fetchone()
|
||||
# create_sql = rows[1]
|
||||
# return create_sql
|
||||
# 这里是要表描述, 返回建表语句会导致token过长而失败
|
||||
cur = self.get_session().execute(
|
||||
text(
|
||||
f"SELECT TABLE_COMMENT "
|
||||
f"FROM information_schema.tables "
|
||||
f'where TABLE_NAME="{table_name}" and TABLE_SCHEMA=database()'
|
||||
)
|
||||
)
|
||||
table = cur.fetchone()
|
||||
if table:
|
||||
return str(table[0])
|
||||
else:
|
||||
return ""
|
||||
|
||||
def get_table_comments(self, db_name=None):
|
||||
db_name = "database()" if not db_name else f"'{db_name}'"
|
||||
cursor = self.get_session().execute(
|
||||
text(
|
||||
f"SELECT TABLE_NAME,TABLE_COMMENT "
|
||||
f"FROM information_schema.tables "
|
||||
f"where TABLE_SCHEMA={db_name}"
|
||||
)
|
||||
)
|
||||
tables = cursor.fetchall()
|
||||
return [(table[0], table[1]) for table in tables]
|
||||
|
||||
def get_database_list(self):
|
||||
return self.get_database_names()
|
||||
|
||||
def get_database_names(self):
|
||||
cursor = self.get_session().execute(text("SHOW DATABASES"))
|
||||
results = cursor.fetchall()
|
||||
return [
|
||||
d[0]
|
||||
for d in results
|
||||
if d[0]
|
||||
not in [
|
||||
"information_schema",
|
||||
"sys",
|
||||
"_statistics_",
|
||||
"mysql",
|
||||
"__internal_schema",
|
||||
"doris_audit_db__",
|
||||
]
|
||||
]
|
||||
|
||||
def get_current_db_name(self) -> str:
|
||||
return self.get_session().execute(text("select database()")).scalar()
|
||||
|
||||
def table_simple_info(self):
|
||||
cursor = self.get_session().execute(
|
||||
text(
|
||||
f"SELECT concat(TABLE_NAME,'(',group_concat(COLUMN_NAME,','),');') "
|
||||
f"FROM information_schema.columns "
|
||||
f"where TABLE_SCHEMA=database() "
|
||||
f"GROUP BY TABLE_NAME"
|
||||
)
|
||||
)
|
||||
results = cursor.fetchall()
|
||||
return [x[0] for x in results]
|
||||
|
||||
def get_indexes(self, table_name):
|
||||
"""Get table indexes about specified table."""
|
||||
cursor = self.get_session().execute(text(f"SHOW INDEX FROM {table_name}"))
|
||||
indexes = cursor.fetchall()
|
||||
return [(index[2], index[4]) for index in indexes]
|
BIN
pilot/server/static/icons/doris.png
Normal file
BIN
pilot/server/static/icons/doris.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 18 KiB |
5
setup.py
5
setup.py
@ -375,7 +375,10 @@ def core_requires():
|
||||
"auto-gpt-plugin-template",
|
||||
"gTTS==2.3.1",
|
||||
"langchain>=0.0.286",
|
||||
"SQLAlchemy==2.0.22",
|
||||
# 从固定==2.0.22改动到可变版本是发现其余dependencies都是>=1.4,同时如pydoris是<2
|
||||
"SQLAlchemy>=1.4,<3",
|
||||
# for doris
|
||||
"pydoris>=1.0.2,<2.0.0",
|
||||
"fastapi==0.98.0",
|
||||
"pymysql",
|
||||
"duckdb==0.8.1",
|
||||
|
Loading…
Reference in New Issue
Block a user