mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-08 03:44:14 +00:00
[Feature] Add Apache doris 2.0+ Database connector (#902)
Co-authored-by: 一帆 <zhang.f@digitalcnzz.com>
This commit is contained in:
parent
18565149ea
commit
a6642dc67f
@ -196,7 +196,7 @@ The core capabilities primarily consist of the following components:
|
|||||||
- [ ] Redis
|
- [ ] Redis
|
||||||
- [ ] MongoDB
|
- [ ] MongoDB
|
||||||
- [ ] HBase
|
- [ ] HBase
|
||||||
- [ ] Doris
|
- [x] Doris
|
||||||
- [ ] DB2
|
- [ ] DB2
|
||||||
- [ ] Couchbase
|
- [ ] Couchbase
|
||||||
- [ ] Elasticsearch
|
- [ ] Elasticsearch
|
||||||
|
@ -229,7 +229,7 @@ The MIT License (MIT)
|
|||||||
- [ ] Redis
|
- [ ] Redis
|
||||||
- [ ] MongoDB
|
- [ ] MongoDB
|
||||||
- [ ] HBase
|
- [ ] HBase
|
||||||
- [ ] Doris
|
- [x] Doris
|
||||||
- [ ] DB2
|
- [ ] DB2
|
||||||
- [ ] Couchbase
|
- [ ] Couchbase
|
||||||
- [ ] Elasticsearch
|
- [ ] Elasticsearch
|
||||||
|
@ -3,23 +3,23 @@ The connections module supports connecting to various structured, semi-structure
|
|||||||
|
|
||||||
The list of data sources we currently support is as follows.
|
The list of data sources we currently support is as follows.
|
||||||
|
|
||||||
| DataSource | support | Notes |
|
| DataSource | support | Notes |
|
||||||
| ------------------------------------------------------------------------------ | ----------- | ------------------------------------------- |
|
| ------------------------------------------------------------------------------ |---------| ------------------------------------------- |
|
||||||
| [MySQL](https://www.mysql.com/) | Yes | MySQL is the world's most popular open source database. |
|
| [MySQL](https://www.mysql.com/) | Yes | MySQL is the world's most popular open source database. |
|
||||||
| [PostgresSQL](https://www.postgresql.org/) | Yes | The World's Most Advanced Open Source Relational Database |
|
| [PostgresSQL](https://www.postgresql.org/) | Yes | The World's Most Advanced Open Source Relational Database |
|
||||||
| [Spark](https://github.com/apache/spark) | Yes | Unified Engine for large-scale data analytics |
|
| [Spark](https://github.com/apache/spark) | Yes | Unified Engine for large-scale data analytics |
|
||||||
| [DuckDB](https://github.com/duckdb/duckdb) | Yes | DuckDB is an in-process SQL OLAP database management system |
|
| [DuckDB](https://github.com/duckdb/duckdb) | Yes | DuckDB is an in-process SQL OLAP database management system |
|
||||||
| [Sqlite](https://github.com/sqlite/sqlite) | Yes | |
|
| [Sqlite](https://github.com/sqlite/sqlite) | Yes | |
|
||||||
| [MSSQL](https://github.com/microsoft/mssql-jdbc) | Yes | |
|
| [MSSQL](https://github.com/microsoft/mssql-jdbc) | Yes | |
|
||||||
| [ClickHouse](https://github.com/ClickHouse/ClickHouse) | Yes | ClickHouse is the fastest and most resource efficient open-source database for real-time apps and analytics. |
|
| [ClickHouse](https://github.com/ClickHouse/ClickHouse) | Yes | ClickHouse is the fastest and most resource efficient open-source database for real-time apps and analytics. |
|
||||||
| [Oracle](https://github.com/oracle) | No | TODO |
|
| [Oracle](https://github.com/oracle) | No | TODO |
|
||||||
| [Redis](https://github.com/redis/redis) | No | The Multi-model NoSQL Database |
|
| [Redis](https://github.com/redis/redis) | No | The Multi-model NoSQL Database |
|
||||||
| [MongoDB](https://github.com/mongodb/mongo) | No | MongoDB is a source-available cross-platform document-oriented database program |
|
| [MongoDB](https://github.com/mongodb/mongo) | No | MongoDB is a source-available cross-platform document-oriented database program |
|
||||||
| [HBase](https://github.com/apache/hbase) | No | Open-source, distributed, versioned, column-oriented store modeled |
|
| [HBase](https://github.com/apache/hbase) | No | Open-source, distributed, versioned, column-oriented store modeled |
|
||||||
| [Doris](https://github.com/apache/doris) | No | Apache Doris is an easy-to-use, high performance and unified analytics database. |
|
| [Doris](https://github.com/apache/doris) | Yes | Apache Doris is an easy-to-use, high performance and unified analytics database. |
|
||||||
| [DB2](https://github.com/IBM/Db2) | No | TODO |
|
| [DB2](https://github.com/IBM/Db2) | No | TODO |
|
||||||
| [Couchbase](https://github.com/couchbase) | No | TODO |
|
| [Couchbase](https://github.com/couchbase) | No | TODO |
|
||||||
| [Elasticsearch](https://github.com/elastic/elasticsearch) | No | Free and Open, Distributed, RESTful Search Engine |
|
| [Elasticsearch](https://github.com/elastic/elasticsearch) | No | Free and Open, Distributed, RESTful Search Engine |
|
||||||
| [OceanBase](https://github.com/OceanBase) | No | OceanBase is a distributed relational database. |
|
| [OceanBase](https://github.com/OceanBase) | No | OceanBase is a distributed relational database. |
|
||||||
| [TiDB](https://github.com/pingcap/tidb) | No | TODO |
|
| [TiDB](https://github.com/pingcap/tidb) | No | TODO |
|
||||||
| [StarRocks](https://github.com/StarRocks/starrocks) | Yes | StarRocks is a next-gen, high-performance analytical data warehouse |
|
| [StarRocks](https://github.com/StarRocks/starrocks) | Yes | StarRocks is a next-gen, high-performance analytical data warehouse |
|
@ -31,6 +31,7 @@ class DBType(Enum):
|
|||||||
Clickhouse = DbInfo("clickhouse")
|
Clickhouse = DbInfo("clickhouse")
|
||||||
StarRocks = DbInfo("starrocks")
|
StarRocks = DbInfo("starrocks")
|
||||||
Spark = DbInfo("spark", True)
|
Spark = DbInfo("spark", True)
|
||||||
|
Doris = DbInfo("doris")
|
||||||
|
|
||||||
def value(self):
|
def value(self):
|
||||||
return self._value_.name
|
return self._value_.name
|
||||||
|
@ -19,6 +19,7 @@ from pilot.connections.rdbms.base import RDBMSDatabase
|
|||||||
from pilot.connections.rdbms.conn_clickhouse import ClickhouseConnect
|
from pilot.connections.rdbms.conn_clickhouse import ClickhouseConnect
|
||||||
from pilot.connections.rdbms.conn_postgresql import PostgreSQLDatabase
|
from pilot.connections.rdbms.conn_postgresql import PostgreSQLDatabase
|
||||||
from pilot.connections.rdbms.conn_starrocks import StarRocksConnect
|
from pilot.connections.rdbms.conn_starrocks import StarRocksConnect
|
||||||
|
from pilot.connections.rdbms.conn_doris import DorisConnect
|
||||||
from pilot.singleton import Singleton
|
from pilot.singleton import Singleton
|
||||||
from pilot.common.sql_database import Database
|
from pilot.common.sql_database import Database
|
||||||
from pilot.connections.db_conn_info import DBConfig
|
from pilot.connections.db_conn_info import DBConfig
|
||||||
|
159
pilot/connections/rdbms/conn_doris.py
Normal file
159
pilot/connections/rdbms/conn_doris.py
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
from typing import Iterable, Optional, Any
|
||||||
|
from sqlalchemy import text
|
||||||
|
from urllib.parse import quote
|
||||||
|
from urllib.parse import quote_plus as urlquote
|
||||||
|
from pilot.connections.rdbms.base import RDBMSDatabase
|
||||||
|
|
||||||
|
|
||||||
|
class DorisConnect(RDBMSDatabase):
|
||||||
|
driver = "doris"
|
||||||
|
db_type = "doris"
|
||||||
|
db_dialect = "doris"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_uri_db(
|
||||||
|
cls,
|
||||||
|
host: str,
|
||||||
|
port: int,
|
||||||
|
user: str,
|
||||||
|
pwd: str,
|
||||||
|
db_name: str,
|
||||||
|
engine_args: Optional[dict] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> RDBMSDatabase:
|
||||||
|
db_url: str = (
|
||||||
|
f"{cls.driver}://{quote(user)}:{urlquote(pwd)}@{host}:{str(port)}/{db_name}"
|
||||||
|
)
|
||||||
|
return cls.from_uri(db_url, engine_args, **kwargs)
|
||||||
|
|
||||||
|
def _sync_tables_from_db(self) -> Iterable[str]:
|
||||||
|
table_results = self.get_session().execute(
|
||||||
|
text(
|
||||||
|
f"SELECT TABLE_NAME FROM information_schema.tables where TABLE_SCHEMA=database()"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
table_results = set(row[0] for row in table_results)
|
||||||
|
self._all_tables = table_results
|
||||||
|
self._metadata.reflect(bind=self._engine)
|
||||||
|
return self._all_tables
|
||||||
|
|
||||||
|
def get_grants(self):
|
||||||
|
cursor = self.get_session().execute(text("SHOW GRANTS"))
|
||||||
|
grants = cursor.fetchall()
|
||||||
|
if len(grants) == 0:
|
||||||
|
return []
|
||||||
|
if len(grants[0]) == 2:
|
||||||
|
grants_list = [x[1] for x in grants]
|
||||||
|
else:
|
||||||
|
grants_list = [x[2] for x in grants]
|
||||||
|
return grants_list
|
||||||
|
|
||||||
|
def _get_current_version(self):
|
||||||
|
"""Get database current version"""
|
||||||
|
return int(
|
||||||
|
self.get_session().execute(text("select current_version()")).scalar()
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_collation(self):
|
||||||
|
"""Get collation.
|
||||||
|
ref: https://doris.apache.org/zh-CN/docs/dev/sql-manual/sql-reference/Show-Statements/SHOW-COLLATION/
|
||||||
|
"""
|
||||||
|
cursor = self.get_session().execute(text("SHOW COLLATION"))
|
||||||
|
results = cursor.fetchall()
|
||||||
|
return "" if not results else results[0][0]
|
||||||
|
|
||||||
|
def get_users(self):
|
||||||
|
"""Get user info."""
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_fields(self, table_name):
|
||||||
|
"""Get column fields about specified table."""
|
||||||
|
cursor = self.get_session().execute(
|
||||||
|
text(
|
||||||
|
f"select COLUMN_NAME, COLUMN_TYPE, COLUMN_DEFAULT, IS_NULLABLE, COLUMN_COMMENT "
|
||||||
|
f"from information_schema.columns "
|
||||||
|
f'where TABLE_NAME="{table_name}" and TABLE_SCHEMA=database()'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
fields = cursor.fetchall()
|
||||||
|
return [(field[0], field[1], field[2], field[3], field[4]) for field in fields]
|
||||||
|
|
||||||
|
def get_charset(self):
|
||||||
|
"""Get character_set."""
|
||||||
|
return "utf-8"
|
||||||
|
|
||||||
|
def get_show_create_table(self, table_name):
|
||||||
|
# cur = self.get_session().execute(
|
||||||
|
# text(
|
||||||
|
# f"""show create table {table_name}"""
|
||||||
|
# )
|
||||||
|
# )
|
||||||
|
# rows = cur.fetchone()
|
||||||
|
# create_sql = rows[1]
|
||||||
|
# return create_sql
|
||||||
|
# 这里是要表描述, 返回建表语句会导致token过长而失败
|
||||||
|
cur = self.get_session().execute(
|
||||||
|
text(
|
||||||
|
f"SELECT TABLE_COMMENT "
|
||||||
|
f"FROM information_schema.tables "
|
||||||
|
f'where TABLE_NAME="{table_name}" and TABLE_SCHEMA=database()'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
table = cur.fetchone()
|
||||||
|
if table:
|
||||||
|
return str(table[0])
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def get_table_comments(self, db_name=None):
|
||||||
|
db_name = "database()" if not db_name else f"'{db_name}'"
|
||||||
|
cursor = self.get_session().execute(
|
||||||
|
text(
|
||||||
|
f"SELECT TABLE_NAME,TABLE_COMMENT "
|
||||||
|
f"FROM information_schema.tables "
|
||||||
|
f"where TABLE_SCHEMA={db_name}"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
tables = cursor.fetchall()
|
||||||
|
return [(table[0], table[1]) for table in tables]
|
||||||
|
|
||||||
|
def get_database_list(self):
|
||||||
|
return self.get_database_names()
|
||||||
|
|
||||||
|
def get_database_names(self):
|
||||||
|
cursor = self.get_session().execute(text("SHOW DATABASES"))
|
||||||
|
results = cursor.fetchall()
|
||||||
|
return [
|
||||||
|
d[0]
|
||||||
|
for d in results
|
||||||
|
if d[0]
|
||||||
|
not in [
|
||||||
|
"information_schema",
|
||||||
|
"sys",
|
||||||
|
"_statistics_",
|
||||||
|
"mysql",
|
||||||
|
"__internal_schema",
|
||||||
|
"doris_audit_db__",
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_current_db_name(self) -> str:
|
||||||
|
return self.get_session().execute(text("select database()")).scalar()
|
||||||
|
|
||||||
|
def table_simple_info(self):
|
||||||
|
cursor = self.get_session().execute(
|
||||||
|
text(
|
||||||
|
f"SELECT concat(TABLE_NAME,'(',group_concat(COLUMN_NAME,','),');') "
|
||||||
|
f"FROM information_schema.columns "
|
||||||
|
f"where TABLE_SCHEMA=database() "
|
||||||
|
f"GROUP BY TABLE_NAME"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
results = cursor.fetchall()
|
||||||
|
return [x[0] for x in results]
|
||||||
|
|
||||||
|
def get_indexes(self, table_name):
|
||||||
|
"""Get table indexes about specified table."""
|
||||||
|
cursor = self.get_session().execute(text(f"SHOW INDEX FROM {table_name}"))
|
||||||
|
indexes = cursor.fetchall()
|
||||||
|
return [(index[2], index[4]) for index in indexes]
|
BIN
pilot/server/static/icons/doris.png
Normal file
BIN
pilot/server/static/icons/doris.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 18 KiB |
5
setup.py
5
setup.py
@ -375,7 +375,10 @@ def core_requires():
|
|||||||
"auto-gpt-plugin-template",
|
"auto-gpt-plugin-template",
|
||||||
"gTTS==2.3.1",
|
"gTTS==2.3.1",
|
||||||
"langchain>=0.0.286",
|
"langchain>=0.0.286",
|
||||||
"SQLAlchemy==2.0.22",
|
# 从固定==2.0.22改动到可变版本是发现其余dependencies都是>=1.4,同时如pydoris是<2
|
||||||
|
"SQLAlchemy>=1.4,<3",
|
||||||
|
# for doris
|
||||||
|
"pydoris>=1.0.2,<2.0.0",
|
||||||
"fastapi==0.98.0",
|
"fastapi==0.98.0",
|
||||||
"pymysql",
|
"pymysql",
|
||||||
"duckdb==0.8.1",
|
"duckdb==0.8.1",
|
||||||
|
Loading…
Reference in New Issue
Block a user