diff --git a/README.md b/README.md index 5a743d0a3..369e85599 100644 --- a/README.md +++ b/README.md @@ -196,7 +196,7 @@ The core capabilities primarily consist of the following components: - [ ] Redis - [ ] MongoDB - [ ] HBase - - [ ] Doris + - [x] Doris - [ ] DB2 - [ ] Couchbase - [ ] Elasticsearch diff --git a/README.zh.md b/README.zh.md index 2e1b86699..60e6ef679 100644 --- a/README.zh.md +++ b/README.zh.md @@ -229,7 +229,7 @@ The MIT License (MIT) - [ ] Redis - [ ] MongoDB - [ ] HBase - - [ ] Doris + - [x] Doris - [ ] DB2 - [ ] Couchbase - [ ] Elasticsearch diff --git a/docs/docs/modules/connections.md b/docs/docs/modules/connections.md index 4babddb41..2a1b1b282 100644 --- a/docs/docs/modules/connections.md +++ b/docs/docs/modules/connections.md @@ -3,23 +3,23 @@ The connections module supports connecting to various structured, semi-structure The list of data sources we currently support is as follows. -| DataSource | support | Notes | -| ------------------------------------------------------------------------------ | ----------- | ------------------------------------------- | -| [MySQL](https://www.mysql.com/) | Yes | MySQL is the world's most popular open source database. | -| [PostgresSQL](https://www.postgresql.org/) | Yes | The World's Most Advanced Open Source Relational Database | -| [Spark](https://github.com/apache/spark) | Yes | Unified Engine for large-scale data analytics | -| [DuckDB](https://github.com/duckdb/duckdb) | Yes | DuckDB is an in-process SQL OLAP database management system | -| [Sqlite](https://github.com/sqlite/sqlite) | Yes | | -| [MSSQL](https://github.com/microsoft/mssql-jdbc) | Yes | | -| [ClickHouse](https://github.com/ClickHouse/ClickHouse) | Yes | ClickHouse is the fastest and most resource efficient open-source database for real-time apps and analytics. | -| [Oracle](https://github.com/oracle) | No | TODO | -| [Redis](https://github.com/redis/redis) | No | The Multi-model NoSQL Database | -| [MongoDB](https://github.com/mongodb/mongo) | No | MongoDB is a source-available cross-platform document-oriented database program | -| [HBase](https://github.com/apache/hbase) | No | Open-source, distributed, versioned, column-oriented store modeled | -| [Doris](https://github.com/apache/doris) | No | Apache Doris is an easy-to-use, high performance and unified analytics database. | -| [DB2](https://github.com/IBM/Db2) | No | TODO | -| [Couchbase](https://github.com/couchbase) | No | TODO | -| [Elasticsearch](https://github.com/elastic/elasticsearch) | No | Free and Open, Distributed, RESTful Search Engine | -| [OceanBase](https://github.com/OceanBase) | No | OceanBase is a distributed relational database. | -| [TiDB](https://github.com/pingcap/tidb) | No | TODO | -| [StarRocks](https://github.com/StarRocks/starrocks) | Yes | StarRocks is a next-gen, high-performance analytical data warehouse | \ No newline at end of file +| DataSource | support | Notes | +| ------------------------------------------------------------------------------ |---------| ------------------------------------------- | +| [MySQL](https://www.mysql.com/) | Yes | MySQL is the world's most popular open source database. | +| [PostgresSQL](https://www.postgresql.org/) | Yes | The World's Most Advanced Open Source Relational Database | +| [Spark](https://github.com/apache/spark) | Yes | Unified Engine for large-scale data analytics | +| [DuckDB](https://github.com/duckdb/duckdb) | Yes | DuckDB is an in-process SQL OLAP database management system | +| [Sqlite](https://github.com/sqlite/sqlite) | Yes | | +| [MSSQL](https://github.com/microsoft/mssql-jdbc) | Yes | | +| [ClickHouse](https://github.com/ClickHouse/ClickHouse) | Yes | ClickHouse is the fastest and most resource efficient open-source database for real-time apps and analytics. | +| [Oracle](https://github.com/oracle) | No | TODO | +| [Redis](https://github.com/redis/redis) | No | The Multi-model NoSQL Database | +| [MongoDB](https://github.com/mongodb/mongo) | No | MongoDB is a source-available cross-platform document-oriented database program | +| [HBase](https://github.com/apache/hbase) | No | Open-source, distributed, versioned, column-oriented store modeled | +| [Doris](https://github.com/apache/doris) | Yes | Apache Doris is an easy-to-use, high performance and unified analytics database. | +| [DB2](https://github.com/IBM/Db2) | No | TODO | +| [Couchbase](https://github.com/couchbase) | No | TODO | +| [Elasticsearch](https://github.com/elastic/elasticsearch) | No | Free and Open, Distributed, RESTful Search Engine | +| [OceanBase](https://github.com/OceanBase) | No | OceanBase is a distributed relational database. | +| [TiDB](https://github.com/pingcap/tidb) | No | TODO | +| [StarRocks](https://github.com/StarRocks/starrocks) | Yes | StarRocks is a next-gen, high-performance analytical data warehouse | \ No newline at end of file diff --git a/pilot/common/schema.py b/pilot/common/schema.py index 561b019dc..a118c3737 100644 --- a/pilot/common/schema.py +++ b/pilot/common/schema.py @@ -31,6 +31,7 @@ class DBType(Enum): Clickhouse = DbInfo("clickhouse") StarRocks = DbInfo("starrocks") Spark = DbInfo("spark", True) + Doris = DbInfo("doris") def value(self): return self._value_.name diff --git a/pilot/connections/manages/connection_manager.py b/pilot/connections/manages/connection_manager.py index b7f4c749d..b55c3a608 100644 --- a/pilot/connections/manages/connection_manager.py +++ b/pilot/connections/manages/connection_manager.py @@ -19,6 +19,7 @@ from pilot.connections.rdbms.base import RDBMSDatabase from pilot.connections.rdbms.conn_clickhouse import ClickhouseConnect from pilot.connections.rdbms.conn_postgresql import PostgreSQLDatabase from pilot.connections.rdbms.conn_starrocks import StarRocksConnect +from pilot.connections.rdbms.conn_doris import DorisConnect from pilot.singleton import Singleton from pilot.common.sql_database import Database from pilot.connections.db_conn_info import DBConfig diff --git a/pilot/connections/rdbms/conn_doris.py b/pilot/connections/rdbms/conn_doris.py new file mode 100644 index 000000000..c33014d91 --- /dev/null +++ b/pilot/connections/rdbms/conn_doris.py @@ -0,0 +1,159 @@ +from typing import Iterable, Optional, Any +from sqlalchemy import text +from urllib.parse import quote +from urllib.parse import quote_plus as urlquote +from pilot.connections.rdbms.base import RDBMSDatabase + + +class DorisConnect(RDBMSDatabase): + driver = "doris" + db_type = "doris" + db_dialect = "doris" + + @classmethod + def from_uri_db( + cls, + host: str, + port: int, + user: str, + pwd: str, + db_name: str, + engine_args: Optional[dict] = None, + **kwargs: Any, + ) -> RDBMSDatabase: + db_url: str = ( + f"{cls.driver}://{quote(user)}:{urlquote(pwd)}@{host}:{str(port)}/{db_name}" + ) + return cls.from_uri(db_url, engine_args, **kwargs) + + def _sync_tables_from_db(self) -> Iterable[str]: + table_results = self.get_session().execute( + text( + f"SELECT TABLE_NAME FROM information_schema.tables where TABLE_SCHEMA=database()" + ) + ) + table_results = set(row[0] for row in table_results) + self._all_tables = table_results + self._metadata.reflect(bind=self._engine) + return self._all_tables + + def get_grants(self): + cursor = self.get_session().execute(text("SHOW GRANTS")) + grants = cursor.fetchall() + if len(grants) == 0: + return [] + if len(grants[0]) == 2: + grants_list = [x[1] for x in grants] + else: + grants_list = [x[2] for x in grants] + return grants_list + + def _get_current_version(self): + """Get database current version""" + return int( + self.get_session().execute(text("select current_version()")).scalar() + ) + + def get_collation(self): + """Get collation. + ref: https://doris.apache.org/zh-CN/docs/dev/sql-manual/sql-reference/Show-Statements/SHOW-COLLATION/ + """ + cursor = self.get_session().execute(text("SHOW COLLATION")) + results = cursor.fetchall() + return "" if not results else results[0][0] + + def get_users(self): + """Get user info.""" + return [] + + def get_fields(self, table_name): + """Get column fields about specified table.""" + cursor = self.get_session().execute( + text( + f"select COLUMN_NAME, COLUMN_TYPE, COLUMN_DEFAULT, IS_NULLABLE, COLUMN_COMMENT " + f"from information_schema.columns " + f'where TABLE_NAME="{table_name}" and TABLE_SCHEMA=database()' + ) + ) + fields = cursor.fetchall() + return [(field[0], field[1], field[2], field[3], field[4]) for field in fields] + + def get_charset(self): + """Get character_set.""" + return "utf-8" + + def get_show_create_table(self, table_name): + # cur = self.get_session().execute( + # text( + # f"""show create table {table_name}""" + # ) + # ) + # rows = cur.fetchone() + # create_sql = rows[1] + # return create_sql + # 这里是要表描述, 返回建表语句会导致token过长而失败 + cur = self.get_session().execute( + text( + f"SELECT TABLE_COMMENT " + f"FROM information_schema.tables " + f'where TABLE_NAME="{table_name}" and TABLE_SCHEMA=database()' + ) + ) + table = cur.fetchone() + if table: + return str(table[0]) + else: + return "" + + def get_table_comments(self, db_name=None): + db_name = "database()" if not db_name else f"'{db_name}'" + cursor = self.get_session().execute( + text( + f"SELECT TABLE_NAME,TABLE_COMMENT " + f"FROM information_schema.tables " + f"where TABLE_SCHEMA={db_name}" + ) + ) + tables = cursor.fetchall() + return [(table[0], table[1]) for table in tables] + + def get_database_list(self): + return self.get_database_names() + + def get_database_names(self): + cursor = self.get_session().execute(text("SHOW DATABASES")) + results = cursor.fetchall() + return [ + d[0] + for d in results + if d[0] + not in [ + "information_schema", + "sys", + "_statistics_", + "mysql", + "__internal_schema", + "doris_audit_db__", + ] + ] + + def get_current_db_name(self) -> str: + return self.get_session().execute(text("select database()")).scalar() + + def table_simple_info(self): + cursor = self.get_session().execute( + text( + f"SELECT concat(TABLE_NAME,'(',group_concat(COLUMN_NAME,','),');') " + f"FROM information_schema.columns " + f"where TABLE_SCHEMA=database() " + f"GROUP BY TABLE_NAME" + ) + ) + results = cursor.fetchall() + return [x[0] for x in results] + + def get_indexes(self, table_name): + """Get table indexes about specified table.""" + cursor = self.get_session().execute(text(f"SHOW INDEX FROM {table_name}")) + indexes = cursor.fetchall() + return [(index[2], index[4]) for index in indexes] diff --git a/pilot/server/static/icons/doris.png b/pilot/server/static/icons/doris.png new file mode 100644 index 000000000..1c0fa86c1 Binary files /dev/null and b/pilot/server/static/icons/doris.png differ diff --git a/setup.py b/setup.py index 3c7fe670f..23bd7806b 100644 --- a/setup.py +++ b/setup.py @@ -375,7 +375,10 @@ def core_requires(): "auto-gpt-plugin-template", "gTTS==2.3.1", "langchain>=0.0.286", - "SQLAlchemy==2.0.22", + # 从固定==2.0.22改动到可变版本是发现其余dependencies都是>=1.4,同时如pydoris是<2 + "SQLAlchemy>=1.4,<3", + # for doris + "pydoris>=1.0.2,<2.0.0", "fastapi==0.98.0", "pymysql", "duckdb==0.8.1",