feat:update spark connection

This commit is contained in:
aries_ckt 2023-09-26 17:53:13 +08:00
parent 8c4accb09e
commit b39ba51bb7
10 changed files with 127 additions and 75 deletions

View File

@ -35,6 +35,7 @@ now DB-GPT support Datasource Type
* DuckDB
* Clickhouse
* Mssql
* Spark
```
### 2.ChatData

View File

@ -80,4 +80,16 @@ Download and install `Microsoft C++ Build Tools` from [visual-cpp-build-tools](h
```
1. Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive)
2. Reinstall PyTorch [start-locally](https://pytorch.org/get-started/locally/#start-locally) with CUDA support.
2. Reinstall PyTorch [start-locally](https://pytorch.org/get-started/locally/#start-locally) with CUDA support.
##### Q7: ImportError: cannot import name 'PersistentClient' from 'chromadb'.
```commandline
pip install chromadb==0.4.10
```
##### Q8: pydantic.error_wrappers.ValidationError:1 validation error for HuggingFaceEmbeddings.model_kwargs extra not permitted
```commandline
pip install langchain>=0.0.286
```

View File

@ -86,8 +86,9 @@ pip install -e "pip install -e ".[default]"
##### Q8 When starting the worker_manager on a cloud server and registering it with the controller, it is noticed that the worker's exposed IP is a private IP instead of a public IP, which leads to the inability to access the service.
```commandline
--worker_register_host public_ip The ip address of current worker to register
to ModelController. If None, the address is
to ModelController. If None, the address is
automatically determined
```

View File

@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: DB-GPT 👏👏 0.3.5\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-08-29 20:30+0800\n"
"POT-Creation-Date: 2023-09-26 17:51+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
@ -20,12 +20,12 @@ msgstr ""
"Generated-By: Babel 2.12.1\n"
#: ../../getting_started/application/chatdb/chatdb.md:1
#: 46745445059c40848770d89655d4452a
#: 0cf45852c1fd430090da81836bc961c7
msgid "ChatData & ChatDB"
msgstr "ChatData & ChatDB"
#: ../../getting_started/application/chatdb/chatdb.md:3
#: 494c5e475fbb420eaab49739b696a2ce
#: 6dc94a787ff844caa21074d71aaf351a
#, fuzzy
msgid ""
"ChatData generates SQL from natural language and executes it. ChatDB "
@ -41,119 +41,127 @@ msgstr ""
#: ../../getting_started/application/chatdb/chatdb.md:20
#: ../../getting_started/application/chatdb/chatdb.md:24
#: ../../getting_started/application/chatdb/chatdb.md:28
#: ../../getting_started/application/chatdb/chatdb.md:42
#: ../../getting_started/application/chatdb/chatdb.md:47
#: ../../getting_started/application/chatdb/chatdb.md:53
#: 0b20219c11a14f9ebdfac5ebabcdcd8d 0f8e5d9baaec4602ae57b55b4db286cf
#: 3a2ef73b33c74d838b5c0ea41b83430d 9de27f6a12dd447eb9434c3b10dce97e
#: 9fc2b16790534cf9a79ac57d7b54ff27
#: ../../getting_started/application/chatdb/chatdb.md:43
#: ../../getting_started/application/chatdb/chatdb.md:48
#: ../../getting_started/application/chatdb/chatdb.md:54
#: 826032e82a0a40b2bd122a90a35d0161 91652ef9e3224290b0c89112bcca4474
#: d396ffa33eef4bef8471040369414420 d7f176a7794048d3ac3573970db86d9d
#: f80e5611eca64f86baeeed6c860061f9
msgid "db plugins demonstration"
msgstr "db plugins demonstration"
#: ../../getting_started/application/chatdb/chatdb.md:7
#: 8d4f856b1b734434a80d1a9cc43b1611
#: aa0f978d3ad34b71aacf143a4c807ba1
msgid "1.Choose Datasource"
msgstr "1.Choose Datasource"
#: ../../getting_started/application/chatdb/chatdb.md:9
#: 9218c985e6e24cebab8c098bc49119a3
#: 8a2338e2fbae44f1b61b2fcf062499d3
msgid ""
"If you are using DB-GPT for the first time, you need to add a data source"
" and set the relevant connection information for the data source."
msgstr "如果你是第一次使用DB-GPT, 首先需要添加数据源,设置数据源的相关连接信息"
#: ../../getting_started/application/chatdb/chatdb.md:13
#: ec508b8298bf4657aca722875d34d858
#: f1d165ab8b564445880e581a2e554434
msgid "there are some example data in DB-GPT-NEW/DB-GPT/docker/examples"
msgstr "在DB-GPT-NEW/DB-GPT/docker/examples有数据示例"
#: ../../getting_started/application/chatdb/chatdb.md:15
#: c92428030b914053ad4c01ab9d78ccff
#: dd390cb518094c96bf5430bfa821830f
msgid "you can execute sql script to generate data."
msgstr "你可以通过执行sql脚本生成测试数据"
#: ../../getting_started/application/chatdb/chatdb.md:18
#: fa5f5b1bf8994d349ba80b63be472c7f
#: aebd974d23124daa80af6d74431d1ce3
msgid "1.1 Datasource management"
msgstr "1.1 Datasource management"
#: ../../getting_started/application/chatdb/chatdb.md:20
#: 6bb044bd35b3469ebee61baf394ce613
#: af4d12aaed5c4fc484a3e7a755a666c2
msgid ""
"![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
"GPT/assets/13723926/7678f07e-9eee-40a9-b980-5b3978a0ed52)"
msgstr "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
msgstr ""
"![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
"GPT/assets/13723926/7678f07e-9eee-40a9-b980-5b3978a0ed52)"
#: ../../getting_started/application/chatdb/chatdb.md:22
#: 23254a25f3464970a7b3e3d7dafa832a
#: 34b7b9ce0f0142af8179a8e1763a32f8
msgid "1.2 Connection management"
msgstr "1.2 Connection管理"
#: ../../getting_started/application/chatdb/chatdb.md:24
#: e244169193dc48fab1b692f7410aed0b
#: 00a1af9f4e0a45b9a398f641c8198114
msgid ""
"![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
"GPT/assets/13723926/25b8f5a9-d322-459e-a8b2-bfe8cb42bdd6)"
msgstr "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
msgstr ""
"![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
"GPT/assets/13723926/25b8f5a9-d322-459e-a8b2-bfe8cb42bdd6)"
#: ../../getting_started/application/chatdb/chatdb.md:26
#: 32507323a3884f35991f60646b6077bb
#: 3b8efc25b482480b8d0f4afe5304ece0
msgid "1.3 Add Datasource"
msgstr "1.3 添加Datasource"
#: ../../getting_started/application/chatdb/chatdb.md:28
#: 3665c149527b4fc3944549454ce81bcf
#: d36a476e1eb34a46b2d35e6c1c4c39dd
msgid ""
"![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
"GPT/assets/13723926/19ce31a7-4061-4da8-a9cb-efca396cc085)"
msgstr "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
msgstr ""
"![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
"GPT/assets/13723926/19ce31a7-4061-4da8-a9cb-efca396cc085)"
#: ../../getting_started/application/chatdb/chatdb.md:31
#: 23100fa4b1b642699f1faae80f78419b
#: 9205388f91404099bf1add6d55f33801
msgid "now DB-GPT support Datasource Type"
msgstr "DB-GPT支持数据源类型"
#: ../../getting_started/application/chatdb/chatdb.md:33
#: 54f6ac1232294e72975f2ec8f92a19fd
#: 197722ccd9e54f8196e3037f0ebd4165
msgid "Mysql"
msgstr "Mysql"
#: ../../getting_started/application/chatdb/chatdb.md:34
#: e2aff57c70fd4f6b81da9548f59e97b7
#: e859c194648440b19941a42635f37ac5
msgid "Sqlite"
msgstr "Sqlite"
#: ../../getting_started/application/chatdb/chatdb.md:35
#: fc2a02bf5b004896a3c68b0f27f82c7b
#: 91c695f437064f01bf1d7c85a0ecf5b4
msgid "DuckDB"
msgstr "DuckDB"
#: ../../getting_started/application/chatdb/chatdb.md:36
#: 1c97c47b248741b290265c7d72875d7a
#: 0a8ff591969c4944890415a84aa64173
msgid "Clickhouse"
msgstr "Clickhouse"
#: ../../getting_started/application/chatdb/chatdb.md:37
#: 5ebd3d4f0ca94f50b5f536f673d68610
#: d52ec849653141dc95862e82ce5777e0
#, fuzzy
msgid "Mssql"
msgstr "Mysql"
#: ../../getting_started/application/chatdb/chatdb.md:40
#: dcdac0c0e6e24305ad601e5ccd82c877
#: ../../getting_started/application/chatdb/chatdb.md:38
#: 430a72d857114422aeecd5595df41881
msgid "Spark"
msgstr "Spark"
#: ../../getting_started/application/chatdb/chatdb.md:41
#: b615a70971e7443291ba33e8bc12b437
msgid "2.ChatData"
msgstr "2.ChatData"
#: ../../getting_started/application/chatdb/chatdb.md:41
#: c15bd38f6f754e0b8820a8afc0a8358b
#: ../../getting_started/application/chatdb/chatdb.md:42
#: e3542c64926143958e71c7cb21d25c78
msgid "Preview Mode"
msgstr "Preview Mode"
#: ../../getting_started/application/chatdb/chatdb.md:42
#: b43ffb3cf0734fc8b17ab3865856eda8
#: ../../getting_started/application/chatdb/chatdb.md:43
#: e32f26b7c22141e181b5345a644dffd5
#, fuzzy
msgid ""
"After successfully setting up the data source, you can start conversing "
@ -165,32 +173,34 @@ msgstr ""
"设置数据源成功后就可以和数据库进行对话了。你可以让它帮你生成SQL也可以和问它数据库元数据的相关信息。 ![db plugins "
"demonstration](../../../../assets/chat_data/chatdata_eg.png)"
#: ../../getting_started/application/chatdb/chatdb.md:46
#: 3f31a98fbf804b3495344ee95505e037
#: ../../getting_started/application/chatdb/chatdb.md:47
#: 4d5c0465a01b4f5a964d0e803f9cbc89
msgid "Editor Mode"
msgstr "Editor Mode"
#: ../../getting_started/application/chatdb/chatdb.md:47
#: e3c071be4daa40d0b03af97dbafe1713
#: ../../getting_started/application/chatdb/chatdb.md:48
#: 79b088787e8f43258bcc4292c89ad1b0
msgid ""
"In Editor Mode, you can edit your sql and execute it. ![db plugins "
"demonstration](https://github.com/eosphoros-ai/DB-"
"GPT/assets/13723926/1a896dc1-7c0e-4354-8629-30357ffd8d7f)"
msgstr "编辑器模式下可以在线编辑sql进行调试. ![db plugins "
"demonstration](https://github.com/eosphoros-ai/DB-"
msgstr ""
"编辑器模式下可以在线编辑sql进行调试. ![db plugins demonstration](https://github.com"
"/eosphoros-ai/DB-"
"GPT/assets/13723926/1a896dc1-7c0e-4354-8629-30357ffd8d7f)"
#: ../../getting_started/application/chatdb/chatdb.md:51
#: 6c694afb12dc4ef28bb58db80d15190c
#: ../../getting_started/application/chatdb/chatdb.md:52
#: 9efaf27749614cd4bea07146edddf558
msgid "3.ChatDB"
msgstr "3.ChatDB"
#: ../../getting_started/application/chatdb/chatdb.md:53
#: 631503240cf64cc8b80a9f5e43aae0dd
#: ../../getting_started/application/chatdb/chatdb.md:54
#: b2dc15f067064c60974e532c3e2f5893
msgid ""
"![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
"GPT/assets/13723926/e04bc1b1-2c58-4b33-af62-97e89098ace7)"
msgstr "![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
msgstr ""
"![db plugins demonstration](https://github.com/eosphoros-ai/DB-"
"GPT/assets/13723926/e04bc1b1-2c58-4b33-af62-97e89098ace7)"
#~ msgid "![db plugins demonstration](../../../../assets/chat_data/db_entry.png)"

View File

@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: DB-GPT 👏👏 0.3.5\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-08-29 20:50+0800\n"
"POT-Creation-Date: 2023-09-26 17:47+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
@ -20,12 +20,12 @@ msgstr ""
"Generated-By: Babel 2.12.1\n"
#: ../../getting_started/faq/deploy/deploy_faq.md:1
#: 0baefc753798469588ea011c12a0bfd3
#: ca823e9d6d1d433db7ed15c8273e1b00
msgid "Installation FAQ"
msgstr "Installation FAQ"
#: ../../getting_started/faq/deploy/deploy_faq.md:5
#: 013bf01a02c842ee8bc576f85d127e22
#: 3803d098c534434f9f513b3a62de54a4
#, fuzzy
msgid ""
"Q1: execute `pip install -e .` error, found some package cannot find "
@ -35,18 +35,18 @@ msgstr ""
"cannot find correct version."
#: ../../getting_started/faq/deploy/deploy_faq.md:6
#: 2729928139484def827143c17f2d968c
#: b785864f47e643df9a4669d8da6167d6
msgid "change the pip source."
msgstr "替换pip源."
#: ../../getting_started/faq/deploy/deploy_faq.md:13
#: ../../getting_started/faq/deploy/deploy_faq.md:20
#: 6e8bf02d7117454fbcc28c7ec27e055a acd2186c0320466f95b500dade75591b
#: c41f026fb1464c71a45d0746c224ecce f70fb69b568d4fc4ad4c4731b2032eaf
msgid "or"
msgstr "或者"
#: ../../getting_started/faq/deploy/deploy_faq.md:27
#: c5aab9455827416084a1ea6792263add
#: d179e3d695764f838dc354eb0d978bb3
msgid ""
"Q2: sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) unable to"
" open database file"
@ -55,73 +55,86 @@ msgstr ""
" open database file"
#: ../../getting_started/faq/deploy/deploy_faq.md:29
#: 29880cfc6c7f4f6fb14a9cbb9eed07ad
#: 55174e8d247a414e8c6c8861d4707a55
msgid "make sure you pull latest code or create directory with mkdir pilot/data"
msgstr "make sure you pull latest code or create directory with mkdir pilot/data"
#: ../../getting_started/faq/deploy/deploy_faq.md:31
#: 36264030f5cd41bebd17beae12d9be51
#: dbce9e9cae734a5083a6f0fc28bce7cd
msgid "Q3: The model keeps getting killed."
msgstr "Q3: The model keeps getting killed."
#: ../../getting_started/faq/deploy/deploy_faq.md:33
#: 0cbf6ae0fee14d239cb1cc6ddba134d7
#: 2de5648d2e7546bf85f20f4162003298
msgid ""
"your GPU VRAM size is not enough, try replace your hardware or replace "
"other llms."
msgstr "GPU显存不够, 增加显存或者换一个显存小的模型"
#: ../../getting_started/faq/deploy/deploy_faq.md:35
#: 6f4ce365d20843529195aa6970d6074e
#: 47810771cd364964b9b5b8fd85bca4ee
msgid "Q4: How to access website on the public network"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:37
#: 9f4a119e64c74a0693fa067cd35cd833
#: e8c5bac6680648509d528ea6aaf5994e
msgid ""
"You can try to use gradio's [network](https://github.com/gradio-"
"app/gradio/blob/main/gradio/networking.py) to achieve."
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:48
#: 4c09cfb493ba41fb8590954b986e949d
#: bb75ec127f574c00a09d92d5206e9357
msgid "Open `url` with your browser to see the website."
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:50
#: 7d905a99d1c547eb95d9c619c70bf221
#: 5fdb87b84bd94385a1a93dab8d41ebe8
msgid "Q5: (Windows) execute `pip install -e .` error"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:52
#: fe26218168c4447a8dc89e436cdd1000
#: 31eef51e044044f29f3ad08defa9c305
msgid "The error log like the following:"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:71
#: d15615f7798d4dc0ad49d9b28926fe32
#: aaba0c3060b443e4b9877f70d78321ce
msgid ""
"Download and install `Microsoft C++ Build Tools` from [visual-cpp-build-"
"tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/)"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:75
#: 60ef06d3f99c44c1b568ec7c652905ee
#: 4c8137546e5c4240884f7ea6d9d922bf
msgid "Q6: `Torch not compiled with CUDA enabled`"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:82
#: 830e63627d2c48b8987ed20db3405c41
#: 01daf14f8c494219b1d9a5af4449951e
msgid "Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive)"
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:83
#: 50a1c244ddf747d797825158550026b9
#: c75e6371911e4d5ca6859e51501c9679
msgid ""
"Reinstall PyTorch [start-locally](https://pytorch.org/get-started/locally"
"/#start-locally) with CUDA support."
msgstr ""
#: ../../getting_started/faq/deploy/deploy_faq.md:85
#: 7cfb9003e505445ebb9ed3d015e184e2
msgid "Q7: ImportError: cannot import name 'PersistentClient' from 'chromadb'."
msgstr "Q7: ImportError: cannot import name 'PersistentClient' from 'chromadb'."
#: ../../getting_started/faq/deploy/deploy_faq.md:91
#: e1d5d5d85ddc480d8d81f7b550848cbf
msgid ""
"Q8: pydantic.error_wrappers.ValidationError:1 validation error for "
"HuggingFaceEmbeddings.model_kwargs extra not permitted"
msgstr "Q8: pydantic.error_wrappers.ValidationError:1 validation error for "
"HuggingFaceEmbeddings.model_kwargs extra not permitted"
#~ msgid ""
#~ "Q2: When use Mysql, Access denied "
#~ "for user 'root@localhost'(using password :NO)"

View File

@ -29,6 +29,7 @@ class DBType(Enum):
MSSQL = DbInfo("mssql")
Postgresql = DbInfo("postgresql")
Clickhouse = DbInfo("clickhouse")
Spark = DbInfo("spark", True)
def value(self):
return self._value_.name

View File

@ -6,9 +6,11 @@ from pilot.connections.base import BaseConnect
class SparkConnect(BaseConnect):
"""Spark Connect
Args:
Usage:
"""
Spark Connect supports operating on a variety of data sources through the DataFrame interface.
A DataFrame can be operated on using relational transformations and can also be used to create a temporary view.
Registering a DataFrame as a temporary view allows you to run SQL queries over its data.
Datasource now support parquet, jdbc, orc, libsvm, csv, text, json.
"""
"""db type"""
@ -29,7 +31,7 @@ class SparkConnect(BaseConnect):
return: Spark DataFrame
"""
self.spark_session = (
spark_session or SparkSession.builder.appName("dbgpt").getOrCreate()
spark_session or SparkSession.builder.appName("dbgpt_spark").getOrCreate()
)
self.path = file_path
self.table_name = "temp"
@ -46,14 +48,19 @@ class SparkConnect(BaseConnect):
print("load spark datasource error" + str(e))
def create_df(self, path) -> DataFrame:
"""Create a Spark DataFrame from Datasource path
"""Create a Spark DataFrame from Datasource path(now support parquet, jdbc, orc, libsvm, csv, text, json.).
return: Spark DataFrame
reference:https://spark.apache.org/docs/latest/sql-data-sources-load-save-functions.html
"""
return self.spark_session.read.option("header", "true").csv(path)
extension = (
"text" if path.rsplit(".", 1)[-1] == "txt" else path.rsplit(".", 1)[-1]
)
return self.spark_session.read.load(
path, format=extension, inferSchema="true", header="true"
)
def run(self, sql):
# self.log(f"llm ingestion sql query is :\n{sql}")
# self.df = self.create_df(self.path)
print(f"spark sql to run is {sql}")
self.df.createOrReplaceTempView(self.table_name)
df = self.spark_session.sql(sql)
first_row = df.first()

View File

@ -19,6 +19,7 @@ from pilot.connections.rdbms.conn_clickhouse import ClickhouseConnect
from pilot.singleton import Singleton
from pilot.common.sql_database import Database
from pilot.connections.db_conn_info import DBConfig
from pilot.connections.conn_spark import SparkConnect
from pilot.summary.db_summary_client import DBSummaryClient
CFG = Config()

View File

@ -45,6 +45,8 @@ class ChatWithDbAutoExecute(BaseChat):
except Exception as e:
print("db summary find error!" + str(e))
table_infos = self.database.table_simple_info()
if not table_infos:
table_infos = self.database.table_simple_info()
# table_infos = self.database.table_simple_info()

View File

@ -66,9 +66,13 @@ async def model_list():
last_heartbeat=model.last_heartbeat,
prompt_template=model.prompt_template,
)
response.manager_host = model.host if manager_map.get(model.host) else None
response.manager_host = (
model.host if manager_map.get(model.host) else None
)
response.manager_port = (
manager_map[model.host].port if manager_map.get(model.host) else None
manager_map[model.host].port
if manager_map.get(model.host)
else None
)
responses.append(response)
return Result.succ(responses)