mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-05 18:33:52 +00:00
fix(ChatData):pyspark lazy load (#633)
1.fix: pyspark lazy load 2.fix: chromadb version
This commit is contained in:
commit
20bdddec51
@ -1,6 +1,4 @@
|
|||||||
from typing import Optional, Any
|
from typing import Optional, Any
|
||||||
from pyspark.sql import SparkSession, DataFrame
|
|
||||||
from sqlalchemy import text
|
|
||||||
|
|
||||||
from pilot.connections.base import BaseConnect
|
from pilot.connections.base import BaseConnect
|
||||||
|
|
||||||
@ -23,13 +21,15 @@ class SparkConnect(BaseConnect):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
file_path: str,
|
file_path: str,
|
||||||
spark_session: Optional[SparkSession] = None,
|
spark_session: Optional = None,
|
||||||
engine_args: Optional[dict] = None,
|
engine_args: Optional[dict] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize the Spark DataFrame from Datasource path
|
"""Initialize the Spark DataFrame from Datasource path
|
||||||
return: Spark DataFrame
|
return: Spark DataFrame
|
||||||
"""
|
"""
|
||||||
|
from pyspark.sql import SparkSession
|
||||||
|
|
||||||
self.spark_session = (
|
self.spark_session = (
|
||||||
spark_session or SparkSession.builder.appName("dbgpt_spark").getOrCreate()
|
spark_session or SparkSession.builder.appName("dbgpt_spark").getOrCreate()
|
||||||
)
|
)
|
||||||
@ -47,7 +47,7 @@ class SparkConnect(BaseConnect):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("load spark datasource error" + str(e))
|
print("load spark datasource error" + str(e))
|
||||||
|
|
||||||
def create_df(self, path) -> DataFrame:
|
def create_df(self, path):
|
||||||
"""Create a Spark DataFrame from Datasource path(now support parquet, jdbc, orc, libsvm, csv, text, json.).
|
"""Create a Spark DataFrame from Datasource path(now support parquet, jdbc, orc, libsvm, csv, text, json.).
|
||||||
return: Spark DataFrame
|
return: Spark DataFrame
|
||||||
reference:https://spark.apache.org/docs/latest/sql-data-sources-load-save-functions.html
|
reference:https://spark.apache.org/docs/latest/sql-data-sources-load-save-functions.html
|
||||||
|
4
setup.py
4
setup.py
@ -281,7 +281,7 @@ def core_requires():
|
|||||||
"importlib-resources==5.12.0",
|
"importlib-resources==5.12.0",
|
||||||
"psutil==5.9.4",
|
"psutil==5.9.4",
|
||||||
"python-dotenv==1.0.0",
|
"python-dotenv==1.0.0",
|
||||||
"colorama==0.4.10",
|
"colorama==0.4.6",
|
||||||
"prettytable",
|
"prettytable",
|
||||||
"cachetools",
|
"cachetools",
|
||||||
]
|
]
|
||||||
@ -312,7 +312,7 @@ def knowledge_requires():
|
|||||||
setup_spec.extras["knowledge"] = [
|
setup_spec.extras["knowledge"] = [
|
||||||
"spacy==3.5.3",
|
"spacy==3.5.3",
|
||||||
# "chromadb==0.3.22",
|
# "chromadb==0.3.22",
|
||||||
"chromadb",
|
"chromadb==0.4.10",
|
||||||
"markdown",
|
"markdown",
|
||||||
"bs4",
|
"bs4",
|
||||||
"python-pptx",
|
"python-pptx",
|
||||||
|
Loading…
Reference in New Issue
Block a user