mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-07-31 15:47:05 +00:00
fix(ChatData):pyspark lazy load (#633)
1.fix: pyspark lazy load 2.fix: chromadb version
This commit is contained in:
commit
20bdddec51
@ -1,6 +1,4 @@
|
||||
from typing import Optional, Any
|
||||
from pyspark.sql import SparkSession, DataFrame
|
||||
from sqlalchemy import text
|
||||
|
||||
from pilot.connections.base import BaseConnect
|
||||
|
||||
@ -23,13 +21,15 @@ class SparkConnect(BaseConnect):
|
||||
def __init__(
|
||||
self,
|
||||
file_path: str,
|
||||
spark_session: Optional[SparkSession] = None,
|
||||
spark_session: Optional = None,
|
||||
engine_args: Optional[dict] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize the Spark DataFrame from Datasource path
|
||||
return: Spark DataFrame
|
||||
"""
|
||||
from pyspark.sql import SparkSession
|
||||
|
||||
self.spark_session = (
|
||||
spark_session or SparkSession.builder.appName("dbgpt_spark").getOrCreate()
|
||||
)
|
||||
@ -47,7 +47,7 @@ class SparkConnect(BaseConnect):
|
||||
except Exception as e:
|
||||
print("load spark datasource error" + str(e))
|
||||
|
||||
def create_df(self, path) -> DataFrame:
|
||||
def create_df(self, path):
|
||||
"""Create a Spark DataFrame from Datasource path(now support parquet, jdbc, orc, libsvm, csv, text, json.).
|
||||
return: Spark DataFrame
|
||||
reference:https://spark.apache.org/docs/latest/sql-data-sources-load-save-functions.html
|
||||
|
4
setup.py
4
setup.py
@ -281,7 +281,7 @@ def core_requires():
|
||||
"importlib-resources==5.12.0",
|
||||
"psutil==5.9.4",
|
||||
"python-dotenv==1.0.0",
|
||||
"colorama==0.4.10",
|
||||
"colorama==0.4.6",
|
||||
"prettytable",
|
||||
"cachetools",
|
||||
]
|
||||
@ -312,7 +312,7 @@ def knowledge_requires():
|
||||
setup_spec.extras["knowledge"] = [
|
||||
"spacy==3.5.3",
|
||||
# "chromadb==0.3.22",
|
||||
"chromadb",
|
||||
"chromadb==0.4.10",
|
||||
"markdown",
|
||||
"bs4",
|
||||
"python-pptx",
|
||||
|
Loading…
Reference in New Issue
Block a user