Fix SQLite bug and split some main dependencies into optional dependencies (#453)

- Fix #445 

- Split some main dependencies into optional dependencies, now llama.cpp
supports running on Apple Silicon(m1) mac and X86 mac.
This commit is contained in:
Aries-ckt 2023-08-16 15:24:33 +08:00 committed by GitHub
commit f968427690
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 84 additions and 42 deletions

View File

@ -11,6 +11,16 @@ ARG LANGUAGE="en"
ARG PIP_INDEX_URL="https://pypi.org/simple"
ENV PIP_INDEX_URL=$PIP_INDEX_URL
RUN pip3 install --upgrade pip -i $PIP_INDEX_URL \
&& (if [ "${LANGUAGE}" = "zh" ]; \
# language is zh, download zh_core_web_sm from github
then wget https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.5.0/zh_core_web_sm-3.5.0-py3-none-any.whl -O /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl \
&& pip3 install /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl -i $PIP_INDEX_URL \
&& rm /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl; \
# not zh, download directly
else python3 -m spacy download zh_core_web_sm; \
fi;)
RUN mkdir -p /app
# COPY only requirements.txt first to leverage Docker cache
@ -29,21 +39,11 @@ WORKDIR /app
# && pip3 install -r /tmp/requirements.txt -i $PIP_INDEX_URL --no-cache-dir \
# && rm /tmp/requirements.txt
RUN pip3 install --upgrade pip -i $PIP_INDEX_URL \
&& cd /app && pip3 install -i $PIP_INDEX_URL .
RUN pip3 install -i $PIP_INDEX_URL .
# ENV CMAKE_ARGS="-DLLAMA_CUBLAS=ON -DLLAMA_AVX2=OFF -DLLAMA_F16C=OFF -DLLAMA_FMA=OFF"
# ENV FORCE_CMAKE=1
RUN cd /app && pip3 install -i $PIP_INDEX_URL .[llama_cpp]
RUN (if [ "${LANGUAGE}" = "zh" ]; \
# language is zh, download zh_core_web_sm from github
then wget https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.5.0/zh_core_web_sm-3.5.0-py3-none-any.whl -O /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl \
&& pip3 install /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl -i $PIP_INDEX_URL \
&& rm /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl; \
# not zh, download directly
else python3 -m spacy download zh_core_web_sm; \
fi;) \
RUN pip3 install -i $PIP_INDEX_URL .[llama_cpp] \
&& rm -rf `pip3 cache dir`
ARG BUILD_LOCAL_CODE="false"

View File

@ -70,10 +70,10 @@ class SQLiteConnect(RDBMSDatabase):
def _sync_tables_from_db(self) -> Iterable[str]:
table_results = self.session.execute(
"SELECT name FROM sqlite_master WHERE type='table'"
text("SELECT name FROM sqlite_master WHERE type='table'")
)
view_results = self.session.execute(
"SELECT name FROM sqlite_master WHERE type='view'"
text("SELECT name FROM sqlite_master WHERE type='view'")
)
table_results = set(row[0] for row in table_results)
view_results = set(row[0] for row in view_results)

View File

@ -121,3 +121,17 @@ def test_get_database_list(db):
def test_get_database_names(db):
db.get_database_names() == []
def test_db_dir_exist_dir():
with tempfile.TemporaryDirectory() as temp_dir:
new_dir = os.path.join(temp_dir, "new_dir")
file_path = os.path.join(new_dir, "sqlite.db")
db = SQLiteConnect.from_file_path(file_path)
assert os.path.exists(new_dir) == True
assert list(db.get_table_names()) == []
with tempfile.TemporaryDirectory() as existing_dir:
file_path = os.path.join(existing_dir, "sqlite.db")
db = SQLiteConnect.from_file_path(file_path)
assert os.path.exists(existing_dir) == True
assert list(db.get_table_names()) == []

View File

View File

@ -1,9 +1,15 @@
from pilot.vector_store.chroma_store import ChromaStore
from pilot.vector_store.milvus_store import MilvusStore
from pilot.vector_store.weaviate_store import WeaviateStore
connector = {"Chroma": ChromaStore, "Milvus": MilvusStore, "Weaviate": WeaviateStore}
connector = {"Chroma": ChromaStore, "Weaviate": WeaviateStore}
try:
from pilot.vector_store.milvus_store import MilvusStore
connector["Milvus"] = MilvusStore
except:
pass
class VectorStoreConnector:

View File

@ -5,8 +5,8 @@ async-timeout==4.0.2
attrs==22.2.0
cchardet==2.1.7
chardet==5.1.0
contourpy==1.0.7
cycler==0.11.0
# contourpy==1.0.7
# cycler==0.11.0
filelock==3.9.0
fonttools==4.38.0
frozenlist==1.3.3
@ -14,20 +14,20 @@ huggingface-hub==0.14.1
importlib-resources==5.12.0
sqlparse==0.4.4
kiwisolver==1.4.4
matplotlib==3.7.1
# kiwisolver==1.4.4
# matplotlib==3.7.1
multidict==6.0.4
packaging==23.0
psutil==5.9.4
pycocotools==2.0.6
pyparsing==3.0.9
# pycocotools==2.0.6
# pyparsing==3.0.9
python-dateutil==2.8.2
pyyaml==6.0
tokenizers==0.13.2
tqdm==4.64.1
transformers>=4.31.0
transformers_stream_generator
timm==0.6.13
# timm==0.6.13
spacy==3.5.3
webdataset==0.2.48
yarl==1.8.2
@ -40,18 +40,17 @@ peft
pycocoevalcap
cpm_kernels
umap-learn
notebook
# notebook
gradio==3.23
gradio-client==0.0.8
wandb
llama-index==0.5.27
# wandb
# llama-index==0.5.27
# TODO move bitsandbytes to optional
bitsandbytes
accelerate>=0.20.3
unstructured==0.6.3
grpcio==1.47.5
gpt4all==0.3.0
diskcache==5.6.1
@ -61,7 +60,7 @@ gTTS==2.3.1
langchain
nltk
python-dotenv==1.0.0
pymilvus==2.2.1
vcrpy
chromadb==0.3.22
markdown2
@ -74,18 +73,7 @@ bardapi==0.1.29
# database
# TODO moved to optional dependencies
pymysql
duckdb
duckdb-engine
pymssql
# Testing dependencies
pytest
asynctest
pytest-asyncio
pytest-benchmark
pytest-cov
pytest-integration
pytest-mock
pytest-recording
pytesseract==0.3.10
duckdb-engine

View File

@ -0,0 +1,10 @@
# Testing dependencies
pytest
asynctest
pytest-asyncio
pytest-benchmark
pytest-cov
pytest-integration
pytest-mock
pytest-recording
pytesseract==0.3.10

View File

@ -117,10 +117,30 @@ def llama_cpp_python_cuda_requires():
def llama_cpp_requires():
"""
pip install "db-gpt[llama_cpp]"
"""
setup_spec.extras["llama_cpp"] = ["llama-cpp-python"]
llama_cpp_python_cuda_requires()
def all_vector_store_requires():
"""
pip install "db-gpt[vstore]"
"""
setup_spec.extras["vstore"] = [
"grpcio==1.47.5", # maybe delete it
"pymilvus==2.2.1",
]
def all_datasource_requires():
"""
pip install "db-gpt[datasource]"
"""
setup_spec.extras["datasource"] = ["pymssql", "pymysql"]
def all_requires():
requires = set()
for _, pkgs in setup_spec.extras.items():
@ -130,11 +150,15 @@ def all_requires():
llama_cpp_requires()
all_vector_store_requires()
all_datasource_requires()
# must be last
all_requires()
setuptools.setup(
name="db-gpt",
packages=find_packages(),
packages=find_packages(exclude=("tests", "*.tests", "*.tests.*", "examples")),
version="0.3.5",
author="csunny",
author_email="cfqcsunny@gmail.com",