diff --git a/docker/base/Dockerfile b/docker/base/Dockerfile index 4075444a9..3cb063e47 100644 --- a/docker/base/Dockerfile +++ b/docker/base/Dockerfile @@ -11,6 +11,16 @@ ARG LANGUAGE="en" ARG PIP_INDEX_URL="https://pypi.org/simple" ENV PIP_INDEX_URL=$PIP_INDEX_URL +RUN pip3 install --upgrade pip -i $PIP_INDEX_URL \ + && (if [ "${LANGUAGE}" = "zh" ]; \ + # language is zh, download zh_core_web_sm from github + then wget https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.5.0/zh_core_web_sm-3.5.0-py3-none-any.whl -O /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl \ + && pip3 install /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl -i $PIP_INDEX_URL \ + && rm /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl; \ + # not zh, download directly + else python3 -m spacy download zh_core_web_sm; \ + fi;) + RUN mkdir -p /app # COPY only requirements.txt first to leverage Docker cache @@ -29,21 +39,11 @@ WORKDIR /app # && pip3 install -r /tmp/requirements.txt -i $PIP_INDEX_URL --no-cache-dir \ # && rm /tmp/requirements.txt -RUN pip3 install --upgrade pip -i $PIP_INDEX_URL \ - && cd /app && pip3 install -i $PIP_INDEX_URL . +RUN pip3 install -i $PIP_INDEX_URL . # ENV CMAKE_ARGS="-DLLAMA_CUBLAS=ON -DLLAMA_AVX2=OFF -DLLAMA_F16C=OFF -DLLAMA_FMA=OFF" # ENV FORCE_CMAKE=1 -RUN cd /app && pip3 install -i $PIP_INDEX_URL .[llama_cpp] - -RUN (if [ "${LANGUAGE}" = "zh" ]; \ - # language is zh, download zh_core_web_sm from github - then wget https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.5.0/zh_core_web_sm-3.5.0-py3-none-any.whl -O /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl \ - && pip3 install /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl -i $PIP_INDEX_URL \ - && rm /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl; \ - # not zh, download directly - else python3 -m spacy download zh_core_web_sm; \ - fi;) \ +RUN pip3 install -i $PIP_INDEX_URL .[llama_cpp] \ && rm -rf `pip3 cache dir` ARG BUILD_LOCAL_CODE="false" diff --git a/pilot/connections/rdbms/conn_sqlite.py b/pilot/connections/rdbms/conn_sqlite.py index 339af025a..1740537cf 100644 --- a/pilot/connections/rdbms/conn_sqlite.py +++ b/pilot/connections/rdbms/conn_sqlite.py @@ -70,10 +70,10 @@ class SQLiteConnect(RDBMSDatabase): def _sync_tables_from_db(self) -> Iterable[str]: table_results = self.session.execute( - "SELECT name FROM sqlite_master WHERE type='table'" + text("SELECT name FROM sqlite_master WHERE type='table'") ) view_results = self.session.execute( - "SELECT name FROM sqlite_master WHERE type='view'" + text("SELECT name FROM sqlite_master WHERE type='view'") ) table_results = set(row[0] for row in table_results) view_results = set(row[0] for row in view_results) diff --git a/pilot/connections/rdbms/tests/test_conn_sqlite.py b/pilot/connections/rdbms/tests/test_conn_sqlite.py index efe4ddf76..01ef51878 100644 --- a/pilot/connections/rdbms/tests/test_conn_sqlite.py +++ b/pilot/connections/rdbms/tests/test_conn_sqlite.py @@ -121,3 +121,17 @@ def test_get_database_list(db): def test_get_database_names(db): db.get_database_names() == [] + + +def test_db_dir_exist_dir(): + with tempfile.TemporaryDirectory() as temp_dir: + new_dir = os.path.join(temp_dir, "new_dir") + file_path = os.path.join(new_dir, "sqlite.db") + db = SQLiteConnect.from_file_path(file_path) + assert os.path.exists(new_dir) == True + assert list(db.get_table_names()) == [] + with tempfile.TemporaryDirectory() as existing_dir: + file_path = os.path.join(existing_dir, "sqlite.db") + db = SQLiteConnect.from_file_path(file_path) + assert os.path.exists(existing_dir) == True + assert list(db.get_table_names()) == [] diff --git a/pilot/openapi/api_v1/__init__.py b/pilot/openapi/api_v1/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pilot/vector_store/connector.py b/pilot/vector_store/connector.py index ca56986c8..eaa202e72 100644 --- a/pilot/vector_store/connector.py +++ b/pilot/vector_store/connector.py @@ -1,9 +1,15 @@ from pilot.vector_store.chroma_store import ChromaStore -from pilot.vector_store.milvus_store import MilvusStore from pilot.vector_store.weaviate_store import WeaviateStore -connector = {"Chroma": ChromaStore, "Milvus": MilvusStore, "Weaviate": WeaviateStore} +connector = {"Chroma": ChromaStore, "Weaviate": WeaviateStore} + +try: + from pilot.vector_store.milvus_store import MilvusStore + + connector["Milvus"] = MilvusStore +except: + pass class VectorStoreConnector: diff --git a/requirements.txt b/requirements.txt index 6340dcac0..55fdbadfb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,8 +5,8 @@ async-timeout==4.0.2 attrs==22.2.0 cchardet==2.1.7 chardet==5.1.0 -contourpy==1.0.7 -cycler==0.11.0 +# contourpy==1.0.7 +# cycler==0.11.0 filelock==3.9.0 fonttools==4.38.0 frozenlist==1.3.3 @@ -14,20 +14,20 @@ huggingface-hub==0.14.1 importlib-resources==5.12.0 sqlparse==0.4.4 -kiwisolver==1.4.4 -matplotlib==3.7.1 +# kiwisolver==1.4.4 +# matplotlib==3.7.1 multidict==6.0.4 packaging==23.0 psutil==5.9.4 -pycocotools==2.0.6 -pyparsing==3.0.9 +# pycocotools==2.0.6 +# pyparsing==3.0.9 python-dateutil==2.8.2 pyyaml==6.0 tokenizers==0.13.2 tqdm==4.64.1 transformers>=4.31.0 transformers_stream_generator -timm==0.6.13 +# timm==0.6.13 spacy==3.5.3 webdataset==0.2.48 yarl==1.8.2 @@ -40,18 +40,17 @@ peft pycocoevalcap cpm_kernels umap-learn -notebook +# notebook gradio==3.23 gradio-client==0.0.8 -wandb -llama-index==0.5.27 +# wandb +# llama-index==0.5.27 # TODO move bitsandbytes to optional bitsandbytes accelerate>=0.20.3 unstructured==0.6.3 -grpcio==1.47.5 gpt4all==0.3.0 diskcache==5.6.1 @@ -61,7 +60,7 @@ gTTS==2.3.1 langchain nltk python-dotenv==1.0.0 -pymilvus==2.2.1 + vcrpy chromadb==0.3.22 markdown2 @@ -74,18 +73,7 @@ bardapi==0.1.29 # database +# TODO moved to optional dependencies pymysql duckdb -duckdb-engine -pymssql - -# Testing dependencies -pytest -asynctest -pytest-asyncio -pytest-benchmark -pytest-cov -pytest-integration -pytest-mock -pytest-recording -pytesseract==0.3.10 +duckdb-engine \ No newline at end of file diff --git a/requirements/test-requirements.txt b/requirements/test-requirements.txt new file mode 100644 index 000000000..c2fb321a5 --- /dev/null +++ b/requirements/test-requirements.txt @@ -0,0 +1,10 @@ +# Testing dependencies +pytest +asynctest +pytest-asyncio +pytest-benchmark +pytest-cov +pytest-integration +pytest-mock +pytest-recording +pytesseract==0.3.10 \ No newline at end of file diff --git a/setup.py b/setup.py index b33979f78..5136f4fb8 100644 --- a/setup.py +++ b/setup.py @@ -117,10 +117,30 @@ def llama_cpp_python_cuda_requires(): def llama_cpp_requires(): + """ + pip install "db-gpt[llama_cpp]" + """ setup_spec.extras["llama_cpp"] = ["llama-cpp-python"] llama_cpp_python_cuda_requires() +def all_vector_store_requires(): + """ + pip install "db-gpt[vstore]" + """ + setup_spec.extras["vstore"] = [ + "grpcio==1.47.5", # maybe delete it + "pymilvus==2.2.1", + ] + + +def all_datasource_requires(): + """ + pip install "db-gpt[datasource]" + """ + setup_spec.extras["datasource"] = ["pymssql", "pymysql"] + + def all_requires(): requires = set() for _, pkgs in setup_spec.extras.items(): @@ -130,11 +150,15 @@ def all_requires(): llama_cpp_requires() +all_vector_store_requires() +all_datasource_requires() + +# must be last all_requires() setuptools.setup( name="db-gpt", - packages=find_packages(), + packages=find_packages(exclude=("tests", "*.tests", "*.tests.*", "examples")), version="0.3.5", author="csunny", author_email="cfqcsunny@gmail.com",