diff --git a/Dockerfile.llamacpp-cpu b/Dockerfile.llamacpp-cpu index fce9915a..feab1659 100644 --- a/Dockerfile.llamacpp-cpu +++ b/Dockerfile.llamacpp-cpu @@ -1,6 +1,6 @@ ### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER ### You will run into a segfault in mac -FROM python:3.11.6-slim-bookworm as base +FROM python:3.11.6-slim-bookworm AS base # Install poetry RUN pip install pipx @@ -20,14 +20,14 @@ RUN apt update && apt install -y \ # https://python-poetry.org/docs/configuration/#virtualenvsin-project ENV POETRY_VIRTUALENVS_IN_PROJECT=true -FROM base as dependencies +FROM base AS dependencies WORKDIR /home/worker/app COPY pyproject.toml poetry.lock ./ ARG POETRY_EXTRAS="ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant" RUN poetry install --no-root --extras "${POETRY_EXTRAS}" -FROM base as app +FROM base AS app ENV PYTHONUNBUFFERED=1 ENV PORT=8080 diff --git a/Dockerfile.ollama b/Dockerfile.ollama index 5a8b73ac..83fd1297 100644 --- a/Dockerfile.ollama +++ b/Dockerfile.ollama @@ -1,4 +1,4 @@ -FROM python:3.11.6-slim-bookworm as base +FROM python:3.11.6-slim-bookworm AS base # Install poetry RUN pip install pipx @@ -10,14 +10,14 @@ ENV PATH=".venv/bin/:$PATH" # https://python-poetry.org/docs/configuration/#virtualenvsin-project ENV POETRY_VIRTUALENVS_IN_PROJECT=true -FROM base as dependencies +FROM base AS dependencies WORKDIR /home/worker/app COPY pyproject.toml poetry.lock ./ ARG POETRY_EXTRAS="ui vector-stores-qdrant llms-ollama embeddings-ollama" RUN poetry install --no-root --extras "${POETRY_EXTRAS}" -FROM base as app +FROM base AS app ENV PYTHONUNBUFFERED=1 ENV PORT=8080 ENV APP_ENV=prod diff --git a/docker-compose.yaml b/docker-compose.yaml index c2ef0f6d..2d6dff2e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -8,11 +8,12 @@ services: # This service builds from an external Dockerfile and runs the Ollama mode. private-gpt-ollama: image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama # x-release-please-version + user: root build: context: . dockerfile: Dockerfile.ollama volumes: - - ./local_data/:/home/worker/app/local_data + - ./local_data:/home/worker/app/local_data ports: - "8001:8001" environment: @@ -27,11 +28,14 @@ services: - ollama-cpu - ollama-cuda - ollama-api + depends_on: + - ollama # Private-GPT service for the local mode # This service builds from a local Dockerfile and runs the application in local mode. private-gpt-llamacpp-cpu: image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-llamacpp-cpu # x-release-please-version + user: root build: context: . dockerfile: Dockerfile.llamacpp-cpu @@ -44,7 +48,7 @@ services: environment: PORT: 8001 PGPT_PROFILES: local - HF_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN:-} profiles: - llamacpp-cpu @@ -57,7 +61,7 @@ services: ollama: image: traefik:v2.10 ports: - - "8081:8080" + - "11434:11434" command: - "--providers.file.filename=/etc/router.yml" - "--log.level=ERROR" @@ -98,4 +102,4 @@ services: count: 1 capabilities: [gpu] profiles: - - ollama-cuda \ No newline at end of file + - ollama-cuda diff --git a/poetry.lock b/poetry.lock index df765336..25e0bbda 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1060,6 +1060,17 @@ files = [ marshmallow = ">=3.18.0,<4.0.0" typing-inspect = ">=0.4.0,<1" +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +optional = false +python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + [[package]] name = "deprecated" version = "1.2.14" @@ -1250,7 +1261,7 @@ standard = ["fastapi", "uvicorn[standard] (>=0.15.0)"] name = "ffmpy" version = "0.4.0" description = "A simple Python wrapper for FFmpeg" -optional = true +optional = false python-versions = "<4.0.0,>=3.8.1" files = [ {file = "ffmpy-0.4.0-py3-none-any.whl", hash = "sha256:39c0f20c5b465e7f8d29a5191f3a7d7675a8c546d9d985de8921151cd9b59e14"}, @@ -2227,6 +2238,17 @@ files = [ {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, ] +[[package]] +name = "jsonpath-python" +version = "1.0.6" +description = "A more powerful JSONPath implementation in modern python" +optional = true +python-versions = ">=3.6" +files = [ + {file = "jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666"}, + {file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"}, +] + [[package]] name = "kiwisolver" version = "1.4.5" @@ -2470,6 +2492,21 @@ huggingface-hub = {version = ">=0.19.0", extras = ["inference"]} llama-index-core = ">=0.10.1,<0.11.0" sentence-transformers = ">=2.6.1" +[[package]] +name = "llama-index-embeddings-mistralai" +version = "0.1.6" +description = "llama-index embeddings mistralai integration" +optional = true +python-versions = "<4.0,>=3.9" +files = [ + {file = "llama_index_embeddings_mistralai-0.1.6-py3-none-any.whl", hash = "sha256:d69d6fc0be8a1772aaf890bc036f2d575af46070b375a2649803c0eb9736ea1b"}, + {file = "llama_index_embeddings_mistralai-0.1.6.tar.gz", hash = "sha256:7c9cbf974b1e7d14ded34d3eb749a0d1a379fb151ab75115cc1ffdd08a96a045"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" +mistralai = ">=1.0.0" + [[package]] name = "llama-index-embeddings-ollama" version = "0.1.2" @@ -2991,6 +3028,27 @@ files = [ {file = "minijinja-2.0.1.tar.gz", hash = "sha256:e774beffebfb8a1ad17e638ef70917cf5e94593f79acb8a8fff7d983169f3a4e"}, ] +[[package]] +name = "mistralai" +version = "1.0.3" +description = "Python Client SDK for the Mistral AI API." +optional = true +python-versions = "<4.0,>=3.8" +files = [ + {file = "mistralai-1.0.3-py3-none-any.whl", hash = "sha256:64af7c9192e64dc66b2da6d1c4d54a1324a881c21665a2f93d6b35d9de9f87c8"}, + {file = "mistralai-1.0.3.tar.gz", hash = "sha256:84f1a217666c76fec9d477ae266399b813c3ac32a4a348d2ecd5fe1c039b0667"}, +] + +[package.dependencies] +httpx = ">=0.27.0,<0.28.0" +jsonpath-python = ">=1.0.6,<2.0.0" +pydantic = ">=2.8.2,<2.9.0" +python-dateutil = ">=2.9.0.post0,<3.0.0" +typing-inspect = ">=0.9.0,<0.10.0" + +[package.extras] +gcp = ["google-auth (==2.27.0)", "requests (>=2.32.3,<3.0.0)"] + [[package]] name = "mmh3" version = "4.1.0" @@ -3850,8 +3908,6 @@ files = [ {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:960db0e31c4e52fa0fc3ecbaea5b2d3b58f379e32a95ae6b0ebeaa25b93dfd34"}, {file = "orjson-3.10.6-cp312-none-win32.whl", hash = "sha256:a6ea7afb5b30b2317e0bee03c8d34c8181bc5a36f2afd4d0952f378972c4efd5"}, {file = "orjson-3.10.6-cp312-none-win_amd64.whl", hash = "sha256:874ce88264b7e655dde4aeaacdc8fd772a7962faadfb41abe63e2a4861abc3dc"}, - {file = "orjson-3.10.6-cp313-none-win32.whl", hash = "sha256:efdf2c5cde290ae6b83095f03119bdc00303d7a03b42b16c54517baa3c4ca3d0"}, - {file = "orjson-3.10.6-cp313-none-win_amd64.whl", hash = "sha256:8e190fe7888e2e4392f52cafb9626113ba135ef53aacc65cd13109eb9746c43e"}, {file = "orjson-3.10.6-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:66680eae4c4e7fc193d91cfc1353ad6d01b4801ae9b5314f17e11ba55e934183"}, {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caff75b425db5ef8e8f23af93c80f072f97b4fb3afd4af44482905c9f588da28"}, {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3722fddb821b6036fd2a3c814f6bd9b57a89dc6337b9924ecd614ebce3271394"}, @@ -4970,6 +5026,20 @@ requests = ">=2.0.0" [package.extras] rsa = ["oauthlib[signedtoken] (>=3.0.0)"] +[[package]] +name = "retry-async" +version = "0.1.4" +description = "" +optional = false +python-versions = ">=3.10,<4.0" +files = [ + {file = "retry_async-0.1.4-py3-none-any.whl", hash = "sha256:21b383c7bc52013478337b894f476c9f106485cfeeb5d449abe5f745be2da219"}, + {file = "retry_async-0.1.4.tar.gz", hash = "sha256:8414d69b20920a1d700de34b68c0f972fa36a0158450a6f6abc5b45a241ac6b6"}, +] + +[package.dependencies] +decorator = ">=5.1.1,<6.0.0" + [[package]] name = "rich" version = "13.7.1" @@ -6669,6 +6739,7 @@ cffi = ["cffi (>=1.11)"] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-gemini = ["llama-index-embeddings-gemini"] embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"] +embeddings-mistral = ["llama-index-embeddings-mistralai"] embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] @@ -6691,4 +6762,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "2eaa56bf185723ad028f5221675f1ee070bc70ba7d606ebe28dcfe276a3c9dca" +content-hash = "3fa6ef447847895b1a16b8b0422dd9e4fda1aaaadef3af71971eb412da89bf67" diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py index 5d3e9974..b55cef87 100644 --- a/private_gpt/components/embedding/embedding_component.py +++ b/private_gpt/components/embedding/embedding_component.py @@ -144,6 +144,23 @@ class EmbeddingComponent: api_key=settings.gemini.api_key, model_name=settings.gemini.embedding_model, ) + case "mistralai": + try: + from llama_index.embeddings.mistralai import ( # type: ignore + MistralAIEmbedding, + ) + except ImportError as e: + raise ImportError( + "Mistral dependencies not found, install with `poetry install --extras embeddings-mistral`" + ) from e + + api_key = settings.openai.api_key + model = settings.openai.embedding_model + + self.embedding_model = MistralAIEmbedding( + api_key=api_key, + model=model, + ) case "mock": # Not a random number, is the dimensionality used by # the default embedding model diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 4cf192a3..abe56627 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -197,7 +197,14 @@ class HuggingFaceSettings(BaseModel): class EmbeddingSettings(BaseModel): mode: Literal[ - "huggingface", "openai", "azopenai", "sagemaker", "ollama", "mock", "gemini" + "huggingface", + "openai", + "azopenai", + "sagemaker", + "ollama", + "mock", + "gemini", + "mistralai", ] ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field( "simple", @@ -350,6 +357,10 @@ class AzureOpenAISettings(BaseModel): class UISettings(BaseModel): enabled: bool path: str + default_mode: Literal["RAG", "Search", "Basic", "Summarize"] = Field( + "RAG", + description="The default mode.", + ) default_chat_system_prompt: str = Field( None, description="The default system prompt to use for the chat mode.", diff --git a/private_gpt/ui/ui.py b/private_gpt/ui/ui.py index e61a8827..7f96a4f2 100644 --- a/private_gpt/ui/ui.py +++ b/private_gpt/ui/ui.py @@ -107,8 +107,11 @@ class PrivateGptUi: self._selected_filename = None # Initialize system prompt based on default mode - self.mode = MODES[0] - self._system_prompt = self._get_default_system_prompt(self.mode) + default_mode_map = {mode.value: mode for mode in Modes} + self._default_mode = default_mode_map.get( + settings().ui.default_mode, Modes.RAG_MODE + ) + self._system_prompt = self._get_default_system_prompt(self._default_mode) # Initialize default response style: Streaming self.response_style = STYLES[0] @@ -425,7 +428,7 @@ class PrivateGptUi: with gr.Row(equal_height=False): with gr.Column(scale=3): - default_mode = MODES[0] + default_mode = self._default_mode mode = gr.Radio( [mode.value for mode in MODES], label="Mode", diff --git a/private_gpt/utils/ollama.py b/private_gpt/utils/ollama.py index 9c75a875..da9107bf 100644 --- a/private_gpt/utils/ollama.py +++ b/private_gpt/utils/ollama.py @@ -3,10 +3,13 @@ from collections import deque from collections.abc import Iterator, Mapping from typing import Any +from httpx import ConnectError from tqdm import tqdm # type: ignore +from private_gpt.utils.retry import retry + try: - from ollama import Client # type: ignore + from ollama import Client, ResponseError # type: ignore except ImportError as e: raise ImportError( "Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`" @@ -14,13 +17,25 @@ except ImportError as e: logger = logging.getLogger(__name__) +_MAX_RETRIES = 5 +_JITTER = (3.0, 10.0) + +@retry( + is_async=False, + exceptions=(ConnectError, ResponseError), + tries=_MAX_RETRIES, + jitter=_JITTER, + logger=logger, +) def check_connection(client: Client) -> bool: try: client.list() return True + except (ConnectError, ResponseError) as e: + raise e except Exception as e: - logger.error(f"Failed to connect to Ollama: {e!s}") + logger.error(f"Failed to connect to Ollama: {type(e).__name__}: {e!s}") return False diff --git a/private_gpt/utils/retry.py b/private_gpt/utils/retry.py new file mode 100644 index 00000000..614b6803 --- /dev/null +++ b/private_gpt/utils/retry.py @@ -0,0 +1,31 @@ +import logging +from collections.abc import Callable +from typing import Any + +from retry_async import retry as retry_untyped # type: ignore + +retry_logger = logging.getLogger(__name__) + + +def retry( + exceptions: Any = Exception, + *, + is_async: bool = False, + tries: int = -1, + delay: float = 0, + max_delay: float | None = None, + backoff: float = 1, + jitter: float | tuple[float, float] = 0, + logger: logging.Logger = retry_logger, +) -> Callable[..., Any]: + wrapped = retry_untyped( + exceptions=exceptions, + is_async=is_async, + tries=tries, + delay=delay, + max_delay=max_delay, + backoff=backoff, + jitter=jitter, + logger=logger, + ) + return wrapped # type: ignore diff --git a/pyproject.toml b/pyproject.toml index 17a7c698..afbb83cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true} llama-index-embeddings-openai = {version ="^0.1.10", optional = true} llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true} llama-index-embeddings-gemini = {version ="^0.1.8", optional = true} +llama-index-embeddings-mistralai = {version ="^0.1.6", optional = true} llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true} llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true} llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true} @@ -66,6 +67,7 @@ ollama = {version ="^0.3.0", optional = true} # Optional HF Transformers einops = {version = "^0.8.0", optional = true} +retry-async = "^0.1.4" [tool.poetry.extras] ui = ["gradio", "ffmpy"] @@ -82,6 +84,7 @@ embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-gemini = ["llama-index-embeddings-gemini"] +embeddings-mistral = ["llama-index-embeddings-mistralai"] vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"] vector-stores-chroma = ["llama-index-vector-stores-chroma"] diff --git a/settings.yaml b/settings.yaml index 04592ebe..eda1af86 100644 --- a/settings.yaml +++ b/settings.yaml @@ -25,6 +25,8 @@ data: ui: enabled: true path: / + # "RAG", "Search", "Basic", or "Summarize" + default_mode: "RAG" default_chat_system_prompt: > You are a helpful, respectful and honest assistant. Always answer as helpfully as possible and follow ALL given instructions.