Merge branch 'zylon-ai:main' into streaming-choice-feature

2025-07-03 18:47:30 +00:00 · 2024-09-24 10:55:48 -04:00 · 2024-09-24 10:55:48 -04:00 · 41bfe234c6
commit 41bfe234c6
parent 77c9160b3d fa3c30661d
11 changed files with 177 additions and 20 deletions
--- a/Dockerfile.llamacpp-cpu
+++ b/Dockerfile.llamacpp-cpu
@ -1,6 +1,6 @@
 ### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER
 ### You will run into a segfault in mac
-FROM python:3.11.6-slim-bookworm as base
+FROM python:3.11.6-slim-bookworm AS base

 # Install poetry
 RUN pip install pipx
@ -20,14 +20,14 @@ RUN apt update && apt install -y \
 # https://python-poetry.org/docs/configuration/#virtualenvsin-project
 ENV POETRY_VIRTUALENVS_IN_PROJECT=true

-FROM base as dependencies
+FROM base AS dependencies
 WORKDIR /home/worker/app
 COPY pyproject.toml poetry.lock ./

 ARG POETRY_EXTRAS="ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
 RUN poetry install --no-root --extras "${POETRY_EXTRAS}"

-FROM base as app
+FROM base AS app

 ENV PYTHONUNBUFFERED=1
 ENV PORT=8080
--- a/Dockerfile.ollama
+++ b/Dockerfile.ollama
@ -1,4 +1,4 @@
-FROM python:3.11.6-slim-bookworm as base
+FROM python:3.11.6-slim-bookworm AS base

 # Install poetry
 RUN pip install pipx
@ -10,14 +10,14 @@ ENV PATH=".venv/bin/:$PATH"
 # https://python-poetry.org/docs/configuration/#virtualenvsin-project
 ENV POETRY_VIRTUALENVS_IN_PROJECT=true

-FROM base as dependencies
+FROM base AS dependencies
 WORKDIR /home/worker/app
 COPY pyproject.toml poetry.lock ./

 ARG POETRY_EXTRAS="ui vector-stores-qdrant llms-ollama embeddings-ollama"
 RUN poetry install --no-root --extras "${POETRY_EXTRAS}"

-FROM base as app
+FROM base AS app
 ENV PYTHONUNBUFFERED=1
 ENV PORT=8080
 ENV APP_ENV=prod
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -8,11 +8,12 @@ services:
  # This service builds from an external Dockerfile and runs the Ollama mode.
  private-gpt-ollama:
    image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama  # x-release-please-version
+    user: root
    build:
      context: .
      dockerfile: Dockerfile.ollama
    volumes:
-      - ./local_data/:/home/worker/app/local_data
+      - ./local_data:/home/worker/app/local_data
    ports:
      - "8001:8001"
    environment:
@ -27,11 +28,14 @@ services:
      - ollama-cpu
      - ollama-cuda
      - ollama-api
+    depends_on:
+      - ollama

  # Private-GPT service for the local mode
  # This service builds from a local Dockerfile and runs the application in local mode.
  private-gpt-llamacpp-cpu:
    image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-llamacpp-cpu # x-release-please-version
+    user: root
    build:
      context: .
      dockerfile: Dockerfile.llamacpp-cpu
@ -44,7 +48,7 @@ services:
    environment:
      PORT: 8001
      PGPT_PROFILES: local
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN:-}
    profiles:
      - llamacpp-cpu

@ -57,7 +61,7 @@ services:
  ollama:
    image: traefik:v2.10
    ports:
-      - "8081:8080"
+      - "11434:11434"
    command:
      - "--providers.file.filename=/etc/router.yml"
      - "--log.level=ERROR"
@ -98,4 +102,4 @@ services:
              count: 1
              capabilities: [gpu]
    profiles:
-      - ollama-cuda
+      - ollama-cuda
--- a/poetry.lock
+++ b/poetry.lock
@ -1060,6 +1060,17 @@ files = [
 marshmallow = ">=3.18.0,<4.0.0"
 typing-inspect = ">=0.4.0,<1"

+[[package]]
+name = "decorator"
+version = "5.1.1"
+description = "Decorators for Humans"
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
+    {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
+]
+
 [[package]]
 name = "deprecated"
 version = "1.2.14"
@ -1250,7 +1261,7 @@ standard = ["fastapi", "uvicorn[standard] (>=0.15.0)"]
 name = "ffmpy"
 version = "0.4.0"
 description = "A simple Python wrapper for FFmpeg"
-optional = true
+optional = false
 python-versions = "<4.0.0,>=3.8.1"
 files = [
    {file = "ffmpy-0.4.0-py3-none-any.whl", hash = "sha256:39c0f20c5b465e7f8d29a5191f3a7d7675a8c546d9d985de8921151cd9b59e14"},
@ -2227,6 +2238,17 @@ files = [
    {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
 ]

+[[package]]
+name = "jsonpath-python"
+version = "1.0.6"
+description = "A more powerful JSONPath implementation in modern python"
+optional = true
+python-versions = ">=3.6"
+files = [
+    {file = "jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666"},
+    {file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"},
+]
+
 [[package]]
 name = "kiwisolver"
 version = "1.4.5"
@ -2470,6 +2492,21 @@ huggingface-hub = {version = ">=0.19.0", extras = ["inference"]}
 llama-index-core = ">=0.10.1,<0.11.0"
 sentence-transformers = ">=2.6.1"

+[[package]]
+name = "llama-index-embeddings-mistralai"
+version = "0.1.6"
+description = "llama-index embeddings mistralai integration"
+optional = true
+python-versions = "<4.0,>=3.9"
+files = [
+    {file = "llama_index_embeddings_mistralai-0.1.6-py3-none-any.whl", hash = "sha256:d69d6fc0be8a1772aaf890bc036f2d575af46070b375a2649803c0eb9736ea1b"},
+    {file = "llama_index_embeddings_mistralai-0.1.6.tar.gz", hash = "sha256:7c9cbf974b1e7d14ded34d3eb749a0d1a379fb151ab75115cc1ffdd08a96a045"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.10.1,<0.11.0"
+mistralai = ">=1.0.0"
+
 [[package]]
 name = "llama-index-embeddings-ollama"
 version = "0.1.2"
@ -2991,6 +3028,27 @@ files = [
    {file = "minijinja-2.0.1.tar.gz", hash = "sha256:e774beffebfb8a1ad17e638ef70917cf5e94593f79acb8a8fff7d983169f3a4e"},
 ]

+[[package]]
+name = "mistralai"
+version = "1.0.3"
+description = "Python Client SDK for the Mistral AI API."
+optional = true
+python-versions = "<4.0,>=3.8"
+files = [
+    {file = "mistralai-1.0.3-py3-none-any.whl", hash = "sha256:64af7c9192e64dc66b2da6d1c4d54a1324a881c21665a2f93d6b35d9de9f87c8"},
+    {file = "mistralai-1.0.3.tar.gz", hash = "sha256:84f1a217666c76fec9d477ae266399b813c3ac32a4a348d2ecd5fe1c039b0667"},
+]
+
+[package.dependencies]
+httpx = ">=0.27.0,<0.28.0"
+jsonpath-python = ">=1.0.6,<2.0.0"
+pydantic = ">=2.8.2,<2.9.0"
+python-dateutil = ">=2.9.0.post0,<3.0.0"
+typing-inspect = ">=0.9.0,<0.10.0"
+
+[package.extras]
+gcp = ["google-auth (==2.27.0)", "requests (>=2.32.3,<3.0.0)"]
+
 [[package]]
 name = "mmh3"
 version = "4.1.0"
@ -3850,8 +3908,6 @@ files = [
    {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:960db0e31c4e52fa0fc3ecbaea5b2d3b58f379e32a95ae6b0ebeaa25b93dfd34"},
    {file = "orjson-3.10.6-cp312-none-win32.whl", hash = "sha256:a6ea7afb5b30b2317e0bee03c8d34c8181bc5a36f2afd4d0952f378972c4efd5"},
    {file = "orjson-3.10.6-cp312-none-win_amd64.whl", hash = "sha256:874ce88264b7e655dde4aeaacdc8fd772a7962faadfb41abe63e2a4861abc3dc"},
-    {file = "orjson-3.10.6-cp313-none-win32.whl", hash = "sha256:efdf2c5cde290ae6b83095f03119bdc00303d7a03b42b16c54517baa3c4ca3d0"},
-    {file = "orjson-3.10.6-cp313-none-win_amd64.whl", hash = "sha256:8e190fe7888e2e4392f52cafb9626113ba135ef53aacc65cd13109eb9746c43e"},
    {file = "orjson-3.10.6-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:66680eae4c4e7fc193d91cfc1353ad6d01b4801ae9b5314f17e11ba55e934183"},
    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caff75b425db5ef8e8f23af93c80f072f97b4fb3afd4af44482905c9f588da28"},
    {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3722fddb821b6036fd2a3c814f6bd9b57a89dc6337b9924ecd614ebce3271394"},
@ -4970,6 +5026,20 @@ requests = ">=2.0.0"
 [package.extras]
 rsa = ["oauthlib[signedtoken] (>=3.0.0)"]

+[[package]]
+name = "retry-async"
+version = "0.1.4"
+description = ""
+optional = false
+python-versions = ">=3.10,<4.0"
+files = [
+    {file = "retry_async-0.1.4-py3-none-any.whl", hash = "sha256:21b383c7bc52013478337b894f476c9f106485cfeeb5d449abe5f745be2da219"},
+    {file = "retry_async-0.1.4.tar.gz", hash = "sha256:8414d69b20920a1d700de34b68c0f972fa36a0158450a6f6abc5b45a241ac6b6"},
+]
+
+[package.dependencies]
+decorator = ">=5.1.1,<6.0.0"
+
 [[package]]
 name = "rich"
 version = "13.7.1"
@ -6669,6 +6739,7 @@ cffi = ["cffi (>=1.11)"]
 embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
 embeddings-gemini = ["llama-index-embeddings-gemini"]
 embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"]
+embeddings-mistral = ["llama-index-embeddings-mistralai"]
 embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
 embeddings-openai = ["llama-index-embeddings-openai"]
 embeddings-sagemaker = ["boto3"]
@ -6691,4 +6762,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.12"
-content-hash = "2eaa56bf185723ad028f5221675f1ee070bc70ba7d606ebe28dcfe276a3c9dca"
+content-hash = "3fa6ef447847895b1a16b8b0422dd9e4fda1aaaadef3af71971eb412da89bf67"
--- a/private_gpt/components/embedding/embedding_component.py
+++ b/private_gpt/components/embedding/embedding_component.py
@ -144,6 +144,23 @@ class EmbeddingComponent:
                    api_key=settings.gemini.api_key,
                    model_name=settings.gemini.embedding_model,
                )
+            case "mistralai":
+                try:
+                    from llama_index.embeddings.mistralai import (  # type: ignore
+                        MistralAIEmbedding,
+                    )
+                except ImportError as e:
+                    raise ImportError(
+                        "Mistral dependencies not found, install with `poetry install --extras embeddings-mistral`"
+                    ) from e
+
+                api_key = settings.openai.api_key
+                model = settings.openai.embedding_model
+
+                self.embedding_model = MistralAIEmbedding(
+                    api_key=api_key,
+                    model=model,
+                )
            case "mock":
                # Not a random number, is the dimensionality used by
                # the default embedding model
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@ -197,7 +197,14 @@ class HuggingFaceSettings(BaseModel):

 class EmbeddingSettings(BaseModel):
    mode: Literal[
-        "huggingface", "openai", "azopenai", "sagemaker", "ollama", "mock", "gemini"
+        "huggingface",
+        "openai",
+        "azopenai",
+        "sagemaker",
+        "ollama",
+        "mock",
+        "gemini",
+        "mistralai",
    ]
    ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field(
        "simple",
@ -350,6 +357,10 @@ class AzureOpenAISettings(BaseModel):
 class UISettings(BaseModel):
    enabled: bool
    path: str
+    default_mode: Literal["RAG", "Search", "Basic", "Summarize"] = Field(
+        "RAG",
+        description="The default mode.",
+    )
    default_chat_system_prompt: str = Field(
        None,
        description="The default system prompt to use for the chat mode.",
--- a/private_gpt/ui/ui.py
+++ b/private_gpt/ui/ui.py
@ -107,8 +107,11 @@ class PrivateGptUi:
        self._selected_filename = None

        # Initialize system prompt based on default mode
-        self.mode = MODES[0]
-        self._system_prompt = self._get_default_system_prompt(self.mode)
+        default_mode_map = {mode.value: mode for mode in Modes}
+        self._default_mode = default_mode_map.get(
+            settings().ui.default_mode, Modes.RAG_MODE
+        )
+        self._system_prompt = self._get_default_system_prompt(self._default_mode)

        # Initialize default response style: Streaming
        self.response_style = STYLES[0]
@ -425,7 +428,7 @@ class PrivateGptUi:

            with gr.Row(equal_height=False):
                with gr.Column(scale=3):
-                    default_mode = MODES[0]
+                    default_mode = self._default_mode
                    mode = gr.Radio(
                        [mode.value for mode in MODES],
                        label="Mode",
--- a/private_gpt/utils/ollama.py
+++ b/private_gpt/utils/ollama.py
@ -3,10 +3,13 @@ from collections import deque
 from collections.abc import Iterator, Mapping
 from typing import Any

+from httpx import ConnectError
 from tqdm import tqdm  # type: ignore

+from private_gpt.utils.retry import retry
+
 try:
-    from ollama import Client  # type: ignore
+    from ollama import Client, ResponseError  # type: ignore
 except ImportError as e:
    raise ImportError(
        "Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`"
@ -14,13 +17,25 @@ except ImportError as e:

 logger = logging.getLogger(__name__)

+_MAX_RETRIES = 5
+_JITTER = (3.0, 10.0)

+
+@retry(
+    is_async=False,
+    exceptions=(ConnectError, ResponseError),
+    tries=_MAX_RETRIES,
+    jitter=_JITTER,
+    logger=logger,
+)
 def check_connection(client: Client) -> bool:
    try:
        client.list()
        return True
+    except (ConnectError, ResponseError) as e:
+        raise e
    except Exception as e:
-        logger.error(f"Failed to connect to Ollama: {e!s}")
+        logger.error(f"Failed to connect to Ollama: {type(e).__name__}: {e!s}")
        return False


--- a/private_gpt/utils/retry.py
+++ b/private_gpt/utils/retry.py
@ -0,0 +1,31 @@
+import logging
+from collections.abc import Callable
+from typing import Any
+
+from retry_async import retry as retry_untyped  # type: ignore
+
+retry_logger = logging.getLogger(__name__)
+
+
+def retry(
+    exceptions: Any = Exception,
+    *,
+    is_async: bool = False,
+    tries: int = -1,
+    delay: float = 0,
+    max_delay: float | None = None,
+    backoff: float = 1,
+    jitter: float | tuple[float, float] = 0,
+    logger: logging.Logger = retry_logger,
+) -> Callable[..., Any]:
+    wrapped = retry_untyped(
+        exceptions=exceptions,
+        is_async=is_async,
+        tries=tries,
+        delay=delay,
+        max_delay=max_delay,
+        backoff=backoff,
+        jitter=jitter,
+        logger=logger,
+    )
+    return wrapped  # type: ignore
--- a/pyproject.toml
+++ b/pyproject.toml
@ -30,6 +30,7 @@ llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true}
 llama-index-embeddings-openai = {version ="^0.1.10", optional = true}
 llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true}
 llama-index-embeddings-gemini = {version ="^0.1.8", optional = true}
+llama-index-embeddings-mistralai = {version ="^0.1.6", optional = true}
 llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true}
 llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true}
 llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true}
@ -66,6 +67,7 @@ ollama = {version ="^0.3.0", optional = true}

 # Optional HF Transformers
 einops = {version = "^0.8.0", optional = true}
+retry-async = "^0.1.4"

 [tool.poetry.extras]
 ui = ["gradio", "ffmpy"]
@ -82,6 +84,7 @@ embeddings-openai = ["llama-index-embeddings-openai"]
 embeddings-sagemaker = ["boto3"]
 embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
 embeddings-gemini = ["llama-index-embeddings-gemini"]
+embeddings-mistral = ["llama-index-embeddings-mistralai"]
 vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"]
 vector-stores-chroma = ["llama-index-vector-stores-chroma"]
--- a/settings.yaml
+++ b/settings.yaml
@ -25,6 +25,8 @@ data:
 ui:
  enabled: true
  path: /
+  # "RAG", "Search", "Basic", or "Summarize"
+  default_mode: "RAG"
  default_chat_system_prompt: >
    You are a helpful, respectful and honest assistant.
    Always answer as helpfully as possible and follow ALL given instructions.