From 77461b96cf2e18b88b592fff441206a49826db97 Mon Sep 17 00:00:00 2001 From: Javier Martinez Date: Mon, 16 Sep 2024 16:43:05 +0200 Subject: [PATCH 1/4] feat: add retry connection to ollama (#2084) * feat: add retry connection to ollama When Ollama is running in the docker-compose, traefik is not ready sometimes to route the request, and it fails * fix: mypy --- poetry.lock | 31 +++++++++++++++++++++++++++---- private_gpt/utils/ollama.py | 19 +++++++++++++++++-- private_gpt/utils/retry.py | 31 +++++++++++++++++++++++++++++++ pyproject.toml | 1 + 4 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 private_gpt/utils/retry.py diff --git a/poetry.lock b/poetry.lock index df765336..8310e1ec 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1060,6 +1060,17 @@ files = [ marshmallow = ">=3.18.0,<4.0.0" typing-inspect = ">=0.4.0,<1" +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +optional = false +python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + [[package]] name = "deprecated" version = "1.2.14" @@ -1250,7 +1261,7 @@ standard = ["fastapi", "uvicorn[standard] (>=0.15.0)"] name = "ffmpy" version = "0.4.0" description = "A simple Python wrapper for FFmpeg" -optional = true +optional = false python-versions = "<4.0.0,>=3.8.1" files = [ {file = "ffmpy-0.4.0-py3-none-any.whl", hash = "sha256:39c0f20c5b465e7f8d29a5191f3a7d7675a8c546d9d985de8921151cd9b59e14"}, @@ -3850,8 +3861,6 @@ files = [ {file = "orjson-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:960db0e31c4e52fa0fc3ecbaea5b2d3b58f379e32a95ae6b0ebeaa25b93dfd34"}, {file = "orjson-3.10.6-cp312-none-win32.whl", hash = "sha256:a6ea7afb5b30b2317e0bee03c8d34c8181bc5a36f2afd4d0952f378972c4efd5"}, {file = "orjson-3.10.6-cp312-none-win_amd64.whl", hash = "sha256:874ce88264b7e655dde4aeaacdc8fd772a7962faadfb41abe63e2a4861abc3dc"}, - {file = "orjson-3.10.6-cp313-none-win32.whl", hash = "sha256:efdf2c5cde290ae6b83095f03119bdc00303d7a03b42b16c54517baa3c4ca3d0"}, - {file = "orjson-3.10.6-cp313-none-win_amd64.whl", hash = "sha256:8e190fe7888e2e4392f52cafb9626113ba135ef53aacc65cd13109eb9746c43e"}, {file = "orjson-3.10.6-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:66680eae4c4e7fc193d91cfc1353ad6d01b4801ae9b5314f17e11ba55e934183"}, {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caff75b425db5ef8e8f23af93c80f072f97b4fb3afd4af44482905c9f588da28"}, {file = "orjson-3.10.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3722fddb821b6036fd2a3c814f6bd9b57a89dc6337b9924ecd614ebce3271394"}, @@ -4970,6 +4979,20 @@ requests = ">=2.0.0" [package.extras] rsa = ["oauthlib[signedtoken] (>=3.0.0)"] +[[package]] +name = "retry-async" +version = "0.1.4" +description = "" +optional = false +python-versions = ">=3.10,<4.0" +files = [ + {file = "retry_async-0.1.4-py3-none-any.whl", hash = "sha256:21b383c7bc52013478337b894f476c9f106485cfeeb5d449abe5f745be2da219"}, + {file = "retry_async-0.1.4.tar.gz", hash = "sha256:8414d69b20920a1d700de34b68c0f972fa36a0158450a6f6abc5b45a241ac6b6"}, +] + +[package.dependencies] +decorator = ">=5.1.1,<6.0.0" + [[package]] name = "rich" version = "13.7.1" @@ -6691,4 +6714,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "2eaa56bf185723ad028f5221675f1ee070bc70ba7d606ebe28dcfe276a3c9dca" +content-hash = "45264d80672084e35ca0ea11b368a29001a3b9003822bddc67fb18489a8fe519" diff --git a/private_gpt/utils/ollama.py b/private_gpt/utils/ollama.py index 9c75a875..da9107bf 100644 --- a/private_gpt/utils/ollama.py +++ b/private_gpt/utils/ollama.py @@ -3,10 +3,13 @@ from collections import deque from collections.abc import Iterator, Mapping from typing import Any +from httpx import ConnectError from tqdm import tqdm # type: ignore +from private_gpt.utils.retry import retry + try: - from ollama import Client # type: ignore + from ollama import Client, ResponseError # type: ignore except ImportError as e: raise ImportError( "Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`" @@ -14,13 +17,25 @@ except ImportError as e: logger = logging.getLogger(__name__) +_MAX_RETRIES = 5 +_JITTER = (3.0, 10.0) + +@retry( + is_async=False, + exceptions=(ConnectError, ResponseError), + tries=_MAX_RETRIES, + jitter=_JITTER, + logger=logger, +) def check_connection(client: Client) -> bool: try: client.list() return True + except (ConnectError, ResponseError) as e: + raise e except Exception as e: - logger.error(f"Failed to connect to Ollama: {e!s}") + logger.error(f"Failed to connect to Ollama: {type(e).__name__}: {e!s}") return False diff --git a/private_gpt/utils/retry.py b/private_gpt/utils/retry.py new file mode 100644 index 00000000..614b6803 --- /dev/null +++ b/private_gpt/utils/retry.py @@ -0,0 +1,31 @@ +import logging +from collections.abc import Callable +from typing import Any + +from retry_async import retry as retry_untyped # type: ignore + +retry_logger = logging.getLogger(__name__) + + +def retry( + exceptions: Any = Exception, + *, + is_async: bool = False, + tries: int = -1, + delay: float = 0, + max_delay: float | None = None, + backoff: float = 1, + jitter: float | tuple[float, float] = 0, + logger: logging.Logger = retry_logger, +) -> Callable[..., Any]: + wrapped = retry_untyped( + exceptions=exceptions, + is_async=is_async, + tries=tries, + delay=delay, + max_delay=max_delay, + backoff=backoff, + jitter=jitter, + logger=logger, + ) + return wrapped # type: ignore diff --git a/pyproject.toml b/pyproject.toml index 17a7c698..da9fab80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,7 @@ ollama = {version ="^0.3.0", optional = true} # Optional HF Transformers einops = {version = "^0.8.0", optional = true} +retry-async = "^0.1.4" [tool.poetry.extras] ui = ["gradio", "ffmpy"] From 8c12c6830b37851cccb3fea75faa820fce49284a Mon Sep 17 00:00:00 2001 From: Javier Martinez Date: Tue, 24 Sep 2024 08:30:58 +0200 Subject: [PATCH 2/4] fix: docker permissions (#2059) * fix: missing depends_on * chore: update copy permissions * chore: update entrypoint * Revert "chore: update entrypoint" This reverts commit f73a36af2f3b8b385f99a84d0eb020899f3e0e80. * Revert "chore: update copy permissions" This reverts commit fabc3f66bba9bd066a3f67bcaf5f9bfb5f6e031b. * style: fix docker warning * fix: multiples fixes * fix: user permissions writing local_data folder --- Dockerfile.llamacpp-cpu | 6 +++--- Dockerfile.ollama | 6 +++--- docker-compose.yaml | 12 ++++++++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Dockerfile.llamacpp-cpu b/Dockerfile.llamacpp-cpu index fce9915a..feab1659 100644 --- a/Dockerfile.llamacpp-cpu +++ b/Dockerfile.llamacpp-cpu @@ -1,6 +1,6 @@ ### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER ### You will run into a segfault in mac -FROM python:3.11.6-slim-bookworm as base +FROM python:3.11.6-slim-bookworm AS base # Install poetry RUN pip install pipx @@ -20,14 +20,14 @@ RUN apt update && apt install -y \ # https://python-poetry.org/docs/configuration/#virtualenvsin-project ENV POETRY_VIRTUALENVS_IN_PROJECT=true -FROM base as dependencies +FROM base AS dependencies WORKDIR /home/worker/app COPY pyproject.toml poetry.lock ./ ARG POETRY_EXTRAS="ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant" RUN poetry install --no-root --extras "${POETRY_EXTRAS}" -FROM base as app +FROM base AS app ENV PYTHONUNBUFFERED=1 ENV PORT=8080 diff --git a/Dockerfile.ollama b/Dockerfile.ollama index 5a8b73ac..83fd1297 100644 --- a/Dockerfile.ollama +++ b/Dockerfile.ollama @@ -1,4 +1,4 @@ -FROM python:3.11.6-slim-bookworm as base +FROM python:3.11.6-slim-bookworm AS base # Install poetry RUN pip install pipx @@ -10,14 +10,14 @@ ENV PATH=".venv/bin/:$PATH" # https://python-poetry.org/docs/configuration/#virtualenvsin-project ENV POETRY_VIRTUALENVS_IN_PROJECT=true -FROM base as dependencies +FROM base AS dependencies WORKDIR /home/worker/app COPY pyproject.toml poetry.lock ./ ARG POETRY_EXTRAS="ui vector-stores-qdrant llms-ollama embeddings-ollama" RUN poetry install --no-root --extras "${POETRY_EXTRAS}" -FROM base as app +FROM base AS app ENV PYTHONUNBUFFERED=1 ENV PORT=8080 ENV APP_ENV=prod diff --git a/docker-compose.yaml b/docker-compose.yaml index c2ef0f6d..2d6dff2e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -8,11 +8,12 @@ services: # This service builds from an external Dockerfile and runs the Ollama mode. private-gpt-ollama: image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama # x-release-please-version + user: root build: context: . dockerfile: Dockerfile.ollama volumes: - - ./local_data/:/home/worker/app/local_data + - ./local_data:/home/worker/app/local_data ports: - "8001:8001" environment: @@ -27,11 +28,14 @@ services: - ollama-cpu - ollama-cuda - ollama-api + depends_on: + - ollama # Private-GPT service for the local mode # This service builds from a local Dockerfile and runs the application in local mode. private-gpt-llamacpp-cpu: image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-llamacpp-cpu # x-release-please-version + user: root build: context: . dockerfile: Dockerfile.llamacpp-cpu @@ -44,7 +48,7 @@ services: environment: PORT: 8001 PGPT_PROFILES: local - HF_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN:-} profiles: - llamacpp-cpu @@ -57,7 +61,7 @@ services: ollama: image: traefik:v2.10 ports: - - "8081:8080" + - "11434:11434" command: - "--providers.file.filename=/etc/router.yml" - "--log.level=ERROR" @@ -98,4 +102,4 @@ services: count: 1 capabilities: [gpu] profiles: - - ollama-cuda \ No newline at end of file + - ollama-cuda From f9182b3a86d88af7c699b41b3a5f21401117acfc Mon Sep 17 00:00:00 2001 From: Liam Dowd <101684827+itsliamdowd@users.noreply.github.com> Date: Tue, 24 Sep 2024 07:31:30 +0100 Subject: [PATCH 3/4] feat: Adding MistralAI mode (#2065) * Adding MistralAI mode * Update embedding_component.py * Update ui.py * Update settings.py * Update embedding_component.py * Update settings.py * Update settings.py * Update settings-mistral.yaml * Update llm_component.py * Update settings-mistral.yaml * Update settings.py * Update settings.py * Update ui.py * Update embedding_component.py * Delete settings-mistral.yaml --------- Co-authored-by: SkiingIsFun123 <101684827+SkiingIsFun123@users.noreply.github.com> Co-authored-by: Javier Martinez --- poetry.lock | 50 ++++++++++++++++++- .../embedding/embedding_component.py | 17 +++++++ private_gpt/settings/settings.py | 9 +++- pyproject.toml | 2 + 4 files changed, 76 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8310e1ec..25e0bbda 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2238,6 +2238,17 @@ files = [ {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, ] +[[package]] +name = "jsonpath-python" +version = "1.0.6" +description = "A more powerful JSONPath implementation in modern python" +optional = true +python-versions = ">=3.6" +files = [ + {file = "jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666"}, + {file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"}, +] + [[package]] name = "kiwisolver" version = "1.4.5" @@ -2481,6 +2492,21 @@ huggingface-hub = {version = ">=0.19.0", extras = ["inference"]} llama-index-core = ">=0.10.1,<0.11.0" sentence-transformers = ">=2.6.1" +[[package]] +name = "llama-index-embeddings-mistralai" +version = "0.1.6" +description = "llama-index embeddings mistralai integration" +optional = true +python-versions = "<4.0,>=3.9" +files = [ + {file = "llama_index_embeddings_mistralai-0.1.6-py3-none-any.whl", hash = "sha256:d69d6fc0be8a1772aaf890bc036f2d575af46070b375a2649803c0eb9736ea1b"}, + {file = "llama_index_embeddings_mistralai-0.1.6.tar.gz", hash = "sha256:7c9cbf974b1e7d14ded34d3eb749a0d1a379fb151ab75115cc1ffdd08a96a045"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" +mistralai = ">=1.0.0" + [[package]] name = "llama-index-embeddings-ollama" version = "0.1.2" @@ -3002,6 +3028,27 @@ files = [ {file = "minijinja-2.0.1.tar.gz", hash = "sha256:e774beffebfb8a1ad17e638ef70917cf5e94593f79acb8a8fff7d983169f3a4e"}, ] +[[package]] +name = "mistralai" +version = "1.0.3" +description = "Python Client SDK for the Mistral AI API." +optional = true +python-versions = "<4.0,>=3.8" +files = [ + {file = "mistralai-1.0.3-py3-none-any.whl", hash = "sha256:64af7c9192e64dc66b2da6d1c4d54a1324a881c21665a2f93d6b35d9de9f87c8"}, + {file = "mistralai-1.0.3.tar.gz", hash = "sha256:84f1a217666c76fec9d477ae266399b813c3ac32a4a348d2ecd5fe1c039b0667"}, +] + +[package.dependencies] +httpx = ">=0.27.0,<0.28.0" +jsonpath-python = ">=1.0.6,<2.0.0" +pydantic = ">=2.8.2,<2.9.0" +python-dateutil = ">=2.9.0.post0,<3.0.0" +typing-inspect = ">=0.9.0,<0.10.0" + +[package.extras] +gcp = ["google-auth (==2.27.0)", "requests (>=2.32.3,<3.0.0)"] + [[package]] name = "mmh3" version = "4.1.0" @@ -6692,6 +6739,7 @@ cffi = ["cffi (>=1.11)"] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-gemini = ["llama-index-embeddings-gemini"] embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"] +embeddings-mistral = ["llama-index-embeddings-mistralai"] embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] @@ -6714,4 +6762,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "45264d80672084e35ca0ea11b368a29001a3b9003822bddc67fb18489a8fe519" +content-hash = "3fa6ef447847895b1a16b8b0422dd9e4fda1aaaadef3af71971eb412da89bf67" diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py index 5d3e9974..b55cef87 100644 --- a/private_gpt/components/embedding/embedding_component.py +++ b/private_gpt/components/embedding/embedding_component.py @@ -144,6 +144,23 @@ class EmbeddingComponent: api_key=settings.gemini.api_key, model_name=settings.gemini.embedding_model, ) + case "mistralai": + try: + from llama_index.embeddings.mistralai import ( # type: ignore + MistralAIEmbedding, + ) + except ImportError as e: + raise ImportError( + "Mistral dependencies not found, install with `poetry install --extras embeddings-mistral`" + ) from e + + api_key = settings.openai.api_key + model = settings.openai.embedding_model + + self.embedding_model = MistralAIEmbedding( + api_key=api_key, + model=model, + ) case "mock": # Not a random number, is the dimensionality used by # the default embedding model diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 4cf192a3..9b4238dd 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -197,7 +197,14 @@ class HuggingFaceSettings(BaseModel): class EmbeddingSettings(BaseModel): mode: Literal[ - "huggingface", "openai", "azopenai", "sagemaker", "ollama", "mock", "gemini" + "huggingface", + "openai", + "azopenai", + "sagemaker", + "ollama", + "mock", + "gemini", + "mistralai", ] ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field( "simple", diff --git a/pyproject.toml b/pyproject.toml index da9fab80..afbb83cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true} llama-index-embeddings-openai = {version ="^0.1.10", optional = true} llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true} llama-index-embeddings-gemini = {version ="^0.1.8", optional = true} +llama-index-embeddings-mistralai = {version ="^0.1.6", optional = true} llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true} llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true} llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true} @@ -83,6 +84,7 @@ embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-gemini = ["llama-index-embeddings-gemini"] +embeddings-mistral = ["llama-index-embeddings-mistralai"] vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"] vector-stores-chroma = ["llama-index-vector-stores-chroma"] From fa3c30661d2ab04634361e20e7819365e3dd351a Mon Sep 17 00:00:00 2001 From: J Date: Tue, 24 Sep 2024 02:33:02 -0400 Subject: [PATCH 4/4] fix: Add default mode option to settings (#2078) * Add default mode option to settings * Revise default_mode to Literal (enum) and add to settings.yaml * Revise to pass make check/test * Default mode: RAG --------- Co-authored-by: Jason --- private_gpt/settings/settings.py | 4 ++++ private_gpt/ui/ui.py | 9 ++++++--- settings.yaml | 14 ++++++++------ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 9b4238dd..abe56627 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -357,6 +357,10 @@ class AzureOpenAISettings(BaseModel): class UISettings(BaseModel): enabled: bool path: str + default_mode: Literal["RAG", "Search", "Basic", "Summarize"] = Field( + "RAG", + description="The default mode.", + ) default_chat_system_prompt: str = Field( None, description="The default system prompt to use for the chat mode.", diff --git a/private_gpt/ui/ui.py b/private_gpt/ui/ui.py index 2c1dcd3e..04258ac3 100644 --- a/private_gpt/ui/ui.py +++ b/private_gpt/ui/ui.py @@ -99,8 +99,11 @@ class PrivateGptUi: self._selected_filename = None # Initialize system prompt based on default mode - self.mode = MODES[0] - self._system_prompt = self._get_default_system_prompt(self.mode) + default_mode_map = {mode.value: mode for mode in Modes} + self._default_mode = default_mode_map.get( + settings().ui.default_mode, Modes.RAG_MODE + ) + self._system_prompt = self._get_default_system_prompt(self._default_mode) def _chat( self, message: str, history: list[list[str]], mode: Modes, *_: Any @@ -390,7 +393,7 @@ class PrivateGptUi: with gr.Row(equal_height=False): with gr.Column(scale=3): - default_mode = MODES[0] + default_mode = self._default_mode mode = gr.Radio( [mode.value for mode in MODES], label="Mode", diff --git a/settings.yaml b/settings.yaml index f030604a..eda1af86 100644 --- a/settings.yaml +++ b/settings.yaml @@ -25,21 +25,23 @@ data: ui: enabled: true path: / + # "RAG", "Search", "Basic", or "Summarize" + default_mode: "RAG" default_chat_system_prompt: > - You are a helpful, respectful and honest assistant. + You are a helpful, respectful and honest assistant. Always answer as helpfully as possible and follow ALL given instructions. Do not speculate or make up information. Do not reference any given instructions or context. default_query_system_prompt: > - You can only answer questions about the provided context. - If you know the answer but it is not based in the provided context, don't provide + You can only answer questions about the provided context. + If you know the answer but it is not based in the provided context, don't provide the answer, just state the answer is not in the context provided. default_summarization_system_prompt: > - Provide a comprehensive summary of the provided context information. + Provide a comprehensive summary of the provided context information. The summary should cover all the key points and main ideas presented in - the original text, while also condensing the information into a concise + the original text, while also condensing the information into a concise and easy-to-understand format. Please ensure that the summary includes - relevant details and examples that support the main ideas, while avoiding + relevant details and examples that support the main ideas, while avoiding any unnecessary information or repetition. delete_file_button_enabled: true delete_all_files_button_enabled: true