diff --git a/Dockerfile.llamacpp-cpu b/Dockerfile.llamacpp-cpu index fce9915a..feab1659 100644 --- a/Dockerfile.llamacpp-cpu +++ b/Dockerfile.llamacpp-cpu @@ -1,6 +1,6 @@ ### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER ### You will run into a segfault in mac -FROM python:3.11.6-slim-bookworm as base +FROM python:3.11.6-slim-bookworm AS base # Install poetry RUN pip install pipx @@ -20,14 +20,14 @@ RUN apt update && apt install -y \ # https://python-poetry.org/docs/configuration/#virtualenvsin-project ENV POETRY_VIRTUALENVS_IN_PROJECT=true -FROM base as dependencies +FROM base AS dependencies WORKDIR /home/worker/app COPY pyproject.toml poetry.lock ./ ARG POETRY_EXTRAS="ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant" RUN poetry install --no-root --extras "${POETRY_EXTRAS}" -FROM base as app +FROM base AS app ENV PYTHONUNBUFFERED=1 ENV PORT=8080 diff --git a/Dockerfile.ollama b/Dockerfile.ollama index 5a8b73ac..83fd1297 100644 --- a/Dockerfile.ollama +++ b/Dockerfile.ollama @@ -1,4 +1,4 @@ -FROM python:3.11.6-slim-bookworm as base +FROM python:3.11.6-slim-bookworm AS base # Install poetry RUN pip install pipx @@ -10,14 +10,14 @@ ENV PATH=".venv/bin/:$PATH" # https://python-poetry.org/docs/configuration/#virtualenvsin-project ENV POETRY_VIRTUALENVS_IN_PROJECT=true -FROM base as dependencies +FROM base AS dependencies WORKDIR /home/worker/app COPY pyproject.toml poetry.lock ./ ARG POETRY_EXTRAS="ui vector-stores-qdrant llms-ollama embeddings-ollama" RUN poetry install --no-root --extras "${POETRY_EXTRAS}" -FROM base as app +FROM base AS app ENV PYTHONUNBUFFERED=1 ENV PORT=8080 ENV APP_ENV=prod diff --git a/docker-compose.yaml b/docker-compose.yaml index 7cec076b..bd0fa368 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -7,11 +7,12 @@ services: # This service builds from an external Dockerfile and runs the Ollama mode. private-gpt-ollama: image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama # x-release-please-version + user: root build: context: . dockerfile: Dockerfile.ollama volumes: - - ./local_data/:/home/worker/app/local_data + - ./local_data:/home/worker/app/local_data ports: - "8001:8001" environment: @@ -26,11 +27,14 @@ services: - ollama-cpu - ollama-cuda - ollama-api + depends_on: + - ollama # Private-GPT service for the local mode # This service builds from a local Dockerfile and runs the application in local mode. private-gpt-llamacpp-cpu: image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-llamacpp-cpu # x-release-please-version + user: root build: context: . dockerfile: Dockerfile.llamacpp-cpu @@ -43,7 +47,7 @@ services: environment: PORT: 8001 PGPT_PROFILES: local - HF_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN:-} profiles: - llamacpp-cpu @@ -56,7 +60,7 @@ services: ollama: image: traefik:v2.10 ports: - - "8081:8080" + - "11434:11434" command: - "--providers.file.filename=/etc/router.yml" - "--log.level=ERROR" diff --git a/poetry.lock b/poetry.lock index 82d703ba..49da0cf2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2413,6 +2413,17 @@ files = [ {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, ] +[[package]] +name = "jsonpath-python" +version = "1.0.6" +description = "A more powerful JSONPath implementation in modern python" +optional = true +python-versions = ">=3.6" +files = [ + {file = "jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666"}, + {file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"}, +] + [[package]] name = "kiwisolver" version = "1.4.7" @@ -2682,6 +2693,21 @@ huggingface-hub = {version = ">=0.19.0", extras = ["inference"]} llama-index-core = ">=0.10.1,<0.11.0" sentence-transformers = ">=2.6.1" +[[package]] +name = "llama-index-embeddings-mistralai" +version = "0.1.6" +description = "llama-index embeddings mistralai integration" +optional = true +python-versions = "<4.0,>=3.9" +files = [ + {file = "llama_index_embeddings_mistralai-0.1.6-py3-none-any.whl", hash = "sha256:d69d6fc0be8a1772aaf890bc036f2d575af46070b375a2649803c0eb9736ea1b"}, + {file = "llama_index_embeddings_mistralai-0.1.6.tar.gz", hash = "sha256:7c9cbf974b1e7d14ded34d3eb749a0d1a379fb151ab75115cc1ffdd08a96a045"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" +mistralai = ">=1.0.0" + [[package]] name = "llama-index-embeddings-ollama" version = "0.1.3" @@ -3221,16 +3247,37 @@ description = "An experimental Python binding of the Rust MiniJinja template eng optional = true python-versions = ">=3.8" files = [ - {file = "minijinja-2.2.0-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:e4154fcf72e81be01c2733b770e6cb3e584851cb2fa73c58e347b04967d3d7c0"}, - {file = "minijinja-2.2.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b05e0070c08b550fa9a09ff9c051f47424674332dd56cc54b997dd602887907"}, - {file = "minijinja-2.2.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:360ea4a93fdf1fe327f3e70eed20ecb29f324ca28fae177de0605dcc29869300"}, - {file = "minijinja-2.2.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9cad5ccb021ef25b6a271158f4d6636474edb08cd1dd49355aac6b68a48aebb"}, - {file = "minijinja-2.2.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7a85c67c519b413fc4892854782927e1244a24cbbb3a3cb0ac5e57d9fdb1868c"}, - {file = "minijinja-2.2.0-cp38-abi3-win32.whl", hash = "sha256:e431a2467dd6e1bcb7c511e9fbad012b02c6e5453acdd9fbd4c4af0d34a3d1c5"}, - {file = "minijinja-2.2.0-cp38-abi3-win_amd64.whl", hash = "sha256:d4df7e4a09be4249c8243207fa89e6f4d22b853c2b565a99f48e478a30713822"}, - {file = "minijinja-2.2.0.tar.gz", hash = "sha256:4411052c7a60f8d56468cc6d17d45d72be3d5e89e9578a04f8336cc56601523c"}, + {file = "minijinja-2.0.1-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:063b291cb31f5c33eb77bb4cb457f67f14426ca1418232b8ae9f267155d330cc"}, + {file = "minijinja-2.0.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a4e9d639dd89ce7fef86e82147082ab3c248a36950fa3fbe793685ba322c1b7"}, + {file = "minijinja-2.0.1-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a20373af4ee5430356c196c7fe5f19e3261a4fa16c944542b4de7a2349bac7a6"}, + {file = "minijinja-2.0.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ade637bf4826258811a785ccc4e5d41cd2bdf4ec317b1ed3daa4dbbdd020f37d"}, + {file = "minijinja-2.0.1-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5ec956d777e0fee8e214af48363334c04f098e986038a9e8cb92a0564f81943"}, + {file = "minijinja-2.0.1-cp38-abi3-win32.whl", hash = "sha256:039f4d1a1a73f90917cff1ed7c617eb56e2b2f91bbbdc551adaa448e1673e5c2"}, + {file = "minijinja-2.0.1-cp38-abi3-win_amd64.whl", hash = "sha256:dca5d7689905dce340e36e47348b505c788daf297253b85a1aff506ea63ad1b8"}, + {file = "minijinja-2.0.1.tar.gz", hash = "sha256:e774beffebfb8a1ad17e638ef70917cf5e94593f79acb8a8fff7d983169f3a4e"}, ] +[[package]] +name = "mistralai" +version = "1.0.3" +description = "Python Client SDK for the Mistral AI API." +optional = true +python-versions = "<4.0,>=3.8" +files = [ + {file = "mistralai-1.0.3-py3-none-any.whl", hash = "sha256:64af7c9192e64dc66b2da6d1c4d54a1324a881c21665a2f93d6b35d9de9f87c8"}, + {file = "mistralai-1.0.3.tar.gz", hash = "sha256:84f1a217666c76fec9d477ae266399b813c3ac32a4a348d2ecd5fe1c039b0667"}, +] + +[package.dependencies] +httpx = ">=0.27.0,<0.28.0" +jsonpath-python = ">=1.0.6,<2.0.0" +pydantic = ">=2.8.2,<2.9.0" +python-dateutil = ">=2.9.0.post0,<3.0.0" +typing-inspect = ">=0.9.0,<0.10.0" + +[package.extras] +gcp = ["google-auth (==2.27.0)", "requests (>=2.32.3,<3.0.0)"] + [[package]] name = "mmh3" version = "5.0.0" @@ -7010,6 +7057,7 @@ embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-fireworks = ["llama-index-embeddings-fireworks"] embeddings-gemini = ["llama-index-embeddings-gemini"] embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"] +embeddings-mistral = ["llama-index-embeddings-mistralai"] embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] @@ -7033,4 +7081,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "43d568d296fc82dfe94a8b13877f0424d5e7980a0f0ca9d93dace983e9623584" +content-hash = "3fa6ef447847895b1a16b8b0422dd9e4fda1aaaadef3af71971eb412da89bf67" diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py index f9302a5b..a51cc838 100644 --- a/private_gpt/components/embedding/embedding_component.py +++ b/private_gpt/components/embedding/embedding_component.py @@ -157,6 +157,23 @@ class EmbeddingComponent: api_key=settings.gemini.api_key, model_name=settings.gemini.embedding_model, ) + case "mistralai": + try: + from llama_index.embeddings.mistralai import ( # type: ignore + MistralAIEmbedding, + ) + except ImportError as e: + raise ImportError( + "Mistral dependencies not found, install with `poetry install --extras embeddings-mistral`" + ) from e + + api_key = settings.openai.api_key + model = settings.openai.embedding_model + + self.embedding_model = MistralAIEmbedding( + api_key=api_key, + model=model, + ) case "mock": # Not a random number, is the dimensionality used by # the default embedding model diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 9aba7af5..a3aa137f 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -198,7 +198,15 @@ class HuggingFaceSettings(BaseModel): class EmbeddingSettings(BaseModel): mode: Literal[ - "huggingface", "openai", "azopenai", "sagemaker", "ollama", "mock", "gemini","fireworks" + "huggingface", + "openai", + "azopenai", + "sagemaker", + "ollama", + "mock", + "gemini", + "mistralai", + "fireworks", ] ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field( "simple", @@ -366,6 +374,10 @@ class AzureOpenAISettings(BaseModel): class UISettings(BaseModel): enabled: bool path: str + default_mode: Literal["RAG", "Search", "Basic", "Summarize"] = Field( + "RAG", + description="The default mode.", + ) default_chat_system_prompt: str = Field( None, description="The default system prompt to use for the chat mode.", diff --git a/private_gpt/ui/ui.py b/private_gpt/ui/ui.py index 2ae2a229..c29e9258 100644 --- a/private_gpt/ui/ui.py +++ b/private_gpt/ui/ui.py @@ -99,8 +99,11 @@ class PrivateGptUi: self._selected_filename = None # Initialize system prompt based on default mode - self.mode = MODES[0] - self._system_prompt = self._get_default_system_prompt(self.mode) + default_mode_map = {mode.value: mode for mode in Modes} + self._default_mode = default_mode_map.get( + settings().ui.default_mode, Modes.RAG_MODE + ) + self._system_prompt = self._get_default_system_prompt(self._default_mode) def _chat( self, message: str, history: list[list[str]], mode: Modes, *_: Any @@ -390,7 +393,7 @@ class PrivateGptUi: with gr.Row(equal_height=False): with gr.Column(scale=3): - default_mode = MODES[0] + default_mode = self._default_mode mode = gr.Radio( [mode.value for mode in MODES], label="Mode", diff --git a/pyproject.toml b/pyproject.toml index 9833004c..36518452 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true} llama-index-embeddings-openai = {version ="^0.1.10", optional = true} llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true} llama-index-embeddings-gemini = {version ="^0.1.8", optional = true} +llama-index-embeddings-mistralai = {version ="^0.1.6", optional = true} llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true} llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true} llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true} @@ -86,6 +87,7 @@ embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-gemini = ["llama-index-embeddings-gemini"] +embeddings-mistral = ["llama-index-embeddings-mistralai"] vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"] vector-stores-chroma = ["llama-index-vector-stores-chroma"] diff --git a/settings.yaml b/settings.yaml index b4278a8a..4dc5ab3b 100644 --- a/settings.yaml +++ b/settings.yaml @@ -25,21 +25,23 @@ data: ui: enabled: true path: / + # "RAG", "Search", "Basic", or "Summarize" + default_mode: "RAG" default_chat_system_prompt: > - You are a helpful, respectful and honest assistant. + You are a helpful, respectful and honest assistant. Always answer as helpfully as possible and follow ALL given instructions. Do not speculate or make up information. Do not reference any given instructions or context. default_query_system_prompt: > - You can only answer questions about the provided context. - If you know the answer but it is not based in the provided context, don't provide + You can only answer questions about the provided context. + If you know the answer but it is not based in the provided context, don't provide the answer, just state the answer is not in the context provided. default_summarization_system_prompt: > - Provide a comprehensive summary of the provided context information. + Provide a comprehensive summary of the provided context information. The summary should cover all the key points and main ideas presented in - the original text, while also condensing the information into a concise + the original text, while also condensing the information into a concise and easy-to-understand format. Please ensure that the summary includes - relevant details and examples that support the main ideas, while avoiding + relevant details and examples that support the main ideas, while avoiding any unnecessary information or repetition. delete_file_button_enabled: true delete_all_files_button_enabled: true