From dae0727a1b4abd35d2b0851fe30e0a4ed67e0fbb Mon Sep 17 00:00:00 2001 From: Javier Martinez Date: Mon, 5 Aug 2024 16:17:36 +0200 Subject: [PATCH] fix(deploy): improve Docker-Compose and quickstart on Docker (#2037) * chore: update docker-compose with profiles * docs: add quick start doc --- .docker/router.yml | 16 ++++ docker-compose.yaml | 90 +++++++++++++++++++++-- fern/docs.yml | 9 +++ fern/docs/pages/quickstart/quickstart.mdx | 85 +++++++++++++++++++++ 4 files changed, 195 insertions(+), 5 deletions(-) create mode 100644 .docker/router.yml create mode 100644 fern/docs/pages/quickstart/quickstart.mdx diff --git a/.docker/router.yml b/.docker/router.yml new file mode 100644 index 00000000..3b55df9e --- /dev/null +++ b/.docker/router.yml @@ -0,0 +1,16 @@ +http: + services: + ollama: + loadBalancer: + healthCheck: + interval: 5s + path: / + servers: + - url: http://ollama-cpu:11434 + - url: http://ollama-cuda:11434 + - url: http://host.docker.internal:11434 + + routers: + ollama-router: + rule: "PathPrefix(`/`)" + service: ollama \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index 517af659..63913678 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,19 +1,99 @@ services: - private-gpt: + + #----------------------------------- + #---- Private-GPT services --------- + #----------------------------------- + + # Private-GPT service for the Ollama CPU and GPU modes + # This service builds from an external Dockerfile and runs the Ollama mode. + private-gpt-ollama: build: + context: . dockerfile: Dockerfile.external volumes: - ./local_data/:/home/worker/app/local_data ports: - - 8001:8001 + - "8001:8001" environment: PORT: 8001 PGPT_PROFILES: docker PGPT_MODE: ollama PGPT_EMBED_MODE: ollama - ollama: - image: ollama/ollama:latest + PGPT_OLLAMA_API_BASE: http://ollama:11434 + profiles: + - "" + - ollama + - ollama-cuda + - ollama-host + + # Private-GPT service for the local mode + # This service builds from a local Dockerfile and runs the application in local mode. + private-gpt-local: + build: + context: . + dockerfile: Dockerfile.local + volumes: + - ./local_data/:/home/worker/app/local_data + - ./models/:/home/worker/app/models + entrypoint: sh -c ".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt" ports: - - 11434:11434 + - "8001:8001" + environment: + PORT: 8001 + PGPT_PROFILES: local + HF_TOKEN: ${HF_TOKEN} + profiles: + - local + + #----------------------------------- + #---- Ollama services -------------- + #----------------------------------- + + # Traefik reverse proxy for the Ollama service + # This will route requests to the Ollama service based on the profile. + ollama: + image: traefik:v2.10 + ports: + - "11435:11434" + - "8081:8080" + command: + - "--providers.file.filename=/etc/router.yml" + - "--log.level=ERROR" + - "--api.insecure=true" + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.web.address=:11434" + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - ./.docker/router.yml:/etc/router.yml:ro + extra_hosts: + - "host.docker.internal:host-gateway" + profiles: + - "" + - ollama + - ollama-cuda + - ollama-host + + # Ollama service for the CPU mode + ollama-cpu: + image: ollama/ollama:latest volumes: - ./models:/root/.ollama + profiles: + - "" + - ollama + + # Ollama service for the CUDA mode + ollama-cuda: + image: ollama/ollama:latest + volumes: + - ./models:/root/.ollama + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + profiles: + - ollama-cuda \ No newline at end of file diff --git a/fern/docs.yml b/fern/docs.yml index e0a5c423..d13822c8 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -10,6 +10,9 @@ tabs: overview: display-name: Overview icon: "fa-solid fa-home" + quickstart: + display-name: Quickstart + icon: "fa-solid fa-rocket" installation: display-name: Installation icon: "fa-solid fa-download" @@ -32,6 +35,12 @@ navigation: contents: - page: Introduction path: ./docs/pages/overview/welcome.mdx + - tab: quickstart + layout: + - section: Getting started + contents: + - page: Quickstart + path: ./docs/pages/quickstart/quickstart.mdx # How to install PrivateGPT, with FAQ and troubleshooting - tab: installation layout: diff --git a/fern/docs/pages/quickstart/quickstart.mdx b/fern/docs/pages/quickstart/quickstart.mdx new file mode 100644 index 00000000..702d8ed7 --- /dev/null +++ b/fern/docs/pages/quickstart/quickstart.mdx @@ -0,0 +1,85 @@ +This guide provides a quick start for running different profiles of PrivateGPT using Docker Compose. +The profiles cater to various environments, including Ollama setups (CPU, CUDA, MacOS) and fully Local setup. + +If you want to run PrivateGPT locally without Docker, refer to the [Local Installation Guide](/installation). + +#### Prerequisites +- **Docker and Docker Compose:** Ensure both are installed on your system. +[Installation Guide for Docker](https://docs.docker.com/get-docker/), [Installation Guide for Docker Compose](https://docs.docker.com/compose/install/). +- **Clone PrivateGPT Repository:** Clone the PrivateGPT repository to your machine and navigate to the directory: + ```sh + git clone https://github.com/zylon-ai/private-gpt.git + cd private-gpt + ``` + +--- + +## Ollama Setups (Recommended) + +Ollama setups are recommended for their ease of use and optimized configurations. Ollama offers different profiles depending on your hardware capabilities and operating system. + +### 1. Default/Ollama CPU + +**Description:** +This profile runs the Ollama service using CPU resources. It is the standard configuration for running Ollama-based Private-GPT services without GPU acceleration. + +**Run:** +To start the services, use either of the following commands: +```sh +docker-compose up +``` +or +```sh +docker-compose --profile ollama up +``` + +### 2. Ollama Nvidia CUDA + +**Description:** +This profile leverages GPU acceleration with CUDA support, suitable for computationally intensive tasks that benefit from GPU resources. + +**Requirements:** +- Ensure that your system has compatible GPU hardware and the necessary NVIDIA drivers installed. The installation process is detailed [here](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html). + +**Run:** +To start the services with CUDA support, use: +```sh +docker-compose --profile ollama-cuda up +``` + +### 3. Ollama Host + +**Description:** +This profile is designed for running PrivateGPT using Ollama installed on the host machine. This setup is particularly useful for MacOS users, as Docker does not yet support Metal GPU. + +**Requirements:** +- Install Ollama on your machine by following the instructions at [ollama.ai](https://ollama.ai/). +- Start the Ollama service with the command: +```sh +OLLAMA_HOST=0.0.0.0 ollama serve +``` + +**Run:** +To start the services with the host configuration, use: +```sh +docker-compose --profile ollama-host up +``` + +--- + +## Fully Local Setups + +### LlamaCPP + HuggingFace Embeddings + +**Description:** +This profile runs the Private-GPT services locally using `llama-cpp` and Hugging Face models. + +**Requirements:** +- **Hugging Face Token (HF_TOKEN):** Required for accessing Hugging Face models. Obtain your token following [this guide](/installation/getting-started/troubleshooting#downloading-gated-and-private-models). + +**Run:** +Start the services with your Hugging Face token: +```sh +HF_TOKEN= docker-compose up --profile local +``` +Replace `` with your actual Hugging Face token. \ No newline at end of file