privateGPT/docker-compose.yaml

services:

  #-----------------------------------
  #---- Private-GPT services ---------
  #-----------------------------------

  # Private-GPT service for the Ollama CPU and GPU modes
  # This service builds from an external Dockerfile and runs the Ollama mode.
  private-gpt-ollama:
    image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama  # x-release-please-version
    user: root
    build:
      context: .
      dockerfile: Dockerfile.ollama
    volumes:
      - ./local_data:/home/worker/app/local_data
    ports:
      - "8001:8001"
    environment:
      PORT: 8001
      PGPT_PROFILES: docker
      PGPT_MODE: ollama
      PGPT_EMBED_MODE: ollama
      PGPT_OLLAMA_API_BASE: http://ollama:11434
      HF_TOKEN: ${HF_TOKEN:-}
    profiles:
      - ""
      - ollama-cpu
      - ollama-cuda
      - ollama-api
    depends_on:
      ollama:
        condition: service_healthy

  # Private-GPT service for the local mode
  # This service builds from a local Dockerfile and runs the application in local mode.
  private-gpt-llamacpp-cpu:
    image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-llamacpp-cpu # x-release-please-version
    user: root
    build:
      context: .
      dockerfile: Dockerfile.llamacpp-cpu
    volumes:
      - ./local_data/:/home/worker/app/local_data
      - ./models/:/home/worker/app/models
    entrypoint: sh -c ".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt"
    ports:
      - "8001:8001"
    environment:
      PORT: 8001
      PGPT_PROFILES: local
      HF_TOKEN: ${HF_TOKEN:-}
    profiles:
      - llamacpp-cpu

  #-----------------------------------
  #---- Ollama services --------------
  #-----------------------------------

  # Traefik reverse proxy for the Ollama service
  # This will route requests to the Ollama service based on the profile.
  ollama:
    image: traefik:v2.10
    healthcheck:
      test: ["CMD", "sh", "-c", "wget -q --spider http://ollama:11434 || exit 1"]
      interval: 10s
      retries: 3
      start_period: 5s
      timeout: 5s
    ports:
      - "8080:8080"
    command:
      - "--providers.file.filename=/etc/router.yml"
      - "--log.level=ERROR"
      - "--api.insecure=true"
      - "--providers.docker=true"
      - "--providers.docker.exposedbydefault=false"
      - "--entrypoints.web.address=:11434"
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro
      - ./.docker/router.yml:/etc/router.yml:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    profiles:
      - ""
      - ollama-cpu
      - ollama-cuda
      - ollama-api

  # Ollama service for the CPU mode
  ollama-cpu:
    image: ollama/ollama:latest
    ports:
      - "11434:11434"
    volumes:
      - ./models:/root/.ollama
    profiles:
      - ""
      - ollama-cpu

  # Ollama service for the CUDA mode
  ollama-cuda:
    image: ollama/ollama:latest
    ports:
      - "11434:11434"
    volumes:
      - ./models:/root/.ollama
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    profiles:
      - ollama-cuda