privateGPT/docker-compose.yaml

services:

  #-----------------------------------
  #---- Private-GPT services ---------
  #-----------------------------------

  # Private-GPT service for the Ollama CPU and GPU modes
  # This service builds from an external Dockerfile and runs the Ollama mode.
  private-gpt-ollama:
    build:
      context: .
      dockerfile: Dockerfile.external
    volumes:
      - ./local_data/:/home/worker/app/local_data
    ports:
      - "8001:8001"
    environment:
      PORT: 8001
      PGPT_PROFILES: docker
      PGPT_MODE: ollama
      PGPT_EMBED_MODE: ollama
      PGPT_OLLAMA_API_BASE: http://ollama:11434
    profiles:
      - ""
      - ollama
      - ollama-cuda
      - ollama-host

  # Private-GPT service for the local mode
  # This service builds from a local Dockerfile and runs the application in local mode.
  private-gpt-local:
    build:
      context: .
      dockerfile: Dockerfile.local
    volumes:
      - ./local_data/:/home/worker/app/local_data
      - ./models/:/home/worker/app/models
    entrypoint: sh -c ".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt"
    ports:
      - "8001:8001"
    environment:
      PORT: 8001
      PGPT_PROFILES: local
      HF_TOKEN: ${HF_TOKEN}
    profiles:
      - local

  #-----------------------------------
  #---- Ollama services --------------
  #-----------------------------------

  # Traefik reverse proxy for the Ollama service
  # This will route requests to the Ollama service based on the profile.
  ollama:
    image: traefik:v2.10
    ports:
      - "11435:11434"
      - "8081:8080"
    command:
      - "--providers.file.filename=/etc/router.yml"
      - "--log.level=ERROR"
      - "--api.insecure=true"
      - "--providers.docker=true"
      - "--providers.docker.exposedbydefault=false"
      - "--entrypoints.web.address=:11434"
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro
      - ./.docker/router.yml:/etc/router.yml:ro
    extra_hosts:
      - "host.docker.internal:host-gateway"
    profiles:
      - ""
      - ollama
      - ollama-cuda
      - ollama-host

  # Ollama service for the CPU mode
  ollama-cpu:
    image: ollama/ollama:latest
    volumes:
      - ./models:/root/.ollama
    profiles:
      - ""
      - ollama

  # Ollama service for the CUDA mode
  ollama-cuda:
    image: ollama/ollama:latest
    volumes:
      - ./models:/root/.ollama
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    profiles:
      - ollama-cuda