privateGPT/docker-compose.yaml
meng-hui 940bdd49af
fix: 503 when private gpt gets ollama service (#2104)
When running private gpt with external ollama API, ollama service
returns 503 on startup because ollama service (traefik) might not be
ready.

- Add healthcheck to ollama service to test for connection to external
ollama
- private-gpt-ollama service depends on ollama being service_healthy

Co-authored-by: Koh Meng Hui <kohmh@duck.com>
2024-10-17 12:44:28 +02:00

117 lines
3.1 KiB
YAML

services:
#-----------------------------------
#---- Private-GPT services ---------
#-----------------------------------
# Private-GPT service for the Ollama CPU and GPU modes
# This service builds from an external Dockerfile and runs the Ollama mode.
private-gpt-ollama:
image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama # x-release-please-version
user: root
build:
context: .
dockerfile: Dockerfile.ollama
volumes:
- ./local_data:/home/worker/app/local_data
ports:
- "8001:8001"
environment:
PORT: 8001
PGPT_PROFILES: docker
PGPT_MODE: ollama
PGPT_EMBED_MODE: ollama
PGPT_OLLAMA_API_BASE: http://ollama:11434
HF_TOKEN: ${HF_TOKEN:-}
profiles:
- ""
- ollama-cpu
- ollama-cuda
- ollama-api
depends_on:
ollama:
condition: service_healthy
# Private-GPT service for the local mode
# This service builds from a local Dockerfile and runs the application in local mode.
private-gpt-llamacpp-cpu:
image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-llamacpp-cpu # x-release-please-version
user: root
build:
context: .
dockerfile: Dockerfile.llamacpp-cpu
volumes:
- ./local_data/:/home/worker/app/local_data
- ./models/:/home/worker/app/models
entrypoint: sh -c ".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt"
ports:
- "8001:8001"
environment:
PORT: 8001
PGPT_PROFILES: local
HF_TOKEN: ${HF_TOKEN:-}
profiles:
- llamacpp-cpu
#-----------------------------------
#---- Ollama services --------------
#-----------------------------------
# Traefik reverse proxy for the Ollama service
# This will route requests to the Ollama service based on the profile.
ollama:
image: traefik:v2.10
healthcheck:
test: ["CMD", "sh", "-c", "wget -q --spider http://ollama:11434 || exit 1"]
interval: 10s
retries: 3
start_period: 5s
timeout: 5s
ports:
- "8080:8080"
command:
- "--providers.file.filename=/etc/router.yml"
- "--log.level=ERROR"
- "--api.insecure=true"
- "--providers.docker=true"
- "--providers.docker.exposedbydefault=false"
- "--entrypoints.web.address=:11434"
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- ./.docker/router.yml:/etc/router.yml:ro
extra_hosts:
- "host.docker.internal:host-gateway"
profiles:
- ""
- ollama-cpu
- ollama-cuda
- ollama-api
# Ollama service for the CPU mode
ollama-cpu:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ./models:/root/.ollama
profiles:
- ""
- ollama-cpu
# Ollama service for the CUDA mode
ollama-cuda:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ./models:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
profiles:
- ollama-cuda