mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-04-27 19:28:38 +00:00
fix(deploy): improve Docker-Compose and quickstart on Docker (#2037)
* chore: update docker-compose with profiles * docs: add quick start doc
This commit is contained in:
parent
6674b46fea
commit
dae0727a1b
16
.docker/router.yml
Normal file
16
.docker/router.yml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
http:
|
||||||
|
services:
|
||||||
|
ollama:
|
||||||
|
loadBalancer:
|
||||||
|
healthCheck:
|
||||||
|
interval: 5s
|
||||||
|
path: /
|
||||||
|
servers:
|
||||||
|
- url: http://ollama-cpu:11434
|
||||||
|
- url: http://ollama-cuda:11434
|
||||||
|
- url: http://host.docker.internal:11434
|
||||||
|
|
||||||
|
routers:
|
||||||
|
ollama-router:
|
||||||
|
rule: "PathPrefix(`/`)"
|
||||||
|
service: ollama
|
@ -1,19 +1,99 @@
|
|||||||
services:
|
services:
|
||||||
private-gpt:
|
|
||||||
|
#-----------------------------------
|
||||||
|
#---- Private-GPT services ---------
|
||||||
|
#-----------------------------------
|
||||||
|
|
||||||
|
# Private-GPT service for the Ollama CPU and GPU modes
|
||||||
|
# This service builds from an external Dockerfile and runs the Ollama mode.
|
||||||
|
private-gpt-ollama:
|
||||||
build:
|
build:
|
||||||
|
context: .
|
||||||
dockerfile: Dockerfile.external
|
dockerfile: Dockerfile.external
|
||||||
volumes:
|
volumes:
|
||||||
- ./local_data/:/home/worker/app/local_data
|
- ./local_data/:/home/worker/app/local_data
|
||||||
ports:
|
ports:
|
||||||
- 8001:8001
|
- "8001:8001"
|
||||||
environment:
|
environment:
|
||||||
PORT: 8001
|
PORT: 8001
|
||||||
PGPT_PROFILES: docker
|
PGPT_PROFILES: docker
|
||||||
PGPT_MODE: ollama
|
PGPT_MODE: ollama
|
||||||
PGPT_EMBED_MODE: ollama
|
PGPT_EMBED_MODE: ollama
|
||||||
ollama:
|
PGPT_OLLAMA_API_BASE: http://ollama:11434
|
||||||
image: ollama/ollama:latest
|
profiles:
|
||||||
|
- ""
|
||||||
|
- ollama
|
||||||
|
- ollama-cuda
|
||||||
|
- ollama-host
|
||||||
|
|
||||||
|
# Private-GPT service for the local mode
|
||||||
|
# This service builds from a local Dockerfile and runs the application in local mode.
|
||||||
|
private-gpt-local:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.local
|
||||||
|
volumes:
|
||||||
|
- ./local_data/:/home/worker/app/local_data
|
||||||
|
- ./models/:/home/worker/app/models
|
||||||
|
entrypoint: sh -c ".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt"
|
||||||
ports:
|
ports:
|
||||||
- 11434:11434
|
- "8001:8001"
|
||||||
|
environment:
|
||||||
|
PORT: 8001
|
||||||
|
PGPT_PROFILES: local
|
||||||
|
HF_TOKEN: ${HF_TOKEN}
|
||||||
|
profiles:
|
||||||
|
- local
|
||||||
|
|
||||||
|
#-----------------------------------
|
||||||
|
#---- Ollama services --------------
|
||||||
|
#-----------------------------------
|
||||||
|
|
||||||
|
# Traefik reverse proxy for the Ollama service
|
||||||
|
# This will route requests to the Ollama service based on the profile.
|
||||||
|
ollama:
|
||||||
|
image: traefik:v2.10
|
||||||
|
ports:
|
||||||
|
- "11435:11434"
|
||||||
|
- "8081:8080"
|
||||||
|
command:
|
||||||
|
- "--providers.file.filename=/etc/router.yml"
|
||||||
|
- "--log.level=ERROR"
|
||||||
|
- "--api.insecure=true"
|
||||||
|
- "--providers.docker=true"
|
||||||
|
- "--providers.docker.exposedbydefault=false"
|
||||||
|
- "--entrypoints.web.address=:11434"
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
|
- ./.docker/router.yml:/etc/router.yml:ro
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
profiles:
|
||||||
|
- ""
|
||||||
|
- ollama
|
||||||
|
- ollama-cuda
|
||||||
|
- ollama-host
|
||||||
|
|
||||||
|
# Ollama service for the CPU mode
|
||||||
|
ollama-cpu:
|
||||||
|
image: ollama/ollama:latest
|
||||||
volumes:
|
volumes:
|
||||||
- ./models:/root/.ollama
|
- ./models:/root/.ollama
|
||||||
|
profiles:
|
||||||
|
- ""
|
||||||
|
- ollama
|
||||||
|
|
||||||
|
# Ollama service for the CUDA mode
|
||||||
|
ollama-cuda:
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
volumes:
|
||||||
|
- ./models:/root/.ollama
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: 1
|
||||||
|
capabilities: [gpu]
|
||||||
|
profiles:
|
||||||
|
- ollama-cuda
|
@ -10,6 +10,9 @@ tabs:
|
|||||||
overview:
|
overview:
|
||||||
display-name: Overview
|
display-name: Overview
|
||||||
icon: "fa-solid fa-home"
|
icon: "fa-solid fa-home"
|
||||||
|
quickstart:
|
||||||
|
display-name: Quickstart
|
||||||
|
icon: "fa-solid fa-rocket"
|
||||||
installation:
|
installation:
|
||||||
display-name: Installation
|
display-name: Installation
|
||||||
icon: "fa-solid fa-download"
|
icon: "fa-solid fa-download"
|
||||||
@ -32,6 +35,12 @@ navigation:
|
|||||||
contents:
|
contents:
|
||||||
- page: Introduction
|
- page: Introduction
|
||||||
path: ./docs/pages/overview/welcome.mdx
|
path: ./docs/pages/overview/welcome.mdx
|
||||||
|
- tab: quickstart
|
||||||
|
layout:
|
||||||
|
- section: Getting started
|
||||||
|
contents:
|
||||||
|
- page: Quickstart
|
||||||
|
path: ./docs/pages/quickstart/quickstart.mdx
|
||||||
# How to install PrivateGPT, with FAQ and troubleshooting
|
# How to install PrivateGPT, with FAQ and troubleshooting
|
||||||
- tab: installation
|
- tab: installation
|
||||||
layout:
|
layout:
|
||||||
|
85
fern/docs/pages/quickstart/quickstart.mdx
Normal file
85
fern/docs/pages/quickstart/quickstart.mdx
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
This guide provides a quick start for running different profiles of PrivateGPT using Docker Compose.
|
||||||
|
The profiles cater to various environments, including Ollama setups (CPU, CUDA, MacOS) and fully Local setup.
|
||||||
|
|
||||||
|
If you want to run PrivateGPT locally without Docker, refer to the [Local Installation Guide](/installation).
|
||||||
|
|
||||||
|
#### Prerequisites
|
||||||
|
- **Docker and Docker Compose:** Ensure both are installed on your system.
|
||||||
|
[Installation Guide for Docker](https://docs.docker.com/get-docker/), [Installation Guide for Docker Compose](https://docs.docker.com/compose/install/).
|
||||||
|
- **Clone PrivateGPT Repository:** Clone the PrivateGPT repository to your machine and navigate to the directory:
|
||||||
|
```sh
|
||||||
|
git clone https://github.com/zylon-ai/private-gpt.git
|
||||||
|
cd private-gpt
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Ollama Setups (Recommended)
|
||||||
|
|
||||||
|
Ollama setups are recommended for their ease of use and optimized configurations. Ollama offers different profiles depending on your hardware capabilities and operating system.
|
||||||
|
|
||||||
|
### 1. Default/Ollama CPU
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
This profile runs the Ollama service using CPU resources. It is the standard configuration for running Ollama-based Private-GPT services without GPU acceleration.
|
||||||
|
|
||||||
|
**Run:**
|
||||||
|
To start the services, use either of the following commands:
|
||||||
|
```sh
|
||||||
|
docker-compose up
|
||||||
|
```
|
||||||
|
or
|
||||||
|
```sh
|
||||||
|
docker-compose --profile ollama up
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Ollama Nvidia CUDA
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
This profile leverages GPU acceleration with CUDA support, suitable for computationally intensive tasks that benefit from GPU resources.
|
||||||
|
|
||||||
|
**Requirements:**
|
||||||
|
- Ensure that your system has compatible GPU hardware and the necessary NVIDIA drivers installed. The installation process is detailed [here](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
|
||||||
|
|
||||||
|
**Run:**
|
||||||
|
To start the services with CUDA support, use:
|
||||||
|
```sh
|
||||||
|
docker-compose --profile ollama-cuda up
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Ollama Host
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
This profile is designed for running PrivateGPT using Ollama installed on the host machine. This setup is particularly useful for MacOS users, as Docker does not yet support Metal GPU.
|
||||||
|
|
||||||
|
**Requirements:**
|
||||||
|
- Install Ollama on your machine by following the instructions at [ollama.ai](https://ollama.ai/).
|
||||||
|
- Start the Ollama service with the command:
|
||||||
|
```sh
|
||||||
|
OLLAMA_HOST=0.0.0.0 ollama serve
|
||||||
|
```
|
||||||
|
|
||||||
|
**Run:**
|
||||||
|
To start the services with the host configuration, use:
|
||||||
|
```sh
|
||||||
|
docker-compose --profile ollama-host up
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Fully Local Setups
|
||||||
|
|
||||||
|
### LlamaCPP + HuggingFace Embeddings
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
This profile runs the Private-GPT services locally using `llama-cpp` and Hugging Face models.
|
||||||
|
|
||||||
|
**Requirements:**
|
||||||
|
- **Hugging Face Token (HF_TOKEN):** Required for accessing Hugging Face models. Obtain your token following [this guide](/installation/getting-started/troubleshooting#downloading-gated-and-private-models).
|
||||||
|
|
||||||
|
**Run:**
|
||||||
|
Start the services with your Hugging Face token:
|
||||||
|
```sh
|
||||||
|
HF_TOKEN=<your_hf_token> docker-compose up --profile local
|
||||||
|
```
|
||||||
|
Replace `<your_hf_token>` with your actual Hugging Face token.
|
Loading…
Reference in New Issue
Block a user