mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-08-31 23:03:45 +00:00
Merge 23704d23ad
into b7ee43788d
This commit is contained in:
84
Dockerfile.local-cuda
Normal file
84
Dockerfile.local-cuda
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
FROM nvidia/cuda:12.5.1-cudnn-devel-ubuntu22.04 as base
|
||||||
|
|
||||||
|
# For tzdata
|
||||||
|
ENV DEBIAN_FRONTEND="noninteractive" TZ="Etc/UTC"
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get upgrade -y \
|
||||||
|
&& apt-get install -y git build-essential \
|
||||||
|
python3 python3-pip python3.11-venv gcc wget \
|
||||||
|
ocl-icd-opencl-dev opencl-headers clinfo \
|
||||||
|
libclblast-dev libopenblas-dev \
|
||||||
|
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \
|
||||||
|
&& ln -sf /usr/bin/python3.11 /usr/bin/python3 \
|
||||||
|
&& python3 --version
|
||||||
|
|
||||||
|
# Install poetry
|
||||||
|
RUN pip install pipx
|
||||||
|
RUN python3 -m pipx ensurepath
|
||||||
|
RUN pipx install poetry==1.8.3
|
||||||
|
ENV PATH="/root/.local/bin:$PATH"
|
||||||
|
ENV PATH=".venv/bin/:$PATH"
|
||||||
|
|
||||||
|
# Dependencies to build llama-cpp
|
||||||
|
RUN apt update && apt install -y \
|
||||||
|
libopenblas-dev\
|
||||||
|
ninja-build\
|
||||||
|
build-essential\
|
||||||
|
pkg-config\
|
||||||
|
wget
|
||||||
|
|
||||||
|
# https://python-poetry.org/docs/configuration/#virtualenvsin-project
|
||||||
|
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
|
||||||
|
|
||||||
|
FROM base as dependencies
|
||||||
|
WORKDIR /home/worker/app
|
||||||
|
COPY pyproject.toml poetry.lock ./
|
||||||
|
|
||||||
|
ARG POETRY_EXTRAS="ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
|
||||||
|
RUN poetry install --no-root --extras "${POETRY_EXTRAS}"
|
||||||
|
|
||||||
|
# Enable GPU support
|
||||||
|
ENV CUDA_DOCKER_ARCH=all
|
||||||
|
ENV GGML_CUDA=1
|
||||||
|
ENV TOKENIZERS_PARALLELISM=true
|
||||||
|
RUN CMAKE_ARGS="-DGGML_CUDA=on" \
|
||||||
|
poetry run pip install \
|
||||||
|
--force-reinstall \
|
||||||
|
--no-cache-dir \
|
||||||
|
--verbose \
|
||||||
|
llama-cpp-python==0.2.84 \
|
||||||
|
numpy==1.26.0
|
||||||
|
|
||||||
|
FROM base as app
|
||||||
|
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
ENV PORT=8080
|
||||||
|
ENV APP_ENV=prod
|
||||||
|
ENV PYTHONPATH="$PYTHONPATH:/home/worker/app/private_gpt/"
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
# Prepare a non-root user
|
||||||
|
# More info about how to configure UIDs and GIDs in Docker:
|
||||||
|
# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md
|
||||||
|
|
||||||
|
# Define the User ID (UID) for the non-root user
|
||||||
|
# UID 100 is chosen to avoid conflicts with existing system users
|
||||||
|
ARG UID=1000
|
||||||
|
|
||||||
|
# Define the Group ID (GID) for the non-root user
|
||||||
|
# GID 65534 is often used for the 'nogroup' or 'nobody' group
|
||||||
|
ARG GID=65534
|
||||||
|
|
||||||
|
RUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker
|
||||||
|
WORKDIR /home/worker/app
|
||||||
|
|
||||||
|
RUN chown worker /home/worker/app
|
||||||
|
RUN mkdir local_data && chown worker local_data
|
||||||
|
RUN mkdir models && chown worker models
|
||||||
|
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
|
||||||
|
COPY --chown=worker private_gpt/ private_gpt
|
||||||
|
COPY --chown=worker *.yaml ./
|
||||||
|
COPY --chown=worker scripts/ scripts
|
||||||
|
|
||||||
|
USER worker
|
||||||
|
ENTRYPOINT python -m private_gpt
|
@@ -53,6 +53,26 @@ services:
|
|||||||
profiles:
|
profiles:
|
||||||
- llamacpp-cpu
|
- llamacpp-cpu
|
||||||
|
|
||||||
|
# Private-GPT service for the local mode (with CUDA support)
|
||||||
|
# This service builds from a local Dockerfile and runs the application in local mode.
|
||||||
|
private-gpt-llamacpp-cuda:
|
||||||
|
image: ${PGPT_IMAGE:-zylonai/private-gpt}${PGPT_TAG:-0.6.1}-llamacpp-cuda
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.llamacpp-cuda
|
||||||
|
volumes:
|
||||||
|
- ./local_data/:/home/worker/app/local_data
|
||||||
|
- ./models/:/home/worker/app/models
|
||||||
|
entrypoint: sh -c ".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt"
|
||||||
|
ports:
|
||||||
|
- "8001:8001"
|
||||||
|
environment:
|
||||||
|
PORT: 8001
|
||||||
|
PGPT_PROFILES: local
|
||||||
|
HF_TOKEN: ${HF_TOKEN}
|
||||||
|
profiles:
|
||||||
|
- llamacpp-cuda
|
||||||
|
|
||||||
#-----------------------------------
|
#-----------------------------------
|
||||||
#---- Ollama services --------------
|
#---- Ollama services --------------
|
||||||
#-----------------------------------
|
#-----------------------------------
|
||||||
|
@@ -82,6 +82,21 @@ HF_TOKEN=<your_hf_token> docker-compose --profile llamacpp-cpu up
|
|||||||
```
|
```
|
||||||
Replace `<your_hf_token>` with your actual Hugging Face token.
|
Replace `<your_hf_token>` with your actual Hugging Face token.
|
||||||
|
|
||||||
|
#### 2. LlamaCPP CUDA
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
This profile runs the Private-GPT services locally using `llama-cpp` and Hugging Face models.
|
||||||
|
|
||||||
|
**Requirements:**
|
||||||
|
A **Hugging Face Token (HF_TOKEN)** is required for accessing Hugging Face models. Obtain your token following [this guide](/installation/getting-started/troubleshooting#downloading-gated-and-private-models).
|
||||||
|
|
||||||
|
**Run:**
|
||||||
|
Start the services with your Hugging Face token using pre-built images:
|
||||||
|
```sh
|
||||||
|
HF_TOKEN=<your_hf_token> docker-compose --profile llamacpp-cuda up
|
||||||
|
```
|
||||||
|
Replace `<your_hf_token>` with your actual Hugging Face token.
|
||||||
|
|
||||||
## Building Locally
|
## Building Locally
|
||||||
|
|
||||||
If you prefer to build Docker images locally, which is useful when making changes to the codebase or the Dockerfiles, follow these steps:
|
If you prefer to build Docker images locally, which is useful when making changes to the codebase or the Dockerfiles, follow these steps:
|
||||||
|
Reference in New Issue
Block a user