GPU support in Docker, other Docker updates

2025-08-22 17:27:03 +00:00 · 2024-03-08 12:33:41 +01:00 · 2024-03-08 12:33:41 +01:00 · d4e5feb43c
commit d4e5feb43c
parent 1b03b369c0
6 changed files with 136 additions and 12 deletions
--- a/Dockerfile.local
+++ b/Dockerfile.local
@ -1,5 +1,3 @@
-### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER
-### You will run into a segfault in mac
 FROM python:3.11.6-slim-bookworm as base

 # Install poetry
@ -20,31 +18,41 @@ RUN apt update && apt install -y \
 # https://python-poetry.org/docs/configuration/#virtualenvsin-project
 ENV POETRY_VIRTUALENVS_IN_PROJECT=true

+#########################
 FROM base as dependencies
+#########################
+
 WORKDIR /home/worker/app
 COPY pyproject.toml poetry.lock ./

+RUN poetry config installer.max-workers 10
 RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"

+################
 FROM base as app
+################

 ENV PYTHONUNBUFFERED=1
 ENV PORT=8080
 EXPOSE 8080

 # Prepare a non-root user
-RUN adduser --system worker
+RUN adduser worker
 WORKDIR /home/worker/app

-RUN mkdir local_data; chown worker local_data
-RUN mkdir models; chown worker models
+RUN mkdir -p local_data; chown -R worker local_data
+RUN mkdir -p models; chown -R worker models
 COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
 COPY --chown=worker private_gpt/ private_gpt
 COPY --chown=worker fern/ fern
 COPY --chown=worker *.yaml *.md ./
 COPY --chown=worker scripts/ scripts
+COPY --chown=worker pyproject.toml poetry.lock ./
+
+# Copy the entry point script into the container and make it executable
+COPY --chown=worker entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh

 ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"

-USER worker
-ENTRYPOINT python -m private_gpt
+ENTRYPOINT /entrypoint.sh python -m private_gpt
--- a/Dockerfile.local.gpu
+++ b/Dockerfile.local.gpu
@ -0,0 +1,75 @@
+FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 as base
+
+# For tzdata
+ENV DEBIAN_FRONTEND="noninteractive" TZ="Etc/UTC"
+
+# Install Python 3.11 and set it as default
+RUN apt-get update && \
+    apt-get install -y software-properties-common && \
+    add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get update && \ 
+    apt-get install -y python3.11 python3.11-venv python3-pip && \
+    ln -sf /usr/bin/python3.11 /usr/bin/python3 && \
+    python3 --version
+
+# Install poetry
+RUN pip install pipx
+RUN python3 -m pipx ensurepath
+RUN pipx install poetry
+ENV PATH="/root/.local/bin:$PATH"
+ENV PATH=".venv/bin/:$PATH"
+
+# Dependencies to build llama-cpp
+RUN apt update && apt install -y \
+  libopenblas-dev\
+  ninja-build\
+  build-essential\
+  pkg-config\
+  wget\
+  gcc
+
+# https://python-poetry.org/docs/configuration/#virtualenvsin-project
+ENV POETRY_VIRTUALENVS_IN_PROJECT=true
+
+#########################
+FROM base as dependencies
+#########################
+
+WORKDIR /home/worker/app
+COPY pyproject.toml poetry.lock ./
+
+RUN poetry config installer.max-workers 10
+RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
+
+# Enable GPU support
+ENV LLAMA_CUBLAS=1
+RUN CMAKE_ARGS='-DLLAMA_CUBLAS=on' FORCE_CMAKE=1 poetry run pip install --upgrade --force-reinstall --no-cache-dir llama-cpp-python
+
+################
+FROM base as app
+################
+
+ENV PYTHONUNBUFFERED=1
+ENV PORT=8080
+EXPOSE 8080
+
+# Prepare a non-root user
+RUN adduser worker
+WORKDIR /home/worker/app
+
+RUN mkdir -p local_data; chown -R worker local_data
+RUN mkdir -p models; chown -R worker models
+COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
+COPY --chown=worker private_gpt/ private_gpt
+COPY --chown=worker fern/ fern
+COPY --chown=worker *.yaml *.md ./
+COPY --chown=worker scripts/ scripts
+COPY --chown=worker pyproject.toml poetry.lock ./
+
+# Copy the entry point script into the container and make it executable
+COPY --chown=worker entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
+
+ENTRYPOINT /entrypoint.sh python -m private_gpt
--- a/docker-compose-gpu.yaml
+++ b/docker-compose-gpu.yaml
@ -0,0 +1,21 @@
+services:
+  private-gpt-gpu:
+    build:
+      dockerfile: Dockerfile.local.gpu
+    volumes:
+      - ./local_data/:/home/worker/app/local_data
+      - ./models/:/home/worker/app/models
+    ports:
+      - 8001:8080
+    environment:
+      PORT: 8080
+      PGPT_PROFILES: docker
+      PGPT_LLM_MODE: llamacpp
+      PGPT_EMBEDDING_MODE: huggingface
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -10,5 +10,6 @@ services:
    environment:
      PORT: 8080
      PGPT_PROFILES: docker
-      PGPT_MODE: local
+      PGPT_LLM_MODE: llamacpp
+      PGPT_EMBEDDING_MODE: huggingface

--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -0,0 +1,17 @@
+#!/bin/sh
+
+## Choose the model, tokenizer and prompt style
+export PGPT_HF_REPO_ID="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+export PGPT_HF_MODEL_FILE="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+export PGPT_TOKENIZER="mistralai/Mistral-7B-Instruct-v0.2"
+export PGPT_PROMPT_STYLE="mistral"
+
+## Optionally, choose a different embedding model
+# export PGPT_EMBEDDING_HF_MODEL_NAME="BAAI/bge-small-en-v1.5"
+
+## Download the embedding and model files
+echo "Running setup script"
+poetry run python scripts/setup
+
+## Execute the main container command
+exec "$@"
--- a/settings-docker.yaml
+++ b/settings-docker.yaml
@ -3,14 +3,16 @@ server:
  port: ${PORT:8080}

 llm:
-  mode: ${PGPT_MODE:mock}
+  mode: ${PGPT_LLM_MODE:mock}
+  tokenizer: ${PGPT_TOKENIZER:mistralai/Mistral-7B-Instruct-v0.2}

 embedding:
-  mode: ${PGPT_MODE:sagemaker}
+  mode: ${PGPT_EMBEDDING_MODE:sagemaker}

 llamacpp:
-  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
-  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
+  prompt_style: ${PGPT_PROMPT_STYLE:mistral}
+  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.2-GGUF}
+  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.2.Q4_K_M.gguf}

 huggingface:
  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}