This commit is contained in:
Luka Boljevic 2024-03-20 20:58:30 +01:00 committed by GitHub
commit 732367a23e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 136 additions and 12 deletions

View File

@ -1,5 +1,3 @@
### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER
### You will run into a segfault in mac
FROM python:3.11.6-slim-bookworm as base
# Install poetry
@ -20,31 +18,41 @@ RUN apt update && apt install -y \
# https://python-poetry.org/docs/configuration/#virtualenvsin-project
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
#########################
FROM base as dependencies
#########################
WORKDIR /home/worker/app
COPY pyproject.toml poetry.lock ./
RUN poetry config installer.max-workers 10
RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
################
FROM base as app
################
ENV PYTHONUNBUFFERED=1
ENV PORT=8080
EXPOSE 8080
# Prepare a non-root user
RUN adduser --system worker
RUN adduser worker
WORKDIR /home/worker/app
RUN mkdir local_data; chown worker local_data
RUN mkdir models; chown worker models
RUN mkdir -p local_data; chown -R worker local_data
RUN mkdir -p models; chown -R worker models
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
COPY --chown=worker private_gpt/ private_gpt
COPY --chown=worker fern/ fern
COPY --chown=worker *.yaml *.md ./
COPY --chown=worker scripts/ scripts
COPY --chown=worker pyproject.toml poetry.lock ./
# Copy the entry point script into the container and make it executable
COPY --chown=worker entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
USER worker
ENTRYPOINT python -m private_gpt
ENTRYPOINT ["/entrypoint.sh", "python", "-m", "private_gpt"]

75
Dockerfile.local.gpu Normal file
View File

@ -0,0 +1,75 @@
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 as base
# For tzdata
ENV DEBIAN_FRONTEND="noninteractive" TZ="Etc/UTC"
# Install Python 3.11 and set it as default
RUN apt-get update && \
apt-get install -y software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y python3.11 python3.11-venv python3-pip && \
ln -sf /usr/bin/python3.11 /usr/bin/python3 && \
python3 --version
# Install poetry
RUN pip install pipx
RUN python3 -m pipx ensurepath
RUN pipx install poetry
ENV PATH="/root/.local/bin:$PATH"
ENV PATH=".venv/bin/:$PATH"
# Dependencies to build llama-cpp
RUN apt update && apt install -y \
libopenblas-dev\
ninja-build\
build-essential\
pkg-config\
wget\
gcc
# https://python-poetry.org/docs/configuration/#virtualenvsin-project
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
#########################
FROM base as dependencies
#########################
WORKDIR /home/worker/app
COPY pyproject.toml poetry.lock ./
RUN poetry config installer.max-workers 10
RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
# Enable GPU support
ENV LLAMA_CUBLAS=1
RUN CMAKE_ARGS='-DLLAMA_CUBLAS=on' FORCE_CMAKE=1 poetry run pip install --upgrade --force-reinstall --no-cache-dir llama-cpp-python
################
FROM base as app
################
ENV PYTHONUNBUFFERED=1
ENV PORT=8080
EXPOSE 8080
# Prepare a non-root user
RUN adduser worker
WORKDIR /home/worker/app
RUN mkdir -p local_data; chown -R worker local_data
RUN mkdir -p models; chown -R worker models
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
COPY --chown=worker private_gpt/ private_gpt
COPY --chown=worker fern/ fern
COPY --chown=worker *.yaml *.md ./
COPY --chown=worker scripts/ scripts
COPY --chown=worker pyproject.toml poetry.lock ./
# Copy the entry point script into the container and make it executable
COPY --chown=worker entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
ENTRYPOINT ["/entrypoint.sh", "python", "-m", "private_gpt"]

21
docker-compose-gpu.yaml Normal file
View File

@ -0,0 +1,21 @@
services:
private-gpt-gpu:
build:
dockerfile: Dockerfile.local.gpu
volumes:
- ./local_data/:/home/worker/app/local_data
- ./models/:/home/worker/app/models
ports:
- 8001:8080
environment:
PORT: 8080
PGPT_PROFILES: docker
PGPT_LLM_MODE: llamacpp
PGPT_EMBEDDING_MODE: huggingface
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]

View File

@ -10,5 +10,6 @@ services:
environment:
PORT: 8080
PGPT_PROFILES: docker
PGPT_MODE: local
PGPT_LLM_MODE: llamacpp
PGPT_EMBEDDING_MODE: huggingface

17
entrypoint.sh Normal file
View File

@ -0,0 +1,17 @@
#!/bin/sh
## Choose the model, tokenizer and prompt style
export PGPT_HF_REPO_ID="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
export PGPT_HF_MODEL_FILE="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
export PGPT_TOKENIZER="mistralai/Mistral-7B-Instruct-v0.2"
export PGPT_PROMPT_STYLE="mistral"
## Optionally, choose a different embedding model
# export PGPT_EMBEDDING_HF_MODEL_NAME="BAAI/bge-small-en-v1.5"
## Download the embedding and model files
echo "Running setup script"
poetry run python scripts/setup
## Execute the main container command
exec "$@"

View File

@ -3,14 +3,16 @@ server:
port: ${PORT:8080}
llm:
mode: ${PGPT_MODE:mock}
mode: ${PGPT_LLM_MODE:mock}
tokenizer: ${PGPT_TOKENIZER:mistralai/Mistral-7B-Instruct-v0.2}
embedding:
mode: ${PGPT_MODE:sagemaker}
mode: ${PGPT_EMBEDDING_MODE:sagemaker}
llamacpp:
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
prompt_style: ${PGPT_PROMPT_STYLE:mistral}
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.2-GGUF}
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.2.Q4_K_M.gguf}
huggingface:
embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}