mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-08-22 17:27:03 +00:00
GPU support in Docker, other Docker updates
This commit is contained in:
parent
1b03b369c0
commit
d4e5feb43c
@ -1,5 +1,3 @@
|
||||
### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER
|
||||
### You will run into a segfault in mac
|
||||
FROM python:3.11.6-slim-bookworm as base
|
||||
|
||||
# Install poetry
|
||||
@ -20,31 +18,41 @@ RUN apt update && apt install -y \
|
||||
# https://python-poetry.org/docs/configuration/#virtualenvsin-project
|
||||
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
|
||||
|
||||
#########################
|
||||
FROM base as dependencies
|
||||
#########################
|
||||
|
||||
WORKDIR /home/worker/app
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
|
||||
RUN poetry config installer.max-workers 10
|
||||
RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
|
||||
|
||||
################
|
||||
FROM base as app
|
||||
################
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV PORT=8080
|
||||
EXPOSE 8080
|
||||
|
||||
# Prepare a non-root user
|
||||
RUN adduser --system worker
|
||||
RUN adduser worker
|
||||
WORKDIR /home/worker/app
|
||||
|
||||
RUN mkdir local_data; chown worker local_data
|
||||
RUN mkdir models; chown worker models
|
||||
RUN mkdir -p local_data; chown -R worker local_data
|
||||
RUN mkdir -p models; chown -R worker models
|
||||
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
|
||||
COPY --chown=worker private_gpt/ private_gpt
|
||||
COPY --chown=worker fern/ fern
|
||||
COPY --chown=worker *.yaml *.md ./
|
||||
COPY --chown=worker scripts/ scripts
|
||||
COPY --chown=worker pyproject.toml poetry.lock ./
|
||||
|
||||
# Copy the entry point script into the container and make it executable
|
||||
COPY --chown=worker entrypoint.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
|
||||
ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
|
||||
|
||||
USER worker
|
||||
ENTRYPOINT python -m private_gpt
|
||||
ENTRYPOINT /entrypoint.sh python -m private_gpt
|
75
Dockerfile.local.gpu
Normal file
75
Dockerfile.local.gpu
Normal file
@ -0,0 +1,75 @@
|
||||
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 as base
|
||||
|
||||
# For tzdata
|
||||
ENV DEBIAN_FRONTEND="noninteractive" TZ="Etc/UTC"
|
||||
|
||||
# Install Python 3.11 and set it as default
|
||||
RUN apt-get update && \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:deadsnakes/ppa && \
|
||||
apt-get update && \
|
||||
apt-get install -y python3.11 python3.11-venv python3-pip && \
|
||||
ln -sf /usr/bin/python3.11 /usr/bin/python3 && \
|
||||
python3 --version
|
||||
|
||||
# Install poetry
|
||||
RUN pip install pipx
|
||||
RUN python3 -m pipx ensurepath
|
||||
RUN pipx install poetry
|
||||
ENV PATH="/root/.local/bin:$PATH"
|
||||
ENV PATH=".venv/bin/:$PATH"
|
||||
|
||||
# Dependencies to build llama-cpp
|
||||
RUN apt update && apt install -y \
|
||||
libopenblas-dev\
|
||||
ninja-build\
|
||||
build-essential\
|
||||
pkg-config\
|
||||
wget\
|
||||
gcc
|
||||
|
||||
# https://python-poetry.org/docs/configuration/#virtualenvsin-project
|
||||
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
|
||||
|
||||
#########################
|
||||
FROM base as dependencies
|
||||
#########################
|
||||
|
||||
WORKDIR /home/worker/app
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
|
||||
RUN poetry config installer.max-workers 10
|
||||
RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
|
||||
|
||||
# Enable GPU support
|
||||
ENV LLAMA_CUBLAS=1
|
||||
RUN CMAKE_ARGS='-DLLAMA_CUBLAS=on' FORCE_CMAKE=1 poetry run pip install --upgrade --force-reinstall --no-cache-dir llama-cpp-python
|
||||
|
||||
################
|
||||
FROM base as app
|
||||
################
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV PORT=8080
|
||||
EXPOSE 8080
|
||||
|
||||
# Prepare a non-root user
|
||||
RUN adduser worker
|
||||
WORKDIR /home/worker/app
|
||||
|
||||
RUN mkdir -p local_data; chown -R worker local_data
|
||||
RUN mkdir -p models; chown -R worker models
|
||||
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
|
||||
COPY --chown=worker private_gpt/ private_gpt
|
||||
COPY --chown=worker fern/ fern
|
||||
COPY --chown=worker *.yaml *.md ./
|
||||
COPY --chown=worker scripts/ scripts
|
||||
COPY --chown=worker pyproject.toml poetry.lock ./
|
||||
|
||||
# Copy the entry point script into the container and make it executable
|
||||
COPY --chown=worker entrypoint.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
|
||||
ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
|
||||
|
||||
ENTRYPOINT /entrypoint.sh python -m private_gpt
|
21
docker-compose-gpu.yaml
Normal file
21
docker-compose-gpu.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
services:
|
||||
private-gpt-gpu:
|
||||
build:
|
||||
dockerfile: Dockerfile.local.gpu
|
||||
volumes:
|
||||
- ./local_data/:/home/worker/app/local_data
|
||||
- ./models/:/home/worker/app/models
|
||||
ports:
|
||||
- 8001:8080
|
||||
environment:
|
||||
PORT: 8080
|
||||
PGPT_PROFILES: docker
|
||||
PGPT_LLM_MODE: llamacpp
|
||||
PGPT_EMBEDDING_MODE: huggingface
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
@ -10,5 +10,6 @@ services:
|
||||
environment:
|
||||
PORT: 8080
|
||||
PGPT_PROFILES: docker
|
||||
PGPT_MODE: local
|
||||
PGPT_LLM_MODE: llamacpp
|
||||
PGPT_EMBEDDING_MODE: huggingface
|
||||
|
||||
|
17
entrypoint.sh
Normal file
17
entrypoint.sh
Normal file
@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
|
||||
## Choose the model, tokenizer and prompt style
|
||||
export PGPT_HF_REPO_ID="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
|
||||
export PGPT_HF_MODEL_FILE="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
|
||||
export PGPT_TOKENIZER="mistralai/Mistral-7B-Instruct-v0.2"
|
||||
export PGPT_PROMPT_STYLE="mistral"
|
||||
|
||||
## Optionally, choose a different embedding model
|
||||
# export PGPT_EMBEDDING_HF_MODEL_NAME="BAAI/bge-small-en-v1.5"
|
||||
|
||||
## Download the embedding and model files
|
||||
echo "Running setup script"
|
||||
poetry run python scripts/setup
|
||||
|
||||
## Execute the main container command
|
||||
exec "$@"
|
@ -3,14 +3,16 @@ server:
|
||||
port: ${PORT:8080}
|
||||
|
||||
llm:
|
||||
mode: ${PGPT_MODE:mock}
|
||||
mode: ${PGPT_LLM_MODE:mock}
|
||||
tokenizer: ${PGPT_TOKENIZER:mistralai/Mistral-7B-Instruct-v0.2}
|
||||
|
||||
embedding:
|
||||
mode: ${PGPT_MODE:sagemaker}
|
||||
mode: ${PGPT_EMBEDDING_MODE:sagemaker}
|
||||
|
||||
llamacpp:
|
||||
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
|
||||
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
|
||||
prompt_style: ${PGPT_PROMPT_STYLE:mistral}
|
||||
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.2-GGUF}
|
||||
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.2.Q4_K_M.gguf}
|
||||
|
||||
huggingface:
|
||||
embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
|
||||
|
Loading…
Reference in New Issue
Block a user