diff --git a/Dockerfile.local b/Dockerfile.local index a5222116..6cb44cf2 100644 --- a/Dockerfile.local +++ b/Dockerfile.local @@ -1,5 +1,3 @@ -### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER -### You will run into a segfault in mac FROM python:3.11.6-slim-bookworm as base # Install poetry @@ -20,31 +18,41 @@ RUN apt update && apt install -y \ # https://python-poetry.org/docs/configuration/#virtualenvsin-project ENV POETRY_VIRTUALENVS_IN_PROJECT=true +######################### FROM base as dependencies +######################### + WORKDIR /home/worker/app COPY pyproject.toml poetry.lock ./ +RUN poetry config installer.max-workers 10 RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant" +################ FROM base as app +################ ENV PYTHONUNBUFFERED=1 ENV PORT=8080 EXPOSE 8080 # Prepare a non-root user -RUN adduser --system worker +RUN adduser worker WORKDIR /home/worker/app -RUN mkdir local_data; chown worker local_data -RUN mkdir models; chown worker models +RUN mkdir -p local_data; chown -R worker local_data +RUN mkdir -p models; chown -R worker models COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv COPY --chown=worker private_gpt/ private_gpt COPY --chown=worker fern/ fern COPY --chown=worker *.yaml *.md ./ COPY --chown=worker scripts/ scripts +COPY --chown=worker pyproject.toml poetry.lock ./ + +# Copy the entry point script into the container and make it executable +COPY --chown=worker entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh ENV PYTHONPATH="$PYTHONPATH:/private_gpt/" -USER worker -ENTRYPOINT python -m private_gpt \ No newline at end of file +ENTRYPOINT /entrypoint.sh python -m private_gpt \ No newline at end of file diff --git a/Dockerfile.local.gpu b/Dockerfile.local.gpu new file mode 100644 index 00000000..da6a73b6 --- /dev/null +++ b/Dockerfile.local.gpu @@ -0,0 +1,75 @@ +FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 as base + +# For tzdata +ENV DEBIAN_FRONTEND="noninteractive" TZ="Etc/UTC" + +# Install Python 3.11 and set it as default +RUN apt-get update && \ + apt-get install -y software-properties-common && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y python3.11 python3.11-venv python3-pip && \ + ln -sf /usr/bin/python3.11 /usr/bin/python3 && \ + python3 --version + +# Install poetry +RUN pip install pipx +RUN python3 -m pipx ensurepath +RUN pipx install poetry +ENV PATH="/root/.local/bin:$PATH" +ENV PATH=".venv/bin/:$PATH" + +# Dependencies to build llama-cpp +RUN apt update && apt install -y \ + libopenblas-dev\ + ninja-build\ + build-essential\ + pkg-config\ + wget\ + gcc + +# https://python-poetry.org/docs/configuration/#virtualenvsin-project +ENV POETRY_VIRTUALENVS_IN_PROJECT=true + +######################### +FROM base as dependencies +######################### + +WORKDIR /home/worker/app +COPY pyproject.toml poetry.lock ./ + +RUN poetry config installer.max-workers 10 +RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant" + +# Enable GPU support +ENV LLAMA_CUBLAS=1 +RUN CMAKE_ARGS='-DLLAMA_CUBLAS=on' FORCE_CMAKE=1 poetry run pip install --upgrade --force-reinstall --no-cache-dir llama-cpp-python + +################ +FROM base as app +################ + +ENV PYTHONUNBUFFERED=1 +ENV PORT=8080 +EXPOSE 8080 + +# Prepare a non-root user +RUN adduser worker +WORKDIR /home/worker/app + +RUN mkdir -p local_data; chown -R worker local_data +RUN mkdir -p models; chown -R worker models +COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv +COPY --chown=worker private_gpt/ private_gpt +COPY --chown=worker fern/ fern +COPY --chown=worker *.yaml *.md ./ +COPY --chown=worker scripts/ scripts +COPY --chown=worker pyproject.toml poetry.lock ./ + +# Copy the entry point script into the container and make it executable +COPY --chown=worker entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +ENV PYTHONPATH="$PYTHONPATH:/private_gpt/" + +ENTRYPOINT /entrypoint.sh python -m private_gpt \ No newline at end of file diff --git a/docker-compose-gpu.yaml b/docker-compose-gpu.yaml new file mode 100644 index 00000000..d0271873 --- /dev/null +++ b/docker-compose-gpu.yaml @@ -0,0 +1,21 @@ +services: + private-gpt-gpu: + build: + dockerfile: Dockerfile.local.gpu + volumes: + - ./local_data/:/home/worker/app/local_data + - ./models/:/home/worker/app/models + ports: + - 8001:8080 + environment: + PORT: 8080 + PGPT_PROFILES: docker + PGPT_LLM_MODE: llamacpp + PGPT_EMBEDDING_MODE: huggingface + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index f86d2380..9c3bab2e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -10,5 +10,6 @@ services: environment: PORT: 8080 PGPT_PROFILES: docker - PGPT_MODE: local + PGPT_LLM_MODE: llamacpp + PGPT_EMBEDDING_MODE: huggingface diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 00000000..1a782f9d --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +## Choose the model, tokenizer and prompt style +export PGPT_HF_REPO_ID="TheBloke/Mistral-7B-Instruct-v0.2-GGUF" +export PGPT_HF_MODEL_FILE="mistral-7b-instruct-v0.2.Q4_K_M.gguf" +export PGPT_TOKENIZER="mistralai/Mistral-7B-Instruct-v0.2" +export PGPT_PROMPT_STYLE="mistral" + +## Optionally, choose a different embedding model +# export PGPT_EMBEDDING_HF_MODEL_NAME="BAAI/bge-small-en-v1.5" + +## Download the embedding and model files +echo "Running setup script" +poetry run python scripts/setup + +## Execute the main container command +exec "$@" \ No newline at end of file diff --git a/settings-docker.yaml b/settings-docker.yaml index d71c4070..3269d45a 100644 --- a/settings-docker.yaml +++ b/settings-docker.yaml @@ -3,14 +3,16 @@ server: port: ${PORT:8080} llm: - mode: ${PGPT_MODE:mock} + mode: ${PGPT_LLM_MODE:mock} + tokenizer: ${PGPT_TOKENIZER:mistralai/Mistral-7B-Instruct-v0.2} embedding: - mode: ${PGPT_MODE:sagemaker} + mode: ${PGPT_EMBEDDING_MODE:sagemaker} llamacpp: - llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF} - llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf} + prompt_style: ${PGPT_PROMPT_STYLE:mistral} + llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.2-GGUF} + llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.2.Q4_K_M.gguf} huggingface: embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}