Merge 23704d23ad into b7ee43788d

2025-08-31 23:03:45 +00:00 · 2024-11-26 15:53:18 +05:30
parent b7ee43788d 23704d23ad
commit e4c561fbfb
3 changed files with 119 additions and 0 deletions
--- a/Dockerfile.local-cuda
+++ b/Dockerfile.local-cuda
@@ -0,0 +1,84 @@
 FROM nvidia/cuda:12.5.1-cudnn-devel-ubuntu22.04 as base
 # For tzdata
 ENV DEBIAN_FRONTEND="noninteractive" TZ="Etc/UTC"
 RUN apt-get update && apt-get upgrade -y \
    && apt-get install -y git build-essential \
    python3 python3-pip python3.11-venv gcc wget \
    ocl-icd-opencl-dev opencl-headers clinfo \
    libclblast-dev libopenblas-dev \
    && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \
    && ln -sf /usr/bin/python3.11 /usr/bin/python3 \
    && python3 --version
 # Install poetry
 RUN pip install pipx
 RUN python3 -m pipx ensurepath
 RUN pipx install poetry==1.8.3
 ENV PATH="/root/.local/bin:$PATH"
 ENV PATH=".venv/bin/:$PATH"
 # Dependencies to build llama-cpp
 RUN apt update && apt install -y \
  libopenblas-dev\
  ninja-build\
  build-essential\
  pkg-config\
  wget
 # https://python-poetry.org/docs/configuration/#virtualenvsin-project
 ENV POETRY_VIRTUALENVS_IN_PROJECT=true
 FROM base as dependencies
 WORKDIR /home/worker/app
 COPY pyproject.toml poetry.lock ./
 ARG POETRY_EXTRAS="ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
 RUN poetry install --no-root --extras "${POETRY_EXTRAS}"
 # Enable GPU support
 ENV CUDA_DOCKER_ARCH=all
 ENV GGML_CUDA=1
 ENV TOKENIZERS_PARALLELISM=true
 RUN CMAKE_ARGS="-DGGML_CUDA=on" \
    poetry run pip install \
        --force-reinstall \
        --no-cache-dir \
        --verbose \
        llama-cpp-python==0.2.84 \
        numpy==1.26.0
 FROM base as app
 ENV PYTHONUNBUFFERED=1
 ENV PORT=8080
 ENV APP_ENV=prod
 ENV PYTHONPATH="$PYTHONPATH:/home/worker/app/private_gpt/"
 EXPOSE 8080
 # Prepare a non-root user
 # More info about how to configure UIDs and GIDs in Docker:
 # https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md
 # Define the User ID (UID) for the non-root user
 # UID 100 is chosen to avoid conflicts with existing system users
 ARG UID=1000
 # Define the Group ID (GID) for the non-root user
 # GID 65534 is often used for the 'nogroup' or 'nobody' group
 ARG GID=65534
 RUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker
 WORKDIR /home/worker/app
 RUN chown worker /home/worker/app
 RUN mkdir local_data && chown worker local_data
 RUN mkdir models && chown worker models
 COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
 COPY --chown=worker private_gpt/ private_gpt
 COPY --chown=worker *.yaml ./
 COPY --chown=worker scripts/ scripts
 USER worker
 ENTRYPOINT python -m private_gpt
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -53,6 +53,26 @@ services:
    profiles:
      - llamacpp-cpu
  # Private-GPT service for the local mode (with CUDA support)
  # This service builds from a local Dockerfile and runs the application in local mode.
  private-gpt-llamacpp-cuda:
    image: ${PGPT_IMAGE:-zylonai/private-gpt}${PGPT_TAG:-0.6.1}-llamacpp-cuda
    build:
      context: .
      dockerfile: Dockerfile.llamacpp-cuda
    volumes:
      - ./local_data/:/home/worker/app/local_data
      - ./models/:/home/worker/app/models
    entrypoint: sh -c ".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt"
    ports:
      - "8001:8001"
    environment:
      PORT: 8001
      PGPT_PROFILES: local
      HF_TOKEN: ${HF_TOKEN}
    profiles:
      - llamacpp-cuda
  #-----------------------------------
  #---- Ollama services --------------
  #-----------------------------------
--- a/fern/docs/pages/quickstart/quickstart.mdx
+++ b/fern/docs/pages/quickstart/quickstart.mdx
@@ -82,6 +82,21 @@ HF_TOKEN=<your_hf_token> docker-compose --profile llamacpp-cpu up
 ```
 Replace `<your_hf_token>` with your actual Hugging Face token.
 #### 2. LlamaCPP CUDA
 **Description:**
 This profile runs the Private-GPT services locally using `llama-cpp` and Hugging Face models.
 **Requirements:**
 A **Hugging Face Token (HF_TOKEN)** is required for accessing Hugging Face models. Obtain your token following [this guide](/installation/getting-started/troubleshooting#downloading-gated-and-private-models).
 **Run:**
 Start the services with your Hugging Face token using pre-built images:
 ```sh
 HF_TOKEN=<your_hf_token> docker-compose --profile llamacpp-cuda up
 ```
 Replace `<your_hf_token>` with your actual Hugging Face token.
 ## Building Locally
 If you prefer to build Docker images locally, which is useful when making changes to the codebase or the Dockerfiles, follow these steps: