From 23704d23ad867a182c368b52f9b30af3ab1e0196 Mon Sep 17 00:00:00 2001
From: Javier Martinez <javiermartinezalvarez98@gmail.com>
Date: Mon, 5 Aug 2024 17:48:14 +0200
Subject: [PATCH] feat: add new cuda profile

---
 docker-compose.yaml                       | 20 ++++++++++++++++++++
 fern/docs/pages/quickstart/quickstart.mdx | 15 +++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/docker-compose.yaml b/docker-compose.yaml
index a5df4647..3a021f8f 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -48,6 +48,26 @@ services:
     profiles:
       - llamacpp-cpu
 
+  # Private-GPT service for the local mode (with CUDA support)
+  # This service builds from a local Dockerfile and runs the application in local mode.
+  private-gpt-llamacpp-cuda:
+    image: ${PGPT_IMAGE:-zylonai/private-gpt}${PGPT_TAG:-0.6.1}-llamacpp-cuda
+    build:
+      context: .
+      dockerfile: Dockerfile.llamacpp-cuda
+    volumes:
+      - ./local_data/:/home/worker/app/local_data
+      - ./models/:/home/worker/app/models
+    entrypoint: sh -c ".venv/bin/python scripts/setup && .venv/bin/python -m private_gpt"
+    ports:
+      - "8001:8001"
+    environment:
+      PORT: 8001
+      PGPT_PROFILES: local
+      HF_TOKEN: ${HF_TOKEN}
+    profiles:
+      - llamacpp-cuda
+
   #-----------------------------------
   #---- Ollama services --------------
   #-----------------------------------
diff --git a/fern/docs/pages/quickstart/quickstart.mdx b/fern/docs/pages/quickstart/quickstart.mdx
index 09877ce2..f0c05684 100644
--- a/fern/docs/pages/quickstart/quickstart.mdx
+++ b/fern/docs/pages/quickstart/quickstart.mdx
@@ -82,6 +82,21 @@ HF_TOKEN=<your_hf_token> docker-compose --profile llamacpp-cpu up
 ```
 Replace `<your_hf_token>` with your actual Hugging Face token.
 
+#### 2. LlamaCPP CUDA
+
+**Description:**
+This profile runs the Private-GPT services locally using `llama-cpp` and Hugging Face models.
+
+**Requirements:**
+A **Hugging Face Token (HF_TOKEN)** is required for accessing Hugging Face models. Obtain your token following [this guide](/installation/getting-started/troubleshooting#downloading-gated-and-private-models).
+
+**Run:**
+Start the services with your Hugging Face token using pre-built images:
+```sh
+HF_TOKEN=<your_hf_token> docker-compose --profile llamacpp-cuda up
+```
+Replace `<your_hf_token>` with your actual Hugging Face token.
+
 ## Building Locally
 
 If you prefer to build Docker images locally, which is useful when making changes to the codebase or the Dockerfiles, follow these steps: