fix: Docker and sagemaker setup (#1118)

* fix: docker copying extra files * feat: allow configuring mode through env vars * feat: Attempt to build and tag a docker image * fix: run docker on release * fix: typing in prompt transformation * chore: remove tutorial comments
2025-08-22 09:17:04 +00:00 · 2023-10-27 13:29:29 +02:00 · 2023-10-27 13:29:29 +02:00 · 895588b82a
commit 895588b82a
parent 768e5ff505
7 changed files with 67 additions and 15 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -3,6 +3,9 @@ models
 .github
 .vscode
 .DS_Store
+.mypy_cache
+.ruff_cache
+local_data
 terraform
 tests
 Dockerfile
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -0,0 +1,47 @@
+name: Create and publish a Docker image
+
+on:
+  release:
+    types: [ published ]
+  push:
+    branches:
+      - main
+  pull_request:
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+  build-and-push-image:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=sha
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
--- a/11
+++ b/11
@ -23,10 +23,7 @@ FROM base as dependencies
 WORKDIR /home/worker/app
 COPY pyproject.toml poetry.lock ./

-RUN poetry install --with local
 RUN poetry install --with ui
-RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"\
-    poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python

 FROM base as app

@ -39,9 +36,11 @@ EXPOSE 8080
 RUN adduser --system worker
 WORKDIR /home/worker/app

-# Copy everything, including the virtual environment
-COPY --chown=worker --from=dependencies /home/worker/app .
-COPY --chown=worker . .
+RUN mkdir "local_data"; chown worker local_data
+COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
+COPY --chown=worker private_gpt/ private_gpt
+COPY --chown=worker docs/ docs
+COPY --chown=worker *.yaml *.md ./

 USER worker
 ENTRYPOINT .venv/bin/python -m private_gpt
--- a/private_gpt/components/embedding/embedding_component.py
+++ b/private_gpt/components/embedding/embedding_component.py
@ -13,7 +13,7 @@ class EmbeddingComponent:
    @inject
    def __init__(self) -> None:
        match settings.llm.mode:
-            case "local":
+            case "local" | "sagemaker":
                from llama_index.embeddings import HuggingFaceEmbedding

                self.embedding_model = HuggingFaceEmbedding(
--- a/private_gpt/components/llm/custom/sagemaker.py
+++ b/private_gpt/components/llm/custom/sagemaker.py
@ -21,8 +21,6 @@ from llama_index.llms.llama_utils import (
 )

 if TYPE_CHECKING:
-    from collections.abc import Callable
-
    from llama_index.callbacks import CallbackManager
    from llama_index.llms import (
        CompletionResponseGen,
@ -113,10 +111,10 @@ class SagemakerLLM(CustomLLM):
    context_window: int = Field(
        description="The maximum number of context tokens for the model."
    )
-    messages_to_prompt: Callable[..., str] = Field(
+    messages_to_prompt: Any = Field(
        description="The function to convert messages to a prompt.", exclude=True
    )
-    completion_to_prompt: Callable[..., str] = Field(
+    completion_to_prompt: Any = Field(
        description="The function to convert a completion to a prompt.", exclude=True
    )
    generate_kwargs: dict[str, Any] = Field(
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@ -37,6 +37,8 @@ class LLMComponent:

                self.llm = SagemakerLLM(
                    endpoint_name=settings.sagemaker.endpoint_name,
+                    messages_to_prompt=messages_to_prompt,
+                    completion_to_prompt=completion_to_prompt,
                )
            case "openai":
                from llama_index.llms import OpenAI
--- a/settings-docker.yaml
+++ b/settings-docker.yaml
@ -3,12 +3,15 @@ server:
  port: ${PORT:8080}

 llm:
-  mode: local
+  mode: ${PGPT_MODE:mock}

 local:
-  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.1-GGUF
-  llm_hf_model_file: mistral-7b-instruct-v0.1.Q4_K_M.gguf
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
+  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
+  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
+
+sagemaker:
+  endpoint_name: ${PGPT_SAGEMAKER_ENDPOINT_NAME:}

 ui:
  enabled: true