fix: Docker and sagemaker setup (#1118)

* fix: docker copying extra files * feat: allow configuring mode through env vars * feat: Attempt to build and tag a docker image * fix: run docker on release * fix: typing in prompt transformation * chore: remove tutorial comments
2025-07-13 07:04:10 +00:00 · 2023-10-27 13:29:29 +02:00 · 2023-10-27 13:29:29 +02:00 · 895588b82a
commit 895588b82a
parent 768e5ff505
7 changed files with 67 additions and 15 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -3,6 +3,9 @@ models
 .github
 .vscode
 .DS_Store
 .mypy_cache
 .ruff_cache
 local_data
 terraform
 tests
 Dockerfile
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -0,0 +1,47 @@
 name: Create and publish a Docker image
 on:
  release:
    types: [ published ]
  push:
    branches:
      - main
  pull_request:
 env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}
 jobs:
  build-and-push-image:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Log in to the Container registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Extract metadata (tags, labels) for Docker
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
          tags: |
            type=ref,event=branch
            type=ref,event=pr
            type=semver,pattern={{version}}
            type=semver,pattern={{major}}.{{minor}}
            type=sha
      - name: Build and push Docker image
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
--- a/11
+++ b/11
@ -23,10 +23,7 @@ FROM base as dependencies
 WORKDIR /home/worker/app
 COPY pyproject.toml poetry.lock ./
 RUN poetry install --with local
 RUN poetry install --with ui
 RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"\
    poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python
 FROM base as app
@ -39,9 +36,11 @@ EXPOSE 8080
 RUN adduser --system worker
 WORKDIR /home/worker/app
-# Copy everything, including the virtual environment
+RUN mkdir "local_data"; chown worker local_data
-COPY --chown=worker --from=dependencies /home/worker/app .
+COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
-COPY --chown=worker . .
+COPY --chown=worker private_gpt/ private_gpt
 COPY --chown=worker docs/ docs
 COPY --chown=worker *.yaml *.md ./
 USER worker
 ENTRYPOINT .venv/bin/python -m private_gpt
--- a/private_gpt/components/embedding/embedding_component.py
+++ b/private_gpt/components/embedding/embedding_component.py
@ -13,7 +13,7 @@ class EmbeddingComponent:
    @inject
    def __init__(self) -> None:
        match settings.llm.mode:
-            case "local":
+            case "local" | "sagemaker":
                from llama_index.embeddings import HuggingFaceEmbedding
                self.embedding_model = HuggingFaceEmbedding(
--- a/private_gpt/components/llm/custom/sagemaker.py
+++ b/private_gpt/components/llm/custom/sagemaker.py
@ -21,8 +21,6 @@ from llama_index.llms.llama_utils import (
 )
 if TYPE_CHECKING:
    from collections.abc import Callable
    from llama_index.callbacks import CallbackManager
    from llama_index.llms import (
        CompletionResponseGen,
@ -113,10 +111,10 @@ class SagemakerLLM(CustomLLM):
    context_window: int = Field(
        description="The maximum number of context tokens for the model."
    )
-    messages_to_prompt: Callable[..., str] = Field(
+    messages_to_prompt: Any = Field(
        description="The function to convert messages to a prompt.", exclude=True
    )
-    completion_to_prompt: Callable[..., str] = Field(
+    completion_to_prompt: Any = Field(
        description="The function to convert a completion to a prompt.", exclude=True
    )
    generate_kwargs: dict[str, Any] = Field(
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@ -37,6 +37,8 @@ class LLMComponent:
                self.llm = SagemakerLLM(
                    endpoint_name=settings.sagemaker.endpoint_name,
                    messages_to_prompt=messages_to_prompt,
                    completion_to_prompt=completion_to_prompt,
                )
            case "openai":
                from llama_index.llms import OpenAI
--- a/settings-docker.yaml
+++ b/settings-docker.yaml
@ -3,12 +3,15 @@ server:
  port: ${PORT:8080}
 llm:
-  mode: local
+  mode: ${PGPT_MODE:mock}
 local:
-  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.1-GGUF
+  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
-  llm_hf_model_file: mistral-7b-instruct-v0.1.Q4_K_M.gguf
+  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
 sagemaker:
  endpoint_name: ${PGPT_SAGEMAKER_ENDPOINT_NAME:}
 ui:
  enabled: true