mirror of
https://github.com/imartinez/privateGPT.git
synced 2025-05-01 04:53:37 +00:00
fix: Docker and sagemaker setup (#1118)
* fix: docker copying extra files * feat: allow configuring mode through env vars * feat: Attempt to build and tag a docker image * fix: run docker on release * fix: typing in prompt transformation * chore: remove tutorial comments
This commit is contained in:
parent
768e5ff505
commit
895588b82a
@ -3,6 +3,9 @@ models
|
|||||||
.github
|
.github
|
||||||
.vscode
|
.vscode
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
.mypy_cache
|
||||||
|
.ruff_cache
|
||||||
|
local_data
|
||||||
terraform
|
terraform
|
||||||
tests
|
tests
|
||||||
Dockerfile
|
Dockerfile
|
||||||
|
47
.github/workflows/docker.yml
vendored
Normal file
47
.github/workflows/docker.yml
vendored
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
name: Create and publish a Docker image
|
||||||
|
|
||||||
|
on:
|
||||||
|
release:
|
||||||
|
types: [ published ]
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
pull_request:
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: ghcr.io
|
||||||
|
IMAGE_NAME: ${{ github.repository }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-push-image:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Log in to the Container registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
- name: Extract metadata (tags, labels) for Docker
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
|
tags: |
|
||||||
|
type=ref,event=branch
|
||||||
|
type=ref,event=pr
|
||||||
|
type=semver,pattern={{version}}
|
||||||
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
|
type=sha
|
||||||
|
- name: Build and push Docker image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
11
Dockerfile
11
Dockerfile
@ -23,10 +23,7 @@ FROM base as dependencies
|
|||||||
WORKDIR /home/worker/app
|
WORKDIR /home/worker/app
|
||||||
COPY pyproject.toml poetry.lock ./
|
COPY pyproject.toml poetry.lock ./
|
||||||
|
|
||||||
RUN poetry install --with local
|
|
||||||
RUN poetry install --with ui
|
RUN poetry install --with ui
|
||||||
RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"\
|
|
||||||
poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python
|
|
||||||
|
|
||||||
FROM base as app
|
FROM base as app
|
||||||
|
|
||||||
@ -39,9 +36,11 @@ EXPOSE 8080
|
|||||||
RUN adduser --system worker
|
RUN adduser --system worker
|
||||||
WORKDIR /home/worker/app
|
WORKDIR /home/worker/app
|
||||||
|
|
||||||
# Copy everything, including the virtual environment
|
RUN mkdir "local_data"; chown worker local_data
|
||||||
COPY --chown=worker --from=dependencies /home/worker/app .
|
COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
|
||||||
COPY --chown=worker . .
|
COPY --chown=worker private_gpt/ private_gpt
|
||||||
|
COPY --chown=worker docs/ docs
|
||||||
|
COPY --chown=worker *.yaml *.md ./
|
||||||
|
|
||||||
USER worker
|
USER worker
|
||||||
ENTRYPOINT .venv/bin/python -m private_gpt
|
ENTRYPOINT .venv/bin/python -m private_gpt
|
@ -13,7 +13,7 @@ class EmbeddingComponent:
|
|||||||
@inject
|
@inject
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
match settings.llm.mode:
|
match settings.llm.mode:
|
||||||
case "local":
|
case "local" | "sagemaker":
|
||||||
from llama_index.embeddings import HuggingFaceEmbedding
|
from llama_index.embeddings import HuggingFaceEmbedding
|
||||||
|
|
||||||
self.embedding_model = HuggingFaceEmbedding(
|
self.embedding_model = HuggingFaceEmbedding(
|
||||||
|
@ -21,8 +21,6 @@ from llama_index.llms.llama_utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from collections.abc import Callable
|
|
||||||
|
|
||||||
from llama_index.callbacks import CallbackManager
|
from llama_index.callbacks import CallbackManager
|
||||||
from llama_index.llms import (
|
from llama_index.llms import (
|
||||||
CompletionResponseGen,
|
CompletionResponseGen,
|
||||||
@ -113,10 +111,10 @@ class SagemakerLLM(CustomLLM):
|
|||||||
context_window: int = Field(
|
context_window: int = Field(
|
||||||
description="The maximum number of context tokens for the model."
|
description="The maximum number of context tokens for the model."
|
||||||
)
|
)
|
||||||
messages_to_prompt: Callable[..., str] = Field(
|
messages_to_prompt: Any = Field(
|
||||||
description="The function to convert messages to a prompt.", exclude=True
|
description="The function to convert messages to a prompt.", exclude=True
|
||||||
)
|
)
|
||||||
completion_to_prompt: Callable[..., str] = Field(
|
completion_to_prompt: Any = Field(
|
||||||
description="The function to convert a completion to a prompt.", exclude=True
|
description="The function to convert a completion to a prompt.", exclude=True
|
||||||
)
|
)
|
||||||
generate_kwargs: dict[str, Any] = Field(
|
generate_kwargs: dict[str, Any] = Field(
|
||||||
|
@ -37,6 +37,8 @@ class LLMComponent:
|
|||||||
|
|
||||||
self.llm = SagemakerLLM(
|
self.llm = SagemakerLLM(
|
||||||
endpoint_name=settings.sagemaker.endpoint_name,
|
endpoint_name=settings.sagemaker.endpoint_name,
|
||||||
|
messages_to_prompt=messages_to_prompt,
|
||||||
|
completion_to_prompt=completion_to_prompt,
|
||||||
)
|
)
|
||||||
case "openai":
|
case "openai":
|
||||||
from llama_index.llms import OpenAI
|
from llama_index.llms import OpenAI
|
||||||
|
@ -3,12 +3,15 @@ server:
|
|||||||
port: ${PORT:8080}
|
port: ${PORT:8080}
|
||||||
|
|
||||||
llm:
|
llm:
|
||||||
mode: local
|
mode: ${PGPT_MODE:mock}
|
||||||
|
|
||||||
local:
|
local:
|
||||||
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.1-GGUF
|
llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
|
||||||
llm_hf_model_file: mistral-7b-instruct-v0.1.Q4_K_M.gguf
|
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
|
||||||
embedding_hf_model_name: BAAI/bge-small-en-v1.5
|
embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
|
||||||
|
|
||||||
|
sagemaker:
|
||||||
|
endpoint_name: ${PGPT_SAGEMAKER_ENDPOINT_NAME:}
|
||||||
|
|
||||||
ui:
|
ui:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
Loading…
Reference in New Issue
Block a user