diff --git a/.github/workflows/apolo-release.yml b/.github/workflows/apolo-release.yml new file mode 100644 index 00000000..76a41646 --- /dev/null +++ b/.github/workflows/apolo-release.yml @@ -0,0 +1,32 @@ +name: CI + +on: + push: + branches: [master] + tags: ["v*"] + pull_request: + branches: [master] + +jobs: + release: + name: Release image + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + steps: + - name: Checkout commit + uses: actions/checkout@v4 + - name: Login to ghcr.io + uses: docker/login-action@v3.1.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ github.token }} + - name: Build Docker image + run: make build-image + - name: Push image + run: | + export IMAGE_TAG=${GITHUB_REF#refs/tags/v} + make push-image + + export IMAGE_TAG=latest + make push-image diff --git a/.neuro/live.yaml b/.neuro/live.yaml index 2bc9bf37..07f321ca 100644 --- a/.neuro/live.yaml +++ b/.neuro/live.yaml @@ -66,6 +66,7 @@ jobs: OLLAMA_API_BASE: http://${{ inspect_job('ollama').internal_hostname_named }}:11434 POSTGRES_HOST: ${{ inspect_job('pgvector').internal_hostname_named }} VLLM_MODEL: stabilityai/StableBeluga-13B + VLLM_TOKENIZER: stabilityai/StableBeluga-13B vllm: image: vllm/vllm-openai:v0.4.0 diff --git a/Dockerfile.external b/Dockerfile.external index a6daaa7f..e1e2ed1d 100644 --- a/Dockerfile.external +++ b/Dockerfile.external @@ -19,6 +19,8 @@ RUN poetry install --extras "$POETRY_EXTRAS" FROM base as app +LABEL org.opencontainers.image.source = "https://github.com/neuro-inc/private-gpt" + ENV PYTHONUNBUFFERED=1 ENV PORT=8080 EXPOSE 8080 diff --git a/Makefile b/Makefile index 7b9a8469..1dc7dd83 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,11 @@ # Any args passed to the make script, use with $(call args, default_value) args = `arg="$(filter-out $@,$(MAKECMDGOALS))" && echo $${arg:-${1}}` +IMAGE_REPO ?= ghcr.io/neuro-inc +IMAGE_NAME ?= private-gpt +IMAGE_TAG ?= latest +IMAGE_REF = $(IMAGE_REPO)/$(IMAGE_NAME):$(IMAGE_TAG) + ######################################################################################################################## # Quality checks ######################################################################################################################## @@ -76,3 +81,12 @@ list: @echo " ingest : Ingest data using specified script" @echo " wipe : Wipe data using specified script" @echo " setup : Setup the application" + +.PHONY: build-image +build-image: + docker build -t $(IMAGE_NAME):latest . + +.PHONY: push-image +push-image: + docker tag $(IMAGE_NAME):latest $(IMAGE_REF) + docker push $(IMAGE_REF) diff --git a/settings/settings-vllm-pgvector.yaml b/settings/settings-vllm-pgvector.yaml index 35001aa4..9a671066 100644 --- a/settings/settings-vllm-pgvector.yaml +++ b/settings/settings-vllm-pgvector.yaml @@ -4,14 +4,14 @@ server: llm: mode: openailike - tokenizer: ${VLLM_MODEL:lmsys/vicuna-7b-v1.5} - max_new_tokens: 5000 - context_window: 2048 - temperature: 0.1 + tokenizer: ${VLLM_TOKENIZER:lmsys/vicuna-7b-v1.5} + max_new_tokens: ${VLLM_MAX_NEW_TOKENS:5000} + context_window: ${VLLM_CONTEXT_WINDOW:2048} + temperature: ${VLLM_TEMPERATURE:0.1} openai: api_base: ${VLLM_API_BASE:http://localhost:8000/v1} - api_key: EMPTY + api_key: ${VLLM_API_BASE:EMPTY} model: ${VLLM_MODEL:lmsys/vicuna-7b-v1.5} embedding: @@ -21,8 +21,7 @@ embedding: ollama: # Note: if you change embedding model, you'll need to use a dedicated DB for ingext storage - embedding_model: nomic-embed-text - # api_base: ${OLLAMA_API_BASE:http://localhost:11434} + embedding_model: ${OLLAMA_EMBEDDING_MODEL:nomic-embed-text} embedding_api_base: ${OLLAMA_API_BASE:http://localhost:11434} request_timeout: 300.0