diff --git a/.github/workflows/apolo-release.yml b/.github/workflows/apolo-release.yml
new file mode 100644
index 00000000..76a41646
--- /dev/null
+++ b/.github/workflows/apolo-release.yml
@@ -0,0 +1,32 @@
+name: CI
+
+on:
+  push:
+    branches: [master]
+    tags: ["v*"]
+  pull_request:
+    branches: [master]
+
+jobs:
+  release:
+    name: Release image
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
+    steps:
+      - name: Checkout commit
+        uses: actions/checkout@v4
+      - name: Login to ghcr.io
+        uses: docker/login-action@v3.1.0
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ github.token }}
+      - name: Build Docker image
+        run: make build-image
+      - name: Push image
+        run: |
+          export IMAGE_TAG=${GITHUB_REF#refs/tags/v}
+          make push-image
+
+          export IMAGE_TAG=latest
+          make push-image
diff --git a/.neuro/live.yaml b/.neuro/live.yaml
index 2bc9bf37..07f321ca 100644
--- a/.neuro/live.yaml
+++ b/.neuro/live.yaml
@@ -66,6 +66,7 @@ jobs:
       OLLAMA_API_BASE: http://${{ inspect_job('ollama').internal_hostname_named }}:11434
       POSTGRES_HOST: ${{ inspect_job('pgvector').internal_hostname_named }}
       VLLM_MODEL: stabilityai/StableBeluga-13B
+      VLLM_TOKENIZER: stabilityai/StableBeluga-13B
 
   vllm:
     image: vllm/vllm-openai:v0.4.0
diff --git a/Dockerfile.external b/Dockerfile.external
index a6daaa7f..e1e2ed1d 100644
--- a/Dockerfile.external
+++ b/Dockerfile.external
@@ -19,6 +19,8 @@ RUN poetry install --extras "$POETRY_EXTRAS"
 
 FROM base as app
 
+LABEL org.opencontainers.image.source = "https://github.com/neuro-inc/private-gpt"
+
 ENV PYTHONUNBUFFERED=1
 ENV PORT=8080
 EXPOSE 8080
diff --git a/Makefile b/Makefile
index 7b9a8469..1dc7dd83 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,11 @@
 # Any args passed to the make script, use with $(call args, default_value)
 args = `arg="$(filter-out $@,$(MAKECMDGOALS))" && echo $${arg:-${1}}`
 
+IMAGE_REPO ?= ghcr.io/neuro-inc
+IMAGE_NAME ?= private-gpt
+IMAGE_TAG ?= latest
+IMAGE_REF = $(IMAGE_REPO)/$(IMAGE_NAME):$(IMAGE_TAG)
+
 ########################################################################################################################
 # Quality checks
 ########################################################################################################################
@@ -76,3 +81,12 @@ list:
 	@echo "  ingest          : Ingest data using specified script"
 	@echo "  wipe            : Wipe data using specified script"
 	@echo "  setup           : Setup the application"
+
+.PHONY: build-image
+build-image:
+	docker build -t $(IMAGE_NAME):latest .
+
+.PHONY: push-image
+push-image:
+	docker tag $(IMAGE_NAME):latest $(IMAGE_REF)
+	docker push $(IMAGE_REF)
diff --git a/settings/settings-vllm-pgvector.yaml b/settings/settings-vllm-pgvector.yaml
index 35001aa4..9a671066 100644
--- a/settings/settings-vllm-pgvector.yaml
+++ b/settings/settings-vllm-pgvector.yaml
@@ -4,14 +4,14 @@ server:
 
 llm:
   mode: openailike
-  tokenizer: ${VLLM_MODEL:lmsys/vicuna-7b-v1.5}
-  max_new_tokens: 5000
-  context_window: 2048
-  temperature: 0.1
+  tokenizer: ${VLLM_TOKENIZER:lmsys/vicuna-7b-v1.5}
+  max_new_tokens: ${VLLM_MAX_NEW_TOKENS:5000}
+  context_window: ${VLLM_CONTEXT_WINDOW:2048}
+  temperature: ${VLLM_TEMPERATURE:0.1}
 
 openai:
   api_base: ${VLLM_API_BASE:http://localhost:8000/v1}
-  api_key: EMPTY
+  api_key: ${VLLM_API_BASE:EMPTY}
   model: ${VLLM_MODEL:lmsys/vicuna-7b-v1.5}
 
 embedding:
@@ -21,8 +21,7 @@ embedding:
 
 ollama:
   # Note: if you change embedding model, you'll need to use a dedicated DB for ingext storage
-  embedding_model: nomic-embed-text
-  # api_base: ${OLLAMA_API_BASE:http://localhost:11434}
+  embedding_model: ${OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
   embedding_api_base: ${OLLAMA_API_BASE:http://localhost:11434}
   request_timeout: 300.0