tests: run kata-monitor functional tests against the dedicated image

Exercise the published kata-monitor container image (the one built by publish-kata-monitor-payload-amd64) rather than the on-disk binary, so integration regressions like the recent glibc/musl mismatch surface at PR time. The kata-monitor-tests.sh script keeps the binary fallback for ad-hoc local runs. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com> Assisted-by: OpenAI Codex <codex@openai.com>
2026-07-01 14:38:33 +00:00 · 2026-06-03 13:03:37 +02:00
parent d5bc1177c0
commit 63fec205fe
3 changed files with 77 additions and 11 deletions
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -297,12 +297,13 @@ jobs:

  run-kata-monitor-tests:
    if: ${{ inputs.skip-test != 'yes' }}
-    needs: build-kata-static-tarball-amd64
+    needs: [build-kata-static-tarball-amd64, publish-kata-monitor-image-amd64]
    uses: ./.github/workflows/run-kata-monitor-tests.yaml
    with:
      tarball-suffix: -${{ inputs.tag }}
      commit-hash: ${{ inputs.commit-hash }}
      target-branch: ${{ inputs.target-branch }}
+      kata-monitor-image: ghcr.io/${{ github.repository_owner }}/kata-monitor-ci:${{ inputs.tag }}-amd64

  run-k8s-tests-on-aks:
    if: ${{ inputs.skip-test != 'yes' }}
--- a/.github/workflows/run-kata-monitor-tests.yaml
+++ b/.github/workflows/run-kata-monitor-tests.yaml
@@ -12,6 +12,14 @@ on:
        required: false
        type: string
        default: ""
+      kata-monitor-image:
+        description: >-
+          Container image reference for kata-monitor to be exercised by
+          the tests. When unset the tests fall back to running the
+          kata-monitor binary installed from the kata-static tarball.
+        required: false
+        type: string
+        default: ""

 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-kata-monitor
@@ -39,6 +47,7 @@ jobs:
      CONTAINER_ENGINE: ${{ matrix.container_engine }}
      CONTAINERD_VERSION: ${{ matrix.containerd_version }}
      KATA_HYPERVISOR: ${{ matrix.vmm }}
+      KATA_MONITOR_IMAGE: ${{ inputs.kata-monitor-image }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
@@ -66,5 +75,9 @@ jobs:
      - name: Install kata
        run: bash tests/functional/kata-monitor/gha-run.sh install-kata kata-artifacts

+      - name: Pre-pull kata-monitor image
+        if: ${{ inputs.kata-monitor-image != '' }}
+        run: sudo docker pull "${KATA_MONITOR_IMAGE}"
+
      - name: Run kata-monitor tests
        run: bash tests/functional/kata-monitor/gha-run.sh run
--- a/tests/functional/kata-monitor/kata-monitor-tests.sh
+++ b/tests/functional/kata-monitor/kata-monitor-tests.sh
@@ -27,7 +27,13 @@ readonly MONITOR_MIN_METRICS_NUM=200
 readonly TIMEOUT="20s"
 CONTAINER_ENGINE=${CONTAINER_ENGINE:-"containerd"}
 CRICTL_RUNTIME=${CRICTL_RUNTIME:-"kata"}
+# When KATA_MONITOR_IMAGE is set, kata-monitor runs inside that container
+# image instead of as an on-disk binary. This is what CI does — it
+# validates the actual image we ship. Manual runs without the env var
+# keep the previous behaviour of executing ${KATA_MONITOR_BIN}.
+KATA_MONITOR_IMAGE="${KATA_MONITOR_IMAGE:-}"
 KATA_MONITOR_BIN="${KATA_MONITOR_BIN:-$(command -v kata-monitor || true)}"
+readonly KATA_MONITOR_CONTAINER_NAME="kata-monitor-test"
 KATA_MONITOR_PID=""
 TMPATH=$(mktemp -d -t kata-monitor-test-XXXXXXXXX)
 METRICS_FILE="${TMPATH}/metrics.txt"
@@ -83,11 +89,59 @@ cleanup() {
 	stop_workload
 	stop_workload "${RUNC_CID}" "${RUNC_POD_ID}"

+	stop_kata_monitor
+
+	rm -rf "${TMPATH}"
+}
+
+start_kata_monitor() {
+	local args="$1"
+
+	if [[ -n "${KATA_MONITOR_IMAGE}" ]]; then
+		# `--network host` keeps the default 127.0.0.1:8090 bind
+		# reachable from the host-side test code without having to
+		# publish a port. Mount /run/containerd so the monitor can
+		# reach containerd's CRI socket, plus the kata sandbox base
+		# path for the per-sandbox shim-monitor sockets.
+
+		# Ensure /run/vc/sbs/ exists on the host so the readonly mount
+		# does not fail before the first kata sandbox is created.
+		sudo mkdir -p /run/vc/sbs
+
+		# shellcheck disable=SC2086
+		sudo docker run --rm -d \
+			--name "${KATA_MONITOR_CONTAINER_NAME}" \
+			--network host \
+			-v /run/containerd:/run/containerd:ro \
+			-v /run/vc/sbs:/run/vc/sbs:ro \
+			"${KATA_MONITOR_IMAGE}" \
+			${args} --log-level trace > /dev/null
+		# Stream container logs into the same file the binary path
+		# writes to, so error_with_msg's dump works identically in
+		# both modes. The redirect target lives under our own
+		# ${TMPATH}, so SC2024 (sudo doesn't affect redirects) is a
+		# false positive here.
+		# shellcheck disable=SC2024
+		sudo docker logs -f "${KATA_MONITOR_CONTAINER_NAME}" \
+			> "${MONITOR_LOG_FILE}" 2>&1 &
+		return
+	fi
+
+	[[ ! -x "${KATA_MONITOR_BIN}" ]] && error_with_msg "kata-monitor binary not found"
+	# shellcheck disable=SC2024,SC2086
+	sudo "${KATA_MONITOR_BIN}" ${args} --log-level trace > "${MONITOR_LOG_FILE}" 2>&1 &
+	KATA_MONITOR_PID="$!"
+}
+
+stop_kata_monitor() {
+	if [[ -n "${KATA_MONITOR_IMAGE}" ]]; then
+		sudo docker stop "${KATA_MONITOR_CONTAINER_NAME}" > /dev/null 2>&1 || true
+		return
+	fi
+
 	[[ -n "${KATA_MONITOR_PID}" ]] \
 		&& [[ -d "/proc/${KATA_MONITOR_PID}" ]] \
 		&& kill -9 "${KATA_MONITOR_PID}"
-
-	rm -rf "${TMPATH}"
 }

 create_sandbox_json() {
@@ -243,15 +297,13 @@ main() {
 	###########################
 	title "start kata-monitor"

-	[[ ! -x "${KATA_MONITOR_BIN}" ]] && error_with_msg "kata-monitor binary not found"
-
-	[[ "${CONTAINER_ENGINE}" = "crio" ]] && args="--runtime-endpoint /run/crio/crio.sock"
-
 	CURRENT_TASK="start kata-monitor"
-	# shellcheck disable=SC2024,SC2086
-	sudo "${KATA_MONITOR_BIN}" ${args} --log-level trace > "${MONITOR_LOG_FILE}" 2>&1 &
-	KATA_MONITOR_PID="$!"
-	echo_ok "${CURRENT_TASK} (${KATA_MONITOR_PID})"
+	start_kata_monitor "${args}"
+	if [[ -n "${KATA_MONITOR_IMAGE}" ]]; then
+		echo_ok "${CURRENT_TASK} (image ${KATA_MONITOR_IMAGE})"
+	else
+		echo_ok "${CURRENT_TASK} (pid ${KATA_MONITOR_PID})"
+	fi

 	###########################
 	title "kata-monitor cache update checks"