From 63fec205feef4f0f65b148f055410e41c107d488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 3 Jun 2026 13:03:37 +0200 Subject: [PATCH] tests: run kata-monitor functional tests against the dedicated image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exercise the published kata-monitor container image (the one built by publish-kata-monitor-payload-amd64) rather than the on-disk binary, so integration regressions like the recent glibc/musl mismatch surface at PR time. The kata-monitor-tests.sh script keeps the binary fallback for ad-hoc local runs. Signed-off-by: Fabiano Fidêncio Assisted-by: OpenAI Codex --- .github/workflows/ci.yaml | 3 +- .github/workflows/run-kata-monitor-tests.yaml | 13 ++++ .../kata-monitor/kata-monitor-tests.sh | 72 ++++++++++++++++--- 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b89957464b..51a036731c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -297,12 +297,13 @@ jobs: run-kata-monitor-tests: if: ${{ inputs.skip-test != 'yes' }} - needs: build-kata-static-tarball-amd64 + needs: [build-kata-static-tarball-amd64, publish-kata-monitor-image-amd64] uses: ./.github/workflows/run-kata-monitor-tests.yaml with: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} target-branch: ${{ inputs.target-branch }} + kata-monitor-image: ghcr.io/${{ github.repository_owner }}/kata-monitor-ci:${{ inputs.tag }}-amd64 run-k8s-tests-on-aks: if: ${{ inputs.skip-test != 'yes' }} diff --git a/.github/workflows/run-kata-monitor-tests.yaml b/.github/workflows/run-kata-monitor-tests.yaml index c39c1d9ac0..aa32e82f7c 100644 --- a/.github/workflows/run-kata-monitor-tests.yaml +++ b/.github/workflows/run-kata-monitor-tests.yaml @@ -12,6 +12,14 @@ on: required: false type: string default: "" + kata-monitor-image: + description: >- + Container image reference for kata-monitor to be exercised by + the tests. When unset the tests fall back to running the + kata-monitor binary installed from the kata-static tarball. + required: false + type: string + default: "" concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-kata-monitor @@ -39,6 +47,7 @@ jobs: CONTAINER_ENGINE: ${{ matrix.container_engine }} CONTAINERD_VERSION: ${{ matrix.containerd_version }} KATA_HYPERVISOR: ${{ matrix.vmm }} + KATA_MONITOR_IMAGE: ${{ inputs.kata-monitor-image }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: @@ -66,5 +75,9 @@ jobs: - name: Install kata run: bash tests/functional/kata-monitor/gha-run.sh install-kata kata-artifacts + - name: Pre-pull kata-monitor image + if: ${{ inputs.kata-monitor-image != '' }} + run: sudo docker pull "${KATA_MONITOR_IMAGE}" + - name: Run kata-monitor tests run: bash tests/functional/kata-monitor/gha-run.sh run diff --git a/tests/functional/kata-monitor/kata-monitor-tests.sh b/tests/functional/kata-monitor/kata-monitor-tests.sh index 2134fb08d2..41da8c3c61 100755 --- a/tests/functional/kata-monitor/kata-monitor-tests.sh +++ b/tests/functional/kata-monitor/kata-monitor-tests.sh @@ -27,7 +27,13 @@ readonly MONITOR_MIN_METRICS_NUM=200 readonly TIMEOUT="20s" CONTAINER_ENGINE=${CONTAINER_ENGINE:-"containerd"} CRICTL_RUNTIME=${CRICTL_RUNTIME:-"kata"} +# When KATA_MONITOR_IMAGE is set, kata-monitor runs inside that container +# image instead of as an on-disk binary. This is what CI does — it +# validates the actual image we ship. Manual runs without the env var +# keep the previous behaviour of executing ${KATA_MONITOR_BIN}. +KATA_MONITOR_IMAGE="${KATA_MONITOR_IMAGE:-}" KATA_MONITOR_BIN="${KATA_MONITOR_BIN:-$(command -v kata-monitor || true)}" +readonly KATA_MONITOR_CONTAINER_NAME="kata-monitor-test" KATA_MONITOR_PID="" TMPATH=$(mktemp -d -t kata-monitor-test-XXXXXXXXX) METRICS_FILE="${TMPATH}/metrics.txt" @@ -83,11 +89,59 @@ cleanup() { stop_workload stop_workload "${RUNC_CID}" "${RUNC_POD_ID}" + stop_kata_monitor + + rm -rf "${TMPATH}" +} + +start_kata_monitor() { + local args="$1" + + if [[ -n "${KATA_MONITOR_IMAGE}" ]]; then + # `--network host` keeps the default 127.0.0.1:8090 bind + # reachable from the host-side test code without having to + # publish a port. Mount /run/containerd so the monitor can + # reach containerd's CRI socket, plus the kata sandbox base + # path for the per-sandbox shim-monitor sockets. + + # Ensure /run/vc/sbs/ exists on the host so the readonly mount + # does not fail before the first kata sandbox is created. + sudo mkdir -p /run/vc/sbs + + # shellcheck disable=SC2086 + sudo docker run --rm -d \ + --name "${KATA_MONITOR_CONTAINER_NAME}" \ + --network host \ + -v /run/containerd:/run/containerd:ro \ + -v /run/vc/sbs:/run/vc/sbs:ro \ + "${KATA_MONITOR_IMAGE}" \ + ${args} --log-level trace > /dev/null + # Stream container logs into the same file the binary path + # writes to, so error_with_msg's dump works identically in + # both modes. The redirect target lives under our own + # ${TMPATH}, so SC2024 (sudo doesn't affect redirects) is a + # false positive here. + # shellcheck disable=SC2024 + sudo docker logs -f "${KATA_MONITOR_CONTAINER_NAME}" \ + > "${MONITOR_LOG_FILE}" 2>&1 & + return + fi + + [[ ! -x "${KATA_MONITOR_BIN}" ]] && error_with_msg "kata-monitor binary not found" + # shellcheck disable=SC2024,SC2086 + sudo "${KATA_MONITOR_BIN}" ${args} --log-level trace > "${MONITOR_LOG_FILE}" 2>&1 & + KATA_MONITOR_PID="$!" +} + +stop_kata_monitor() { + if [[ -n "${KATA_MONITOR_IMAGE}" ]]; then + sudo docker stop "${KATA_MONITOR_CONTAINER_NAME}" > /dev/null 2>&1 || true + return + fi + [[ -n "${KATA_MONITOR_PID}" ]] \ && [[ -d "/proc/${KATA_MONITOR_PID}" ]] \ && kill -9 "${KATA_MONITOR_PID}" - - rm -rf "${TMPATH}" } create_sandbox_json() { @@ -243,15 +297,13 @@ main() { ########################### title "start kata-monitor" - [[ ! -x "${KATA_MONITOR_BIN}" ]] && error_with_msg "kata-monitor binary not found" - - [[ "${CONTAINER_ENGINE}" = "crio" ]] && args="--runtime-endpoint /run/crio/crio.sock" - CURRENT_TASK="start kata-monitor" - # shellcheck disable=SC2024,SC2086 - sudo "${KATA_MONITOR_BIN}" ${args} --log-level trace > "${MONITOR_LOG_FILE}" 2>&1 & - KATA_MONITOR_PID="$!" - echo_ok "${CURRENT_TASK} (${KATA_MONITOR_PID})" + start_kata_monitor "${args}" + if [[ -n "${KATA_MONITOR_IMAGE}" ]]; then + echo_ok "${CURRENT_TASK} (image ${KATA_MONITOR_IMAGE})" + else + echo_ok "${CURRENT_TASK} (pid ${KATA_MONITOR_PID})" + fi ########################### title "kata-monitor cache update checks"