workflows: Update metrics jobs

Currently the run-metrics job runs a manual install and does this in a separate job before the metrics tests run. This doesn't make sense as if we have multiple CI runs in parallel (like we often do), there is a high chance that the setup for another PR runs between the metrics setup and the runs, meaning it's not testing the correct version of code. We want to remove this from happening, so install (and delete to cleanup) kata as part of the metrics test jobs. Also switch to kata-deploy rather than manual install for simplicity and in order to test what we recommend to users. Signed-off-by: stevenhorsman <steven@uk.ibm.com>
2025-09-02 09:24:35 +00:00 · 2025-02-27 11:27:25 +00:00
parent 3f13023f5f
commit 6f918d71f5
4 changed files with 49 additions and 34 deletions
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -288,8 +288,11 @@ jobs:
    needs: build-kata-static-tarball-amd64
    uses: ./.github/workflows/run-metrics.yaml
    with:
-      tarball-suffix: -${{ inputs.tag }}
+      registry: ghcr.io
+      repo: ${{ github.repository_owner }}/kata-deploy-ci
+      tag: ${{ inputs.tag }}-amd64
      commit-hash: ${{ inputs.commit-hash }}
+      pr-number: ${{ inputs.pr-number }}
      target-branch: ${{ inputs.target-branch }}

  run-basic-amd64-tests:
--- a/.github/workflows/run-metrics.yaml
+++ b/.github/workflows/run-metrics.yaml
@@ -2,8 +2,17 @@ name: CI | Run test metrics
 on:
  workflow_call:
    inputs:
-      tarball-suffix:
-        required: false
+      registry:
+        required: true
+        type: string
+      repo:
+        required: true
+        type: string
+      tag:
+        required: true
+        type: string
+      pr-number:
+        required: true
        type: string
      commit-hash:
        required: false
@@ -14,34 +23,7 @@ on:
        default: ""

 jobs:
-  setup-kata:
-    name: Kata Setup
-    runs-on: metrics
-    env:
-      GOPATH: ${{ github.workspace }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          ref: ${{ inputs.commit-hash }}
-          fetch-depth: 0
-
-      - name: Rebase atop of the latest target branch
-        run: |
-          ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
-        env:
-          TARGET_BRANCH: ${{ inputs.target-branch }}
-
-      - name: get-kata-tarball
-        uses: actions/download-artifact@v4
-        with:
-          name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
-          path: kata-artifacts
-
-      - name: Install kata
-        run: bash tests/metrics/gha-run.sh install-kata kata-artifacts
-
  run-metrics:
-    needs: setup-kata
    strategy:
      # We can set this to true whenever we're 100% sure that
      # the all the tests are not flaky, otherwise we'll fail
@@ -54,7 +36,32 @@ jobs:
    env:
      GOPATH: ${{ github.workspace }}
      KATA_HYPERVISOR: ${{ matrix.vmm }}
+      DOCKER_REGISTRY: ${{ inputs.registry }}
+      DOCKER_REPO: ${{ inputs.repo }}
+      DOCKER_TAG: ${{ inputs.tag }}
+      GH_PR_NUMBER: ${{ inputs.pr-number }}
+      K8S_TEST_HOST_TYPE: "baremetal"
+      USING_NFD: "false"
+      KUBERNETES: kubeadm
    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.commit-hash }}
+          fetch-depth: 0
+
+      - name: Rebase atop of the latest target branch
+        run: |
+          ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
+        env:
+          TARGET_BRANCH: ${{ inputs.target-branch }}
+
+      - name: Deploy Kata
+        timeout-minutes: 10
+        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-kubeadm
+
+      - name: Install check metrics
+        run: bash tests/metrics/gha-run.sh install-checkmetrics
+
      - name: enabling the hypervisor
        run: bash tests/metrics/gha-run.sh enabling-hypervisor

@@ -92,3 +99,7 @@ jobs:
          path: results-${{ matrix.vmm }}.tar.gz
          retention-days: 1
          if-no-files-found: error
+
+      - name: Delete kata-deploy
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh cleanup-kubeadm
--- a/tests/integration/kubernetes/gha-run.sh
+++ b/tests/integration/kubernetes/gha-run.sh
@@ -433,8 +433,8 @@ function cleanup() {
 		return
 	fi

-	# In case of canceling workflow manually, 'run_kubernetes_tests.sh' continues running and triggers new tests, 
-	# resulting in the CI being in an unexpected state. So we need kill all running test scripts before cleaning up the node. 
+	# In case of canceling workflow manually, 'run_kubernetes_tests.sh' continues running and triggers new tests,
+	# resulting in the CI being in an unexpected state. So we need kill all running test scripts before cleaning up the node.
 	# See issue https://github.com/kata-containers/kata-containers/issues/9980
 	delete_test_runners	|| true
 	# Switch back to the default namespace and delete the tests one
@@ -594,6 +594,7 @@ function main() {
 		collect-artifacts) collect_artifacts ;;
 		cleanup) cleanup ;;
 		cleanup-kcli) cleanup "kcli" ;;
+		cleanup-kubeadm) cleanup "kubeadm" ;;
 		cleanup-sev) cleanup "sev" ;;
 		cleanup-snp) cleanup "snp" ;;
 		cleanup-tdx) cleanup "tdx" ;;
--- a/tests/metrics/gha-run.sh
+++ b/tests/metrics/gha-run.sh
@@ -121,7 +121,7 @@ function run_test_latency() {
 function main() {
 	action="${1:-}"
 	case "${action}" in
-		install-kata) install_kata && install_checkmetrics ;;
+		install-checkmetrics) install_checkmetrics ;;
 		enabling-hypervisor) enabling_hypervisor ;;
 		make-tarball-results) make_tarball_results ;;
 		run-test-launchtimes) run_test_launchtimes ;;
@@ -132,7 +132,7 @@ function main() {
 		run-test-fio) run_test_fio ;;
 		run-test-iperf) run_test_iperf ;;
 		run-test-latency) run_test_latency ;;
-		*) >&2 die "Invalid argument" ;;
+		*) >&2 die "Invalid argument: ${action}" ;;
 	esac
 }