From 6f918d71f53eaded766daa3a20d8f78cede3fa11 Mon Sep 17 00:00:00 2001
From: stevenhorsman <steven@uk.ibm.com>
Date: Thu, 27 Feb 2025 11:27:25 +0000
Subject: [PATCH] workflows: Update metrics jobs

Currently the run-metrics job runs a manual install
and does this in a separate job before the metrics
tests run. This doesn't make sense as if we have multiple
CI runs in parallel (like we often do), there is a high chance
that the setup for another PR runs between the metrics
setup and the runs, meaning it's not testing the correct
version of code. We want to remove this from happening,
so install (and delete to cleanup) kata as part of the metrics
test jobs.

Also switch to kata-deploy rather than manual install for
simplicity and in order to test what we recommend to users.

Signed-off-by: stevenhorsman <steven@uk.ibm.com>
---
 .github/workflows/ci.yaml               |  5 +-
 .github/workflows/run-metrics.yaml      | 69 ++++++++++++++-----------
 tests/integration/kubernetes/gha-run.sh |  5 +-
 tests/metrics/gha-run.sh                |  4 +-
 4 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index cd6f15259b..ed7a1eecfb 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -288,8 +288,11 @@ jobs:
     needs: build-kata-static-tarball-amd64
     uses: ./.github/workflows/run-metrics.yaml
     with:
-      tarball-suffix: -${{ inputs.tag }}
+      registry: ghcr.io
+      repo: ${{ github.repository_owner }}/kata-deploy-ci
+      tag: ${{ inputs.tag }}-amd64
       commit-hash: ${{ inputs.commit-hash }}
+      pr-number: ${{ inputs.pr-number }}
       target-branch: ${{ inputs.target-branch }}
 
   run-basic-amd64-tests:
diff --git a/.github/workflows/run-metrics.yaml b/.github/workflows/run-metrics.yaml
index be39105e78..d5dbb32b0a 100644
--- a/.github/workflows/run-metrics.yaml
+++ b/.github/workflows/run-metrics.yaml
@@ -2,8 +2,17 @@ name: CI | Run test metrics
 on:
   workflow_call:
     inputs:
-      tarball-suffix:
-        required: false
+      registry:
+        required: true
+        type: string
+      repo:
+        required: true
+        type: string
+      tag:
+        required: true
+        type: string
+      pr-number:
+        required: true
         type: string
       commit-hash:
         required: false
@@ -14,34 +23,7 @@ on:
         default: ""
 
 jobs:
-  setup-kata:
-    name: Kata Setup
-    runs-on: metrics
-    env:
-      GOPATH: ${{ github.workspace }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          ref: ${{ inputs.commit-hash }}
-          fetch-depth: 0
-
-      - name: Rebase atop of the latest target branch
-        run: |
-          ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
-        env:
-          TARGET_BRANCH: ${{ inputs.target-branch }}
-
-      - name: get-kata-tarball
-        uses: actions/download-artifact@v4
-        with:
-          name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
-          path: kata-artifacts
-
-      - name: Install kata
-        run: bash tests/metrics/gha-run.sh install-kata kata-artifacts
-
   run-metrics:
-    needs: setup-kata
     strategy:
       # We can set this to true whenever we're 100% sure that
       # the all the tests are not flaky, otherwise we'll fail
@@ -54,7 +36,32 @@ jobs:
     env:
       GOPATH: ${{ github.workspace }}
       KATA_HYPERVISOR: ${{ matrix.vmm }}
+      DOCKER_REGISTRY: ${{ inputs.registry }}
+      DOCKER_REPO: ${{ inputs.repo }}
+      DOCKER_TAG: ${{ inputs.tag }}
+      GH_PR_NUMBER: ${{ inputs.pr-number }}
+      K8S_TEST_HOST_TYPE: "baremetal"
+      USING_NFD: "false"
+      KUBERNETES: kubeadm
     steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.commit-hash }}
+          fetch-depth: 0
+
+      - name: Rebase atop of the latest target branch
+        run: |
+          ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
+        env:
+          TARGET_BRANCH: ${{ inputs.target-branch }}
+
+      - name: Deploy Kata
+        timeout-minutes: 10
+        run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-kubeadm
+
+      - name: Install check metrics
+        run: bash tests/metrics/gha-run.sh install-checkmetrics
+
       - name: enabling the hypervisor
         run: bash tests/metrics/gha-run.sh enabling-hypervisor
 
@@ -92,3 +99,7 @@ jobs:
           path: results-${{ matrix.vmm }}.tar.gz
           retention-days: 1
           if-no-files-found: error
+
+      - name: Delete kata-deploy
+        if: always()
+        run: bash tests/integration/kubernetes/gha-run.sh cleanup-kubeadm
diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh
index f5a8084408..9b7e7e45b6 100755
--- a/tests/integration/kubernetes/gha-run.sh
+++ b/tests/integration/kubernetes/gha-run.sh
@@ -433,8 +433,8 @@ function cleanup() {
 		return
 	fi
 
-	# In case of canceling workflow manually, 'run_kubernetes_tests.sh' continues running and triggers new tests, 
-	# resulting in the CI being in an unexpected state. So we need kill all running test scripts before cleaning up the node. 
+	# In case of canceling workflow manually, 'run_kubernetes_tests.sh' continues running and triggers new tests,
+	# resulting in the CI being in an unexpected state. So we need kill all running test scripts before cleaning up the node.
 	# See issue https://github.com/kata-containers/kata-containers/issues/9980
 	delete_test_runners	|| true
 	# Switch back to the default namespace and delete the tests one
@@ -594,6 +594,7 @@ function main() {
 		collect-artifacts) collect_artifacts ;;
 		cleanup) cleanup ;;
 		cleanup-kcli) cleanup "kcli" ;;
+		cleanup-kubeadm) cleanup "kubeadm" ;;
 		cleanup-sev) cleanup "sev" ;;
 		cleanup-snp) cleanup "snp" ;;
 		cleanup-tdx) cleanup "tdx" ;;
diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh
index 4edf79f028..7fb55df89e 100755
--- a/tests/metrics/gha-run.sh
+++ b/tests/metrics/gha-run.sh
@@ -121,7 +121,7 @@ function run_test_latency() {
 function main() {
 	action="${1:-}"
 	case "${action}" in
-		install-kata) install_kata && install_checkmetrics ;;
+		install-checkmetrics) install_checkmetrics ;;
 		enabling-hypervisor) enabling_hypervisor ;;
 		make-tarball-results) make_tarball_results ;;
 		run-test-launchtimes) run_test_launchtimes ;;
@@ -132,7 +132,7 @@ function main() {
 		run-test-fio) run_test_fio ;;
 		run-test-iperf) run_test_iperf ;;
 		run-test-latency) run_test_latency ;;
-		*) >&2 die "Invalid argument" ;;
+		*) >&2 die "Invalid argument: ${action}" ;;
 	esac
 }