From fa832f4709f6213946c11601ccd895ab01ee8bab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Wed, 17 May 2023 13:38:08 +0200 Subject: [PATCH] gha: k8s: Make the tests more reliable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We like it or not, every now and then we'll have to deal with flaky tests, and our tests using GHA are not exempt from that fact. With this simple commit, we're trying to improve the reliability of the tests in a few different fronts: * Giving enough time for the script used by kata-deploy to be executed * We've hit issues as the kata-deploy pod is considered "Ready" at the moment it starts running, not when it finishes the needed setup. We should also be looking on how to solve this on the kata-deploy side but, for now, let's ensure our tests do not break with the current kata-deploy behavior. * Merging the "Deploy kata-deploy" and "Run tests" steps * We've hit issues re-running tests and seeing even more failures than the ones we're trying to debug, as a step will simply be taken as succeeded as part of the re-run, in case it was successful executed as part of the first run. This causes issues with the kata-deploy deployment, as the tests would start running before even having the node set up for running Kata Containers. Fixes: #6865 #6649 Signed-off-by: Fabiano FidĂȘncio --- .github/workflows/run-k8s-tests-on-aks.yaml | 11 +++++++---- .github/workflows/run-k8s-tests-on-sev.yaml | 11 +++++++---- .github/workflows/run-k8s-tests-on-snp.yaml | 11 +++++++---- .github/workflows/run-k8s-tests-on-tdx.yaml | 11 +++++++---- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/.github/workflows/run-k8s-tests-on-aks.yaml b/.github/workflows/run-k8s-tests-on-aks.yaml index f9a26debb6..b9886e47e7 100644 --- a/.github/workflows/run-k8s-tests-on-aks.yaml +++ b/.github/workflows/run-k8s-tests-on-aks.yaml @@ -61,7 +61,8 @@ jobs: run: | az aks get-credentials -g "kataCI" -n ${{ github.event.pull_request.number }}-${{ github.event.pull_request.head.sha }}-${{ matrix.vmm }}-amd64 - - name: Deploy kata-deploy + - name: Run tests + timeout-minutes: 30 run: | sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml @@ -72,9 +73,11 @@ jobs: kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod kubectl apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml - - name: Run tests - timeout-minutes: 30 - run: | + # This is needed as the kata-deploy pod will be set to "Ready" when it starts running, + # which may cause issues like not having the node properly labeled or the artefacts + # properly deployed when the tests actually start running. + sleep 60s + pushd tests/integration/kubernetes sed -i -e 's|runtimeClassName: kata|runtimeClassName: kata-${{ matrix.vmm }}|' runtimeclass_workloads/*.yaml bash run_kubernetes_tests.sh diff --git a/.github/workflows/run-k8s-tests-on-sev.yaml b/.github/workflows/run-k8s-tests-on-sev.yaml index aeafa242e4..98a6db6107 100644 --- a/.github/workflows/run-k8s-tests-on-sev.yaml +++ b/.github/workflows/run-k8s-tests-on-sev.yaml @@ -27,7 +27,8 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} - - name: Deploy kata-deploy + - name: Run tests + timeout-minutes: 30 run: | sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml @@ -38,9 +39,11 @@ jobs: kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod kubectl apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml - - name: Run tests - timeout-minutes: 30 - run: | + # This is needed as the kata-deploy pod will be set to "Ready" when it starts running, + # which may cause issues like not having the node properly labeled or the artefacts + # properly deployed when the tests actually start running. + sleep 60s + pushd tests/integration/kubernetes sed -i -e 's|runtimeClassName: kata|runtimeClassName: kata-${{ matrix.vmm }}|' runtimeclass_workloads/*.yaml bash run_kubernetes_tests.sh diff --git a/.github/workflows/run-k8s-tests-on-snp.yaml b/.github/workflows/run-k8s-tests-on-snp.yaml index 5bc4aea506..541695e0f4 100644 --- a/.github/workflows/run-k8s-tests-on-snp.yaml +++ b/.github/workflows/run-k8s-tests-on-snp.yaml @@ -27,7 +27,8 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} - - name: Deploy kata-deploy + - name: Run tests + timeout-minutes: 30 run: | sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml @@ -38,9 +39,11 @@ jobs: kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod kubectl apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml - - name: Run tests - timeout-minutes: 30 - run: | + # This is needed as the kata-deploy pod will be set to "Ready" when it starts running, + # which may cause issues like not having the node properly labeled or the artefacts + # properly deployed when the tests actually start running. + sleep 60s + pushd tests/integration/kubernetes sed -i -e 's|runtimeClassName: kata|runtimeClassName: kata-${{ matrix.vmm }}|' runtimeclass_workloads/*.yaml bash run_kubernetes_tests.sh diff --git a/.github/workflows/run-k8s-tests-on-tdx.yaml b/.github/workflows/run-k8s-tests-on-tdx.yaml index 1777a16c80..0b74ecc7bd 100644 --- a/.github/workflows/run-k8s-tests-on-tdx.yaml +++ b/.github/workflows/run-k8s-tests-on-tdx.yaml @@ -27,7 +27,8 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} - - name: Deploy kata-deploy + - name: Run tests + timeout-minutes: 30 run: | sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${{ inputs.registry }}/${{ inputs.repo }}:${{ inputs.tag }}|g" tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml cat tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml @@ -38,9 +39,11 @@ jobs: kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod kubectl apply -f tools/packaging/kata-deploy/runtimeclasses/kata-runtimeClasses.yaml - - name: Run tests - timeout-minutes: 30 - run: | + # This is needed as the kata-deploy pod will be set to "Ready" when it starts running, + # which may cause issues like not having the node properly labeled or the artefacts + # properly deployed when the tests actually start running. + sleep 60s + pushd tests/integration/kubernetes sed -i -e 's|runtimeClassName: kata|runtimeClassName: kata-${{ matrix.vmm }}|' runtimeclass_workloads/*.yaml bash run_kubernetes_tests.sh