From 4cd048444d760952cde87c73856cc5a30323553e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Sat, 11 May 2024 17:47:42 +0200 Subject: [PATCH 1/2] build: nvidia-gpu: Fix cache usage of the headers tarball MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whenever we count on having the headers tarball, we must unpack the cached content into the expected directory, otherwise we'd simply fail, as we've been failing in our CI, at the end of the process where we generate the tarball from the cached components. It's weird to me, sincerely, that the headers tarball end up in such weird place (build/kernel-nvidia-gpu/builddir/), but I'll leave that to Zvonko to figure out whether something better can be done, as the intuit of this PR is simply unblock Kata Containers CI. Signed-off-by: Fabiano FidĂȘncio --- .../local-build/kata-deploy-binaries.sh | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index aedd29980c..33fba31863 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -436,15 +436,20 @@ install_cached_kernel_tarball_component() { "${extra_tarballs}" \ || return 1 - if [[ "${kernel_name}" != "kernel"*"-confidential" ]]; then - return 0 - fi + case ${kernel_name} in + "kernel-nvidia-gpu"*"") + local kernel_headers_dir=$(get_kernel_headers_dir "${kernel_name}") + mkdir -p ${kernel_headers_dir} || true + tar xvf ${workdir}/${kernel_name}/builddir/kata-static-${kernel_name}-headers.tar.xz -C "${kernel_headers_dir}" || return 1 + ;;& # fallthrough in the confidential case we need the modules.tar.xz and for every kernel-nvidia-gpu we need the headers + "kernel"*"-confidential") + local modules_dir=$(get_kernel_modules_dir ${kernel_version} ${kernel_kata_config_version} ${build_target}) + mkdir -p "${modules_dir}" || true + tar xvf "${workdir}/kata-static-${kernel_name}-modules.tar.xz" -C "${modules_dir}" || return 1 + ;; + esac - local modules_dir=$(get_kernel_modules_dir ${kernel_version} ${kernel_kata_config_version} ${build_target}) - mkdir -p "${modules_dir}" || true - tar xvf "${workdir}/kata-static-${kernel_name}-modules.tar.xz" -C "${modules_dir}" && return 0 - - return 1 + return 0 } #Install kernel asset From 9713558477d2fdaa9e4d68cda65c36fb98f64a60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Sat, 11 May 2024 23:18:20 +0200 Subject: [PATCH 2/2] k0s: Use a different port for kube-route's metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kube-router decided to use :8080 for its metrics, and this seems to be a change that affected k0s 1.30.0+, leading to kube-router pod crashing all the time and anything can actually be started after that. Due to this issue, let's simply use a different port (:9999) and move on with our tests. Fixes: #9623 Signed-off-by: Fabiano FidĂȘncio --- tests/gha-run-k8s-common.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 9971174485..3b3a580ed6 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -193,6 +193,17 @@ function deploy_k0s() { sudo k0s install controller --single ${KUBERNETES_EXTRA_PARAMS:-} + # kube-router decided to use :8080 for its metrics, and this seems + # to be a change that affected k0s 1.30.0+, leading to kube-router + # pod crashing all the time and anything can actually be started + # after that. + # + # Due to this issue, let's simply use a different port (:9999) and + # move on with our tests. + sudo mkdir -p /etc/k0s + k0s config create | sudo tee /etc/k0s/k0s.yaml + sudo sed -i -e "s/metricsPort: 8080/metricsPort: 9999/g" /etc/k0s/k0s.yaml + sudo k0s start # This is an arbitrary value that came up from local tests