From 89bef7d0366555a7011c555428a6bbfd1ba1d1f9 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 9 Aug 2023 16:05:13 -0300 Subject: [PATCH 01/14] ci: k8s: create k8s clusters with kcli Adapted the gha-run.sh script to create a Kubernetes cluster locally using the kcli tool. Use `./gha-run.sh create-cluster-kcli` to create it, and `./gha-run.sh delete-cluster-kcli` to delete. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta --- tests/gha-run-k8s-common.sh | 43 +++++++++++++++++++++++++ tests/integration/kubernetes/gha-run.sh | 2 ++ 2 files changed, 45 insertions(+) diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 44b02f32ea..4f7de35e14 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -107,6 +107,11 @@ function delete_cluster() { --yes } +function delete_cluster_kcli() { + CLUSTER_NAME="${CLUSTER_NAME:-kata-k8s}" + kcli delete -y kube "$CLUSTER_NAME" +} + function get_nodes_and_pods_info() { kubectl debug $(kubectl get nodes -o name) -it --image=quay.io/kata-containers/kata-debug:latest || true kubectl get pods -o name | grep node-debugger | xargs kubectl delete || true @@ -165,6 +170,44 @@ function deploy_k3s() { cp /etc/rancher/k3s/k3s.yaml ~/.kube/config } +function create_cluster_kcli() { + CLUSTER_NAME="${CLUSTER_NAME:-kata-k8s}" + + delete_cluster_kcli || true + + kcli create kube "${KUBE_TYPE:-generic}" \ + -P domain="kata.com" \ + -P pool="${LIBVIRT_POOL:-default}" \ + -P ctlplanes="${CLUSTER_CONTROL_NODES:-1}" \ + -P workers="${CLUSTER_WORKERS:-1}" \ + -P network="${LIBVIRT_NETWORK:-default}" \ + -P image="${CLUSTER_IMAGE:-ubuntu2004}" \ + -P sdn=flannel \ + -P nfs=false \ + -P disk_size="${CLUSTER_DISK_SIZE:-20}" \ + "${CLUSTER_NAME}" + + export KUBECONFIG="$HOME/.kcli/clusters/$CLUSTER_NAME/auth/kubeconfig" + + local cmd="kubectl get nodes | grep '.*worker.*\'" + echo "Wait at least one worker be Ready" + if ! waitForProcess "330" "30" "$cmd"; then + echo "ERROR: worker nodes not ready." + kubectl get nodes + return 1 + fi + + # Ensure that system pods are running or completed. + cmd="[ \$(kubectl get pods -A --no-headers | grep -v 'Running\|Completed' | wc -l) -eq 0 ]" + echo "Wait system pods be running or completed" + if ! waitForProcess "90" "30" "$cmd"; then + echo "ERROR: not all pods are Running or Completed." + kubectl get pods -A + kubectl get pods -A + return 1 + fi +} + function deploy_rke2() { curl -sfL https://get.rke2.io | sudo sh - diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index bfde44342c..a53b0fcf62 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -214,6 +214,7 @@ function main() { install-azure-cli) install_azure_cli ;; login-azure) login_azure ;; create-cluster) create_cluster ;; + create-cluster-kcli) create_cluster_kcli ;; configure-snapshotter) configure_snapshotter ;; setup-crio) setup_crio ;; deploy-k8s) deploy_k8s ;; @@ -231,6 +232,7 @@ function main() { cleanup-tdx) cleanup "tdx" ;; cleanup-garm) cleanup "garm" ;; delete-cluster) cleanup "aks" ;; + delete-cluster-kcli) delete_cluster_kcli ;; *) >&2 echo "Invalid argument"; exit 2 ;; esac } From cbb9aa15b6d21e2c98b0e2b60dbf0359ae3c765c Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 9 Aug 2023 16:22:19 -0300 Subject: [PATCH 02/14] ci: k8s: set default image for deploy_kata() On CI workflows the variables DOCKER_REGISTRY, DOCKER_REPO and DOCKER_TAG are exported to match the built image. However, when running the script outside of CI context, a developer might just use the latest image which in this case will be `quay.io/kata-containers/kata-deploy-ci:kata-containers-latest`. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/gha-run.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index a53b0fcf62..87436a7a62 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -12,6 +12,10 @@ kubernetes_dir="$(dirname "$(readlink -f "$0")")" source "${kubernetes_dir}/../../gha-run-k8s-common.sh" tools_dir="${repo_root_dir}/tools" +DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io} +DOCKER_REPO=${DOCKER_REPO:-kata-containers/kata-deploy-ci} +DOCKER_TAG=${DOCKER_TAG:-kata-containers-latest} + function configure_devmapper() { sudo mkdir -p /var/lib/containerd/devmapper sudo truncate --size 10G /var/lib/containerd/devmapper/data-disk.img From d2be8eef1a43f285e4821a9807b509644ad78168 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 9 Aug 2023 16:38:58 -0300 Subject: [PATCH 03/14] ci: k8s: add cleanup-kcli() to gha-run.sh The cleanup-kcli() behaves like other clean up for bare-metal (e.g. sev, tdx...etc) except that KUBECONFIG should be exported. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/gha-run.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 87436a7a62..30b542a294 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -166,6 +166,9 @@ function cleanup() { test_type="${2:-k8s}" ensure_yq + [ "$platform" = "kcli" ] && \ + export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + echo "Gather information about the nodes and pods before cleaning up the node" get_nodes_and_pods_info @@ -231,6 +234,7 @@ function main() { deploy-kata-tdx) deploy_kata "tdx" ;; deploy-kata-garm) deploy_kata "garm" ;; run-tests) run_tests ;; + cleanup-kcli) cleanup "kcli" ;; cleanup-sev) cleanup "sev" ;; cleanup-snp) cleanup "snp" ;; cleanup-tdx) cleanup "tdx" ;; From c2ef1f0fb089c5cce3e2c126b6c544e7e8166a58 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 9 Aug 2023 18:18:10 -0300 Subject: [PATCH 04/14] ci: k8s: add deploy-kata-kcli() to gh-run.sh The cleanup-kcli() behaves like other deploy kata for bare-metal (e.g. sev, tdx...etc) except that KUBECONFIG should be exported. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/gha-run.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 30b542a294..ac016e4df7 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -95,7 +95,10 @@ function deploy_kata() { platform="${1}" ensure_yq - # Emsure we're in the default namespace + [ "$platform" = "kcli" ] && \ + export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + + # Ensure we're in the default namespace kubectl config set-context --current --namespace=default sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" @@ -229,6 +232,7 @@ function main() { install-kubectl) install_kubectl ;; get-cluster-credentials) get_cluster_credentials ;; deploy-kata-aks) deploy_kata "aks" ;; + deploy-kata-kcli) deploy_kata "kcli" ;; deploy-kata-sev) deploy_kata "sev" ;; deploy-kata-snp) deploy_kata "snp" ;; deploy-kata-tdx) deploy_kata "tdx" ;; From d54e6d9cdaae6fff1bbb04719f0383be880484ab Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 19:28:58 -0300 Subject: [PATCH 05/14] ci: k8s: run_tests() for kcli The only difference to the other platforms is that it needs to export KUBECONFIG. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/gha-run.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index ac016e4df7..7d368fd0f2 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -151,6 +151,11 @@ function deploy_kata() { } function run_tests() { + platform="${1:-}" + + [ "$platform" = "kcli" ] && \ + export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + # Delete any spurious tests namespace that was left behind kubectl delete namespace kata-containers-k8s-tests &> /dev/null || true @@ -238,6 +243,7 @@ function main() { deploy-kata-tdx) deploy_kata "tdx" ;; deploy-kata-garm) deploy_kata "garm" ;; run-tests) run_tests ;; + run-tests-kcli) run_tests "kcli" ;; cleanup-kcli) cleanup "kcli" ;; cleanup-sev) cleanup "sev" ;; cleanup-snp) cleanup "snp" ;; From 4af78be13aa276cfadd4f4b859f62bdf45e5bac7 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 9 Aug 2023 18:40:03 -0300 Subject: [PATCH 06/14] kata-deploy: re-format kata-[deploy|cleanup].yaml The .tests/integration/kubernetes/gh-run.sh script run `yq write` a couple of times to edit the kata-[deploy|cleanup].yaml, resulting on the file being formatted again. This is annoying because leaves the git tree dirty. Signed-off-by: Wainer dos Santos Moschetta --- .../kata-cleanup/base/kata-cleanup.yaml | 52 ++++++------- .../kata-deploy/base/kata-deploy.yaml | 76 +++++++++---------- 2 files changed, 64 insertions(+), 64 deletions(-) diff --git a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml index 3d9006572e..7cb8756891 100644 --- a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml +++ b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml @@ -6,39 +6,39 @@ metadata: namespace: kube-system spec: selector: - matchLabels: - name: kubelet-kata-cleanup + matchLabels: + name: kubelet-kata-cleanup template: metadata: - labels: - name: kubelet-kata-cleanup + labels: + name: kubelet-kata-cleanup spec: serviceAccountName: kata-deploy-sa hostPID: true nodeSelector: - katacontainers.io/kata-runtime: cleanup + katacontainers.io/kata-runtime: cleanup containers: - - name: kube-kata-cleanup - image: quay.io/kata-containers/kata-deploy:latest - imagePullPolicy: Always - command: [ "bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh reset" ] - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: DEBUG - value: "false" - - name: SHIMS - value: "clh dragonball fc qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx qemu" - - name: DEFAULT_SHIM - value: "qemu" - - name: CREATE_RUNTIMECLASSES - value: "false" - - name: CREATE_DEFAULT_RUNTIMECLASS - value: "false" - securityContext: - privileged: true + - name: kube-kata-cleanup + image: quay.io/kata-containers/kata-deploy:latest + imagePullPolicy: Always + command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh reset"] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: DEBUG + value: "false" + - name: SHIMS + value: "clh dragonball fc qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx qemu" + - name: DEFAULT_SHIM + value: "qemu" + - name: CREATE_RUNTIMECLASSES + value: "false" + - name: CREATE_DEFAULT_RUNTIMECLASS + value: "false" + securityContext: + privileged: true updateStrategy: rollingUpdate: maxUnavailable: 1 diff --git a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml index c10061d902..3b4e8888a3 100644 --- a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml +++ b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml @@ -6,50 +6,50 @@ metadata: namespace: kube-system spec: selector: - matchLabels: - name: kata-deploy + matchLabels: + name: kata-deploy template: metadata: - labels: - name: kata-deploy + labels: + name: kata-deploy spec: serviceAccountName: kata-deploy-sa hostPID: true containers: - - name: kube-kata - image: quay.io/kata-containers/kata-deploy:latest - imagePullPolicy: Always - lifecycle: - preStop: - exec: - command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"] - command: [ "bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh install" ] - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: DEBUG - value: "false" - - name: SHIMS - value: "clh dragonball fc qemu qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx" - - name: DEFAULT_SHIM - value: "qemu" - - name: CREATE_RUNTIMECLASSES - value: "false" - - name: CREATE_DEFAULT_RUNTIMECLASS - value: "false" - securityContext: - privileged: true - volumeMounts: - - name: crio-conf - mountPath: /etc/crio/ - - name: containerd-conf - mountPath: /etc/containerd/ - - name: kata-artifacts - mountPath: /opt/kata/ - - name: local-bin - mountPath: /usr/local/bin/ + - name: kube-kata + image: quay.io/kata-containers/kata-deploy:latest + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"] + command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh install"] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: DEBUG + value: "false" + - name: SHIMS + value: "clh dragonball fc qemu qemu-nvidia-gpu qemu-sev qemu-snp qemu-tdx" + - name: DEFAULT_SHIM + value: "qemu" + - name: CREATE_RUNTIMECLASSES + value: "false" + - name: CREATE_DEFAULT_RUNTIMECLASS + value: "false" + securityContext: + privileged: true + volumeMounts: + - name: crio-conf + mountPath: /etc/crio/ + - name: containerd-conf + mountPath: /etc/containerd/ + - name: kata-artifacts + mountPath: /opt/kata/ + - name: local-bin + mountPath: /usr/local/bin/ volumes: - name: crio-conf hostPath: From 200e542921a87f9f99d10c9bfce749d53917edc3 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 10:41:57 -0300 Subject: [PATCH 07/14] ci: k8s: shellcheck fixes to gha-run.sh Fixed a couple of warns shellcheck emitted and disabled others: * SC2154 (var is referenced but not assigned) * SC2086 (Double quote to prevent globbing and word splitting) Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/gha-run.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 7d368fd0f2..05a4a74c1e 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -10,6 +10,7 @@ set -o pipefail kubernetes_dir="$(dirname "$(readlink -f "$0")")" source "${kubernetes_dir}/../../gha-run-k8s-common.sh" +# shellcheck disable=2154 tools_dir="${repo_root_dir}/tools" DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io} @@ -121,7 +122,7 @@ function deploy_kata() { echo "::group::Final kata-deploy.yaml that is used in the test" cat "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" - cat "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" | grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" || die "Failed to setup the tests image" + grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" || die "Failed to setup the tests image" echo "::endgroup::" kubectl apply -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" @@ -160,7 +161,7 @@ function run_tests() { kubectl delete namespace kata-containers-k8s-tests &> /dev/null || true # Create a new namespace for the tests and switch to it - kubectl apply -f ${kubernetes_dir}/runtimeclass_workloads/tests-namespace.yaml + kubectl apply -f "${kubernetes_dir}/runtimeclass_workloads/tests-namespace.yaml" kubectl config set-context --current --namespace=kata-containers-k8s-tests pushd "${kubernetes_dir}" @@ -197,6 +198,7 @@ function cleanup() { cleanup_spec="-f "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml"" fi + # shellcheck disable=2086 kubectl delete ${deploy_spec} kubectl -n kube-system wait --timeout=10m --for=delete -l name=kata-deploy pod @@ -211,10 +213,12 @@ function cleanup() { sed -i -e "s|quay.io/kata-containers/kata-deploy:latest|${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}|g" "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" cat "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" - cat "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" | grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" || die "Failed to setup the tests image" + grep "${DOCKER_REGISTRY}/${DOCKER_REPO}:${DOCKER_TAG}" "${tools_dir}/packaging/kata-deploy/kata-cleanup/base/kata-cleanup.yaml" || die "Failed to setup the tests image" + # shellcheck disable=2086 kubectl apply ${cleanup_spec} sleep 180s + # shellcheck disable=2086 kubectl delete ${cleanup_spec} kubectl delete -f "${tools_dir}/packaging/kata-deploy/kata-rbac/base/kata-rbac.yaml" } From 6677a61fe410d287309f58427150b8237928a8da Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 14:30:54 -0300 Subject: [PATCH 08/14] ci: k8s: configurable deploy kata timeout The deploy-kata() of gha-run.sh will wait for 10 minutes for the kata deploy installation finish. This allow users of the script to overwrite that value by exporting the KATA_DEPLOY_WAIT_TIMEOUT environment variable. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/gha-run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 05a4a74c1e..51112f1339 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -16,6 +16,7 @@ tools_dir="${repo_root_dir}/tools" DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io} DOCKER_REPO=${DOCKER_REPO:-kata-containers/kata-deploy-ci} DOCKER_TAG=${DOCKER_TAG:-kata-containers-latest} +KATA_DEPLOY_WAIT_TIMEOUT=${KATA_DEPLOY_WAIT_TIMEOUT:-10m} function configure_devmapper() { sudo mkdir -p /var/lib/containerd/devmapper @@ -131,7 +132,7 @@ function deploy_kata() { else kubectl apply -f "${tools_dir}/packaging/kata-deploy/kata-deploy/base/kata-deploy.yaml" fi - kubectl -n kube-system wait --timeout=10m --for=condition=Ready -l name=kata-deploy pod + kubectl -n kube-system wait --timeout="${KATA_DEPLOY_WAIT_TIMEOUT}" --for=condition=Ready -l name=kata-deploy pod # This is needed as the kata-deploy pod will be set to "Ready" when it starts running, # which may cause issues like not having the node properly labeled or the artefacts From 68f083c4d082c3fe4f5585a46eb4d722171e44ee Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 15:31:29 -0300 Subject: [PATCH 09/14] ci: k8s: set KATA_HYPERVISOR default value Let KATA_HYPERVISOR be qemu by default in gh-run.sh as this variable is required to tweak some configurations of kata-deploy. Fixes #7620 Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/gha-run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 51112f1339..450f331c0d 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -17,6 +17,7 @@ DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io} DOCKER_REPO=${DOCKER_REPO:-kata-containers/kata-deploy-ci} DOCKER_TAG=${DOCKER_TAG:-kata-containers-latest} KATA_DEPLOY_WAIT_TIMEOUT=${KATA_DEPLOY_WAIT_TIMEOUT:-10m} +KATA_HYPERVISOR=${KATA_HYPERVISOR:-qemu} function configure_devmapper() { sudo mkdir -p /var/lib/containerd/devmapper From 61c9c17bff5511dc419701398bf77aab0eb79b9f Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 17:30:22 -0300 Subject: [PATCH 10/14] tests: add get_one_kata_node() to tests_common.sh The introduced get_one_kata_node() returns the first node that has the kata-runtime=true label, i.e., supposedly a node with kata installed. This is useful for tests that should run on a determined worker node on a multi-nodes cluster. Fixes #7619 Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/tests_common.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index fa3e77a152..618920260b 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -38,6 +38,14 @@ get_pod_config_dir() { info "k8s configured to use runtimeclass" } +# Return the first worker found that is kata-runtime labeled. +get_one_kata_node() { + local resource_name + resource_name="$(kubectl get node -l katacontainers.io/kata-runtime=true -o name | head -1)" + # Remove leading "/node" + echo "${resource_name/"node/"}" +} + # Runs a command in the host filesystem. exec_host() { node="$(kubectl get node -o name)" From 3a00fc910122b024ef7e5775ec2793976ad56102 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 17:48:25 -0300 Subject: [PATCH 11/14] tests: exec_host() now gets the node name The exec_host() simply fails on cluster with multi-nodes because `kubectl get node -o name" will return a list o names. Moreover, it will return control nodes names which usually don't have kata installed. Fixes #7619 Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/tests_common.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index 618920260b..922286ada5 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -47,10 +47,14 @@ get_one_kata_node() { } # Runs a command in the host filesystem. +# +# Parameters: +# $1 - the node name +# exec_host() { - node="$(kubectl get node -o name)" + node="$1" # `kubectl debug` always returns 0, so we hack it to return the right exit code. - command="$@" + command="${@:2}" command+='; echo -en \\n$?' # We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051 # tl;dr: When testing with CRI-O we're facing the foillowing error: @@ -61,7 +65,7 @@ exec_host() { # [bats-exec-test:38] INFO: k8s configured to use runtimeclass # bash: line 1: $'\r': command not found # ``` - output="$(kubectl debug -qit "${node}" --image=alpine:latest -- chroot /host bash -c "${command}" | tr -d '\r')" + output="$(kubectl debug -qit "node/${node}" --image=alpine:latest -- chroot /host bash -c "${command}" | tr -d '\r')" kubectl get pods -o name | grep node-debugger | xargs kubectl delete > /dev/null exit_code="$(echo "${output}" | tail -1)" echo "$(echo "${output}" | head -n -1)" From 666993da8d75c0c268304442bca82e33ea3e956f Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 18:42:19 -0300 Subject: [PATCH 12/14] tests: run k8s-file-volume on a given node This test can give false-positive on a multi-node cluster. Changed it to use the new get_one_kata_node() and the modified exec_host() to run the setup commands on a given node (that has kata installed) and ensure the test pod is scheduled at that same node. Fixes #7619 Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/k8s-file-volume.bats | 8 +++++--- .../runtimeclass_workloads/pod-file-volume.yaml | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/integration/kubernetes/k8s-file-volume.bats b/tests/integration/kubernetes/k8s-file-volume.bats index d849db2edd..37ccd85f83 100644 --- a/tests/integration/kubernetes/k8s-file-volume.bats +++ b/tests/integration/kubernetes/k8s-file-volume.bats @@ -14,7 +14,8 @@ setup() { [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" pod_name="test-file-volume" container_name="busybox-file-volume-container" - tmp_file=$(exec_host mktemp /tmp/file-volume-test-foo.XXXXX) + node="$(get_one_kata_node)" + tmp_file=$(exec_host "$node" mktemp /tmp/file-volume-test-foo.XXXXX) mount_path="/tmp/foo.txt" file_body="test" get_pod_config_dir @@ -22,11 +23,12 @@ setup() { @test "Test readonly volume for pods" { # Write test body to temp file - exec_host "echo "$file_body" > $tmp_file" + exec_host "$node" "echo "$file_body" > $tmp_file" # Create test yaml sed -e "s|HOST_FILE|$tmp_file|" ${pod_config_dir}/pod-file-volume.yaml > ${pod_config_dir}/test-pod-file-volume.yaml sed -i "s|MOUNT_PATH|$mount_path|" ${pod_config_dir}/test-pod-file-volume.yaml + sed -i "s|NODE|$node|" ${pod_config_dir}/test-pod-file-volume.yaml # Create pod kubectl create -f "${pod_config_dir}/test-pod-file-volume.yaml" @@ -43,6 +45,6 @@ teardown() { [ "${KATA_HYPERVISOR}" == "firecracker" ] && skip "test not working see: ${fc_limitations}" [ "${KATA_HYPERVISOR}" == "fc" ] && skip "test not working see: ${fc_limitations}" kubectl delete pod "$pod_name" - exec_host rm -f $tmp_file + exec_host "$node" rm -f $tmp_file rm -f ${pod_config_dir}/test-pod-file-volume.yaml.yaml } diff --git a/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml b/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml index 4784b1477c..e7a194f42a 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml +++ b/tests/integration/kubernetes/runtimeclass_workloads/pod-file-volume.yaml @@ -11,6 +11,7 @@ spec: terminationGracePeriodSeconds: 0 runtimeClassName: kata restartPolicy: Never + nodeName: NODE volumes: - name: shared-file hostPath: From c30c3ff1853c8d8ccb86240fd7fb6fd67d9021a0 Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 10 Aug 2023 18:45:04 -0300 Subject: [PATCH 13/14] tests: run k8s-volume on a given node This test can give false-positive on a multi-node cluster. Changed it to use the new get_one_kata_node() and the modified exec_host() to run the setup commands on a given node (that has kata installed) and ensure the test pod is scheduled at that same node. Fixes #7619 Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/k8s-volume.bats | 19 ++++++++++++------- .../runtimeclass_workloads/pv-pod.yaml | 1 + 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/integration/kubernetes/k8s-volume.bats b/tests/integration/kubernetes/k8s-volume.bats index 705794443c..7bb69f95d1 100644 --- a/tests/integration/kubernetes/k8s-volume.bats +++ b/tests/integration/kubernetes/k8s-volume.bats @@ -15,13 +15,17 @@ setup() { get_pod_config_dir - tmp_file=$(exec_host mktemp -d /tmp/data.XXXX) + node=$(get_one_kata_node) + tmp_file=$(exec_host "$node" mktemp -d /tmp/data.XXXX) + pv_yaml=$(mktemp --tmpdir pv_config.XXXXXX.yaml) pod_yaml=$(mktemp --tmpdir pod_config.XXXXXX.yaml) msg="Hello from Kubernetes" - exec_host "echo $msg > $tmp_file/index.html" + exec_host "$node" "echo $msg > $tmp_file/index.html" pod_name="pv-pod" # Define temporary file at yaml - sed -e "s|tmp_data|${tmp_file}|g" ${pod_config_dir}/pv-volume.yaml > "$pod_yaml" + sed -e "s|tmp_data|${tmp_file}|g" ${pod_config_dir}/pv-volume.yaml > "$pv_yaml" + sed -e "s|NODE|${node}|g" "${pod_config_dir}/pv-pod.yaml" > "$pod_yaml" + } @test "Create Persistent Volume" { @@ -31,7 +35,7 @@ setup() { volume_claim="pv-claim" # Create the persistent volume - kubectl create -f "$pod_yaml" + kubectl create -f "$pv_yaml" # Check the persistent volume is Available cmd="kubectl get pv $volume_name | grep Available" @@ -45,7 +49,7 @@ setup() { waitForProcess "$wait_time" "$sleep_time" "$cmd" # Create pod - kubectl create -f "${pod_config_dir}/pv-pod.yaml" + kubectl create -f "$pod_yaml" # Check pod creation kubectl wait --for=condition=Ready --timeout=$timeout pod "$pod_name" @@ -62,8 +66,9 @@ teardown() { kubectl describe "pod/$pod_name" kubectl delete pod "$pod_name" + rm -f "$pod_yaml" kubectl delete pvc "$volume_claim" kubectl delete pv "$volume_name" - rm -f "$pod_yaml" - exec_host rm -rf "$tmp_file" + rm -f "$pv_yaml" + exec_host "$node" rm -rf "$tmp_file" } diff --git a/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml b/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml index 6a165b9712..c3686a981f 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml +++ b/tests/integration/kubernetes/runtimeclass_workloads/pv-pod.yaml @@ -10,6 +10,7 @@ metadata: spec: terminationGracePeriodSeconds: 0 runtimeClassName: kata + nodeName: NODE volumes: - name: pv-storage persistentVolumeClaim: From e669282c25f3a928fe51e251079475a40b96cd9c Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Wed, 20 Sep 2023 13:36:34 -0300 Subject: [PATCH 14/14] ci: k8s: set KUBERNETES default value The KUBERNETES variable is mostly used by kata-deploy whether to apply k3s specific deployments or not. It is used to select the type of kubernetes to be installed (k3s, k0s, rancher...etc) and it is always set on CI. Running the script locally we want to set a value by default to avoid `KUBERNETES: unbound variable` errors. Signed-off-by: Wainer dos Santos Moschetta --- tests/integration/kubernetes/gha-run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 450f331c0d..2c18c66e87 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -18,6 +18,7 @@ DOCKER_REPO=${DOCKER_REPO:-kata-containers/kata-deploy-ci} DOCKER_TAG=${DOCKER_TAG:-kata-containers-latest} KATA_DEPLOY_WAIT_TIMEOUT=${KATA_DEPLOY_WAIT_TIMEOUT:-10m} KATA_HYPERVISOR=${KATA_HYPERVISOR:-qemu} +KUBERNETES="${KUBERNETES:-}" function configure_devmapper() { sudo mkdir -p /var/lib/containerd/devmapper