diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ed7a1eecfb..b78f34295d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -283,6 +283,18 @@ jobs: pr-number: ${{ inputs.pr-number }} target-branch: ${{ inputs.target-branch }} + run-kata-deploy-tests: + if: ${{ inputs.skip-test != 'yes' }} + needs: [publish-kata-deploy-payload-amd64] + uses: ./.github/workflows/run-kata-deploy-tests.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + run-metrics-tests: if: ${{ inputs.skip-test != 'yes' }} needs: build-kata-static-tarball-amd64 diff --git a/.github/workflows/run-kata-deploy-tests-on-garm.yaml b/.github/workflows/run-kata-deploy-tests.yaml similarity index 80% rename from .github/workflows/run-kata-deploy-tests-on-garm.yaml rename to .github/workflows/run-kata-deploy-tests.yaml index ce2ebf5403..2bd73d4b89 100644 --- a/.github/workflows/run-kata-deploy-tests-on-garm.yaml +++ b/.github/workflows/run-kata-deploy-tests.yaml @@ -1,4 +1,4 @@ -name: CI | Run kata-deploy tests on GARM +name: CI | Run kata-deploy tests on: workflow_call: inputs: @@ -28,18 +28,13 @@ jobs: fail-fast: false matrix: vmm: - - clh - qemu k8s: - k0s - k3s - rke2 - microk8s - # TODO: There are a couple of vmm/k8s combination failing (https://github.com/kata-containers/kata-containers/issues/9854) - # and we will put the entire kata-deploy-tests on GARM on maintenance. - # TODO: Transition to free runner (see #9940). - if: false - runs-on: garm-ubuntu-2004-smaller + runs-on: ubuntu-22.04 env: DOCKER_REGISTRY: ${{ inputs.registry }} DOCKER_REPO: ${{ inputs.repo }} diff --git a/tests/functional/kata-deploy/gha-run.sh b/tests/functional/kata-deploy/gha-run.sh index 744e73f1c1..f88c4f2e7d 100755 --- a/tests/functional/kata-deploy/gha-run.sh +++ b/tests/functional/kata-deploy/gha-run.sh @@ -24,9 +24,9 @@ function cleanup_runtimeclasses() { # Cleanup any runtime class that was left behind in the cluster, in # case of a test failure, apart from the default one that comes from # AKS - for rc in `kubectl get runtimeclass -o name | grep -v "kata-mshv-vm-isolation" | sed 's|runtimeclass.node.k8s.io/||'` + for rc in $(kubectl get runtimeclass -o name | grep -v "kata-mshv-vm-isolation" | sed 's|runtimeclass.node.k8s.io/||') do - kubectl delete runtimeclass $rc; + kubectl delete runtimeclass "${rc}"; done } @@ -36,8 +36,8 @@ function cleanup() { cleanup_runtimeclasses || true - if [ "${platform}" = "aks" ]; then - delete_cluster ${test_type} + if [[ "${platform}" = "aks" ]]; then + delete_cluster "${test_type}" fi } @@ -45,7 +45,7 @@ function main() { export KATA_HOST_OS="${KATA_HOST_OS:-}" platform="aks" - if [ "${KATA_HYPERVISOR}" = "qemu-tdx" ]; then + if [[ "${KATA_HYPERVISOR}" = "qemu-tdx" ]]; then platform="tdx" fi export platform diff --git a/tests/functional/kata-deploy/run-kata-deploy-tests.sh b/tests/functional/kata-deploy/run-kata-deploy-tests.sh index 39bfdfa08a..2357e557f7 100644 --- a/tests/functional/kata-deploy/run-kata-deploy-tests.sh +++ b/tests/functional/kata-deploy/run-kata-deploy-tests.sh @@ -10,8 +10,8 @@ set -e kata_deploy_dir=$(dirname "$(readlink -f "$0")") source "${kata_deploy_dir}/../../common.bash" -if [ -n "${KATA_DEPLOY_TEST_UNION:-}" ]; then - KATA_DEPLOY_TEST_UNION=($KATA_DEPLOY_TEST_UNION) +if [[ -n "${KATA_DEPLOY_TEST_UNION:-}" ]]; then + KATA_DEPLOY_TEST_UNION=("${KATA_DEPLOY_TEST_UNION}") else KATA_DEPLOY_TEST_UNION=( \ "kata-deploy.bats" \ diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index dcea96a264..a326be67b6 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -6,13 +6,21 @@ tests_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${tests_dir}/common.bash" +kubernetes_dir="${tests_dir}/integration/kubernetes" -K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-small}" -GH_PR_NUMBER="${GH_PR_NUMBER:-}" +AZ_APPID="${AZ_APPID:-}" +AZ_PASSWORD="${AZ_PASSWORD:-}" +AZ_SUBSCRIPTION_ID="${AZ_SUBSCRIPTION_ID:-}" +AZ_TENANT_ID="${AZ_TENANT_ID:-}" GENPOLICY_PULL_METHOD="${GENPOLICY_PULL_METHOD:-oci-distribution}" +GH_PR_NUMBER="${GH_PR_NUMBER:-}" +KATA_HOST_OS="${KATA_HOST_OS:-}" +KUBERNETES="${KUBERNETES:-}" +K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-small}" +TEST_CLUSTER_NAMESPACE="${TEST_CLUSTER_NAMESPACE:-}" function _print_instance_type() { - case ${K8S_TEST_HOST_TYPE} in + case "${K8S_TEST_HOST_TYPE}" in small) echo "Standard_D2s_v5" ;; @@ -33,21 +41,21 @@ function _print_cluster_name() { local short_sha local metadata - if [ -n "${AKS_NAME:-}" ]; then - echo "$AKS_NAME" + if [[ -n "${AKS_NAME:-}" ]]; then + echo "${AKS_NAME}" else short_sha="$(git rev-parse --short=12 HEAD)" metadata="${GH_PR_NUMBER}-${short_sha}-${KATA_HYPERVISOR}-${KATA_HOST_OS}-amd64-${K8S_TEST_HOST_TYPE:0:1}-${GENPOLICY_PULL_METHOD:0:1}" # Compute the SHA1 digest of the metadata part to keep the name less # than the limit of 63 chars of AKS - echo "${test_type}-$(sha1sum <<< "$metadata" | cut -d' ' -f1)" + echo "${test_type}-$(sha1sum <<< "${metadata}" | cut -d' ' -f1)" fi } function _print_rg_name() { test_type="${1:-k8s}" - echo "${AZ_RG:-"kataCI-$(_print_cluster_name ${test_type})"}" + echo "${AZ_RG:-"kataCI-$(_print_cluster_name "${test_type}")"}" } # Enable the HTTP application routing add-on to AKS. @@ -61,7 +69,7 @@ function enable_cluster_http_application_routing() { rg="$(_print_rg_name "${test_type}")" cluster_name="$(_print_cluster_name "${test_type}")" - az aks enable-addons -g "$rg" -n "$cluster_name" \ + az aks enable-addons -g "${rg}" -n "${cluster_name}" \ --addons http_application_routing } @@ -86,11 +94,12 @@ function create_cluster() { test_type="${1:-k8s}" local short_sha local tags + local rg # First ensure it didn't fail to get cleaned up from a previous run. delete_cluster "${test_type}" || true - local rg="$(_print_rg_name ${test_type})" + rg="$(_print_rg_name "${test_type}")" short_sha="$(git rev-parse --short=12 HEAD)" tags=("GH_PR_NUMBER=${GH_PR_NUMBER:-}" \ @@ -104,15 +113,19 @@ function create_cluster() { -l eastus \ -n "${rg}" + # Adding a double quote on the last line ends up causing issues + # ine the cbl-mariner installation. Because of that, let's just + # disable the warning for this specific case. + # shellcheck disable=SC2046 az aks create \ -g "${rg}" \ --node-resource-group "node-${rg}" \ - -n "$(_print_cluster_name ${test_type})" \ + -n "$(_print_cluster_name "${test_type}")" \ -s "$(_print_instance_type)" \ --node-count 1 \ --generate-ssh-keys \ --tags "${tags[@]}" \ - $([ "${KATA_HOST_OS}" = "cbl-mariner" ] && echo "--os-sku AzureLinux --workload-runtime KataMshvVmIsolation") + $([[ "${KATA_HOST_OS}" = "cbl-mariner" ]] && echo "--os-sku AzureLinux --workload-runtime KataMshvVmIsolation") } function install_bats() { @@ -145,13 +158,13 @@ function install_kustomize() { checksum=$(get_from_kata_deps ".externals.kustomize.checksum.${arch}") local tarball="kustomize_${version}_linux_${arch}.tar.gz" - curl -Lf -o "$tarball" "https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize/${version}/${tarball}" + curl -Lf -o "${tarball}" "https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize/${version}/${tarball}" local rc=0 - echo "${checksum} $tarball" | sha256sum -c || rc=$? - [ $rc -eq 0 ] && sudo tar -xvzf "${tarball}" -C /usr/local/bin || rc=$? - rm -f "$tarball" - [ $rc -eq 0 ] + echo "${checksum} ${tarball}" | sha256sum -c || rc=$? + [[ ${rc} -eq 0 ]] && sudo tar -xvzf "${tarball}" -C /usr/local/bin || rc=$? + rm -f "${tarball}" + [[ ${rc} -eq 0 ]] } function get_cluster_credentials() { @@ -159,8 +172,8 @@ function get_cluster_credentials() { az aks get-credentials \ --overwrite-existing \ - -g "$(_print_rg_name ${test_type})" \ - -n "$(_print_cluster_name ${test_type})" + -g "$(_print_rg_name "${test_type}")" \ + -n "$(_print_cluster_name "${test_type}")" } @@ -178,26 +191,26 @@ function get_cluster_specific_dns_zone() { rg="$(_print_rg_name "${test_type}")" cluster_name="$(_print_cluster_name "${test_type}")" - az aks show -g "$rg" -n "$cluster_name" --query "$q" | tr -d \" + az aks show -g "${rg}" -n "${cluster_name}" --query "${q}" | tr -d \" } function delete_cluster() { test_type="${1:-k8s}" local rg - rg="$(_print_rg_name ${test_type})" + rg="$(_print_rg_name "${test_type}")" - if [ "$(az group exists -g "${rg}")" == "true" ]; then + if [[ "$(az group exists -g "${rg}")" == "true" ]]; then az group delete -g "${rg}" --yes fi } function delete_cluster_kcli() { CLUSTER_NAME="${CLUSTER_NAME:-kata-k8s}" - kcli delete -y kube "$CLUSTER_NAME" + kcli delete -y kube "${CLUSTER_NAME}" } function get_nodes_and_pods_info() { - kubectl debug $(kubectl get nodes -o name) -it --image=quay.io/kata-containers/kata-debug:latest || true + kubectl debug "$(kubectl get nodes -o name)" -it --image=quay.io/kata-containers/kata-debug:latest || true kubectl get pods -o name | grep node-debugger | xargs kubectl delete || true } @@ -206,12 +219,15 @@ function deploy_k0s() { k0s_version_param="" version=$(get_from_kata_deps ".externals.k0s.version") - if [ -n "${version}" ]; then + if [[ -n "${version}" ]]; then k0s_version_param="K0S_VERSION=${version}" fi - curl -sSLf ${url} | sudo ${k0s_version_param} sh + curl -sSLf "${url}" | sudo "${k0s_version_param}" sh + # In this case we explicitly want word splitting when calling k0s + # with extra parameters. + # shellcheck disable=SC2086 sudo k0s install controller --single ${KUBERNETES_EXTRA_PARAMS:-} # kube-router decided to use :8080 for its metrics, and this seems @@ -235,12 +251,12 @@ function deploy_k0s() { ARCH=$(arch_to_golang) kubectl_version=$(sudo k0s kubectl version 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //') - sudo curl -fL --progress-bar -o /usr/bin/kubectl https://dl.k8s.io/release/${kubectl_version}/bin/linux/${ARCH}/kubectl + sudo curl -fL --progress-bar -o /usr/bin/kubectl https://dl.k8s.io/release/"${kubectl_version}"/bin/linux/"${ARCH}"/kubectl sudo chmod +x /usr/bin/kubectl mkdir -p ~/.kube sudo cp /var/lib/k0s/pki/admin.conf ~/.kube/config - sudo chown ${USER}:${USER} ~/.kube/config + sudo chown "${USER}":"${USER}" ~/.kube/config } function deploy_k3s() { @@ -261,7 +277,7 @@ function deploy_k3s() { ARCH=$(arch_to_golang) kubectl_version=$(/usr/local/bin/k3s kubectl version --client=true 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //' -e 's/+k3s[0-9]\+//') - sudo curl -fL --progress-bar -o /usr/bin/kubectl https://dl.k8s.io/release/${kubectl_version}/bin/linux/${ARCH}/kubectl + sudo curl -fL --progress-bar -o /usr/bin/kubectl https://dl.k8s.io/release/"${kubectl_version}"/bin/linux/"${ARCH}"/kubectl sudo chmod +x /usr/bin/kubectl sudo rm -rf /usr/local/bin/kubectl @@ -286,11 +302,11 @@ function create_cluster_kcli() { -P disk_size="${CLUSTER_DISK_SIZE:-20}" \ "${CLUSTER_NAME}" - export KUBECONFIG="$HOME/.kcli/clusters/$CLUSTER_NAME/auth/kubeconfig" + export KUBECONFIG="${HOME}/.kcli/clusters/${CLUSTER_NAME}/auth/kubeconfig" local cmd="kubectl get nodes | grep '.*worker.*\'" echo "Wait at least one worker be Ready" - if ! waitForProcess "330" "30" "$cmd"; then + if ! waitForProcess "330" "30" "${cmd}"; then echo "ERROR: worker nodes not ready." kubectl get nodes return 1 @@ -299,10 +315,9 @@ function create_cluster_kcli() { # Ensure that system pods are running or completed. cmd="[ \$(kubectl get pods -A --no-headers | grep -v 'Running\|Completed' | wc -l) -eq 0 ]" echo "Wait system pods be running or completed" - if ! waitForProcess "90" "30" "$cmd"; then + if ! waitForProcess "90" "30" "${cmd}"; then echo "ERROR: not all pods are Running or Completed." kubectl get pods -A - kubectl get pods -A return 1 fi } @@ -320,27 +335,27 @@ function deploy_rke2() { mkdir -p ~/.kube sudo cp /etc/rancher/rke2/rke2.yaml ~/.kube/config - sudo chown ${USER}:${USER} ~/.kube/config + sudo chown "${USER}":"${USER}" ~/.kube/config } function deploy_microk8s() { sudo snap install microk8s --classic + sudo usermod -a -G microk8s "${USER}" + mkdir -p ~/.kube + # As we want to call microk8s with sudo, we're safe to ignore SC2024 here + # shellcheck disable=SC2024 + sudo microk8s kubectl config view --raw > ~/.kube/config + sudo chown "${USER}":"${USER}" ~/.kube/config # These are arbitrary values - sleep 30 - sudo /snap/bin/microk8s.status --wait-ready --timeout 300 + sudo microk8s status --wait-ready --timeout 300 # install kubectl ARCH=$(arch_to_golang) - kubectl_version=$(/snap/bin/microk8s.version | grep -oe 'v[0-9]\+\(\.[0-9]\+\)*') - sudo curl -fL --progress-bar -o /usr/bin/kubectl https://dl.k8s.io/release/${kubectl_version}/bin/linux/${ARCH}/kubectl + kubectl_version=$(sudo microk8s version | grep -oe 'v[0-9]\+\(\.[0-9]\+\)*') + sudo curl -fL --progress-bar -o /usr/bin/kubectl https://dl.k8s.io/release/"${kubectl_version}"/bin/linux/"${ARCH}"/kubectl sudo chmod +x /usr/bin/kubectl sudo rm -rf /usr/local/bin/kubectl - - mkdir -p ~/.kube - sudo /snap/bin/microk8s.config > ~/.kube/config - sudo chown ${USER}:${USER} ~/.kube/config - newgrp microk8s } function _get_k0s_kubernetes_version_for_crio() { @@ -356,25 +371,25 @@ function _get_k0s_kubernetes_version_for_crio() { # Remove the 'v' crio_version=${crio_version#v} - echo ${crio_version} + echo "${crio_version}" } function setup_crio() { # Get the CRI-O version to be installed depending on the version of the # "k8s distro" that we are using - case ${KUBERNETES} in + case "${KUBERNETES}" in k0s) crio_version=$(_get_k0s_kubernetes_version_for_crio) ;; *) >&2 echo "${KUBERNETES} flavour is not supported with CRI-O"; exit 2 ;; esac - install_crio ${crio_version} + install_crio "${crio_version}" } function deploy_k8s() { echo "::group::Deploying ${KUBERNETES}" - case ${KUBERNETES} in + case "${KUBERNETES}" in k0s) deploy_k0s ;; k3s) deploy_k3s ;; rke2) deploy_rke2 ;; @@ -407,9 +422,9 @@ function delete_test_runners(){ echo "Delete test scripts" local scripts_names=( "run_kubernetes_tests.sh" "bats" ) for script_name in "${scripts_names[@]}"; do - pids=$(pgrep -f ${script_name}) - if [ -n "$pids" ]; then - echo "$pids" | xargs sudo kill -SIGTERM >/dev/null 2>&1 || true + pids=$(pgrep -f "${script_name}") + if [[ -n "${pids}" ]]; then + echo "${pids}" | xargs sudo kill -SIGTERM >/dev/null 2>&1 || true fi done } diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index 9b7e7e45b6..4a172e3b30 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -9,7 +9,7 @@ set -o nounset set -o pipefail DEBUG="${DEBUG:-}" -[ -n "$DEBUG" ] && set -x +[[ -n "${DEBUG}" ]] && set -x kubernetes_dir="$(dirname "$(readlink -f "$0")")" source "${kubernetes_dir}/../../gha-run-k8s-common.sh" @@ -118,7 +118,7 @@ EOF sleep 60s sudo cat "${containerd_config_file}" - if [ "${KUBERNETES}" = 'k3s' ] + if [[ "${KUBERNETES}" = 'k3s' ]] then local ctr_dm_status local result @@ -128,9 +128,9 @@ EOF plugins ls |\ awk '$2 ~ /^devmapper$/ { print $0 }' || true) - result=$(echo "$ctr_dm_status" | awk '{print $4}' || true) + result=$(echo "${ctr_dm_status}" | awk '{print $4}' || true) - [ "$result" = 'ok' ] || die "k3s containerd device mapper not configured: '$ctr_dm_status'" + [[ "${result}" = 'ok' ]] || die "k3s containerd device mapper not configured: '${ctr_dm_status}'" fi info "devicemapper (DM) devices" @@ -160,7 +160,7 @@ function delete_coco_kbs() { # service externally # function deploy_coco_kbs() { - kbs_k8s_deploy "$KBS_INGRESS" + kbs_k8s_deploy "${KBS_INGRESS}" } function deploy_kata() { @@ -168,10 +168,10 @@ function deploy_kata() { ensure_helm ensure_yq - [ "$platform" = "kcli" ] && \ - export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + [[ "${platform}" = "kcli" ]] && \ + export KUBECONFIG="${HOME}/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" - if [ "${K8S_TEST_HOST_TYPE}" = "baremetal" ]; then + if [[ "${K8S_TEST_HOST_TYPE}" = "baremetal" ]]; then cleanup_kata_deploy || true fi @@ -195,65 +195,76 @@ function deploy_kata() { yq -i ".env.pullTypeMapping = \"\"" "${values_yaml}" yq -i ".env.hostOS = \"\"" "${values_yaml}" - if [ -n "${SNAPSHOTTER}" ]; then + if [[ -n "${SNAPSHOTTER}" ]]; then yq -i ".env.snapshotterHandlerMapping = \"${KATA_HYPERVISOR}:${SNAPSHOTTER}\"" "${values_yaml}" fi - if [ "${KATA_HOST_OS}" = "cbl-mariner" ]; then + if [[ "${KATA_HOST_OS}" = "cbl-mariner" ]]; then yq -i ".env.allowedHypervisorAnnotations = \"image kernel default_vcpus\"" "${values_yaml}" yq -i ".env.hostOS = \"${KATA_HOST_OS}\"" "${values_yaml}" fi - if [ "${KATA_HYPERVISOR}" = "qemu" ]; then + if [[ "${KATA_HYPERVISOR}" = "qemu" ]]; then yq -i ".env.allowedHypervisorAnnotations = \"image initrd kernel default_vcpus\"" "${values_yaml}" fi - if [ "${KATA_HYPERVISOR}" = "qemu-tdx" ]; then + if [[ "${KATA_HYPERVISOR}" = "qemu-tdx" ]]; then yq -i ".env.agentHttpsProxy = \"${HTTPS_PROXY}\"" "${values_yaml}" yq -i ".env.agentNoProxy = \"${NO_PROXY}\"" "${values_yaml}" fi # Set the PULL_TYPE_MAPPING - if [ "${PULL_TYPE}" != "default" ]; then + if [[ "${PULL_TYPE}" != "default" ]]; then yq -i ".env.pullTypeMapping = \"${KATA_HYPERVISOR}:${PULL_TYPE}\"" "${values_yaml}" fi echo "::group::Final kata-deploy manifests used in the test" cat "${values_yaml}" helm template "${helm_chart_dir}" --values "${values_yaml}" --namespace kube-system - [ "$(yq .image.reference ${values_yaml})" = "${DOCKER_REGISTRY}/${DOCKER_REPO}" ] || die "Failed to set image reference" - [ "$(yq .image.tag ${values_yaml})" = "${DOCKER_TAG}" ] || die "Failed to set image tag" + [[ "$(yq .image.reference "${values_yaml}")" = "${DOCKER_REGISTRY}/${DOCKER_REPO}" ]] || die "Failed to set image reference" + [[ "$(yq .image.tag "${values_yaml}")" = "${DOCKER_TAG}" ]] || die "Failed to set image tag" echo "::endgroup::" - local max_tries=3 - local interval=10 - local i=0 + local max_tries + local interval + local i + + max_tries=3 + interval=10 + i=10 + # Retry loop for helm install to prevent transient failures due to instantly unreachable cluster set +e # Disable immediate exit on failure while true; do helm upgrade --install kata-deploy "${helm_chart_dir}" --values "${values_yaml}" --namespace kube-system --debug - if [ $? -eq 0 ]; then + ret=${?} + if [[ ${ret} -eq 0 ]]; then echo "Helm install succeeded!" break fi i=$((i+1)) - [ $i -lt $max_tries ] && echo "Retrying after $interval seconds (Attempt $i of $(($max_tries - 1)))" || break - sleep $interval + if [[ ${i} -lt ${max_tries} ]]; then + echo "Retrying after ${interval} seconds (Attempt ${i} of $((max_tries - 1)))" + else + break + fi + sleep "${interval}" done set -e # Re-enable immediate exit on failure - if [ $i -eq $max_tries ]; then - die "Failed to deploy kata-deploy after $max_tries tries" + if [[ ${i} -eq ${max_tries} ]]; then + die "Failed to deploy kata-deploy after ${max_tries} tries" fi # `helm install --wait` does not take effect on single replicas and maxUnavailable=1 DaemonSets # like kata-deploy on CI. So wait for pods being Running in the "tradicional" way. - local cmd="kubectl -n kube-system get -l name=kata-deploy pod 2>/dev/null | grep '\'" - waitForProcess "${KATA_DEPLOY_WAIT_TIMEOUT}" 10 "$cmd" + local cmd + cmd="kubectl -n kube-system get -l name=kata-deploy pod 2>/dev/null | grep '\'" + waitForProcess "${KATA_DEPLOY_WAIT_TIMEOUT}" 10 "${cmd}" # This is needed as the kata-deploy pod will be set to "Ready" when it starts running, # which may cause issues like not having the node properly labeled or the artefacts # properly deployed when the tests actually start running. - if [ "${platform}" = "aks" ]; then + if [[ "${platform}" = "aks" ]]; then sleep 240s else sleep 60s @@ -277,7 +288,7 @@ function uninstall_kbs_client() { } function run_tests() { - if [ "${K8S_TEST_HOST_TYPE}" = "baremetal" ]; then + if [[ "${K8S_TEST_HOST_TYPE}" = "baremetal" ]]; then # Baremetal self-hosted runners end up accumulating way too much log # and when those get displayed it's very hard to understand what's # part of the current run and what's something from the past coming @@ -291,10 +302,10 @@ function run_tests() { ensure_yq platform="${1:-}" - [ "$platform" = "kcli" ] && \ - export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + [[ "${platform}" = "kcli" ]] && \ + export KUBECONFIG="${HOME}/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" - if [ "${AUTO_GENERATE_POLICY}" = "yes" ] && [ "${GENPOLICY_PULL_METHOD}" = "containerd" ]; then + if [[ "${AUTO_GENERATE_POLICY}" = "yes" ]] && [[ "${GENPOLICY_PULL_METHOD}" = "containerd" ]]; then # containerd's config on the local machine (where kubectl and genpolicy are executed by CI), # might have been provided by a distro-specific package that disables the cri plug-in by using: # @@ -309,10 +320,15 @@ function run_tests() { sudo systemctl restart containerd && sudo systemctl is-active containerd # Allow genpolicy to access the containerd image pull APIs without sudo. - local socket_wait_time=30 - local socket_sleep_time=3 - local cmd="sudo chmod a+rw /var/run/containerd/containerd.sock" - waitForProcess "${socket_wait_time}" "${socket_sleep_time}" "$cmd" + local socket_wait_time + local socket_sleep_time + local cmd + + socket_wait_time=30 + socket_sleep_time=3 + cmd="sudo chmod a+rw /var/run/containerd/containerd.sock" + + waitForProcess "${socket_wait_time}" "${socket_sleep_time}" "${cmd}" fi set_test_cluster_namespace @@ -323,22 +339,22 @@ function run_tests() { # In case of running on Github workflow it needs to save the start time # on the environment variables file so that the variable is exported on # next workflow steps. - if [ -n "${GITHUB_ENV:-}" ]; then + if [[ -n "${GITHUB_ENV:-}" ]]; then start_time=$(date '+%Y-%m-%d %H:%M:%S') export start_time - echo "start_time=${start_time}" >> "$GITHUB_ENV" + echo "start_time=${start_time}" >> "${GITHUB_ENV}" fi if [[ "${KATA_HYPERVISOR}" = "cloud-hypervisor" ]] && [[ "${SNAPSHOTTER}" = "devmapper" ]]; then - if [ -n "$GITHUB_ENV" ]; then + if [[ -n "${GITHUB_ENV}" ]]; then KATA_TEST_VERBOSE=true export KATA_TEST_VERBOSE - echo "KATA_TEST_VERBOSE=${KATA_TEST_VERBOSE}" >> "$GITHUB_ENV" + echo "KATA_TEST_VERBOSE=${KATA_TEST_VERBOSE}" >> "${GITHUB_ENV}" fi fi if [[ "${KATA_HYPERVISOR}" = "dragonball" ]] && [[ "${SNAPSHOTTER}" = "devmapper" ]]; then - echo "Skipping tests for $KATA_HYPERVISOR using devmapper" + echo "Skipping tests for ${KATA_HYPERVISOR} using devmapper" else bash run_kubernetes_tests.sh fi @@ -346,24 +362,32 @@ function run_tests() { } function collect_artifacts() { - if [ -z "${start_time:-}" ]; then + if [[ -z "${start_time:-}" ]]; then warn "tests start time is not defined. Cannot gather journal information" return fi - local artifacts_dir="/tmp/artifacts" - if [ -d "${artifacts_dir}" ]; then + local artifacts_dir + artifacts_dir="/tmp/artifacts" + if [[ -d "${artifacts_dir}" ]]; then rm -rf "${artifacts_dir}" fi mkdir -p "${artifacts_dir}" info "Collecting artifacts using ${KATA_HYPERVISOR} hypervisor" - local journalctl_log_filename="journalctl-$RANDOM.log" - local journalctl_log_path="${artifacts_dir}/${journalctl_log_filename}" - sudo journalctl --since="$start_time" > "${journalctl_log_path}" + local journalctl_log_filename + local journalctl_log_path - local k3s_dir='/var/lib/rancher/k3s/agent' + journalctl_log_filename="journalctl-${RANDOM}.log" + journalctl_log_path="${artifacts_dir}/${journalctl_log_filename}" - if [ -d "$k3s_dir" ] + # As we want to call journalctl with sudo, we're safe to ignore SC2024 here + # shellcheck disable=SC2024 + sudo journalctl --since="${start_time}" > "${journalctl_log_path}" + + local k3s_dir + k3s_dir='/var/lib/rancher/k3s/agent' + + if [[ -d "${k3s_dir}" ]] then info "Collecting k3s artifacts" @@ -375,36 +399,35 @@ function collect_artifacts() { files+=('containerd/containerd.log') # Add any rotated containerd logs - files+=( $(sudo find \ - "${k3s_dir}/containerd/" \ - -type f \ - -name 'containerd*\.log\.gz') ) + files+=("$(sudo find "${k3s_dir}/containerd/" -type f -name 'containerd*\.log\.gz')") local file for file in "${files[@]}" do - local path="$k3s_dir/$file" - sudo [ ! -e "$path" ] && continue + local path="${k3s_dir}/${file}" + sudo [[ ! -e "${path}" ]] && continue local encoded - encoded=$(echo "$path" | tr '/' '-' | sed 's/^-//g') - - local from="$path" + encoded="$(echo "${path}" | tr '/' '-' | sed 's/^-//g')" + local from local to + from="${path}" to="${artifacts_dir}/${encoded}" - if [[ $path = *.gz ]] + if [[ ${path} = *.gz ]] then - sudo cp "$from" "$to" + sudo cp "${from}" "${to}" else to="${to}.gz" - sudo gzip -c "$from" > "$to" + # As we want to call gzip with sudo, we're safe to ignore SC2024 here + # shellcheck disable=SC2024 + sudo gzip -c "${from}" > "${to}" fi - info " Collected k3s file '$from' to '$to'" + info " Collected k3s file '${from}' to '${to}'" done fi } @@ -422,13 +445,13 @@ function cleanup() { test_type="${2:-k8s}" ensure_yq - [ "$platform" = "kcli" ] && \ - export KUBECONFIG="$HOME/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" + [[ "${platform}" = "kcli" ]] && \ + export KUBECONFIG="${HOME}/.kcli/clusters/${CLUSTER_NAME:-kata-k8s}/auth/kubeconfig" echo "Gather information about the nodes and pods before cleaning up the node" get_nodes_and_pods_info - if [ "${platform}" = "aks" ]; then + if [[ "${platform}" = "aks" ]]; then delete_cluster "${test_type}" return fi @@ -436,7 +459,7 @@ function cleanup() { # In case of canceling workflow manually, 'run_kubernetes_tests.sh' continues running and triggers new tests, # resulting in the CI being in an unexpected state. So we need kill all running test scripts before cleaning up the node. # See issue https://github.com/kata-containers/kata-containers/issues/9980 - delete_test_runners || true + delete_test_runners || true # Switch back to the default namespace and delete the tests one delete_test_cluster_namespace || true @@ -475,8 +498,9 @@ function deploy_nydus_snapshotter() { echo "::group::deploy_nydus_snapshotter" ensure_yq - local nydus_snapshotter_install_dir="/tmp/nydus-snapshotter" - if [ -d "${nydus_snapshotter_install_dir}" ]; then + local nydus_snapshotter_install_dir + nydus_snapshotter_install_dir="/tmp/nydus-snapshotter" + if [[ -d "${nydus_snapshotter_install_dir}" ]]; then rm -rf "${nydus_snapshotter_install_dir}" fi mkdir -p "${nydus_snapshotter_install_dir}" @@ -484,11 +508,11 @@ function deploy_nydus_snapshotter() { nydus_snapshotter_version=$(get_from_kata_deps ".externals.nydus-snapshotter.version") git clone -b "${nydus_snapshotter_version}" "${nydus_snapshotter_url}" "${nydus_snapshotter_install_dir}" - pushd "$nydus_snapshotter_install_dir" - if [ "${K8S_TEST_HOST_TYPE}" = "baremetal" ]; then + pushd "${nydus_snapshotter_install_dir}" + if [[ "${K8S_TEST_HOST_TYPE}" = "baremetal" ]]; then cleanup_nydus_snapshotter || true fi - if [ "${PULL_TYPE}" == "guest-pull" ]; then + if [[ "${PULL_TYPE}" == "guest-pull" ]]; then # Enable guest pull feature in nydus snapshotter yq -i \ 'select(.kind == "ConfigMap").data.FS_DRIVER = "proxy"' \ @@ -518,14 +542,14 @@ function deploy_nydus_snapshotter() { # Deploy nydus snapshotter as a daemonset kubectl_retry create -f "misc/snapshotter/nydus-snapshotter-rbac.yaml" - if [ "${KUBERNETES}" = "k3s" ]; then + if [[ "${KUBERNETES}" = "k3s" ]]; then kubectl_retry apply -k "misc/snapshotter/overlays/k3s" else kubectl_retry apply -f "misc/snapshotter/base/nydus-snapshotter.yaml" fi popd - kubectl rollout status daemonset nydus-snapshotter -n nydus-system --timeout ${SNAPSHOTTER_DEPLOY_WAIT_TIMEOUT} + kubectl rollout status daemonset nydus-snapshotter -n nydus-system --timeout "${SNAPSHOTTER_DEPLOY_WAIT_TIMEOUT}" echo "::endgroup::" echo "::group::nydus snapshotter logs" @@ -538,15 +562,16 @@ function deploy_nydus_snapshotter() { function cleanup_nydus_snapshotter() { echo "cleanup_nydus_snapshotter" - local nydus_snapshotter_install_dir="/tmp/nydus-snapshotter" - if [ ! -d "${nydus_snapshotter_install_dir}" ]; then + local nydus_snapshotter_install_dir + nydus_snapshotter_install_dir="/tmp/nydus-snapshotter" + if [[ ! -d "${nydus_snapshotter_install_dir}" ]]; then >&2 echo "nydus snapshotter dir not found" exit 1 fi - pushd "$nydus_snapshotter_install_dir" + pushd "${nydus_snapshotter_install_dir}" - if [ "${KUBERNETES}" = "k3s" ]; then + if [[ "${KUBERNETES}" = "k3s" ]]; then kubectl_retry delete --ignore-not-found -k "misc/snapshotter/overlays/k3s" else kubectl_retry delete --ignore-not-found -f "misc/snapshotter/base/nydus-snapshotter.yaml" @@ -567,7 +592,7 @@ function main() { case "${action}" in install-azure-cli) install_azure_cli ;; login-azure) login_azure ;; - create-cluster) create_cluster ;; + create-cluster) create_cluster "" ;; create-cluster-kcli) create_cluster_kcli ;; configure-snapshotter) configure_snapshotter ;; setup-crio) setup_crio ;; @@ -577,7 +602,7 @@ function main() { install-kata-tools) install_kata_tools ;; install-kbs-client) install_kbs_client ;; install-kubectl) install_kubectl ;; - get-cluster-credentials) get_cluster_credentials ;; + get-cluster-credentials) get_cluster_credentials "" ;; deploy-csi-driver) return 0 ;; deploy-kata) deploy_kata ;; deploy-kata-aks) deploy_kata "aks" ;;