diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7479e67777..35317c528f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -124,6 +124,14 @@ jobs: pr-number: ${{ inputs.pr-number }} target-branch: ${{ inputs.target-branch }} + run-kata-monitor-tests: + needs: build-kata-static-tarball-amd64 + uses: ./.github/workflows/run-kata-monitor-tests.yaml + with: + tarball-suffix: -${{ inputs.tag }} + commit-hash: ${{ inputs.commit-hash }} + target-branch: ${{ inputs.target-branch }} + run-k8s-tests-on-aks: needs: publish-kata-deploy-payload-amd64 uses: ./.github/workflows/run-k8s-tests-on-aks.yaml diff --git a/.github/workflows/run-kata-monitor-tests.yaml b/.github/workflows/run-kata-monitor-tests.yaml new file mode 100644 index 0000000000..98e2a2276e --- /dev/null +++ b/.github/workflows/run-kata-monitor-tests.yaml @@ -0,0 +1,59 @@ +name: CI | Run kata-monitor tests +on: + workflow_call: + inputs: + tarball-suffix: + required: false + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + +jobs: + run-monitor: + strategy: + fail-fast: false + matrix: + vmm: + - qemu + container_engine: + - crio + - containerd + include: + - container_engine: containerd + containerd_version: lts + runs-on: garm-ubuntu-2204-smaller + env: + CONTAINER_ENGINE: ${{ matrix.container_engine }} + CONTAINERD_VERSION: ${{ matrix.containerd_version }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Install dependencies + run: bash tests/functional/kata-monitor/gha-run.sh install-dependencies + + - name: get-kata-tarball + uses: actions/download-artifact@v3 + with: + name: kata-static-tarball-amd64${{ inputs.tarball-suffix }} + path: kata-artifacts + + - name: Install kata + run: bash tests/functional/kata-monitor/gha-run.sh install-kata kata-artifacts + + - name: Run kata-monitor tests + run: bash tests/functional/kata-monitor/gha-run.sh run diff --git a/tests/common.bash b/tests/common.bash index c6fbf71a78..a111445749 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -257,25 +257,71 @@ function restart_containerd_service() { return 0 } +function restart_crio_service() { + sudo systemctl restart crio +} + # Configures containerd function overwrite_containerd_config() { containerd_config="/etc/containerd/config.toml" sudo rm -f "${containerd_config}" sudo tee "${containerd_config}" << EOF version = 2 -[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] - SystemdCgroup = true [plugins] [plugins."io.containerd.grpc.v1.cri"] [plugins."io.containerd.grpc.v1.cri".containerd] - default_runtime_name = "kata" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + base_runtime_spec = "" + cni_conf_dir = "" + cni_max_conf_num = 0 + container_annotations = [] + pod_annotations = [] + privileged_without_host_devices = false + runtime_engine = "" + runtime_path = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v2" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + BinaryName = "" + CriuImagePath = "" + CriuPath = "" + CriuWorkPath = "" + IoGid = 0 + IoUid = 0 + NoNewKeyring = false + NoPivotRoot = false + Root = "" + ShimCgroup = "" + SystemdCgroup = false [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata] runtime_type = "io.containerd.kata.v2" EOF } +# Configures CRI-O +function overwrite_crio_config() { + crio_conf_d="/etc/crio/crio.conf.d" + sudo mkdir -p ${crio_conf_d} + + kata_config="${crio_conf_d}/99-kata-containers" + sudo tee "${kata_config}" << EOF +[crio.runtime.runtimes.kata] +runtime_path = "/usr/local/bin/containerd-shim-kata-v2" +runtime_type = "vm" +runtime_root = "/run/vc" +runtime_config_path = "/opt/kata/share/defaults/kata-containers/configuration.toml" +privileged_without_host_devices = true +EOF + + debug_config="${crio_conf_d}/100-debug" + sudo tee "${debug_config}" << EOF +[crio] +log_level = "debug" +EOF +} + function install_kata() { local kata_tarball="kata-static.tar.xz" declare -r katadir="/opt/kata" @@ -294,8 +340,14 @@ function install_kata() { sudo ln -sf "${b}" "${local_bin_dir}/$(basename $b)" done - check_containerd_config_for_kata - restart_containerd_service + if [ "${CONTAINER_ENGINE:=containerd}" = "containerd" ]; then + check_containerd_config_for_kata + restart_containerd_service + else + overwrite_crio_config + restart_crio_service + fi + } # creates a new kata configuration.toml hard link that @@ -383,6 +435,19 @@ function download_github_project_tarball() { wget https://github.com/${project}/releases/download/${version}/${tarball_name} } +# version: The version to be intalled +function install_cni_plugins() { + version="${1}" + + project="containernetworking/plugins" + tarball_name="cni-plugins-linux-$(${repo_root_dir}/tests/kata-arch.sh -g)-${version}.tgz" + + download_github_project_tarball "${project}" "${version}" "${tarball_name}" + sudo mkdir -p /opt/cni/bin + sudo tar -xvf "${tarball_name}" -C /opt/cni/bin + rm -f "${tarball_name}" +} + # base_version: The version to be intalled in the ${major}.${minor} format function install_cri_containerd() { base_version="${1}" @@ -436,6 +501,52 @@ function install_nydus_snapshotter() { rm -f "${tarball_name}" } +function _get_os_for_crio() { + source /etc/os-release + + if [ "${NAME}" != "Ubuntu" ]; then + echo "Only Ubuntu is supported for now" + exit 2 + fi + + echo "x${NAME}_${VERSION_ID}" +} + +# version: the CRI-O version to be installe +function install_crio() { + local version=${1} + + os=$(_get_os_for_crio) + + echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/ /"|sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list + echo "deb http://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/${version}/${os}/ /"|sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable:cri-o:${version}.list + curl -L https://download.opensuse.org/repositories/devel:kubic:libcontainers:stable:cri-o:${version}/${os}/Release.key | sudo apt-key add - + curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/Release.key | sudo apt-key add - + sudo apt update + sudo apt install -y cri-o cri-o-runc + + # We need to set the default capabilities to ensure our tests will pass + # See: https://github.com/kata-containers/kata-containers/issues/8034 + sudo mkdir -p /etc/crio/crio.conf.d/ + cat <&2 die "Invalid argument" ;; + esac +} + +main "$@" diff --git a/tests/functional/kata-monitor/kata-monitor-tests.sh b/tests/functional/kata-monitor/kata-monitor-tests.sh new file mode 100755 index 0000000000..fe95cb867d --- /dev/null +++ b/tests/functional/kata-monitor/kata-monitor-tests.sh @@ -0,0 +1,294 @@ +#!/bin/bash +# +# Copyright (c) 2022 Red Hat +# +# SPDX-License-Identifier: Apache-2.0 +# +# This test file will test kata-monitor for basic functionality (retrieve kata sandboxes) +# It will assume an environment where: +# - a CRI container manager (container engine) will be up and running +# - crictl is installed and configured +# - the kata-monitor binary is available on the host +# + +set -o errexit +set -o nounset +set -o pipefail + +source "/etc/os-release" || source "/usr/lib/os-release" + +[ -n "${BASH_VERSION:-}" ] && set -o errtrace +[ -n "${DEBUG:-}" ] && set -o xtrace + +readonly MONITOR_HTTP_ENDPOINT="127.0.0.1:8090" +# we should collect few hundred metrics, let's put a reasonable minimum +readonly MONITOR_MIN_METRICS_NUM=200 +CONTAINER_ENGINE=${CONTAINER_ENGINE:-"containerd"} +CRICTL_RUNTIME=${CRICTL_RUNTIME:-"kata"} +KATA_MONITOR_BIN="${KATA_MONITOR_BIN:-$(command -v kata-monitor || true)}" +KATA_MONITOR_PID="" +TMPATH=$(mktemp -d -t kata-monitor-test-XXXXXXXXX) +METRICS_FILE="${TMPATH}/metrics.txt" +MONITOR_LOG_FILE="${TMPATH}/kata-monitor.log" +CACHE_UPD_TIMEOUT_SEC=${CACHE_UPD_TIMEOUT_SEC:-20} +POD_ID="" +CID="" +RUNC_POD_ID="" +RUNC_CID="" +CURRENT_TASK="" + +FALSE=1 +TRUE=0 + +trap error_with_msg ERR + +title() { + local step="$1" + echo -e "\n* STEP: $step" +} + +echo_ok() { + local msg="$1" + + echo "OK: $msg" +} + +# quiet crictrl +qcrictl() { + sudo crictl "$@" > /dev/null +} + +# this is just an hash of current date (+ nanoseconds) +gen_unique_id() { + date +%T:%N | md5sum | cut -d ' ' -f 1 +} + +error_with_msg() { + local msg=${1:-"cannot $CURRENT_TASK"} + + trap - ERR + echo -e "\nERROR: $msg" + if [ -f "$MONITOR_LOG_FILE" ]; then + echo -e "\nkata-monitor logs:\n----------------" + cat "$MONITOR_LOG_FILE" + fi + echo -e "\nkata-monitor testing: FAILED!" + cleanup + exit 1 +} + +cleanup() { + stop_workload + stop_workload "$RUNC_CID" "$RUNC_POD_ID" + + [ -n "$KATA_MONITOR_PID" ] \ + && [ -d "/proc/$KATA_MONITOR_PID" ] \ + && kill -9 "$KATA_MONITOR_PID" + + rm -rf "$TMPATH" +} + +create_sandbox_json() { + local uid_name_suffix="$(gen_unique_id)" + local sbfile="$TMPATH/sandbox-$uid_name_suffix.json" + + cat <$sbfile +{ + "metadata": { + "name": "nginx-$uid_name_suffix", + "namespace": "default", + "uid": "nginx-container-uid", + "attempt": 1 + }, + "logDirectory": "/tmp", + "linux": { + } +} +EOF + echo "$sbfile" +} + +create_container_json() { + local uid_name_suffix="$(gen_unique_id)" + local cntfile="$TMPATH/container-$uid_name_suffix.json" + + cat <$cntfile +{ + "metadata": { + "name": "busybox", + "namespace": "default", + "uid": "busybox-container-uid" + }, + "image":{ + "image": "busybox" + }, + "command": [ + "top" + ], + "log_path":"busybox.log", + "linux": { + } +} +EOF + echo "$cntfile" +} + +start_workload() { + local runtime=${1:-} + local args="" + local sbfile="" + local cntfile="" + + [ -n "$runtime" ] && args="-r $runtime" + + sbfile="$(create_sandbox_json)" + cntfile="$(create_container_json)" + + POD_ID=$(sudo crictl runp $args $sbfile) + CID=$(sudo crictl create $POD_ID $cntfile $sbfile) + qcrictl start $CID +} + +stop_workload() { + local cid="${1:-$CID}" + local pod_id="${2:-$POD_ID}" + local check + + [ -z "$pod_id" ] && return + check=$(sudo crictl pods -q -id $pod_id) + [ -z "$check" ] && return + + qcrictl stop $cid + qcrictl rm $cid + + qcrictl stopp $pod_id + qcrictl rmp $pod_id +} + +is_sandbox_there() { + local podid=${1} + local sbs s + + sbs=$(sudo curl -s ${MONITOR_HTTP_ENDPOINT}/sandboxes) + if [ -n "$sbs" ]; then + for s in $sbs; do + if [ "$s" = "$podid" ]; then + return $TRUE + break + fi + done + fi + return $FALSE +} + +is_sandbox_there_iterate() { + local podid=${1} + + for i in $(seq 1 $CACHE_UPD_TIMEOUT_SEC); do + is_sandbox_there "$podid" && return $TRUE + echo -n "." + sleep 1 + continue + done + + return $FALSE +} + +is_sandbox_missing_iterate() { + local podid=${1} + + for i in $(seq 1 $CACHE_UPD_TIMEOUT_SEC); do + is_sandbox_there "$podid" || return $TRUE + echo -n "." + sleep 1 + continue + done + + return $FALSE +} + +main() { + local args="" + + ########################### + title "pre-checks" + + CURRENT_TASK="connect to the container engine" + qcrictl pods + echo_ok "$CURRENT_TASK" + + ########################### + title "pull the image to be used" + sudo crictl pull busybox + + ########################### + title "create workloads" + + CURRENT_TASK="start workload (runc)" + start_workload + RUNC_POD_ID="$POD_ID" + RUNC_CID="$CID" + echo_ok "$CURRENT_TASK - POD ID:$POD_ID, CID:$CID" + + CURRENT_TASK="start workload ($CRICTL_RUNTIME)" + start_workload "$CRICTL_RUNTIME" + echo_ok "$CURRENT_TASK - POD ID:$POD_ID, CID:$CID" + + ########################### + title "start kata-monitor" + + [ ! -x "$KATA_MONITOR_BIN" ] && error_with_msg "kata-monitor binary not found" + + [ "$CONTAINER_ENGINE" = "crio" ] && args="--runtime-endpoint /run/crio/crio.sock" + + CURRENT_TASK="start kata-monitor" + sudo $KATA_MONITOR_BIN $args --log-level trace > "$MONITOR_LOG_FILE" 2>&1 & + KATA_MONITOR_PID="$!" + echo_ok "$CURRENT_TASK ($KATA_MONITOR_PID)" + + ########################### + title "kata-monitor cache update checks" + + CURRENT_TASK="retrieve $POD_ID in kata-monitor cache" + is_sandbox_there_iterate "$POD_ID" || error_with_msg + echo_ok "$CURRENT_TASK" + + CURRENT_TASK="look for runc pod $RUNC_POD_ID in kata-monitor cache" + is_sandbox_there_iterate "$RUNC_POD_ID" && error_with_msg "cache: got runc pod $RUNC_POD_ID" + echo_ok "runc pod $RUNC_POD_ID skipped from kata-monitor cache" + + ########################### + title "kata-monitor metrics retrieval" + + CURRENT_TASK="retrieve metrics from kata-monitor" + curl -s ${MONITOR_HTTP_ENDPOINT}/metrics > "$METRICS_FILE" + echo_ok "$CURRENT_TASK" + + CURRENT_TASK="retrieve metrics for pod $POD_ID" + METRICS_COUNT=$(grep -c "$POD_ID" "$METRICS_FILE") + [ ${METRICS_COUNT} -lt ${MONITOR_MIN_METRICS_NUM} ] \ + && error_with_msg "got too few metrics (#${METRICS_COUNT})" + echo_ok "$CURRENT_TASK - found #${METRICS_COUNT} metrics" + + ########################### + title "remove kata workload" + + CURRENT_TASK="stop workload ($CRICTL_RUNTIME)" + stop_workload + echo_ok "$CURRENT_TASK" + + ########################### + title "kata-monitor cache update checks (removal)" + + CURRENT_TASK="verify removal of $POD_ID from kata-monitor cache" + is_sandbox_missing_iterate "$POD_ID" || error_with_msg "pod $POD_ID was not removed" + echo_ok "$CURRENT_TASK" + + ########################### + CURRENT_TASK="cleanup" + cleanup + + echo -e "\nkata-monitor testing: PASSED!\n" +} + +main "@" diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 9c1d9f9be5..2db44cfee6 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -197,17 +197,6 @@ function _get_k0s_kubernetes_version_for_crio() { echo ${crio_version} } -function _get_os_for_crio() { - source /etc/os-release - - if [ "${NAME}" != "Ubuntu" ]; then - echo "Only Ubuntu is supported for now" - exit 2 - fi - - echo "x${NAME}_${VERSION_ID}" -} - function setup_crio() { # Get the CRI-O version to be installed depending on the version of the # "k8s distro" that we are using @@ -217,35 +206,7 @@ function setup_crio() { esac - os=$(_get_os_for_crio) - - echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/ /"|sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list - echo "deb http://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/${crio_version}/${os}/ /"|sudo tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable:cri-o:${crio_version}.list - curl -L https://download.opensuse.org/repositories/devel:kubic:libcontainers:stable:cri-o:${crio_version}/${os}/Release.key | sudo apt-key add - - curl -L https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/${os}/Release.key | sudo apt-key add - - sudo apt update - sudo apt install -y cri-o cri-o-runc - - # We need to set the default capabilities to ensure our tests will pass - # See: https://github.com/kata-containers/kata-containers/issues/8034 - sudo mkdir -p /etc/crio/crio.conf.d/ - cat <