From 67c62319f29fdeb02ffa5a0e4eacb381b5021e43 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Wed, 18 Oct 2023 15:52:03 +0000 Subject: [PATCH 01/15] metrics: Add iperf udp benchmark This PR adds the iperf udp benchmark for bandwdith measurement for network metrics. Fixes #8246 Signed-off-by: Gabriela Cervantes (cherry picked from commit a58afe70b8e6e6b97c2aae9fc75053e6be1113ea) --- .../k8s-network-metrics-iperf3-udp.sh | 192 ++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100755 tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3-udp.sh diff --git a/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3-udp.sh b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3-udp.sh new file mode 100755 index 0000000000..2067d94669 --- /dev/null +++ b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3-udp.sh @@ -0,0 +1,192 @@ +#!/bin/bash +# +# Copyright (c) 2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +# This test measures the following UDP network essentials: +# - bandwith simplex +# +# These metrics/results will be got from the interconnection between +# a client and a server using iperf3 tool. +# The following cases are covered: +# +# case 1: +# container-server <----> container-client +# +# case 2" +# container-server <----> host-client + +set -o pipefail + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") + +source "${SCRIPT_PATH}/../../lib/common.bash" +iperf_file=$(mktemp iperfresults.XXXXXXXXXX) +TEST_NAME="${TEST_NAME:-network-iperf3-udp}" +COLLECT_ALL="${COLLECT_ALL:-false}" +IPERF_DEPLOYMENT="${SCRIPT_PATH}/runtimeclass_workloads/iperf3-deployment.yaml" +IPERF_DAEMONSET="${SCRIPT_PATH}/runtimeclass_workloads/iperf3-daemonset.yaml" + +function remove_tmp_file() { + rm -rf "${iperf_file}" +} + +trap remove_tmp_file EXIT + +function iperf3_udp_all_collect_results() { + metrics_json_init + metrics_json_start_array + local json="$(cat << EOF + { + "bandwidth": { + "Result" : $bandwidth_result, + "Units" : "$bandwidth_units" + } + } +EOF +)" + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" +} + +function iperf3_udp_bandwidth() { + # Start server + local transmit_timeout="120" + + kubectl exec -i "$client_pod_name" -- sh -c "iperf3 -c ${server_ip_add} -u -b 1G -t $transmit_timeout" | grep receiver | cut -d' ' -f13 > "${iperf_file}" + export bandwidth_result=$(cat "${iperf_file}") + export bandwidth_units="Mbits/sec" + + if [ "$COLLECT_ALL" == "true" ]; then + iperf3_udp_all_collect_results + else + metrics_json_init + metrics_json_start_array + + local json="$(cat << EOF + { + "bandwidth": { + "Result" : $bandwidth_result, + "Units" : "$bandwidth_units" + } + } +EOF +)" + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" + fi +} + +function iperf3_udp_start_deployment() { + cmds=("bc") + check_cmds "${cmds[@]}" + + # Check no processes are left behind + check_processes + + wait_time=20 + sleep_time=2 + + # Create deployment + kubectl create -f "${IPERF_DEPLOYMENT}" + + # Check deployment creation + local cmd="kubectl wait --for=condition=Available deployment/iperf3-server-deployment" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" + + # Create DaemonSet + kubectl create -f "${IPERF_DAEMONSET}" + + # Get the names of the server pod + export server_pod_name=$(kubectl get pods -o name | grep server | cut -d '/' -f2) + + # Verify the server pod is working + local cmd="kubectl get pod ${server_pod_name} -o yaml | grep 'phase: Running'" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" + + # Get the names of client pod + export client_pod_name=$(kubectl get pods -o name | grep client | cut -d '/' -f2) + + # Verify the client pod is working + local cmd="kubectl get pod ${client_pod_name} -o yaml | grep 'phase: Running'" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" + + # Get the ip address of the server pod + export server_ip_add=$(kubectl get pod "${server_pod_name}" -o jsonpath='{.status.podIP}') +} + +function iperf3_udp_deployment_cleanup() { + info "iperf: deleting deployments and services" + kubectl delete pod "${server_pod_name}" "${client_pod_name}" + kubectl delete -f "${IPERF_DAEMONSET}" + kubectl delete -f "${IPERF_DEPLOYMENT}" + kill_kata_components && sleep 1 + kill_kata_components + check_processes + info "End of iperf3 test" +} + +# The deployment must be removed in +# any case the script terminates. +trap iperf3_udp_deployment_cleanup EXIT + +function help() { +echo "$(cat << EOF +Usage: $0 "[options]" + Description: + This script implements a number of network metrics + using iperf3 with UDP. + + Options: + -a Run all tests + -b Run bandwidth tests + -h Help +EOF +)" +} + +function main() { + init_env + iperf3_udp_start_deployment + + local OPTIND + while getopts ":abh:" opt + do + case "$opt" in + a) # all tests + test_all="1" + ;; + b) # bandwith test + test_bandwith="1" + ;; + h) + help + exit 0; + ;; + :) + echo "Missing argument for -$OPTARG"; + help + exit 1; + ;; + esac + done + shift $((OPTIND-1)) + + [[ -z "$test_bandwith" ]] && \ + [[ -z "$test_all" ]] && \ + help && die "Must choose at least one test" + + if [ "$test_bandwith" == "1" ]; then + iperf3_udp_bandwidth + fi + + if [ "$test_all" == "1" ]; then + export COLLECT_ALL=true && iperf3_udp_bandwidth + fi + + info "iperf3: saving test results" + metrics_json_save +} + +main "$@" From 44da54ffc405336919aeb6120513364a31f59647 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Fri, 20 Oct 2023 19:54:06 +0000 Subject: [PATCH 02/15] metrics: Add parallel udp iperf3 benchmark This PR adds the parallel udp iperf3 benchmark for network metrics. Fixes #8277 Signed-off-by: Gabriela Cervantes (cherry picked from commit 2d0518cbe6d4b875a1990df3ef392d64eea25c78) --- .../k8s-network-metrics-iperf3-udp.sh | 46 ++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3-udp.sh b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3-udp.sh index 2067d94669..0238bd0537 100755 --- a/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3-udp.sh +++ b/tests/metrics/network/iperf3_kubernetes/k8s-network-metrics-iperf3-udp.sh @@ -6,6 +6,7 @@ # # This test measures the following UDP network essentials: # - bandwith simplex +# - parallel bandwidth # # These metrics/results will be got from the interconnection between # a client and a server using iperf3 tool. @@ -42,6 +43,10 @@ function iperf3_udp_all_collect_results() { "bandwidth": { "Result" : $bandwidth_result, "Units" : "$bandwidth_units" + }, + "parallel": { + "Result" : $parallel_result, + "Units" : "$parallel_units" } } EOF @@ -78,6 +83,34 @@ EOF fi } +function iperf3_udp_parallel() { + # Start server + local transmit_timeout="120" + + kubectl exec -i "$client_pod_name" -- sh -c "iperf3 -c ${server_ip_add} -u -J -P 4" | jq '.end.sum.bits_per_second' > "${iperf_file}" + export parallel_result=$(cat "${iperf_file}") + export parallel_units="bits/sec" + + if [ "$COLLECT_ALL" == "true" ]; then + iperf3_udp_all_collect_results + else + metrics_json_init + metrics_json_start_array + + local json="$(cat << EOF + { + "parallel": { + "Result" : $parallel_result, + "Units" : "$parallel_units" + } + } +EOF +)" + metrics_json_add_array_element "$json" + metrics_json_end_array "Results" + fi +} + function iperf3_udp_start_deployment() { cmds=("bc") check_cmds "${cmds[@]}" @@ -141,6 +174,7 @@ Usage: $0 "[options]" Options: -a Run all tests -b Run bandwidth tests + -p Run parallel tests -h Help EOF )" @@ -151,7 +185,7 @@ function main() { iperf3_udp_start_deployment local OPTIND - while getopts ":abh:" opt + while getopts ":abph:" opt do case "$opt" in a) # all tests @@ -160,6 +194,9 @@ function main() { b) # bandwith test test_bandwith="1" ;; + p) # parallel test + test_parallel="1" + ;; h) help exit 0; @@ -174,6 +211,7 @@ function main() { shift $((OPTIND-1)) [[ -z "$test_bandwith" ]] && \ + [[ -z "$test_parallel" ]] && \ [[ -z "$test_all" ]] && \ help && die "Must choose at least one test" @@ -181,8 +219,12 @@ function main() { iperf3_udp_bandwidth fi + if [ "$test_parallel" == "1" ]; then + iperf3_udp_parallel + fi + if [ "$test_all" == "1" ]; then - export COLLECT_ALL=true && iperf3_udp_bandwidth + export COLLECT_ALL=true && iperf3_udp_bandwidth && iperf3_udp_parallel fi info "iperf3: saving test results" From 43299bcca6e93b66118030b0726dcbba5f50a99d Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Fri, 20 Oct 2023 18:13:14 -0700 Subject: [PATCH 03/15] kata-manager: Add clh config to containerd config file kata-manager currently adds default config which currently is qemu. Add config for clh as well to containerd configuration. This should allow new users to get started with clh using kata-manager. Also add config related to enabling privileged_without_host_devices. Always good to have this config enabled when users try to run privileged containers so that devices from host are not inadverdantly passed to the guest. Fixes: #8280 Signed-off-by: Archana Shinde (cherry picked from commit d3250dff34f83d2412994436d2c111bc83e6e51d) --- utils/kata-manager.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/utils/kata-manager.sh b/utils/kata-manager.sh index 249a510fa3..43bc89aeb1 100755 --- a/utils/kata-manager.sh +++ b/utils/kata-manager.sh @@ -34,6 +34,12 @@ readonly kata_install_dir="${kata_install_dir:-/opt/kata}" readonly kata_runtime_name="kata" readonly kata_runtime_type="io.containerd.${kata_runtime_name}.v2" readonly kata_shim_v2="containerd-shim-${kata_runtime_name}-v2" +readonly kata_configuration="configuration" + +readonly kata_clh_runtime_name="kata-clh" +readonly kata_clh_runtime_type="io.containerd.${kata_clh_runtime_name}.v2" +readonly kata_clh_shim_v2="containerd-shim-${kata_clh_runtime_name}-v2" +readonly kata_clh_configuration="configuration-clh" # Systemd unit name for containerd daemon readonly containerd_service_name="containerd.service" @@ -477,6 +483,14 @@ configure_containerd() [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.${kata_runtime_name}] runtime_type = "${kata_runtime_type}" + privileged_without_host_devices = true + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.${kata_runtime_name}.options] + ConfigPath = "/opt/kata/share/defaults/kata-containers/${kata_configuration}.toml" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.${kata_clh_runtime_name}] + runtime_type = "${kata_clh_runtime_type}" + privileged_without_host_devices = true + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.${kata_clh_runtime_name}.options] + ConfigPath = "/opt/kata/share/defaults/kata-containers/${kata_clh_configuration}.toml" EOF modified="true" From 448db8e975f9593b0ed43f40a5657e2a3c0b3be9 Mon Sep 17 00:00:00 2001 From: Chelsea Mafrica Date: Wed, 25 Oct 2023 12:49:09 -0700 Subject: [PATCH 04/15] gha: add dependencies for spell checker In the migration from the tests repo to the kata containers repo we missed two huspell dictionaries for static checks; add them. Fixes #8315 Signed-off-by: Chelsea Mafrica (cherry picked from commit c20aadd7a8daab92513d7665ad54ea8cc07b509e) --- .github/workflows/static-checks.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index c55adf4c7f..c0c0463124 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -187,7 +187,7 @@ jobs: echo "/usr/local/go/bin" >> $GITHUB_PATH - name: Install system dependencies run: | - sudo apt-get -y install moreutils hunspell pandoc + sudo apt-get -y install moreutils hunspell hunspell-en-gb hunspell-en-us pandoc - name: Run check run: | export PATH=${PATH}:${GOPATH}/bin From 47ff3e5655ec28ab946ae842c2929cc87146295e Mon Sep 17 00:00:00 2001 From: Wainer dos Santos Moschetta Date: Thu, 26 Oct 2023 11:50:12 -0300 Subject: [PATCH 05/15] tests/git-helper: cancel any previous rebase left halfway In bare-metal machines the git tree might get on unstable state with the previous rebase left halfway. So let's attempt to abort any rebase before. Fixes #8318 Signed-off-by: Wainer dos Santos Moschetta (cherry picked from commit 0ce0abffa6e354a69ee95cb24dc0c7c6f03d00d8) --- tests/git-helper.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/git-helper.sh b/tests/git-helper.sh index 81ec53cfa1..32d0384fa7 100755 --- a/tests/git-helper.sh +++ b/tests/git-helper.sh @@ -19,6 +19,8 @@ function add_kata_bot_info() { function rebase_atop_of_the_latest_target_branch() { if [ -n "${TARGET_BRANCH}" ]; then echo "Rebasing atop of the latest ${TARGET_BRANCH}" + # Recover from any previous rebase left halfway + git rebase --abort 2> /dev/null || true git rebase origin/${TARGET_BRANCH} fi } From 51284275eea2a6d3be6d96f1ad1a3f2cbfae8719 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Thu, 26 Oct 2023 15:23:31 +0100 Subject: [PATCH 06/15] utils: kata-manager: Lint fixes Improve the code by fixing some lint issues: - defining variables before using them. - Using `grep -E` rather than `egrep`. - Quoting variables. - Adding a check for invalid CLI arguments. Signed-off-by: James O. D. Hunt (cherry picked from commit 59bd5348279716b092640408b45be863374c317a) --- utils/kata-manager.sh | 77 +++++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/utils/kata-manager.sh b/utils/kata-manager.sh index 43bc89aeb1..7a855742a3 100755 --- a/utils/kata-manager.sh +++ b/utils/kata-manager.sh @@ -102,7 +102,8 @@ github_get_latest_release() # - The sort(1) call; none of the standard utilities support semver # so attempt to perform a semver sort manually. # - Pre-releases are excluded via the select() call. - local latest=$(curl -sL "$url" |\ + local latest + latest=$(curl -sL "$url" |\ jq -r '.[].tag_name | select(contains("-") | not)' |\ sort -t "." -k1,1n -k2,2n -k3,3n |\ tail -1 || true) @@ -142,7 +143,8 @@ github_get_release_file_url() local url="${1:-}" local version="${2:-}" - local arch=$(uname -m) + local arch + arch=$(uname -m) local regex="" @@ -166,7 +168,7 @@ github_get_release_file_url() -r '.[] | select(.tag_name == $version) | .assets[].browser_download_url' |\ grep "/${regex}$") - download_url=$(echo $download_url | awk '{print $1}') + download_url=$(echo "$download_url" | awk '{print $1}') [ -z "$download_url" ] && die "Cannot determine download URL for version $version ($url)" @@ -187,7 +189,8 @@ github_download_release() pushd "$tmpdir" >/dev/null - local download_url=$(github_get_release_file_url \ + local download_url + download_url=$(github_get_release_file_url \ "$url" \ "$version" || true) @@ -198,7 +201,8 @@ github_download_release() # progress. curl -LO "$download_url" - local filename=$(echo "$download_url" | awk -F'/' '{print $NF}') + local filename + filename=$(echo "$download_url" | awk -F'/' '{print $NF}') ls -d "${PWD}/${filename}" @@ -252,7 +256,7 @@ containerd_installed() command -v containerd &>/dev/null && return 0 systemctl list-unit-files --type service |\ - egrep -q "^${containerd_service_name}\>" \ + grep -Eq "^${containerd_service_name}\>" \ && return 0 return 1 @@ -297,8 +301,11 @@ check_deps() for elem in "${elems[@]}" do - local cmd=$(echo "$elem"|cut -d: -f1) - local pkg=$(echo "$elem"|cut -d: -f2-) + local cmd + cmd=$(echo "$elem"|cut -d: -f1) + + local pkg + pkg=$(echo "$elem"|cut -d: -f2-) command -v "$cmd" &>/dev/null && continue @@ -307,7 +314,8 @@ check_deps() [ "${#pkgs_to_install[@]}" -eq 0 ] && return 0 - local packages="${pkgs_to_install[@]}" + local packages + packages="${pkgs_to_install[@]}" info "Installing packages '$packages'" @@ -358,13 +366,15 @@ github_download_package() [ -z "$releases_url" ] && die "need releases URL" [ -z "$project" ] && die "need project URL" - local version=$(github_resolve_version_to_download \ + local version + version=$(github_resolve_version_to_download \ "$releases_url" \ "$requested_version" || true) [ -z "$version" ] && die "Unable to determine $project version to download" - local file=$(github_download_release \ + local file + file=$(github_download_release \ "$releases_url" \ "$version") @@ -382,15 +392,19 @@ install_containerd() info "Downloading $project release ($version_desc)" - local results=$(github_download_package \ + local results + results=$(github_download_package \ "$containerd_releases_url" \ "$requested_version" \ "$project") [ -z "$results" ] && die "Cannot download $project release file" - local version=$(echo "$results"|cut -d: -f1) - local file=$(echo "$results"|cut -d: -f2-) + local version + version=$(echo "$results"|cut -d: -f1) + + local file + file=$(echo "$results"|cut -d: -f2-) [ -z "$version" ] && die "Cannot determine $project resolved version" [ -z "$file" ] && die "Cannot determine $project release file" @@ -429,7 +443,8 @@ configure_containerd() then pushd "$tmpdir" >/dev/null - local service_url=$(printf "%s/%s/%s/%s" \ + local service_url + service_url=$(printf "%s/%s/%s/%s" \ "https://raw.githubusercontent.com" \ "${containerd_slug}" \ "main" \ @@ -457,7 +472,8 @@ configure_containerd() info "Created $cfg" } - local original="${cfg}-pre-kata-$(date -I)" + local original + original="${cfg}-pre-kata-$(date -I)" sudo grep -q "$kata_runtime_type" "$cfg" || { sudo cp "$cfg" "${original}" @@ -534,15 +550,19 @@ install_kata() info "Downloading $project release ($version_desc)" - local results=$(github_download_package \ + local results + results=$(github_download_package \ "$kata_releases_url" \ "$requested_version" \ "$project") [ -z "$results" ] && die "Cannot download $project release file" - local version=$(echo "$results"|cut -d: -f1) - local file=$(echo "$results"|cut -d: -f2-) + local version + version=$(echo "$results"|cut -d: -f1) + + local file + file=$(echo "$results"|cut -d: -f2-) [ -z "$version" ] && die "Cannot determine $project resolved version" [ -z "$file" ] && die "Cannot determine $project release file" @@ -555,12 +575,14 @@ install_kata() create_links_for+=("kata-collect-data.sh") create_links_for+=("kata-runtime") - local from_dir=$(printf "%s/bin" "$kata_install_dir") + local from_dir + from_dir=$(printf "%s/bin" "$kata_install_dir") # Since we're unpacking to the root directory, perform a sanity check # on the archive first. - local unexpected=$(tar -tf "${file}" |\ - egrep -v "^(\./$|\./opt/$|\.${kata_install_dir}/)" || true) + local unexpected + unexpected=$(tar -tf "${file}" |\ + grep -Ev "^(\./$|\./opt/$|\.${kata_install_dir}/)" || true) [ -n "$unexpected" ] && die "File '$file' contains unexpected paths: '$unexpected'" @@ -572,7 +594,8 @@ install_kata() for file in "${create_links_for[@]}" do - local from_path=$(printf "%s/%s" "$from_dir" "$file") + local from_path + from_path=$(printf "%s/%s" "$from_dir" "$file") [ -e "$from_path" ] || die "File $from_path not found" sudo ln -sf "$from_path" "$link_dir" @@ -671,7 +694,8 @@ test_installation() # Used to prove that the kernel in the container # is different to the host kernel. - local container_kernel=$(sudo ctr run \ + local container_kernel + container_kernel=$(sudo ctr run \ --runtime "$kata_runtime_type" \ --rm \ "$image" \ @@ -680,7 +704,8 @@ test_installation() [ -z "$container_kernel" ] && die "Failed to test $kata_project" - local host_kernel=$(uname -r) + local host_kernel + host_kernel=$(uname -r) info "Test successful:\n" @@ -763,6 +788,8 @@ handle_args() r) cleanup="false" ;; t) disable_test="true" ;; T) only_run_test="true" ;; + + *) die "invalid option: '$opt'" ;; esac done From 469fa59bbf5940a66da7c82e92bd61f5c7ea8bd2 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Thu, 26 Oct 2023 15:27:37 +0100 Subject: [PATCH 07/15] utils: kata-manager: Fix "Cannot determine download URL" issue The archive names for x86_64 [Kata releases](https://github.com/kata-containers/kata-containers/releases) used to include the tag `x86_64`, but that has now been changed to `amd64`, which unfortunately broke `kata-manager.sh`: ``` kata-static-3.1.3-x86_64.tar.xz ~~~~~~ expected kata-static-3.2.0-alpha3-x86_64.tar.xz ~~~~~~ expected kata-static-3.2.0-alpha4-amd64.tar.xz ~~~~~ changed ``` Fixes: #8321. Signed-off-by: James O. D. Hunt (cherry picked from commit 2ac7ac1dd2da6cc3db2e132a77ad280269bf0a42) --- utils/kata-manager.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/kata-manager.sh b/utils/kata-manager.sh index 7a855742a3..9abad5d267 100755 --- a/utils/kata-manager.sh +++ b/utils/kata-manager.sh @@ -145,6 +145,7 @@ github_get_release_file_url() local arch arch=$(uname -m) + [ "$arch" = "x86_64" ] && arch="amd64" local regex="" @@ -154,7 +155,6 @@ github_get_release_file_url() ;; *containerd*) - [ "$arch" = "x86_64" ] && arch="amd64" regex="containerd-.*-linux-${arch}.tar.gz" ;; From c42d899a6a38eb946bc2553d5404c982032beef7 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Thu, 26 Oct 2023 16:06:51 +0100 Subject: [PATCH 08/15] utils: kata-manager: Fix whitespace Use tabs consistently. Signed-off-by: James O. D. Hunt (cherry picked from commit 346f195532d6feb15744915b88b10d765fa86c98) --- utils/kata-manager.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/kata-manager.sh b/utils/kata-manager.sh index 9abad5d267..a55b346192 100755 --- a/utils/kata-manager.sh +++ b/utils/kata-manager.sh @@ -168,7 +168,7 @@ github_get_release_file_url() -r '.[] | select(.tag_name == $version) | .assets[].browser_download_url' |\ grep "/${regex}$") - download_url=$(echo "$download_url" | awk '{print $1}') + download_url=$(echo "$download_url" | awk '{print $1}') [ -z "$download_url" ] && die "Cannot determine download URL for version $version ($url)" From 69a23bb4e6af9908994bc86167079cc03a486cb1 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Thu, 26 Oct 2023 16:07:33 +0100 Subject: [PATCH 09/15] utils: kata-manager: Fix containerd version check Contained release files include the version number without a "v" prefix. However, the tag for the equivalent release does include it so handle this distinction and also tighten up the Kata check by specifying an explicit version number in the regex. Signed-off-by: James O. D. Hunt (cherry picked from commit ae3ea1421dbfdf9c4075daa40fca072d98d045f9) --- utils/kata-manager.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/utils/kata-manager.sh b/utils/kata-manager.sh index a55b346192..d853f96cdf 100755 --- a/utils/kata-manager.sh +++ b/utils/kata-manager.sh @@ -143,6 +143,10 @@ github_get_release_file_url() local url="${1:-}" local version="${2:-}" + # The version, less any leading 'v' + local version_number + version_number=${version#v} + local arch arch=$(uname -m) [ "$arch" = "x86_64" ] && arch="amd64" @@ -151,11 +155,11 @@ github_get_release_file_url() case "$url" in *kata*) - regex="kata-static-.*-${arch}.tar.xz" + regex="kata-static-${version}-${arch}.tar.xz" ;; *containerd*) - regex="containerd-.*-linux-${arch}.tar.gz" + regex="containerd-${version_number}-linux-${arch}.tar.gz" ;; *) die "invalid url: '$url'" ;; @@ -165,8 +169,10 @@ github_get_release_file_url() download_url=$(curl -sL "$url" |\ jq --arg version "$version" \ - -r '.[] | select(.tag_name == $version) | .assets[].browser_download_url' |\ - grep "/${regex}$") + -r '.[] | + select( (.tag_name == $version) or (.tag_name == "v" + $version) ) | + .assets[].browser_download_url' |\ + grep "/${regex}$") download_url=$(echo "$download_url" | awk '{print $1}') From b5d391f18fd0911e6b97b073b39b826c9cc46458 Mon Sep 17 00:00:00 2001 From: David Esparza Date: Tue, 10 Oct 2023 14:08:33 -0600 Subject: [PATCH 10/15] metrics: update iodepth and job size fio parameters to improve workload This PR updates the values of the fio parameters for iodepth requests and for the number of jobs, in order to increase the number of sequential operations. Additionally, it adds the list of packages needed to parse the results. Fixes: #8198 Signed-off-by: David Esparza (cherry picked from commit 873386a349de28e6c566e3c036f2a593d5b0d79d) --- .../storage/fio-dockerfile/workload/fio_bench.sh | 4 ++-- tests/metrics/storage/fio_test.sh | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh b/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh index 43aed0136e..bf62e40f19 100755 --- a/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh +++ b/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh @@ -16,13 +16,13 @@ set -o pipefail # read, write, randread, randwrite, randrw, readwrite io_type="read" block_size="4k" -num_jobs="2" +num_jobs="4" # FIO default settings readonly ioengine="libaio" readonly rate_process="linear" readonly disable_buffered="1" -readonly iodepth="2" +readonly iodepth="8" readonly runtime="10s" # ramp time readonly rt="10s" diff --git a/tests/metrics/storage/fio_test.sh b/tests/metrics/storage/fio_test.sh index 7ea7deb8e9..91ebca5373 100755 --- a/tests/metrics/storage/fio_test.sh +++ b/tests/metrics/storage/fio_test.sh @@ -18,6 +18,7 @@ IMAGE="docker.io/library/fio-bench:latest" DOCKERFILE="${SCRIPT_PATH}/fio-dockerfile/Dockerfile" PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" TEST_NAME="fio" +REQUIRED_CMDS=("jq" "script") # Fio default number of jobs nj=4 @@ -34,7 +35,7 @@ trap release_resources EXIT function setup() { info "setup fio test" clean_env_ctr - check_cmds "${cmds[@]}" + check_cmds "${REQUIRED_CMDS[@]}" check_ctr_images "$IMAGE" "$DOCKERFILE" init_env @@ -135,21 +136,21 @@ function main() { # Collect bs=4K, num_jobs=4, io-direct, io-depth=2 info "Processing sequential type workload" sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-read-4k ${nj}" >/dev/null 2>&1 - local results_read_4K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + local results_read_4K="$(script -qc "sudo -E ${CTR_EXE} t exec -t --exec-id ${RANDOM} ${CONTAINER_ID} sh -c './fio_bench.sh print-latest-results'")" sleep 0.5 sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-write-4k ${nj}" >/dev/null 2>&1 - local results_write_4K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + local results_write_4K="$(script -qc "sudo -E ${CTR_EXE} t exec -t --exec-id ${RANDOM} ${CONTAINER_ID} sh -c './fio_bench.sh print-latest-results'")" # Collect bs=64K, num_jobs=4, io-direct, io-depth=2 info "Processing random type workload" sleep 0.5 sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-randread-64k ${nj}" >/dev/null 2>&1 - local results_rand_read_64K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + local results_rand_read_64K="$(script -qc "sudo -E ${CTR_EXE} t exec -t --exec-id ${RANDOM} ${CONTAINER_ID} sh -c './fio_bench.sh print-latest-results'")" sleep 0.5 sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-randwrite-64k ${nj}" >/dev/null 2>&1 - local results_rand_write_64K="$(sudo -E "${CTR_EXE}" t exec -t --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results")" + local results_rand_write_64K="$(script -qc "sudo -E ${CTR_EXE} t exec -t --exec-id ${RANDOM} ${CONTAINER_ID} sh -c './fio_bench.sh print-latest-results'")" # parse results metrics_json_init From aa8a96fe518ffca5540a0a5168d103d09f7327cd Mon Sep 17 00:00:00 2001 From: David Esparza Date: Tue, 10 Oct 2023 18:39:40 -0600 Subject: [PATCH 11/15] metrics: FIO ci test enablement This PR enables the new FIO test based on the containerd client which is used to track the I/O metrics in the kata-ci environment. Additionally this PR fixes the parsing of results. Fixes: #8199 Signed-off-by: David Esparza (cherry picked from commit 1626253d9e2119dc01a835fb8bee53b521c8c4b9) --- .../checkmetrics-json-clh-kata-metric8.toml | 52 ++++++++++++++++++ .../checkmetrics-json-qemu-kata-metric8.toml | 52 ++++++++++++++++++ tests/metrics/gha-run.sh | 4 +- .../fio-dockerfile/workload/fio_bench.sh | 6 +- tests/metrics/storage/fio_test.sh | 55 ++++++++++++------- 5 files changed, 143 insertions(+), 26 deletions(-) diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml index c0436b24c1..1304d79169 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-clh-kata-metric8.toml @@ -98,6 +98,58 @@ midval = 98.0 minpercent = 20.0 maxpercent = 20.0 +[[metric]] +name = "fio" +type = "json" +description = "measure sequential read throughput using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = "[.\"fio\".\"Results sequential\"] | .[] | .[] | .read.bw | select( . != null )" +checktype = "mean" +midval = 312776 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "fio" +type = "json" +description = "measure sequential write throughput using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = "[.\"fio\".\"Results sequential\"] | .[] | .[] | .write.bw | select( . != null )" +checktype = "mean" +midval = 307948 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "fio" +type = "json" +description = "measure random read throughput using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = "[.\"fio\".\"Results random\"] | .[] | .[] | .randread.bw | select( . != null )" +checktype = "mean" +midval = 1351339 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "fio" +type = "json" +description = "measure random write throughput using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = "[.\"fio\".\"Results random\"] | .[] | .[] | .randwrite.bw | select( . != null )" +checktype = "mean" +midval = 1440540.7 +minpercent = 20.0 +maxpercent = 20.0 + [[metric]] name = "latency" type = "json" diff --git a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml index 666a898bea..a7332918f4 100644 --- a/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml +++ b/tests/metrics/cmd/checkmetrics/ci_worker/checkmetrics-json-qemu-kata-metric8.toml @@ -98,6 +98,58 @@ midval = 98.0 minpercent = 20.0 maxpercent = 20.0 +[[metric]] +name = "fio" +type = "json" +description = "measure sequential read throughput using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = "[.\"fio\".\"Results sequential\"] | .[] | .[] | .read.bw | select( . != null )" +checktype = "mean" +midval = 327066.8 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "fio" +type = "json" +description = "measure sequential write throughput using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = "[.\"fio\".\"Results sequential\"] | .[] | .[] | .write.bw | select( . != null )" +checktype = "mean" +midval = 309023.65 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "fio" +type = "json" +description = "measure random read throughput using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = "[.\"fio\".\"Results random\"] | .[] | .[] | .randread.bw | select( . != null )" +checktype = "mean" +midval = 1301793.45 +minpercent = 20.0 +maxpercent = 20.0 + +[[metric]] +name = "fio" +type = "json" +description = "measure random write throughput using fio" +# Min and Max values to set a 'range' that +# the median of the CSV Results data must fall +# within (inclusive) +checkvar = "[.\"fio\".\"Results random\"] | .[] | .[] | .randwrite.bw | select( . != null )" +checktype = "mean" +midval = 1457926.8 +minpercent = 20.0 +maxpercent = 20.0 + [[metric]] name = "latency" type = "json" diff --git a/tests/metrics/gha-run.sh b/tests/metrics/gha-run.sh index 3c7686e122..28180d270f 100755 --- a/tests/metrics/gha-run.sh +++ b/tests/metrics/gha-run.sh @@ -84,9 +84,9 @@ function run_test_tensorflow() { } function run_test_fio() { - info "Skipping FIO test temporarily using ${KATA_HYPERVISOR} hypervisor" + info "Running FIO test using ${KATA_HYPERVISOR} hypervisor" - # bash tests/metrics/storage/fio-k8s/fio-test-ci.sh + bash tests/metrics/storage/fio_test.sh } function run_test_iperf() { diff --git a/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh b/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh index bf62e40f19..2ddcb79624 100755 --- a/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh +++ b/tests/metrics/storage/fio-dockerfile/workload/fio_bench.sh @@ -71,7 +71,7 @@ function launch_workload() { local test_name="${io_type}_${block_size}_nj-${num_jobs}_${rate_process}_iodepth-${iodepth}_io-direct-${disable_buffered}" setup_workload - rm -f "${summary_file_local}" > /dev/null 2>&1 + rm -f "${summary_file_local}" >/dev/null 2>&1 fio \ --name="${test_name}" \ --output-format="json" \ @@ -88,12 +88,12 @@ function launch_workload() { --iodepth="${iodepth}" \ --gtod_reduce="1" \ --randrepeat="1" \ - | tee -a ${summary_file_local} > /dev/null 2>&1 + --output "${summary_file_local}" >/dev/null 2>&1 } function print_latest_results() { [ ! -f "${summary_file_local}" ] && echo "Error: no results to display; you must run a test before requesting results display" && exit 1 - echo "$(cat ${summary_file_local})" + cat "${summary_file_local}" } function delete_workload() { diff --git a/tests/metrics/storage/fio_test.sh b/tests/metrics/storage/fio_test.sh index 91ebca5373..367efb5238 100755 --- a/tests/metrics/storage/fio_test.sh +++ b/tests/metrics/storage/fio_test.sh @@ -19,24 +19,31 @@ DOCKERFILE="${SCRIPT_PATH}/fio-dockerfile/Dockerfile" PAYLOAD_ARGS="${PAYLOAD_ARGS:-tail -f /dev/null}" TEST_NAME="fio" REQUIRED_CMDS=("jq" "script") +TMP_DIR=$(mktemp --tmpdir -d fio.XXXXXXXXXX) +results_file="${TMP_DIR}/fio_results.json" +results_read="" +results_write="" # Fio default number of jobs nj=4 function release_resources() { - sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" ${CONTAINER_ID} sh -c "./fio_bench.sh delete-workload" + sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${CONTAINER_ID}" sh -c "./fio_bench.sh delete-workload" + sudo -E "${CTR_EXE}" t kill -a -s SIGKILL "${CONTAINER_ID}" + sudo -E "${CTR_EXE}" c rm "${CONTAINER_ID}" + rm -rf "${TMP_DIR}" sleep 0.5 clean_env_ctr - info "fio test end" + info "removing containers done" } trap release_resources EXIT function setup() { info "setup fio test" - clean_env_ctr check_cmds "${REQUIRED_CMDS[@]}" check_ctr_images "$IMAGE" "$DOCKERFILE" + clean_env_ctr init_env # drop caches @@ -108,7 +115,7 @@ function convert_results_to_json() { "bw_stddev" : "${bw_stddev}", "iops" : "${iops}", "iops_stddev" : "${iops_stddev}", - "units" : "Kb" + "units" : "KB/s" } } EOF @@ -117,46 +124,52 @@ EOF } function store_results() { - local data_r="${1}" - local data_w="${2}" - local title="${3}" + local title="${1}" - [ -z "${data_r}" ] || [ -z "${data_w}" ] || [ -z "${title}" ] && die "Missing data and/or title when trying storing results." + [ -z "${results_read}" ] || [ -z "${results_write}" ] || [ -z "${title}" ] && die "Missing data and/or title when trying storing results." metrics_json_start_array - extract_test_params "${data_r}" - parse_results "${data_r}" - parse_results "${data_w}" + extract_test_params "${results_read}" + parse_results "${results_read}" + parse_results "${results_write}" metrics_json_end_array "${title}" } function main() { setup - # Collect bs=4K, num_jobs=4, io-direct, io-depth=2 + # Collect bs=4K, num_jobs=4, io-direct, io-depth=8 info "Processing sequential type workload" sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-read-4k ${nj}" >/dev/null 2>&1 - local results_read_4K="$(script -qc "sudo -E ${CTR_EXE} t exec -t --exec-id ${RANDOM} ${CONTAINER_ID} sh -c './fio_bench.sh print-latest-results'")" + sudo -E ${CTR_EXE} t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results" >"${results_file}" + results_read=$(<"${results_file}") sleep 0.5 sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-write-4k ${nj}" >/dev/null 2>&1 - local results_write_4K="$(script -qc "sudo -E ${CTR_EXE} t exec -t --exec-id ${RANDOM} ${CONTAINER_ID} sh -c './fio_bench.sh print-latest-results'")" + sudo -E ${CTR_EXE} t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results" >"${results_file}" + results_write=$(<"${results_file}") - # Collect bs=64K, num_jobs=4, io-direct, io-depth=2 + # parse results sequential + metrics_json_init + store_results "Results sequential" + + # Collect bs=64K, num_jobs=4, io-direct, io-depth=8 info "Processing random type workload" sleep 0.5 sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-randread-64k ${nj}" >/dev/null 2>&1 - local results_rand_read_64K="$(script -qc "sudo -E ${CTR_EXE} t exec -t --exec-id ${RANDOM} ${CONTAINER_ID} sh -c './fio_bench.sh print-latest-results'")" + sudo -E ${CTR_EXE} t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results" >"${results_file}" + results_read=$(<"${results_file}") sleep 0.5 sudo -E "${CTR_EXE}" t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh run-randwrite-64k ${nj}" >/dev/null 2>&1 - local results_rand_write_64K="$(script -qc "sudo -E ${CTR_EXE} t exec -t --exec-id ${RANDOM} ${CONTAINER_ID} sh -c './fio_bench.sh print-latest-results'")" + sudo -E ${CTR_EXE} t exec --exec-id "${RANDOM}" ${CONTAINER_ID} sh -c "./fio_bench.sh print-latest-results" >"${results_file}" + results_write=$(<"${results_file}") - # parse results - metrics_json_init - store_results "${results_read_4K}" "${results_write_4K}" "Results sequential" - store_results "${results_rand_read_64K}" "${results_rand_write_64K}" "Results random" + # parse results random + store_results "Results random" metrics_json_save } main "$@" +info "fio test end" + From 885fcc9aaa5556363e9b59e13f203dab22639dc6 Mon Sep 17 00:00:00 2001 From: David Esparza Date: Tue, 24 Oct 2023 09:27:32 -0600 Subject: [PATCH 12/15] metrics: increase the number of attempts to stop kata This PR increases the number of attempts to stop kata components when it is required usually before starting a metrics test. Fixes: #8307 Signed-off-by: David Esparza (cherry picked from commit c42a2f2eda5fd17fd3d6cdcfeb3f23abe6b9ff5d) --- tests/common.bash | 8 ++++++-- tests/metrics/lib/common.bash | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/common.bash b/tests/common.bash index a111445749..46dc9064a1 100644 --- a/tests/common.bash +++ b/tests/common.bash @@ -190,13 +190,17 @@ function clean_env_ctr() # Kills running shim and hypervisor components function kill_kata_components() { + local ATTEMPTS=2 local TIMEOUT="30s" local PID_NAMES=( "containerd-shim-kata-v2" "qemu-system-x86_64" "cloud-hypervisor" ) sudo systemctl stop containerd # iterate over the list of kata components and stop them - for PID_NAME in "${PID_NAMES[@]}"; do - [[ ! -z "$(pidof ${PID_NAME})" ]] && sudo killall "${PID_NAME}" > /dev/null 2>&1 || true + for (( i=1; i<=ATTEMPTS; i++ )); do + for PID_NAME in "${PID_NAMES[@]}"; do + [[ ! -z "$(pidof ${PID_NAME})" ]] && sudo killall "${PID_NAME}" >/dev/null 2>&1 || true + done + sleep 1 done sudo timeout -s SIGKILL "${TIMEOUT}" systemctl start containerd } diff --git a/tests/metrics/lib/common.bash b/tests/metrics/lib/common.bash index e42a596ac9..9fefd39a55 100755 --- a/tests/metrics/lib/common.bash +++ b/tests/metrics/lib/common.bash @@ -201,8 +201,8 @@ function kill_processes_before_start() CTR_PROCS=$(sudo "${CTR_EXE}" t list -q) [[ -n "${CTR_PROCS}" ]] && clean_env_ctr + kill_kata_components && sleep 1 kill_kata_components - check_processes } From 48cab2bfd08e28a49f37e16a715d151ba7045660 Mon Sep 17 00:00:00 2001 From: David Esparza Date: Fri, 27 Oct 2023 11:09:08 -0600 Subject: [PATCH 13/15] metrics: removes double-quotes in checkemtrics when parsing results This PR removes double quotes in jq output to return raw strings as input of checkmetrics tool. Fixes: #8331 Signed-off-by: David Esparza (cherry picked from commit c232869af9ed82f67cfeb6c6a1c8bd2079e8c409) --- tests/metrics/cmd/checkmetrics/json.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metrics/cmd/checkmetrics/json.go b/tests/metrics/cmd/checkmetrics/json.go index a11d957b70..f38d70cb96 100644 --- a/tests/metrics/cmd/checkmetrics/json.go +++ b/tests/metrics/cmd/checkmetrics/json.go @@ -29,7 +29,7 @@ func (c *jsonRecord) load(filepath string, metric *metrics) error { log.Debugf(" Run jq '%v' %s", metric.CheckVar, filepath) - out, err := exec.Command("jq", metric.CheckVar, filepath).Output() + out, err := exec.Command("jq", "-r", metric.CheckVar, filepath).Output() if err != nil { log.Warnf("Failed to run [jq %v %v][%v]", metric.CheckVar, filepath, err) return err From 3b9e6e7ee43eb805c54be472abfc1395fe14ba40 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Fri, 27 Oct 2023 21:42:37 -0700 Subject: [PATCH 14/15] network: Fix network attach for ipvlan and macvlan We used the approach of cold-plugging network interface for pre-shimv2 support for docker.Since the hotplug approach was not required, we never really got to implementing hotplug support for certain network endpoints, ipvlan and macvlan being among them. Since moving to shimv2 interface as the default for runtime, we switched to hotplugging the network interface for supporting docker and nerdctl. This was done for veth endpoints only. Implement the hot-attach apis for ipvlan and macvlan as well to support ipvlan and macvlan networks with docker and nerdctl. Fixes: #8333 Signed-off-by: Archana Shinde (cherry picked from commit f53f86884fe69a04cfcd6e57a622e522ccb946fa) --- src/runtime/virtcontainers/ipvlan_endpoint.go | 36 ++++++++++++++++--- .../virtcontainers/macvlan_endpoint.go | 36 ++++++++++++++++--- 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/src/runtime/virtcontainers/ipvlan_endpoint.go b/src/runtime/virtcontainers/ipvlan_endpoint.go index f3da40e88e..4c495ba67a 100644 --- a/src/runtime/virtcontainers/ipvlan_endpoint.go +++ b/src/runtime/virtcontainers/ipvlan_endpoint.go @@ -125,14 +125,42 @@ func (endpoint *IPVlanEndpoint) Detach(ctx context.Context, netNsCreated bool, n }) } -// HotAttach for ipvlan endpoint not supported yet func (endpoint *IPVlanEndpoint) HotAttach(ctx context.Context, h Hypervisor) error { - return fmt.Errorf("IPVlanEndpoint does not support Hot attach") + span, ctx := ipvlanTrace(ctx, "HotAttach", endpoint) + defer span.End() + + if err := xConnectVMNetwork(ctx, endpoint, h); err != nil { + networkLogger().WithError(err).Error("Error bridging ipvlan ep") + return err + } + + if _, err := h.HotplugAddDevice(ctx, endpoint, NetDev); err != nil { + networkLogger().WithError(err).Error("Error hotplugging ipvlan ep") + return err + } + + return nil } -// HotDetach for ipvlan endpoint not supported yet func (endpoint *IPVlanEndpoint) HotDetach(ctx context.Context, h Hypervisor, netNsCreated bool, netNsPath string) error { - return fmt.Errorf("IPVlanEndpoint does not support Hot detach") + if !netNsCreated { + return nil + } + + span, ctx := ipvlanTrace(ctx, "HotDetach", endpoint) + defer span.End() + + if err := doNetNS(netNsPath, func(_ ns.NetNS) error { + return xDisconnectVMNetwork(ctx, endpoint) + }); err != nil { + networkLogger().WithError(err).Warn("Error un-bridging ipvlan ep") + } + + if _, err := h.HotplugRemoveDevice(ctx, endpoint, NetDev); err != nil { + networkLogger().WithError(err).Error("Error detach ipvlan ep") + return err + } + return nil } func (endpoint *IPVlanEndpoint) save() persistapi.NetworkEndpoint { diff --git a/src/runtime/virtcontainers/macvlan_endpoint.go b/src/runtime/virtcontainers/macvlan_endpoint.go index 76dc911a8b..974019fbb6 100644 --- a/src/runtime/virtcontainers/macvlan_endpoint.go +++ b/src/runtime/virtcontainers/macvlan_endpoint.go @@ -122,14 +122,42 @@ func (endpoint *MacvlanEndpoint) Detach(ctx context.Context, netNsCreated bool, }) } -// HotAttach for bridged macvlan endpoint not supported yet func (endpoint *MacvlanEndpoint) HotAttach(ctx context.Context, h Hypervisor) error { - return fmt.Errorf("MacvlanEndpoint does not support Hot attach") + span, ctx := macvlanTrace(ctx, "HotAttach", endpoint) + defer span.End() + + if err := xConnectVMNetwork(ctx, endpoint, h); err != nil { + networkLogger().WithError(err).Error("Error bridging macvlan ep") + return err + } + + if _, err := h.HotplugAddDevice(ctx, endpoint, NetDev); err != nil { + networkLogger().WithError(err).Error("Error hotplugging macvlan ep") + return err + } + + return nil } -// HotDetach for bridged macvlan endpoint not supported yet func (endpoint *MacvlanEndpoint) HotDetach(ctx context.Context, h Hypervisor, netNsCreated bool, netNsPath string) error { - return fmt.Errorf("MacvlanEndpoint does not support Hot detach") + if !netNsCreated { + return nil + } + + span, ctx := macvlanTrace(ctx, "HotDetach", endpoint) + defer span.End() + + if err := doNetNS(netNsPath, func(_ ns.NetNS) error { + return xDisconnectVMNetwork(ctx, endpoint) + }); err != nil { + networkLogger().WithError(err).Warn("Error un-bridging macvlan ep") + } + + if _, err := h.HotplugRemoveDevice(ctx, endpoint, NetDev); err != nil { + networkLogger().WithError(err).Error("Error detach macvlan ep") + return err + } + return nil } func (endpoint *MacvlanEndpoint) save() persistapi.NetworkEndpoint { From d48ecf9f2228573a66f73d0b1fc29278c7fe6f9d Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 23 Oct 2023 11:22:14 +0200 Subject: [PATCH 15/15] agent: Skip flaky create_tmpfs on s390x This is to skip a flaky test `create_tmpfs()` on s390x until a root cause is identified and fixed. Fixes: #4248 Signed-off-by: Hyounggyu Choi (cherry picked from commit a0746c8d7b292c258375c913aab04f7bd6c7b2f0) --- src/agent/src/watcher.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agent/src/watcher.rs b/src/agent/src/watcher.rs index a6cf4113b4..95dfa6cba0 100644 --- a/src/agent/src/watcher.rs +++ b/src/agent/src/watcher.rs @@ -1291,7 +1291,7 @@ mod tests { #[tokio::test] #[serial] - #[cfg(not(target_arch = "aarch64"))] + #[cfg(not(any(target_arch = "aarch64", target_arch = "s390x")))] async fn create_tmpfs() { skip_if_not_root!();