mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-17 23:07:55 +00:00
Merge pull request #10954 from kata-containers/topic/metrics-kata-deploy
Rework and fix metrics issues
This commit is contained in:
@@ -298,23 +298,6 @@ function clean_env_ctr()
|
||||
fi
|
||||
}
|
||||
|
||||
# Kills running shim and hypervisor components
|
||||
function kill_kata_components() {
|
||||
local ATTEMPTS=2
|
||||
local TIMEOUT="30s"
|
||||
local PID_NAMES=( "containerd-shim-kata-v2" "qemu-system-x86_64" "qemu-system-x86_64-tdx-experimental" "cloud-hypervisor" )
|
||||
|
||||
sudo systemctl stop containerd
|
||||
# iterate over the list of kata components and stop them
|
||||
for (( i=1; i<=ATTEMPTS; i++ )); do
|
||||
for PID_NAME in "${PID_NAMES[@]}"; do
|
||||
[[ ! -z "$(pidof ${PID_NAME})" ]] && sudo killall -w -s SIGKILL "${PID_NAME}" >/dev/null 2>&1 || true
|
||||
done
|
||||
sleep 1
|
||||
done
|
||||
sudo timeout -s SIGKILL "${TIMEOUT}" systemctl start containerd
|
||||
}
|
||||
|
||||
# Restarts a systemd service while ensuring the start-limit-burst is set to 0.
|
||||
# Outputs warnings to stdio if something has gone wrong.
|
||||
#
|
||||
|
@@ -433,8 +433,8 @@ function cleanup() {
|
||||
return
|
||||
fi
|
||||
|
||||
# In case of canceling workflow manually, 'run_kubernetes_tests.sh' continues running and triggers new tests,
|
||||
# resulting in the CI being in an unexpected state. So we need kill all running test scripts before cleaning up the node.
|
||||
# In case of canceling workflow manually, 'run_kubernetes_tests.sh' continues running and triggers new tests,
|
||||
# resulting in the CI being in an unexpected state. So we need kill all running test scripts before cleaning up the node.
|
||||
# See issue https://github.com/kata-containers/kata-containers/issues/9980
|
||||
delete_test_runners || true
|
||||
# Switch back to the default namespace and delete the tests one
|
||||
@@ -594,6 +594,7 @@ function main() {
|
||||
collect-artifacts) collect_artifacts ;;
|
||||
cleanup) cleanup ;;
|
||||
cleanup-kcli) cleanup "kcli" ;;
|
||||
cleanup-kubeadm) cleanup "kubeadm" ;;
|
||||
cleanup-sev) cleanup "sev" ;;
|
||||
cleanup-snp) cleanup "snp" ;;
|
||||
cleanup-tdx) cleanup "tdx" ;;
|
||||
|
@@ -18,7 +18,7 @@ checkvar = ".\"boot-times\".Results | .[] | .\"to-workload\".Result"
|
||||
checktype = "mean"
|
||||
midval = 0.39
|
||||
minpercent = 40.0
|
||||
maxpercent = 30.0
|
||||
maxpercent = 50.0
|
||||
|
||||
[[metric]]
|
||||
name = "memory-footprint"
|
||||
@@ -121,7 +121,7 @@ description = "measure sequential write throughput using fio"
|
||||
checkvar = "[.\"fio\".\"Results sequential\"] | .[] | .[] | .write.bw | select( . != null )"
|
||||
checktype = "mean"
|
||||
midval = 307948
|
||||
minpercent = 20.0
|
||||
minpercent = 40.0
|
||||
maxpercent = 20.0
|
||||
|
||||
[[metric]]
|
||||
@@ -199,7 +199,7 @@ description = "measure container parallel bandwidth using iperf3"
|
||||
checkvar = ".\"network-iperf3\".Results | .[] | .parallel.Result"
|
||||
checktype = "mean"
|
||||
midval = 57516472021.90
|
||||
minpercent = 20.0
|
||||
minpercent = 40.0
|
||||
maxpercent = 20.0
|
||||
|
||||
[[metric]]
|
||||
@@ -211,6 +211,6 @@ description = "iperf"
|
||||
# within (inclusive)
|
||||
checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result"
|
||||
checktype = "mean"
|
||||
midval = 0.04
|
||||
midval = 0.02
|
||||
minpercent = 70.0
|
||||
maxpercent = 60.0
|
||||
|
@@ -212,5 +212,5 @@ description = "iperf"
|
||||
checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result"
|
||||
checktype = "mean"
|
||||
midval = 0.040
|
||||
minpercent = 60.0
|
||||
minpercent = 80.0
|
||||
maxpercent = 60.0
|
||||
|
@@ -54,9 +54,14 @@ function make_tarball_results() {
|
||||
}
|
||||
|
||||
function run_test_launchtimes() {
|
||||
info "Running Launch Time test using ${KATA_HYPERVISOR} hypervisor"
|
||||
repetitions=20
|
||||
if [[ ${KATA_HYPERVISOR} == "qemu" ]]; then
|
||||
# The qemu workload seems to fail before it can run ~5-7 repetitions of the workload
|
||||
repetitions=3
|
||||
fi
|
||||
|
||||
bash tests/metrics/time/launch_times.sh -i public.ecr.aws/ubuntu/ubuntu:latest -n 20
|
||||
info "Running Launch Time test using ${KATA_HYPERVISOR} hypervisor"
|
||||
bash tests/metrics/time/launch_times.sh -i public.ecr.aws/ubuntu/ubuntu:latest -n "${repetitions}"
|
||||
}
|
||||
|
||||
function run_test_memory_usage() {
|
||||
@@ -114,14 +119,12 @@ function run_test_latency() {
|
||||
info "Running Latency test using ${KATA_HYPERVISOR} hypervisor"
|
||||
|
||||
bash tests/metrics/network/latency_kubernetes/latency-network.sh
|
||||
|
||||
check_metrics
|
||||
}
|
||||
|
||||
function main() {
|
||||
action="${1:-}"
|
||||
case "${action}" in
|
||||
install-kata) install_kata && install_checkmetrics ;;
|
||||
install-checkmetrics) install_checkmetrics ;;
|
||||
enabling-hypervisor) enabling_hypervisor ;;
|
||||
make-tarball-results) make_tarball_results ;;
|
||||
run-test-launchtimes) run_test_launchtimes ;;
|
||||
@@ -132,7 +135,8 @@ function main() {
|
||||
run-test-fio) run_test_fio ;;
|
||||
run-test-iperf) run_test_iperf ;;
|
||||
run-test-latency) run_test_latency ;;
|
||||
*) >&2 die "Invalid argument" ;;
|
||||
check-metrics) check_metrics;;
|
||||
*) >&2 die "Invalid argument: ${action}" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
|
@@ -224,6 +224,23 @@ function kill_processes_before_start()
|
||||
kill_kata_components
|
||||
}
|
||||
|
||||
# Kills running shim and hypervisor components
|
||||
function kill_kata_components() {
|
||||
local ATTEMPTS=2
|
||||
local TIMEOUT="300s"
|
||||
local PID_NAMES=( "containerd-shim-kata-v2" "qemu-system-x86_64" "qemu-system-x86_64-tdx-experimental" "cloud-hypervisor" )
|
||||
|
||||
sudo systemctl stop containerd
|
||||
# iterate over the list of kata components and stop them
|
||||
for (( i=1; i<=ATTEMPTS; i++ )); do
|
||||
for PID_NAME in "${PID_NAMES[@]}"; do
|
||||
[[ ! -z "$(pidof ${PID_NAME})" ]] && sudo killall -w -s SIGKILL "${PID_NAME}" >/dev/null 2>&1 || true
|
||||
done
|
||||
sleep 1
|
||||
done
|
||||
sudo timeout -s SIGKILL "${TIMEOUT}" systemctl start containerd
|
||||
}
|
||||
|
||||
# Generate a random name - generally used when creating containers, but can
|
||||
# be used for any other appropriate purpose
|
||||
function random_name()
|
||||
|
@@ -179,7 +179,7 @@ function iperf3_start_deployment() {
|
||||
# Check no processes are left behind
|
||||
check_processes
|
||||
|
||||
wait_time=20
|
||||
wait_time=180
|
||||
sleep_time=2
|
||||
|
||||
# Create deployment
|
||||
|
@@ -19,7 +19,7 @@ spec:
|
||||
app: iperf3-client
|
||||
spec:
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
|
@@ -25,12 +25,10 @@ spec:
|
||||
- weight: 1
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/role
|
||||
operator: In
|
||||
values:
|
||||
- master
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
|
@@ -33,12 +33,10 @@ function main() {
|
||||
cmds=("bc" "jq")
|
||||
check_cmds "${cmds[@]}"
|
||||
|
||||
init_env
|
||||
|
||||
# Check no processes are left behind
|
||||
check_processes
|
||||
|
||||
wait_time=20
|
||||
wait_time=180
|
||||
sleep_time=2
|
||||
|
||||
# Create server
|
||||
|
Reference in New Issue
Block a user