mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-19 18:01:01 +00:00
Merge pull request #10954 from kata-containers/topic/metrics-kata-deploy
Rework and fix metrics issues
This commit is contained in:
commit
4bb0eb4590
5
.github/workflows/ci.yaml
vendored
5
.github/workflows/ci.yaml
vendored
@ -288,8 +288,11 @@ jobs:
|
|||||||
needs: build-kata-static-tarball-amd64
|
needs: build-kata-static-tarball-amd64
|
||||||
uses: ./.github/workflows/run-metrics.yaml
|
uses: ./.github/workflows/run-metrics.yaml
|
||||||
with:
|
with:
|
||||||
tarball-suffix: -${{ inputs.tag }}
|
registry: ghcr.io
|
||||||
|
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||||
|
tag: ${{ inputs.tag }}-amd64
|
||||||
commit-hash: ${{ inputs.commit-hash }}
|
commit-hash: ${{ inputs.commit-hash }}
|
||||||
|
pr-number: ${{ inputs.pr-number }}
|
||||||
target-branch: ${{ inputs.target-branch }}
|
target-branch: ${{ inputs.target-branch }}
|
||||||
|
|
||||||
run-basic-amd64-tests:
|
run-basic-amd64-tests:
|
||||||
|
89
.github/workflows/run-metrics.yaml
vendored
89
.github/workflows/run-metrics.yaml
vendored
@ -2,8 +2,17 @@ name: CI | Run test metrics
|
|||||||
on:
|
on:
|
||||||
workflow_call:
|
workflow_call:
|
||||||
inputs:
|
inputs:
|
||||||
tarball-suffix:
|
registry:
|
||||||
required: false
|
required: true
|
||||||
|
type: string
|
||||||
|
repo:
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
tag:
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
pr-number:
|
||||||
|
required: true
|
||||||
type: string
|
type: string
|
||||||
commit-hash:
|
commit-hash:
|
||||||
required: false
|
required: false
|
||||||
@ -14,34 +23,7 @@ on:
|
|||||||
default: ""
|
default: ""
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
setup-kata:
|
|
||||||
name: Kata Setup
|
|
||||||
runs-on: metrics
|
|
||||||
env:
|
|
||||||
GOPATH: ${{ github.workspace }}
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
ref: ${{ inputs.commit-hash }}
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Rebase atop of the latest target branch
|
|
||||||
run: |
|
|
||||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
|
||||||
env:
|
|
||||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
|
||||||
|
|
||||||
- name: get-kata-tarball
|
|
||||||
uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
name: kata-static-tarball-amd64${{ inputs.tarball-suffix }}
|
|
||||||
path: kata-artifacts
|
|
||||||
|
|
||||||
- name: Install kata
|
|
||||||
run: bash tests/metrics/gha-run.sh install-kata kata-artifacts
|
|
||||||
|
|
||||||
run-metrics:
|
run-metrics:
|
||||||
needs: setup-kata
|
|
||||||
strategy:
|
strategy:
|
||||||
# We can set this to true whenever we're 100% sure that
|
# We can set this to true whenever we're 100% sure that
|
||||||
# the all the tests are not flaky, otherwise we'll fail
|
# the all the tests are not flaky, otherwise we'll fail
|
||||||
@ -54,34 +36,78 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
GOPATH: ${{ github.workspace }}
|
GOPATH: ${{ github.workspace }}
|
||||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||||
|
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||||
|
DOCKER_REPO: ${{ inputs.repo }}
|
||||||
|
DOCKER_TAG: ${{ inputs.tag }}
|
||||||
|
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||||
|
K8S_TEST_HOST_TYPE: "baremetal"
|
||||||
|
USING_NFD: "false"
|
||||||
|
KUBERNETES: kubeadm
|
||||||
steps:
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ inputs.commit-hash }}
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Rebase atop of the latest target branch
|
||||||
|
run: |
|
||||||
|
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||||
|
env:
|
||||||
|
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||||
|
|
||||||
|
- name: Deploy Kata
|
||||||
|
timeout-minutes: 10
|
||||||
|
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata-kubeadm
|
||||||
|
|
||||||
|
- name: Install check metrics
|
||||||
|
run: bash tests/metrics/gha-run.sh install-checkmetrics
|
||||||
|
|
||||||
- name: enabling the hypervisor
|
- name: enabling the hypervisor
|
||||||
run: bash tests/metrics/gha-run.sh enabling-hypervisor
|
run: bash tests/metrics/gha-run.sh enabling-hypervisor
|
||||||
|
|
||||||
- name: run launch times test
|
- name: run launch times test
|
||||||
|
timeout-minutes: 15
|
||||||
|
continue-on-error: true
|
||||||
run: bash tests/metrics/gha-run.sh run-test-launchtimes
|
run: bash tests/metrics/gha-run.sh run-test-launchtimes
|
||||||
|
|
||||||
- name: run memory foot print test
|
- name: run memory foot print test
|
||||||
|
timeout-minutes: 15
|
||||||
|
continue-on-error: true
|
||||||
run: bash tests/metrics/gha-run.sh run-test-memory-usage
|
run: bash tests/metrics/gha-run.sh run-test-memory-usage
|
||||||
|
|
||||||
- name: run memory usage inside container test
|
- name: run memory usage inside container test
|
||||||
|
timeout-minutes: 15
|
||||||
|
continue-on-error: true
|
||||||
run: bash tests/metrics/gha-run.sh run-test-memory-usage-inside-container
|
run: bash tests/metrics/gha-run.sh run-test-memory-usage-inside-container
|
||||||
|
|
||||||
- name: run blogbench test
|
- name: run blogbench test
|
||||||
|
timeout-minutes: 15
|
||||||
|
continue-on-error: true
|
||||||
run: bash tests/metrics/gha-run.sh run-test-blogbench
|
run: bash tests/metrics/gha-run.sh run-test-blogbench
|
||||||
|
|
||||||
- name: run tensorflow test
|
- name: run tensorflow test
|
||||||
|
timeout-minutes: 15
|
||||||
|
continue-on-error: true
|
||||||
run: bash tests/metrics/gha-run.sh run-test-tensorflow
|
run: bash tests/metrics/gha-run.sh run-test-tensorflow
|
||||||
|
|
||||||
- name: run fio test
|
- name: run fio test
|
||||||
|
timeout-minutes: 15
|
||||||
|
continue-on-error: true
|
||||||
run: bash tests/metrics/gha-run.sh run-test-fio
|
run: bash tests/metrics/gha-run.sh run-test-fio
|
||||||
|
|
||||||
- name: run iperf test
|
- name: run iperf test
|
||||||
|
timeout-minutes: 15
|
||||||
|
continue-on-error: true
|
||||||
run: bash tests/metrics/gha-run.sh run-test-iperf
|
run: bash tests/metrics/gha-run.sh run-test-iperf
|
||||||
|
|
||||||
- name: run latency test
|
- name: run latency test
|
||||||
|
timeout-minutes: 15
|
||||||
|
continue-on-error: true
|
||||||
run: bash tests/metrics/gha-run.sh run-test-latency
|
run: bash tests/metrics/gha-run.sh run-test-latency
|
||||||
|
|
||||||
|
- name: check metrics
|
||||||
|
run: bash tests/metrics/gha-run.sh check-metrics
|
||||||
|
|
||||||
- name: make metrics tarball ${{ matrix.vmm }}
|
- name: make metrics tarball ${{ matrix.vmm }}
|
||||||
run: bash tests/metrics/gha-run.sh make-tarball-results
|
run: bash tests/metrics/gha-run.sh make-tarball-results
|
||||||
|
|
||||||
@ -92,3 +118,8 @@ jobs:
|
|||||||
path: results-${{ matrix.vmm }}.tar.gz
|
path: results-${{ matrix.vmm }}.tar.gz
|
||||||
retention-days: 1
|
retention-days: 1
|
||||||
if-no-files-found: error
|
if-no-files-found: error
|
||||||
|
|
||||||
|
- name: Delete kata-deploy
|
||||||
|
timeout-minutes: 10
|
||||||
|
if: always()
|
||||||
|
run: bash tests/integration/kubernetes/gha-run.sh cleanup-kubeadm
|
||||||
|
@ -298,23 +298,6 @@ function clean_env_ctr()
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Kills running shim and hypervisor components
|
|
||||||
function kill_kata_components() {
|
|
||||||
local ATTEMPTS=2
|
|
||||||
local TIMEOUT="30s"
|
|
||||||
local PID_NAMES=( "containerd-shim-kata-v2" "qemu-system-x86_64" "qemu-system-x86_64-tdx-experimental" "cloud-hypervisor" )
|
|
||||||
|
|
||||||
sudo systemctl stop containerd
|
|
||||||
# iterate over the list of kata components and stop them
|
|
||||||
for (( i=1; i<=ATTEMPTS; i++ )); do
|
|
||||||
for PID_NAME in "${PID_NAMES[@]}"; do
|
|
||||||
[[ ! -z "$(pidof ${PID_NAME})" ]] && sudo killall -w -s SIGKILL "${PID_NAME}" >/dev/null 2>&1 || true
|
|
||||||
done
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
sudo timeout -s SIGKILL "${TIMEOUT}" systemctl start containerd
|
|
||||||
}
|
|
||||||
|
|
||||||
# Restarts a systemd service while ensuring the start-limit-burst is set to 0.
|
# Restarts a systemd service while ensuring the start-limit-burst is set to 0.
|
||||||
# Outputs warnings to stdio if something has gone wrong.
|
# Outputs warnings to stdio if something has gone wrong.
|
||||||
#
|
#
|
||||||
|
@ -433,8 +433,8 @@ function cleanup() {
|
|||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# In case of canceling workflow manually, 'run_kubernetes_tests.sh' continues running and triggers new tests,
|
# In case of canceling workflow manually, 'run_kubernetes_tests.sh' continues running and triggers new tests,
|
||||||
# resulting in the CI being in an unexpected state. So we need kill all running test scripts before cleaning up the node.
|
# resulting in the CI being in an unexpected state. So we need kill all running test scripts before cleaning up the node.
|
||||||
# See issue https://github.com/kata-containers/kata-containers/issues/9980
|
# See issue https://github.com/kata-containers/kata-containers/issues/9980
|
||||||
delete_test_runners || true
|
delete_test_runners || true
|
||||||
# Switch back to the default namespace and delete the tests one
|
# Switch back to the default namespace and delete the tests one
|
||||||
@ -594,6 +594,7 @@ function main() {
|
|||||||
collect-artifacts) collect_artifacts ;;
|
collect-artifacts) collect_artifacts ;;
|
||||||
cleanup) cleanup ;;
|
cleanup) cleanup ;;
|
||||||
cleanup-kcli) cleanup "kcli" ;;
|
cleanup-kcli) cleanup "kcli" ;;
|
||||||
|
cleanup-kubeadm) cleanup "kubeadm" ;;
|
||||||
cleanup-sev) cleanup "sev" ;;
|
cleanup-sev) cleanup "sev" ;;
|
||||||
cleanup-snp) cleanup "snp" ;;
|
cleanup-snp) cleanup "snp" ;;
|
||||||
cleanup-tdx) cleanup "tdx" ;;
|
cleanup-tdx) cleanup "tdx" ;;
|
||||||
|
@ -18,7 +18,7 @@ checkvar = ".\"boot-times\".Results | .[] | .\"to-workload\".Result"
|
|||||||
checktype = "mean"
|
checktype = "mean"
|
||||||
midval = 0.39
|
midval = 0.39
|
||||||
minpercent = 40.0
|
minpercent = 40.0
|
||||||
maxpercent = 30.0
|
maxpercent = 50.0
|
||||||
|
|
||||||
[[metric]]
|
[[metric]]
|
||||||
name = "memory-footprint"
|
name = "memory-footprint"
|
||||||
@ -121,7 +121,7 @@ description = "measure sequential write throughput using fio"
|
|||||||
checkvar = "[.\"fio\".\"Results sequential\"] | .[] | .[] | .write.bw | select( . != null )"
|
checkvar = "[.\"fio\".\"Results sequential\"] | .[] | .[] | .write.bw | select( . != null )"
|
||||||
checktype = "mean"
|
checktype = "mean"
|
||||||
midval = 307948
|
midval = 307948
|
||||||
minpercent = 20.0
|
minpercent = 40.0
|
||||||
maxpercent = 20.0
|
maxpercent = 20.0
|
||||||
|
|
||||||
[[metric]]
|
[[metric]]
|
||||||
@ -199,7 +199,7 @@ description = "measure container parallel bandwidth using iperf3"
|
|||||||
checkvar = ".\"network-iperf3\".Results | .[] | .parallel.Result"
|
checkvar = ".\"network-iperf3\".Results | .[] | .parallel.Result"
|
||||||
checktype = "mean"
|
checktype = "mean"
|
||||||
midval = 57516472021.90
|
midval = 57516472021.90
|
||||||
minpercent = 20.0
|
minpercent = 40.0
|
||||||
maxpercent = 20.0
|
maxpercent = 20.0
|
||||||
|
|
||||||
[[metric]]
|
[[metric]]
|
||||||
@ -211,6 +211,6 @@ description = "iperf"
|
|||||||
# within (inclusive)
|
# within (inclusive)
|
||||||
checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result"
|
checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result"
|
||||||
checktype = "mean"
|
checktype = "mean"
|
||||||
midval = 0.04
|
midval = 0.02
|
||||||
minpercent = 70.0
|
minpercent = 70.0
|
||||||
maxpercent = 60.0
|
maxpercent = 60.0
|
||||||
|
@ -212,5 +212,5 @@ description = "iperf"
|
|||||||
checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result"
|
checkvar = ".\"network-iperf3\".Results | .[] | .jitter.Result"
|
||||||
checktype = "mean"
|
checktype = "mean"
|
||||||
midval = 0.040
|
midval = 0.040
|
||||||
minpercent = 60.0
|
minpercent = 80.0
|
||||||
maxpercent = 60.0
|
maxpercent = 60.0
|
||||||
|
@ -54,9 +54,14 @@ function make_tarball_results() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function run_test_launchtimes() {
|
function run_test_launchtimes() {
|
||||||
info "Running Launch Time test using ${KATA_HYPERVISOR} hypervisor"
|
repetitions=20
|
||||||
|
if [[ ${KATA_HYPERVISOR} == "qemu" ]]; then
|
||||||
|
# The qemu workload seems to fail before it can run ~5-7 repetitions of the workload
|
||||||
|
repetitions=3
|
||||||
|
fi
|
||||||
|
|
||||||
bash tests/metrics/time/launch_times.sh -i public.ecr.aws/ubuntu/ubuntu:latest -n 20
|
info "Running Launch Time test using ${KATA_HYPERVISOR} hypervisor"
|
||||||
|
bash tests/metrics/time/launch_times.sh -i public.ecr.aws/ubuntu/ubuntu:latest -n "${repetitions}"
|
||||||
}
|
}
|
||||||
|
|
||||||
function run_test_memory_usage() {
|
function run_test_memory_usage() {
|
||||||
@ -114,14 +119,12 @@ function run_test_latency() {
|
|||||||
info "Running Latency test using ${KATA_HYPERVISOR} hypervisor"
|
info "Running Latency test using ${KATA_HYPERVISOR} hypervisor"
|
||||||
|
|
||||||
bash tests/metrics/network/latency_kubernetes/latency-network.sh
|
bash tests/metrics/network/latency_kubernetes/latency-network.sh
|
||||||
|
|
||||||
check_metrics
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
action="${1:-}"
|
action="${1:-}"
|
||||||
case "${action}" in
|
case "${action}" in
|
||||||
install-kata) install_kata && install_checkmetrics ;;
|
install-checkmetrics) install_checkmetrics ;;
|
||||||
enabling-hypervisor) enabling_hypervisor ;;
|
enabling-hypervisor) enabling_hypervisor ;;
|
||||||
make-tarball-results) make_tarball_results ;;
|
make-tarball-results) make_tarball_results ;;
|
||||||
run-test-launchtimes) run_test_launchtimes ;;
|
run-test-launchtimes) run_test_launchtimes ;;
|
||||||
@ -132,7 +135,8 @@ function main() {
|
|||||||
run-test-fio) run_test_fio ;;
|
run-test-fio) run_test_fio ;;
|
||||||
run-test-iperf) run_test_iperf ;;
|
run-test-iperf) run_test_iperf ;;
|
||||||
run-test-latency) run_test_latency ;;
|
run-test-latency) run_test_latency ;;
|
||||||
*) >&2 die "Invalid argument" ;;
|
check-metrics) check_metrics;;
|
||||||
|
*) >&2 die "Invalid argument: ${action}" ;;
|
||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -224,6 +224,23 @@ function kill_processes_before_start()
|
|||||||
kill_kata_components
|
kill_kata_components
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Kills running shim and hypervisor components
|
||||||
|
function kill_kata_components() {
|
||||||
|
local ATTEMPTS=2
|
||||||
|
local TIMEOUT="300s"
|
||||||
|
local PID_NAMES=( "containerd-shim-kata-v2" "qemu-system-x86_64" "qemu-system-x86_64-tdx-experimental" "cloud-hypervisor" )
|
||||||
|
|
||||||
|
sudo systemctl stop containerd
|
||||||
|
# iterate over the list of kata components and stop them
|
||||||
|
for (( i=1; i<=ATTEMPTS; i++ )); do
|
||||||
|
for PID_NAME in "${PID_NAMES[@]}"; do
|
||||||
|
[[ ! -z "$(pidof ${PID_NAME})" ]] && sudo killall -w -s SIGKILL "${PID_NAME}" >/dev/null 2>&1 || true
|
||||||
|
done
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
sudo timeout -s SIGKILL "${TIMEOUT}" systemctl start containerd
|
||||||
|
}
|
||||||
|
|
||||||
# Generate a random name - generally used when creating containers, but can
|
# Generate a random name - generally used when creating containers, but can
|
||||||
# be used for any other appropriate purpose
|
# be used for any other appropriate purpose
|
||||||
function random_name()
|
function random_name()
|
||||||
|
@ -179,7 +179,7 @@ function iperf3_start_deployment() {
|
|||||||
# Check no processes are left behind
|
# Check no processes are left behind
|
||||||
check_processes
|
check_processes
|
||||||
|
|
||||||
wait_time=20
|
wait_time=180
|
||||||
sleep_time=2
|
sleep_time=2
|
||||||
|
|
||||||
# Create deployment
|
# Create deployment
|
||||||
|
@ -19,7 +19,7 @@ spec:
|
|||||||
app: iperf3-client
|
app: iperf3-client
|
||||||
spec:
|
spec:
|
||||||
tolerations:
|
tolerations:
|
||||||
- key: node-role.kubernetes.io/master
|
- key: node-role.kubernetes.io/control-plane
|
||||||
operator: Exists
|
operator: Exists
|
||||||
effect: NoSchedule
|
effect: NoSchedule
|
||||||
containers:
|
containers:
|
||||||
|
@ -25,12 +25,10 @@ spec:
|
|||||||
- weight: 1
|
- weight: 1
|
||||||
preference:
|
preference:
|
||||||
matchExpressions:
|
matchExpressions:
|
||||||
- key: kubernetes.io/role
|
- key: node-role.kubernetes.io/control-plane
|
||||||
operator: In
|
operator: Exists
|
||||||
values:
|
|
||||||
- master
|
|
||||||
tolerations:
|
tolerations:
|
||||||
- key: node-role.kubernetes.io/master
|
- key: node-role.kubernetes.io/control-plane
|
||||||
operator: Exists
|
operator: Exists
|
||||||
effect: NoSchedule
|
effect: NoSchedule
|
||||||
containers:
|
containers:
|
||||||
|
@ -33,12 +33,10 @@ function main() {
|
|||||||
cmds=("bc" "jq")
|
cmds=("bc" "jq")
|
||||||
check_cmds "${cmds[@]}"
|
check_cmds "${cmds[@]}"
|
||||||
|
|
||||||
init_env
|
|
||||||
|
|
||||||
# Check no processes are left behind
|
# Check no processes are left behind
|
||||||
check_processes
|
check_processes
|
||||||
|
|
||||||
wait_time=20
|
wait_time=180
|
||||||
sleep_time=2
|
sleep_time=2
|
||||||
|
|
||||||
# Create server
|
# Create server
|
||||||
|
Loading…
Reference in New Issue
Block a user