From a1ed923740cc0df67ad17f75624dab57ce098616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Thu, 20 Feb 2025 23:31:57 -0600 Subject: [PATCH 1/5] agent: Fix race condition with cgroup watchers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the CI, test containers intermittently fail to start after creation, with an error like below (see #10872 for more details): # State: Terminated # Reason: StartError # Message: failed to start containerd task "afd43e77fae0815afbc7205eac78f94859e247968a6a4e8bcbb987690fcf10a6": No such file or directory (os error 2) I've observed this error to repro with the following containers, which have in common that they're all *very short-lived* by design (more tests might be affected): * k8s-job.bats * k8s-seccomp.bats * k8s-hostname.bats * k8s-policy-job.bats * k8s-policy-logs.bats Furthermore, appending a `; sleep 1` to the command line for those containers seemed to consistently get rid of the error. Investigating further, I've uncovered a race between the end of the container process and the setting up of the cgroup watchers (to report OOMs). If the process terminates first, the agent will try to watch cgroup paths that don't exist anymore, and it will fail to start the container. The added error context in notifier.rs confirms that the error comes from the missing cgroup: https://github.com/kata-containers/kata-containers/actions/runs/13450787436/job/37585901466#step:17:6536 The fix simply consists in creating the watchers *before* we start the container but still *after* we create it -- this is non-blocking, and IIUC the cgroup is guaranteed to already be present then. Fixes: #10872 Signed-off-by: Aurélien Bombo --- src/agent/rustjail/src/cgroups/notifier.rs | 12 ++++++++++-- src/agent/src/rpc.rs | 21 +++++++++++---------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/agent/rustjail/src/cgroups/notifier.rs b/src/agent/rustjail/src/cgroups/notifier.rs index fb0a057b9e..e9819da4ae 100644 --- a/src/agent/rustjail/src/cgroups/notifier.rs +++ b/src/agent/rustjail/src/cgroups/notifier.rs @@ -77,9 +77,17 @@ async fn register_memory_event_v2( let mut inotify = Inotify::init().context("Failed to initialize inotify")?; // watching oom kill - let ev_wd = inotify.add_watch(&event_control_path, WatchMask::MODIFY)?; + let ev_wd = inotify + .add_watch(&event_control_path, WatchMask::MODIFY) + .context(format!("failed to add watch for {:?}", &event_control_path))?; + // Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited - let cg_wd = inotify.add_watch(&cgroup_event_control_path, WatchMask::MODIFY)?; + let cg_wd = inotify + .add_watch(&cgroup_event_control_path, WatchMask::MODIFY) + .context(format!( + "failed to add watch for {:?}", + &cgroup_event_control_path + ))?; info!(sl(), "ev_wd: {:?}", ev_wd); info!(sl(), "cg_wd: {:?}", cg_wd); diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 30f83aec7c..2ab66e3a62 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -342,24 +342,25 @@ impl AgentService { async fn do_start_container(&self, req: protocols::agent::StartContainerRequest) -> Result<()> { let mut s = self.sandbox.lock().await; let sid = s.id.clone(); - let cid = req.container_id; + let cid = req.container_id.clone(); let ctr = s .get_container(&cid) .ok_or_else(|| anyhow!("Invalid container id"))?; - ctr.exec().await?; - if sid == cid { - return Ok(()); + if sid != cid { + // start oom event loop + if let Ok(cg_path) = ctr.cgroup_manager.as_ref().get_cgroup_path("memory") { + let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?; + s.run_oom_event_monitor(rx, cid.clone()).await; + } } - // start oom event loop - if let Ok(cg_path) = ctr.cgroup_manager.as_ref().get_cgroup_path("memory") { - let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?; - s.run_oom_event_monitor(rx, cid).await; - } + let ctr = s + .get_container(&cid) + .ok_or_else(|| anyhow!("Invalid container id"))?; - Ok(()) + ctr.exec().await } #[instrument] From 7542dbffb8abd9cccdc60a48d3c22234f5942cb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Fri, 21 Feb 2025 14:10:09 -0600 Subject: [PATCH 2/5] Revert "tests: disable k8s-policy-job.bats on coco-dev" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 47ce5dad9dda2ce7ac9719c33818b14adc862ace. Signed-off-by: Aurélien Bombo --- tests/integration/kubernetes/k8s-policy-job.bats | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/integration/kubernetes/k8s-policy-job.bats b/tests/integration/kubernetes/k8s-policy-job.bats index 60caaae260..e3b7070fac 100644 --- a/tests/integration/kubernetes/k8s-policy-job.bats +++ b/tests/integration/kubernetes/k8s-policy-job.bats @@ -9,10 +9,6 @@ load "${BATS_TEST_DIRNAME}/../../common.bash" load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { - if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then - skip "Test not stable on qemu-coco-dev. See issue #10616" - fi - auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled." get_pod_config_dir @@ -145,10 +141,6 @@ test_job_policy_error() { } teardown() { - if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then - skip "Test not stable on qemu-coco-dev. See issue #10616" - fi - auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled." # Debugging information From 1f8c15fa48fb04094afcdce4d01b2c7f23142d5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Fri, 21 Feb 2025 17:35:33 -0600 Subject: [PATCH 3/5] Revert "tests: Skip k8s job test on qemu-coco-dev" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit a8ccd9a2ac7b91841adb5e5440314194755f32f0. Signed-off-by: Aurélien Bombo --- tests/integration/kubernetes/k8s-job.bats | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/integration/kubernetes/k8s-job.bats b/tests/integration/kubernetes/k8s-job.bats index c0112e4a96..9b64d4dec3 100644 --- a/tests/integration/kubernetes/k8s-job.bats +++ b/tests/integration/kubernetes/k8s-job.bats @@ -9,10 +9,6 @@ load "${BATS_TEST_DIRNAME}/../../common.bash" load "${BATS_TEST_DIRNAME}/tests_common.sh" setup() { - if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then - skip "Test not stable on qemu-coco-dev. See issue #10616" - fi - get_pod_config_dir job_name="job-pi-test" yaml_file="${pod_config_dir}/job.yaml" @@ -42,10 +38,6 @@ setup() { } teardown() { - if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then - skip "Test not stable on qemu-coco-dev. See issue #10616" - fi - # Debugging information kubectl describe pod "$pod_name" kubectl describe jobs/"$job_name" From 111803e168f3ae601172c2915c82addb02525465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Fri, 21 Feb 2025 11:41:07 -0600 Subject: [PATCH 4/5] runtime: cgroups: Remove commented out code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doesn't seem like we're going to use this and it's confusing when inspecting code. Signed-off-by: Aurélien Bombo --- src/runtime/virtcontainers/kata_agent.go | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 3e94798670..25f08d63ad 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1033,20 +1033,6 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, dis grpcSpec.Linux.Resources.CPU.Mems = "" } - // We need agent systemd cgroup now. - // There are three main reasons to do not apply systemd cgroups in the VM - // - Initrd image doesn't have systemd. - // - Nobody will be able to modify the resources of a specific container by using systemctl set-property. - // - docker is not running in the VM. - // if resCtrl.IsSystemdCgroup(grpcSpec.Linux.CgroupsPath) { - // // Convert systemd cgroup to cgroupfs - // slice := strings.Split(grpcSpec.Linux.CgroupsPath, ":") - // // 0 - slice: system.slice - // // 1 - prefix: docker - // // 2 - name: abc123 - // grpcSpec.Linux.CgroupsPath = filepath.Join("/", slice[1], slice[2]) - // } - // Disable network namespace since it is already handled on the host by // virtcontainers. The network is a complex part which cannot be simply // passed to the agent. From adca339c3c861f48ea4a48cee9cc4332aaa2e1ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Fri, 21 Feb 2025 08:28:59 -0600 Subject: [PATCH 5/5] ci: Fix GH throttling in run-nerdctl-tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Specify a GH API token to avoid the below throttling error: https://github.com/kata-containers/kata-containers/actions/runs/13450787436/job/37585810679?pr=10911#step:4:96 Signed-off-by: Aurélien Bombo --- .github/workflows/basic-ci-amd64.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/basic-ci-amd64.yaml b/.github/workflows/basic-ci-amd64.yaml index fb07a12597..0842fba223 100644 --- a/.github/workflows/basic-ci-amd64.yaml +++ b/.github/workflows/basic-ci-amd64.yaml @@ -369,6 +369,8 @@ jobs: TARGET_BRANCH: ${{ inputs.target-branch }} - name: Install dependencies + env: + GITHUB_API_TOKEN: ${{ github.token }} run: bash tests/integration/nerdctl/gha-run.sh install-dependencies - name: get-kata-tarball