mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-12 12:29:11 +00:00
Merge pull request #10911 from kata-containers/sprt/fix-cgroup-race
agent: Fix race condition with cgroup watchers
This commit is contained in:
2
.github/workflows/basic-ci-amd64.yaml
vendored
2
.github/workflows/basic-ci-amd64.yaml
vendored
@@ -369,6 +369,8 @@ jobs:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
GITHUB_API_TOKEN: ${{ github.token }}
|
||||
run: bash tests/integration/nerdctl/gha-run.sh install-dependencies
|
||||
|
||||
- name: get-kata-tarball
|
||||
|
@@ -77,9 +77,17 @@ async fn register_memory_event_v2(
|
||||
let mut inotify = Inotify::init().context("Failed to initialize inotify")?;
|
||||
|
||||
// watching oom kill
|
||||
let ev_wd = inotify.add_watch(&event_control_path, WatchMask::MODIFY)?;
|
||||
let ev_wd = inotify
|
||||
.add_watch(&event_control_path, WatchMask::MODIFY)
|
||||
.context(format!("failed to add watch for {:?}", &event_control_path))?;
|
||||
|
||||
// Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited
|
||||
let cg_wd = inotify.add_watch(&cgroup_event_control_path, WatchMask::MODIFY)?;
|
||||
let cg_wd = inotify
|
||||
.add_watch(&cgroup_event_control_path, WatchMask::MODIFY)
|
||||
.context(format!(
|
||||
"failed to add watch for {:?}",
|
||||
&cgroup_event_control_path
|
||||
))?;
|
||||
|
||||
info!(sl(), "ev_wd: {:?}", ev_wd);
|
||||
info!(sl(), "cg_wd: {:?}", cg_wd);
|
||||
|
@@ -342,24 +342,25 @@ impl AgentService {
|
||||
async fn do_start_container(&self, req: protocols::agent::StartContainerRequest) -> Result<()> {
|
||||
let mut s = self.sandbox.lock().await;
|
||||
let sid = s.id.clone();
|
||||
let cid = req.container_id;
|
||||
let cid = req.container_id.clone();
|
||||
|
||||
let ctr = s
|
||||
.get_container(&cid)
|
||||
.ok_or_else(|| anyhow!("Invalid container id"))?;
|
||||
ctr.exec().await?;
|
||||
|
||||
if sid == cid {
|
||||
return Ok(());
|
||||
if sid != cid {
|
||||
// start oom event loop
|
||||
if let Ok(cg_path) = ctr.cgroup_manager.as_ref().get_cgroup_path("memory") {
|
||||
let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?;
|
||||
s.run_oom_event_monitor(rx, cid.clone()).await;
|
||||
}
|
||||
}
|
||||
|
||||
// start oom event loop
|
||||
if let Ok(cg_path) = ctr.cgroup_manager.as_ref().get_cgroup_path("memory") {
|
||||
let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?;
|
||||
s.run_oom_event_monitor(rx, cid).await;
|
||||
}
|
||||
let ctr = s
|
||||
.get_container(&cid)
|
||||
.ok_or_else(|| anyhow!("Invalid container id"))?;
|
||||
|
||||
Ok(())
|
||||
ctr.exec().await
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
|
@@ -1033,20 +1033,6 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, dis
|
||||
grpcSpec.Linux.Resources.CPU.Mems = ""
|
||||
}
|
||||
|
||||
// We need agent systemd cgroup now.
|
||||
// There are three main reasons to do not apply systemd cgroups in the VM
|
||||
// - Initrd image doesn't have systemd.
|
||||
// - Nobody will be able to modify the resources of a specific container by using systemctl set-property.
|
||||
// - docker is not running in the VM.
|
||||
// if resCtrl.IsSystemdCgroup(grpcSpec.Linux.CgroupsPath) {
|
||||
// // Convert systemd cgroup to cgroupfs
|
||||
// slice := strings.Split(grpcSpec.Linux.CgroupsPath, ":")
|
||||
// // 0 - slice: system.slice
|
||||
// // 1 - prefix: docker
|
||||
// // 2 - name: abc123
|
||||
// grpcSpec.Linux.CgroupsPath = filepath.Join("/", slice[1], slice[2])
|
||||
// }
|
||||
|
||||
// Disable network namespace since it is already handled on the host by
|
||||
// virtcontainers. The network is a complex part which cannot be simply
|
||||
// passed to the agent.
|
||||
|
@@ -9,10 +9,6 @@ load "${BATS_TEST_DIRNAME}/../../common.bash"
|
||||
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||
|
||||
setup() {
|
||||
if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then
|
||||
skip "Test not stable on qemu-coco-dev. See issue #10616"
|
||||
fi
|
||||
|
||||
get_pod_config_dir
|
||||
job_name="job-pi-test"
|
||||
yaml_file="${pod_config_dir}/job.yaml"
|
||||
@@ -42,10 +38,6 @@ setup() {
|
||||
}
|
||||
|
||||
teardown() {
|
||||
if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then
|
||||
skip "Test not stable on qemu-coco-dev. See issue #10616"
|
||||
fi
|
||||
|
||||
# Debugging information
|
||||
kubectl describe pod "$pod_name"
|
||||
kubectl describe jobs/"$job_name"
|
||||
|
@@ -9,10 +9,6 @@ load "${BATS_TEST_DIRNAME}/../../common.bash"
|
||||
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||
|
||||
setup() {
|
||||
if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then
|
||||
skip "Test not stable on qemu-coco-dev. See issue #10616"
|
||||
fi
|
||||
|
||||
auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled."
|
||||
|
||||
get_pod_config_dir
|
||||
@@ -145,10 +141,6 @@ test_job_policy_error() {
|
||||
}
|
||||
|
||||
teardown() {
|
||||
if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then
|
||||
skip "Test not stable on qemu-coco-dev. See issue #10616"
|
||||
fi
|
||||
|
||||
auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled."
|
||||
|
||||
# Debugging information
|
||||
|
Reference in New Issue
Block a user