mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-09-12 20:39:30 +00:00
Merge pull request #10911 from kata-containers/sprt/fix-cgroup-race
agent: Fix race condition with cgroup watchers
This commit is contained in:
2
.github/workflows/basic-ci-amd64.yaml
vendored
2
.github/workflows/basic-ci-amd64.yaml
vendored
@@ -369,6 +369,8 @@ jobs:
|
|||||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
|
env:
|
||||||
|
GITHUB_API_TOKEN: ${{ github.token }}
|
||||||
run: bash tests/integration/nerdctl/gha-run.sh install-dependencies
|
run: bash tests/integration/nerdctl/gha-run.sh install-dependencies
|
||||||
|
|
||||||
- name: get-kata-tarball
|
- name: get-kata-tarball
|
||||||
|
@@ -77,9 +77,17 @@ async fn register_memory_event_v2(
|
|||||||
let mut inotify = Inotify::init().context("Failed to initialize inotify")?;
|
let mut inotify = Inotify::init().context("Failed to initialize inotify")?;
|
||||||
|
|
||||||
// watching oom kill
|
// watching oom kill
|
||||||
let ev_wd = inotify.add_watch(&event_control_path, WatchMask::MODIFY)?;
|
let ev_wd = inotify
|
||||||
|
.add_watch(&event_control_path, WatchMask::MODIFY)
|
||||||
|
.context(format!("failed to add watch for {:?}", &event_control_path))?;
|
||||||
|
|
||||||
// Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited
|
// Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited
|
||||||
let cg_wd = inotify.add_watch(&cgroup_event_control_path, WatchMask::MODIFY)?;
|
let cg_wd = inotify
|
||||||
|
.add_watch(&cgroup_event_control_path, WatchMask::MODIFY)
|
||||||
|
.context(format!(
|
||||||
|
"failed to add watch for {:?}",
|
||||||
|
&cgroup_event_control_path
|
||||||
|
))?;
|
||||||
|
|
||||||
info!(sl(), "ev_wd: {:?}", ev_wd);
|
info!(sl(), "ev_wd: {:?}", ev_wd);
|
||||||
info!(sl(), "cg_wd: {:?}", cg_wd);
|
info!(sl(), "cg_wd: {:?}", cg_wd);
|
||||||
|
@@ -342,24 +342,25 @@ impl AgentService {
|
|||||||
async fn do_start_container(&self, req: protocols::agent::StartContainerRequest) -> Result<()> {
|
async fn do_start_container(&self, req: protocols::agent::StartContainerRequest) -> Result<()> {
|
||||||
let mut s = self.sandbox.lock().await;
|
let mut s = self.sandbox.lock().await;
|
||||||
let sid = s.id.clone();
|
let sid = s.id.clone();
|
||||||
let cid = req.container_id;
|
let cid = req.container_id.clone();
|
||||||
|
|
||||||
let ctr = s
|
let ctr = s
|
||||||
.get_container(&cid)
|
.get_container(&cid)
|
||||||
.ok_or_else(|| anyhow!("Invalid container id"))?;
|
.ok_or_else(|| anyhow!("Invalid container id"))?;
|
||||||
ctr.exec().await?;
|
|
||||||
|
|
||||||
if sid == cid {
|
if sid != cid {
|
||||||
return Ok(());
|
// start oom event loop
|
||||||
|
if let Ok(cg_path) = ctr.cgroup_manager.as_ref().get_cgroup_path("memory") {
|
||||||
|
let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?;
|
||||||
|
s.run_oom_event_monitor(rx, cid.clone()).await;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// start oom event loop
|
let ctr = s
|
||||||
if let Ok(cg_path) = ctr.cgroup_manager.as_ref().get_cgroup_path("memory") {
|
.get_container(&cid)
|
||||||
let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?;
|
.ok_or_else(|| anyhow!("Invalid container id"))?;
|
||||||
s.run_oom_event_monitor(rx, cid).await;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
ctr.exec().await
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument]
|
#[instrument]
|
||||||
|
@@ -1033,20 +1033,6 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, dis
|
|||||||
grpcSpec.Linux.Resources.CPU.Mems = ""
|
grpcSpec.Linux.Resources.CPU.Mems = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// We need agent systemd cgroup now.
|
|
||||||
// There are three main reasons to do not apply systemd cgroups in the VM
|
|
||||||
// - Initrd image doesn't have systemd.
|
|
||||||
// - Nobody will be able to modify the resources of a specific container by using systemctl set-property.
|
|
||||||
// - docker is not running in the VM.
|
|
||||||
// if resCtrl.IsSystemdCgroup(grpcSpec.Linux.CgroupsPath) {
|
|
||||||
// // Convert systemd cgroup to cgroupfs
|
|
||||||
// slice := strings.Split(grpcSpec.Linux.CgroupsPath, ":")
|
|
||||||
// // 0 - slice: system.slice
|
|
||||||
// // 1 - prefix: docker
|
|
||||||
// // 2 - name: abc123
|
|
||||||
// grpcSpec.Linux.CgroupsPath = filepath.Join("/", slice[1], slice[2])
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Disable network namespace since it is already handled on the host by
|
// Disable network namespace since it is already handled on the host by
|
||||||
// virtcontainers. The network is a complex part which cannot be simply
|
// virtcontainers. The network is a complex part which cannot be simply
|
||||||
// passed to the agent.
|
// passed to the agent.
|
||||||
|
@@ -9,10 +9,6 @@ load "${BATS_TEST_DIRNAME}/../../common.bash"
|
|||||||
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||||
|
|
||||||
setup() {
|
setup() {
|
||||||
if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then
|
|
||||||
skip "Test not stable on qemu-coco-dev. See issue #10616"
|
|
||||||
fi
|
|
||||||
|
|
||||||
get_pod_config_dir
|
get_pod_config_dir
|
||||||
job_name="job-pi-test"
|
job_name="job-pi-test"
|
||||||
yaml_file="${pod_config_dir}/job.yaml"
|
yaml_file="${pod_config_dir}/job.yaml"
|
||||||
@@ -42,10 +38,6 @@ setup() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
teardown() {
|
teardown() {
|
||||||
if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then
|
|
||||||
skip "Test not stable on qemu-coco-dev. See issue #10616"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Debugging information
|
# Debugging information
|
||||||
kubectl describe pod "$pod_name"
|
kubectl describe pod "$pod_name"
|
||||||
kubectl describe jobs/"$job_name"
|
kubectl describe jobs/"$job_name"
|
||||||
|
@@ -9,10 +9,6 @@ load "${BATS_TEST_DIRNAME}/../../common.bash"
|
|||||||
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||||
|
|
||||||
setup() {
|
setup() {
|
||||||
if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then
|
|
||||||
skip "Test not stable on qemu-coco-dev. See issue #10616"
|
|
||||||
fi
|
|
||||||
|
|
||||||
auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled."
|
auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled."
|
||||||
|
|
||||||
get_pod_config_dir
|
get_pod_config_dir
|
||||||
@@ -145,10 +141,6 @@ test_job_policy_error() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
teardown() {
|
teardown() {
|
||||||
if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then
|
|
||||||
skip "Test not stable on qemu-coco-dev. See issue #10616"
|
|
||||||
fi
|
|
||||||
|
|
||||||
auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled."
|
auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled."
|
||||||
|
|
||||||
# Debugging information
|
# Debugging information
|
||||||
|
Reference in New Issue
Block a user