From 0e9d73fe30339bdcb622df9f0fafdaf0dd00f9a0 Mon Sep 17 00:00:00 2001 From: Xuewei Niu Date: Thu, 4 Jan 2024 15:35:25 +0800 Subject: [PATCH] agent: Fix an issue reporting OOM events by mistake The agent registers an event fd in `memory.oom_control`. An OOM event is forwarded to containerd when the event is emitted, regardless of the content in that file. I observed content indicating that events should not be forwarded, as shown below. When `oom_kill` is set to 0, it means no OOM has occurred. Therefore, it is important to check the content to avoid mistakenly forwarding OOM events. ``` oom_kill_disable 0 under_oom 0 oom_kill 0 ``` Fixes: #8715 Signed-off-by: Xuewei Niu --- src/agent/rustjail/src/cgroups/notifier.rs | 27 +++++++++++----------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/agent/rustjail/src/cgroups/notifier.rs b/src/agent/rustjail/src/cgroups/notifier.rs index 5260a3d3f2..fb0a057b9e 100644 --- a/src/agent/rustjail/src/cgroups/notifier.rs +++ b/src/agent/rustjail/src/cgroups/notifier.rs @@ -3,19 +3,20 @@ // SPDX-License-Identifier: Apache-2.0 // -use anyhow::{anyhow, Context, Result}; -use eventfd::{eventfd, EfdFlags}; -use nix::sys::eventfd; use std::fs::{self, File}; use std::os::unix::io::{AsRawFd, FromRawFd}; use std::path::Path; -use crate::pipestream::PipeStream; +use anyhow::{anyhow, Context, Result}; +use eventfd::{eventfd, EfdFlags}; use futures::StreamExt as _; use inotify::{Inotify, WatchMask}; +use nix::sys::eventfd; use tokio::io::AsyncReadExt; use tokio::sync::mpsc::{channel, Receiver}; +use crate::pipestream::PipeStream; + // Convenience function to obtain the scope logger. fn sl() -> slog::Logger { slog_scope::logger().new(o!("subsystem" => "cgroups_notifier")) @@ -165,7 +166,6 @@ async fn register_memory_event( tokio::spawn(async move { loop { - let sender = sender.clone(); let mut buf = [0u8; 8]; match eventfd_stream.read(&mut buf).await { Err(err) => { @@ -173,14 +173,15 @@ async fn register_memory_event( return; } Ok(_) => { - let content = fs::read_to_string(path.clone()); - info!( - sl(), - "cgroup event for container: {}, path: {:?}, content: {:?}", - &containere_id, - &path, - content - ); + if let Ok(times) = get_value_from_cgroup(&path, "oom_kill") { + if times < 1 { + // Do not send an OOM event in the case where no OOM has occurred + continue; + } + } + // Send an OOM event in two cases: + // 1. The value is not empty && times > 0: OOM kill has occurred. + // 2. The value is empty: Do what previous implemention did. } }