diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 1f4729b7d0..24dd4691a4 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -19,6 +19,7 @@ use ttrpc::{ }; use anyhow::{anyhow, Context, Result}; +use cgroups::freezer::FreezerState; use oci::{LinuxNamespace, Root, Spec}; use protobuf::{Message, RepeatedField, SingularPtrField}; use protocols::agent::{ @@ -40,8 +41,9 @@ use rustjail::specconv::CreateOpts; use nix::errno::Errno; use nix::mount::MsFlags; use nix::sys::signal::Signal; -use nix::sys::stat; +use nix::sys::{signal, stat}; use nix::unistd::{self, Pid}; +use rustjail::cgroups::Manager; use rustjail::process::ProcessOperations; use sysinfo::{DiskExt, System, SystemExt}; @@ -389,7 +391,6 @@ impl AgentService { let cid = req.container_id.clone(); let eid = req.exec_id.clone(); let s = self.sandbox.clone(); - let mut sandbox = s.lock().await; info!( sl!(), @@ -398,27 +399,97 @@ impl AgentService { "exec-id" => eid.clone(), ); - let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?; - - let mut signal = Signal::try_from(req.signal as i32).map_err(|e| { + let mut sig = Signal::try_from(req.signal as i32).map_err(|e| { anyhow!(e).context(format!( "failed to convert {:?} to signal (container-id: {}, exec-id: {})", req.signal, cid, eid )) })?; - - // For container initProcess, if it hasn't installed handler for "SIGTERM" signal, - // it will ignore the "SIGTERM" signal sent to it, thus send it "SIGKILL" signal - // instead of "SIGTERM" to terminate it. - if p.init && signal == Signal::SIGTERM && !is_signal_handled(p.pid, req.signal) { - signal = Signal::SIGKILL; + { + let mut sandbox = s.lock().await; + let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?; + // For container initProcess, if it hasn't installed handler for "SIGTERM" signal, + // it will ignore the "SIGTERM" signal sent to it, thus send it "SIGKILL" signal + // instead of "SIGTERM" to terminate it. + if p.init && sig == Signal::SIGTERM && !is_signal_handled(p.pid, sig as u32) { + sig = Signal::SIGKILL; + } + p.signal(sig)?; } - p.signal(signal)?; + if eid.is_empty() { + // eid is empty, signal all the remaining processes in the container cgroup + info!( + sl!(), + "signal all the remaining processes"; + "container-id" => cid.clone(), + "exec-id" => eid.clone(), + ); + if let Err(err) = self.freeze_cgroup(&cid, FreezerState::Frozen).await { + warn!( + sl!(), + "freeze cgroup failed"; + "container-id" => cid.clone(), + "exec-id" => eid.clone(), + "error" => format!("{:?}", err), + ); + } + + let pids = self.get_pids(&cid).await?; + for pid in pids.iter() { + if let Err(err) = signal::kill(Pid::from_raw(*pid), Some(sig)) { + warn!( + sl!(), + "signal failed"; + "container-id" => cid.clone(), + "exec-id" => eid.clone(), + "pid" => pid, + "error" => format!("{:?}", err), + ); + } + } + if let Err(err) = self.freeze_cgroup(&cid, FreezerState::Thawed).await { + warn!( + sl!(), + "unfreeze cgroup failed"; + "container-id" => cid.clone(), + "exec-id" => eid.clone(), + "error" => format!("{:?}", err), + ); + } + } Ok(()) } + async fn freeze_cgroup(&self, cid: &str, state: FreezerState) -> Result<()> { + let s = self.sandbox.clone(); + let mut sandbox = s.lock().await; + let ctr = sandbox + .get_container(cid) + .ok_or_else(|| anyhow!("Invalid container id {}", cid))?; + let cm = ctr + .cgroup_manager + .as_ref() + .ok_or_else(|| anyhow!("cgroup manager not exist"))?; + cm.freeze(state)?; + Ok(()) + } + + async fn get_pids(&self, cid: &str) -> Result> { + let s = self.sandbox.clone(); + let mut sandbox = s.lock().await; + let ctr = sandbox + .get_container(cid) + .ok_or_else(|| anyhow!("Invalid container id {}", cid))?; + let cm = ctr + .cgroup_manager + .as_ref() + .ok_or_else(|| anyhow!("cgroup manager not exist"))?; + let pids = cm.get_pids()?; + Ok(pids) + } + #[instrument] async fn do_wait_process( &self, diff --git a/src/runtime/pkg/containerd-shim-v2/service.go b/src/runtime/pkg/containerd-shim-v2/service.go index 8e20ae82fb..72f3f14a04 100644 --- a/src/runtime/pkg/containerd-shim-v2/service.go +++ b/src/runtime/pkg/containerd-shim-v2/service.go @@ -776,6 +776,8 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.E return empty, errors.New("The exec process does not exist") } processStatus = execs.status + } else { + r.All = true } // According to CRI specs, kubelet will call StopPodSandbox()