mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-05-01 05:04:26 +00:00
agent: Kill the all the container processes of the same cgroup
Otherwise the container process might leak and cause an unclean exit Fixes: #3913 Signed-off-by: Feng Wang <feng.wang@databricks.com>
This commit is contained in:
parent
19f372b5f5
commit
0928eb9f4e
@ -19,6 +19,7 @@ use ttrpc::{
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use cgroups::freezer::FreezerState;
|
||||
use oci::{LinuxNamespace, Root, Spec};
|
||||
use protobuf::{Message, RepeatedField, SingularPtrField};
|
||||
use protocols::agent::{
|
||||
@ -40,8 +41,9 @@ use rustjail::specconv::CreateOpts;
|
||||
use nix::errno::Errno;
|
||||
use nix::mount::MsFlags;
|
||||
use nix::sys::signal::Signal;
|
||||
use nix::sys::stat;
|
||||
use nix::sys::{signal, stat};
|
||||
use nix::unistd::{self, Pid};
|
||||
use rustjail::cgroups::Manager;
|
||||
use rustjail::process::ProcessOperations;
|
||||
|
||||
use sysinfo::{DiskExt, System, SystemExt};
|
||||
@ -391,7 +393,6 @@ impl AgentService {
|
||||
let cid = req.container_id.clone();
|
||||
let eid = req.exec_id.clone();
|
||||
let s = self.sandbox.clone();
|
||||
let mut sandbox = s.lock().await;
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
@ -400,27 +401,97 @@ impl AgentService {
|
||||
"exec-id" => eid.clone(),
|
||||
);
|
||||
|
||||
let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?;
|
||||
|
||||
let mut signal = Signal::try_from(req.signal as i32).map_err(|e| {
|
||||
let mut sig = Signal::try_from(req.signal as i32).map_err(|e| {
|
||||
anyhow!(e).context(format!(
|
||||
"failed to convert {:?} to signal (container-id: {}, exec-id: {})",
|
||||
req.signal, cid, eid
|
||||
))
|
||||
})?;
|
||||
|
||||
// For container initProcess, if it hasn't installed handler for "SIGTERM" signal,
|
||||
// it will ignore the "SIGTERM" signal sent to it, thus send it "SIGKILL" signal
|
||||
// instead of "SIGTERM" to terminate it.
|
||||
if p.init && signal == Signal::SIGTERM && !is_signal_handled(p.pid, req.signal) {
|
||||
signal = Signal::SIGKILL;
|
||||
{
|
||||
let mut sandbox = s.lock().await;
|
||||
let p = sandbox.find_container_process(cid.as_str(), eid.as_str())?;
|
||||
// For container initProcess, if it hasn't installed handler for "SIGTERM" signal,
|
||||
// it will ignore the "SIGTERM" signal sent to it, thus send it "SIGKILL" signal
|
||||
// instead of "SIGTERM" to terminate it.
|
||||
if p.init && sig == Signal::SIGTERM && !is_signal_handled(p.pid, sig as u32) {
|
||||
sig = Signal::SIGKILL;
|
||||
}
|
||||
p.signal(sig)?;
|
||||
}
|
||||
|
||||
p.signal(signal)?;
|
||||
if eid.is_empty() {
|
||||
// eid is empty, signal all the remaining processes in the container cgroup
|
||||
info!(
|
||||
sl!(),
|
||||
"signal all the remaining processes";
|
||||
"container-id" => cid.clone(),
|
||||
"exec-id" => eid.clone(),
|
||||
);
|
||||
|
||||
if let Err(err) = self.freeze_cgroup(&cid, FreezerState::Frozen).await {
|
||||
warn!(
|
||||
sl!(),
|
||||
"freeze cgroup failed";
|
||||
"container-id" => cid.clone(),
|
||||
"exec-id" => eid.clone(),
|
||||
"error" => format!("{:?}", err),
|
||||
);
|
||||
}
|
||||
|
||||
let pids = self.get_pids(&cid).await?;
|
||||
for pid in pids.iter() {
|
||||
if let Err(err) = signal::kill(Pid::from_raw(*pid), Some(sig)) {
|
||||
warn!(
|
||||
sl!(),
|
||||
"signal failed";
|
||||
"container-id" => cid.clone(),
|
||||
"exec-id" => eid.clone(),
|
||||
"pid" => pid,
|
||||
"error" => format!("{:?}", err),
|
||||
);
|
||||
}
|
||||
}
|
||||
if let Err(err) = self.freeze_cgroup(&cid, FreezerState::Thawed).await {
|
||||
warn!(
|
||||
sl!(),
|
||||
"unfreeze cgroup failed";
|
||||
"container-id" => cid.clone(),
|
||||
"exec-id" => eid.clone(),
|
||||
"error" => format!("{:?}", err),
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn freeze_cgroup(&self, cid: &str, state: FreezerState) -> Result<()> {
|
||||
let s = self.sandbox.clone();
|
||||
let mut sandbox = s.lock().await;
|
||||
let ctr = sandbox
|
||||
.get_container(cid)
|
||||
.ok_or_else(|| anyhow!("Invalid container id {}", cid))?;
|
||||
let cm = ctr
|
||||
.cgroup_manager
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("cgroup manager not exist"))?;
|
||||
cm.freeze(state)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_pids(&self, cid: &str) -> Result<Vec<i32>> {
|
||||
let s = self.sandbox.clone();
|
||||
let mut sandbox = s.lock().await;
|
||||
let ctr = sandbox
|
||||
.get_container(cid)
|
||||
.ok_or_else(|| anyhow!("Invalid container id {}", cid))?;
|
||||
let cm = ctr
|
||||
.cgroup_manager
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("cgroup manager not exist"))?;
|
||||
let pids = cm.get_pids()?;
|
||||
Ok(pids)
|
||||
}
|
||||
|
||||
#[instrument]
|
||||
async fn do_wait_process(
|
||||
&self,
|
||||
|
@ -776,6 +776,8 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.E
|
||||
return empty, errors.New("The exec process does not exist")
|
||||
}
|
||||
processStatus = execs.status
|
||||
} else {
|
||||
r.All = true
|
||||
}
|
||||
|
||||
// According to CRI specs, kubelet will call StopPodSandbox()
|
||||
|
Loading…
Reference in New Issue
Block a user