From 86ad832e37c87bf62aa08f34191a00997a00ffac Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Fri, 2 Sep 2022 17:36:28 +0800 Subject: [PATCH] runtime-rs: force shutdown shim process in it can't exit In some case the call of cleanup from shim to service manager will fail, and the shim process will continue to running, that will make process leak. This commit will force shutdown the shim process in case of any errors in service crate. Fixes: #5087 Signed-off-by: Bin Liu --- .../share_fs/share_virtio_fs_standalone.rs | 5 +++- src/runtime-rs/crates/service/src/manager.rs | 2 +- src/runtime-rs/crates/shim/src/shim_delete.rs | 30 ++++++++++++++++--- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs index 9c798d7467..b0c9149735 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs @@ -67,7 +67,10 @@ impl ShareVirtioFsStandalone { fn virtiofsd_args(&self, sock_path: &str) -> Result> { let source_path = get_host_ro_shared_path(&self.config.id); if !source_path.exists() { - return Err(anyhow!("The virtiofs shared path didn't exist")); + return Err(anyhow!( + "The virtiofs shared path({:?}) didn't exist", + source_path + )); } let mut args: Vec = vec![ diff --git a/src/runtime-rs/crates/service/src/manager.rs b/src/runtime-rs/crates/service/src/manager.rs index 214db1e47d..1019aa220a 100644 --- a/src/runtime-rs/crates/service/src/manager.rs +++ b/src/runtime-rs/crates/service/src/manager.rs @@ -155,7 +155,7 @@ impl ServiceManager { let handler = RuntimeHandlerManager::new(sid, sender) .await .context("new runtime handler")?; - handler.cleanup().await?; + handler.cleanup().await.context("runtime handler cleanup")?; let temp_dir = [KATA_PATH, sid].join("/"); if std::fs::metadata(temp_dir.as_str()).is_ok() { // try to remove dir and skip the result diff --git a/src/runtime-rs/crates/shim/src/shim_delete.rs b/src/runtime-rs/crates/shim/src/shim_delete.rs index 8429d34aeb..acaa0d83e2 100644 --- a/src/runtime-rs/crates/shim/src/shim_delete.rs +++ b/src/runtime-rs/crates/shim/src/shim_delete.rs @@ -6,6 +6,9 @@ use anyhow::{Context, Result}; use containerd_shim_protos::api; +use kata_sys_util::spec::{get_bundle_path, get_contaier_type, load_oci_spec}; +use kata_types::container::ContainerType; +use nix::{sys::signal::kill, sys::signal::SIGKILL, unistd::Pid}; use protobuf::Message; use std::{fs, path::Path}; @@ -14,7 +17,7 @@ use crate::{shim::ShimExecutor, Error}; impl ShimExecutor { pub async fn delete(&mut self) -> Result<()> { self.args.validate(true).context("validate")?; - let rsp = self.do_cleanup().await.context("do cleanup")?; + let rsp = self.do_cleanup().await.context("shim do cleanup")?; rsp.write_to_writer(&mut std::io::stdout()) .context(Error::FileWrite(format!("write {:?} to stdout", rsp)))?; Ok(()) @@ -41,9 +44,28 @@ impl ShimExecutor { info!(sl!(), "remote socket path: {:?}", &file_path); fs::remove_file(file_path).ok(); } - service::ServiceManager::cleanup(&self.args.id) - .await - .context("cleanup")?; + + if let Err(e) = service::ServiceManager::cleanup(&self.args.id).await { + error!( + sl!(), + "failed to cleanup in service manager: {:?}. force shutdown shim process", e + ); + + let bundle_path = get_bundle_path().context("get bundle path")?; + if let Ok(spec) = load_oci_spec() { + if let Ok(ContainerType::PodSandbox) = get_contaier_type(&spec) { + // only force shutdown for sandbox container + if let Ok(shim_pid) = self.read_pid_file(&bundle_path) { + info!(sl!(), "force to shutdown shim process {}", shim_pid); + let pid = Pid::from_raw(shim_pid as i32); + if let Err(_e) = kill(pid, SIGKILL) { + // ignore kill errors + } + } + } + } + } + Ok(rsp) } }