From 80e2473440e4f070e1f7e970bb8fc66cc573a086 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 2 Jun 2026 21:38:47 +0200 Subject: [PATCH] runtime-rs: shut down shim daemon on a failed create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CreateContainer fails before the runtime instance is registered (e.g. a hypervisor/cgroup error), no sandbox exists to drive the normal teardown. containerd's follow-up Shutdown RPC then reaches get_runtime_instance(), fails with "runtime not ready", and returns before the service loop is ever told to stop. Because the shim ignores SIGTERM, the containerd-shim-kata-v2 daemon is left running and orphaned. Make the Shutdown RPC force the daemon to exit when there is no runtime instance, emitting the same Action::Shutdown that sandbox.shutdown() sends on the normal path. This guarantees the shim process is reaped after a failed create instead of leaking. Signed-off-by: Fabiano FidĂȘncio Assisted-by: Cursor --- src/runtime-rs/crates/runtimes/src/manager.rs | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/runtime-rs/crates/runtimes/src/manager.rs b/src/runtime-rs/crates/runtimes/src/manager.rs index 458cdbe3eb..a4ec1b8c45 100644 --- a/src/runtime-rs/crates/runtimes/src/manager.rs +++ b/src/runtime-rs/crates/runtimes/src/manager.rs @@ -535,6 +535,29 @@ impl RuntimeHandlerManager { Ok(TaskResponse::CreateContainer(shim_pid)) } else { + // A teardown RPC must still make the shim daemon exit even when + // the runtime instance was never (fully) created -- e.g. after a + // failed CreateContainer. In that case containerd's follow-up + // Shutdown would otherwise hit `get_runtime_instance()`, fail with + // "runtime not ready", and the service loop would never receive + // `Action::Shutdown`. Because the shim ignores SIGTERM the daemon + // would then be left running and orphaned by containerd. + if let TaskRequest::ShutdownContainer(_) = &req { + if self.get_runtime_instance().await.is_err() { + warn!( + sl!(), + "shutdown requested but runtime instance is not ready; \ + forcing shim exit to avoid an orphaned shim process" + ); + let sender = self.inner.read().await.msg_sender.clone(); + sender + .send(Message::new(Action::Shutdown)) + .await + .context("send shutdown message")?; + return Ok(TaskResponse::ShutdownContainer); + } + } + self.handler_task_request(req) .await .context("handler TaskRequest") @@ -950,3 +973,35 @@ fn configure_non_root_hypervisor(config: &mut Hypervisor) -> Result<()> { Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use common::types::ShutdownRequest; + use tokio::sync::mpsc::channel; + + // A ShutdownContainer RPC that arrives before any runtime instance was + // created (e.g. after a failed CreateContainer) must still drive the shim + // daemon to exit, otherwise the process is orphaned. Verify it returns + // ShutdownContainer and emits Action::Shutdown on the service channel. + #[tokio::test] + async fn test_shutdown_without_runtime_instance_forces_exit() { + let (sender, mut receiver) = channel::(8); + let manager = RuntimeHandlerManager::new("test-sid", sender).unwrap(); + + let resp = manager + .handler_task_message(TaskRequest::ShutdownContainer(ShutdownRequest { + container_id: "test-sid".to_string(), + is_now: true, + })) + .await + .expect("shutdown should succeed even without a runtime instance"); + + assert!(matches!(resp, TaskResponse::ShutdownContainer)); + + let msg = receiver + .try_recv() + .expect("an Action::Shutdown message must be sent to stop the daemon"); + assert!(matches!(msg.action, Action::Shutdown)); + } +}