From 33908867b338da35c058a2d5f818df6a8d2223c6 Mon Sep 17 00:00:00 2001 From: Hui Zhu Date: Thu, 20 Feb 2025 11:17:55 +0800 Subject: [PATCH] runtime-rs: Fix stop after cloud_hypervisor exit issue Got an issue is set a wrong firmware to cloud_hypervisor. Then the kata shim will not exit after this fail. This commit to fix this issue with check if cloud hypervisor is running before call cloud_hypervisor_vmm_shutdown. And set timeout with call cloud_hypervisor_vmm_shutdown. Fixes: #10907 Signed-off-by: Hui Zhu --- .../hypervisor/src/ch/inner_hypervisor.rs | 39 ++++++++++++++----- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs index 841eb97f0c..68c8f335bc 100644 --- a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs @@ -41,7 +41,7 @@ use tokio::process::{Child, Command}; use tokio::sync::watch::Receiver; use tokio::task; use tokio::task::JoinHandle; -use tokio::time::Duration; +use tokio::time::{self, Duration}; use tokio::{io::AsyncBufReadExt, sync::mpsc}; const CH_NAME: &str = "cloud-hypervisor"; @@ -405,18 +405,37 @@ impl CloudHypervisorInner { Ok(()) } - async fn cloud_hypervisor_shutdown(&mut self) -> Result<()> { - let socket = self - .api_socket - .as_ref() - .ok_or("missing socket") + fn cloud_hypervisor_is_running(&self) -> Result { + let pid = self + .pid + .ok_or(format!("{} missing PID", CH_NAME)) .map_err(|e| anyhow!(e))?; + let proc_path = format!("/proc/{}", pid); + let path = Path::new(&proc_path); + Ok(fs::metadata(path).is_ok()) + } - let response = - cloud_hypervisor_vmm_shutdown(socket.try_clone().context("shutdown failed")?).await?; + async fn cloud_hypervisor_shutdown(&mut self) -> Result<()> { + if self.cloud_hypervisor_is_running()? { + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; - if let Some(detail) = response { - debug!(sl!(), "shutdown response: {:?}", detail); + let response = time::timeout( + Duration::from_secs(5), + cloud_hypervisor_vmm_shutdown(socket.try_clone().context("shutdown failed")?), + ) + .await + .context("cloud_hypervisor_vmm_shutdown timeout")? + .context("cloud_hypervisor_vmm_shutdown")?; + + if let Some(detail) = response { + debug!(sl!(), "shutdown response: {:?}", detail); + } + } else { + info!(sl!(), "{} not running", CH_NAME); } // Trigger a controlled shutdown