From 2499d013bd60818e371fc2778641e288b984faa8 Mon Sep 17 00:00:00 2001 From: Zvonko Kaiser Date: Thu, 13 Feb 2025 19:38:19 +0000 Subject: [PATCH] gpu: Update handle_cdi_devices AgentConfig now has the cdi_timeout from the kernel cmdline, update the proper function signature and use it in the for loop. Signed-off-by: Zvonko Kaiser --- src/agent/src/config.rs | 5 ++++- src/agent/src/device/mod.rs | 12 +++++++----- src/agent/src/rpc.rs | 4 +--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/agent/src/config.rs b/src/agent/src/config.rs index f12fbcdadf..8995abd283 100644 --- a/src/agent/src/config.rs +++ b/src/agent/src/config.rs @@ -780,7 +780,10 @@ fn get_timeout(param: &str) -> Result { let fields: Vec<&str> = param.split('=').collect(); ensure!(fields.len() == 2, ERR_INVALID_TIMEOUT); ensure!( - matches!(fields[0], HOTPLUG_TIMOUT_OPTION | CDH_API_TIMOUT_OPTION | CDI_TIMEOUT_OPTION), + matches!( + fields[0], + HOTPLUG_TIMOUT_OPTION | CDH_API_TIMOUT_OPTION | CDI_TIMEOUT_OPTION + ), ERR_INVALID_TIMEOUT_KEY ); diff --git a/src/agent/src/device/mod.rs b/src/agent/src/device/mod.rs index 400b6f1386..3503a34cf5 100644 --- a/src/agent/src/device/mod.rs +++ b/src/agent/src/device/mod.rs @@ -248,7 +248,7 @@ pub async fn handle_cdi_devices( logger: &Logger, spec: &mut Spec, spec_dir: &str, - cdi_timeout: u64, + cdi_timeout: time::Duration, ) -> Result<()> { if let Some(container_type) = spec .annotations() @@ -271,7 +271,7 @@ pub async fn handle_cdi_devices( let options: Vec = vec![with_auto_refresh(false), with_spec_dirs(&[spec_dir])]; let cache: Arc> = new_cache(options); - for _ in 0..=cdi_timeout { + for _ in 0..=cdi_timeout.as_secs() { let inject_result = { // Lock cache within this scope, std::sync::Mutex has no Send // and await will not work with time::sleep @@ -298,11 +298,11 @@ pub async fn handle_cdi_devices( println!("error injecting devices: {:?}", e); } } - time::sleep(Duration::from_millis(1000)).await; + time::sleep(Duration::from_secs(1)).await; } Err(anyhow!( "failed to inject devices after CDI timeout of {} seconds", - cdi_timeout + cdi_timeout.as_secs() )) } @@ -1243,8 +1243,10 @@ mod tests { fs::write(&cdi_file, cdi_content).expect("Failed to write CDI file"); + let cdi_timeout = Duration::from_secs(0); + let res = - handle_cdi_devices(&logger, &mut spec, temp_dir.path().to_str().unwrap(), 0).await; + handle_cdi_devices(&logger, &mut spec, temp_dir.path().to_str().unwrap(), cdi_timeout).await; println!("modfied spec {:?}", spec); assert!(res.is_ok(), "{}", res.err().unwrap()); diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 5f2a3eb955..9303060ee5 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -130,8 +130,6 @@ const ERR_NO_SANDBOX_PIDNS: &str = "Sandbox does not have sandbox_pidns"; // not available. const IPTABLES_RESTORE_WAIT_SEC: u64 = 5; -const CDI_TIMEOUT_LIMIT: u64 = 100; - // Convenience function to obtain the scope logger. fn sl() -> slog::Logger { slog_scope::logger() @@ -234,7 +232,7 @@ impl AgentService { // or other entities for a specifc device. // In Kata we only consider the directory "/var/run/cdi", "/etc" may be // readonly - handle_cdi_devices(&sl(), &mut oci, "/var/run/cdi", CDI_TIMEOUT_LIMIT).await?; + handle_cdi_devices(&sl(), &mut oci, "/var/run/cdi", AGENT_CONFIG.cdi_timeout).await?; cdh_handler(&mut oci).await?;