mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-08-05 10:10:50 +00:00
Merge pull request #10878 from zvonkok/agent_cdi_timeout
gpu: agent cdi timeout
This commit is contained in:
commit
72833cb00b
@ -23,6 +23,7 @@ const SERVER_ADDR_OPTION: &str = "agent.server_addr";
|
|||||||
const PASSFD_LISTENER_PORT: &str = "agent.passfd_listener_port";
|
const PASSFD_LISTENER_PORT: &str = "agent.passfd_listener_port";
|
||||||
const HOTPLUG_TIMOUT_OPTION: &str = "agent.hotplug_timeout";
|
const HOTPLUG_TIMOUT_OPTION: &str = "agent.hotplug_timeout";
|
||||||
const CDH_API_TIMOUT_OPTION: &str = "agent.cdh_api_timeout";
|
const CDH_API_TIMOUT_OPTION: &str = "agent.cdh_api_timeout";
|
||||||
|
const CDI_TIMEOUT_OPTION: &str = "agent.cdi_timeout";
|
||||||
const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport";
|
const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport";
|
||||||
const LOG_VPORT_OPTION: &str = "agent.log_vport";
|
const LOG_VPORT_OPTION: &str = "agent.log_vport";
|
||||||
const CONTAINER_PIPE_SIZE_OPTION: &str = "agent.container_pipe_size";
|
const CONTAINER_PIPE_SIZE_OPTION: &str = "agent.container_pipe_size";
|
||||||
@ -70,6 +71,7 @@ const MEM_AGENT_COMPACT_FORCE_TIMES: &str = "agent.mem_agent_compact_force_times
|
|||||||
const DEFAULT_LOG_LEVEL: slog::Level = slog::Level::Info;
|
const DEFAULT_LOG_LEVEL: slog::Level = slog::Level::Info;
|
||||||
const DEFAULT_HOTPLUG_TIMEOUT: time::Duration = time::Duration::from_secs(3);
|
const DEFAULT_HOTPLUG_TIMEOUT: time::Duration = time::Duration::from_secs(3);
|
||||||
const DEFAULT_CDH_API_TIMEOUT: time::Duration = time::Duration::from_secs(50);
|
const DEFAULT_CDH_API_TIMEOUT: time::Duration = time::Duration::from_secs(50);
|
||||||
|
const DEFAULT_CDI_TIMEOUT: time::Duration = time::Duration::from_secs(100);
|
||||||
const DEFAULT_CONTAINER_PIPE_SIZE: i32 = 0;
|
const DEFAULT_CONTAINER_PIPE_SIZE: i32 = 0;
|
||||||
const VSOCK_ADDR: &str = "vsock://-1";
|
const VSOCK_ADDR: &str = "vsock://-1";
|
||||||
|
|
||||||
@ -132,6 +134,7 @@ pub struct AgentConfig {
|
|||||||
pub log_level: slog::Level,
|
pub log_level: slog::Level,
|
||||||
pub hotplug_timeout: time::Duration,
|
pub hotplug_timeout: time::Duration,
|
||||||
pub cdh_api_timeout: time::Duration,
|
pub cdh_api_timeout: time::Duration,
|
||||||
|
pub cdi_timeout: time::Duration,
|
||||||
pub debug_console_vport: i32,
|
pub debug_console_vport: i32,
|
||||||
pub log_vport: i32,
|
pub log_vport: i32,
|
||||||
pub container_pipe_size: i32,
|
pub container_pipe_size: i32,
|
||||||
@ -169,6 +172,7 @@ pub struct AgentConfigBuilder {
|
|||||||
pub log_level: Option<String>,
|
pub log_level: Option<String>,
|
||||||
pub hotplug_timeout: Option<time::Duration>,
|
pub hotplug_timeout: Option<time::Duration>,
|
||||||
pub cdh_api_timeout: Option<time::Duration>,
|
pub cdh_api_timeout: Option<time::Duration>,
|
||||||
|
pub cdi_timeout: Option<time::Duration>,
|
||||||
pub debug_console_vport: Option<i32>,
|
pub debug_console_vport: Option<i32>,
|
||||||
pub log_vport: Option<i32>,
|
pub log_vport: Option<i32>,
|
||||||
pub container_pipe_size: Option<i32>,
|
pub container_pipe_size: Option<i32>,
|
||||||
@ -267,6 +271,7 @@ impl Default for AgentConfig {
|
|||||||
log_level: DEFAULT_LOG_LEVEL,
|
log_level: DEFAULT_LOG_LEVEL,
|
||||||
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
|
hotplug_timeout: DEFAULT_HOTPLUG_TIMEOUT,
|
||||||
cdh_api_timeout: DEFAULT_CDH_API_TIMEOUT,
|
cdh_api_timeout: DEFAULT_CDH_API_TIMEOUT,
|
||||||
|
cdi_timeout: DEFAULT_CDI_TIMEOUT,
|
||||||
debug_console_vport: 0,
|
debug_console_vport: 0,
|
||||||
log_vport: 0,
|
log_vport: 0,
|
||||||
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
|
container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE,
|
||||||
@ -312,6 +317,7 @@ impl FromStr for AgentConfig {
|
|||||||
);
|
);
|
||||||
config_override!(agent_config_builder, agent_config, hotplug_timeout);
|
config_override!(agent_config_builder, agent_config, hotplug_timeout);
|
||||||
config_override!(agent_config_builder, agent_config, cdh_api_timeout);
|
config_override!(agent_config_builder, agent_config, cdh_api_timeout);
|
||||||
|
config_override!(agent_config_builder, agent_config, cdi_timeout);
|
||||||
config_override!(agent_config_builder, agent_config, debug_console_vport);
|
config_override!(agent_config_builder, agent_config, debug_console_vport);
|
||||||
config_override!(agent_config_builder, agent_config, log_vport);
|
config_override!(agent_config_builder, agent_config, log_vport);
|
||||||
config_override!(agent_config_builder, agent_config, container_pipe_size);
|
config_override!(agent_config_builder, agent_config, container_pipe_size);
|
||||||
@ -487,6 +493,15 @@ impl AgentConfig {
|
|||||||
|cdh_api_timeout: &time::Duration| cdh_api_timeout.as_secs() > 0
|
|cdh_api_timeout: &time::Duration| cdh_api_timeout.as_secs() > 0
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// ensure the timeout is a positive value
|
||||||
|
parse_cmdline_param!(
|
||||||
|
param,
|
||||||
|
CDI_TIMEOUT_OPTION,
|
||||||
|
config.cdi_timeout,
|
||||||
|
get_timeout,
|
||||||
|
|cdi_timeout: &time::Duration| cdi_timeout.as_secs() > 0
|
||||||
|
);
|
||||||
|
|
||||||
// vsock port should be positive values
|
// vsock port should be positive values
|
||||||
parse_cmdline_param!(
|
parse_cmdline_param!(
|
||||||
param,
|
param,
|
||||||
@ -763,7 +778,10 @@ fn get_timeout(param: &str) -> Result<time::Duration> {
|
|||||||
let fields: Vec<&str> = param.split('=').collect();
|
let fields: Vec<&str> = param.split('=').collect();
|
||||||
ensure!(fields.len() == 2, ERR_INVALID_TIMEOUT);
|
ensure!(fields.len() == 2, ERR_INVALID_TIMEOUT);
|
||||||
ensure!(
|
ensure!(
|
||||||
matches!(fields[0], HOTPLUG_TIMOUT_OPTION | CDH_API_TIMOUT_OPTION),
|
matches!(
|
||||||
|
fields[0],
|
||||||
|
HOTPLUG_TIMOUT_OPTION | CDH_API_TIMOUT_OPTION | CDI_TIMEOUT_OPTION
|
||||||
|
),
|
||||||
ERR_INVALID_TIMEOUT_KEY
|
ERR_INVALID_TIMEOUT_KEY
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -1704,6 +1722,7 @@ Caused by:
|
|||||||
)))]
|
)))]
|
||||||
#[case("agent.chd_api_timeout=1", Err(anyhow!(ERR_INVALID_TIMEOUT_KEY)))]
|
#[case("agent.chd_api_timeout=1", Err(anyhow!(ERR_INVALID_TIMEOUT_KEY)))]
|
||||||
#[case("agent.cdh_api_timeout=600", Ok(time::Duration::from_secs(600)))]
|
#[case("agent.cdh_api_timeout=600", Ok(time::Duration::from_secs(600)))]
|
||||||
|
#[case("agent.cdi_timeout=320", Ok(time::Duration::from_secs(320)))]
|
||||||
fn test_timeout(#[case] param: &str, #[case] expected: Result<time::Duration>) {
|
fn test_timeout(#[case] param: &str, #[case] expected: Result<time::Duration>) {
|
||||||
let result = get_timeout(param);
|
let result = get_timeout(param);
|
||||||
let msg = format!("expected: {:?}, result: {:?}", expected, result);
|
let msg = format!("expected: {:?}, result: {:?}", expected, result);
|
||||||
|
@ -248,7 +248,7 @@ pub async fn handle_cdi_devices(
|
|||||||
logger: &Logger,
|
logger: &Logger,
|
||||||
spec: &mut Spec,
|
spec: &mut Spec,
|
||||||
spec_dir: &str,
|
spec_dir: &str,
|
||||||
cdi_timeout: u64,
|
cdi_timeout: time::Duration,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
if let Some(container_type) = spec
|
if let Some(container_type) = spec
|
||||||
.annotations()
|
.annotations()
|
||||||
@ -271,7 +271,7 @@ pub async fn handle_cdi_devices(
|
|||||||
let options: Vec<CdiOption> = vec![with_auto_refresh(false), with_spec_dirs(&[spec_dir])];
|
let options: Vec<CdiOption> = vec![with_auto_refresh(false), with_spec_dirs(&[spec_dir])];
|
||||||
let cache: Arc<std::sync::Mutex<cdi::cache::Cache>> = new_cache(options);
|
let cache: Arc<std::sync::Mutex<cdi::cache::Cache>> = new_cache(options);
|
||||||
|
|
||||||
for _ in 0..=cdi_timeout {
|
for i in 0..=cdi_timeout.as_secs() {
|
||||||
let inject_result = {
|
let inject_result = {
|
||||||
// Lock cache within this scope, std::sync::Mutex has no Send
|
// Lock cache within this scope, std::sync::Mutex has no Send
|
||||||
// and await will not work with time::sleep
|
// and await will not work with time::sleep
|
||||||
@ -294,15 +294,20 @@ pub async fn handle_cdi_devices(
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
info!(logger, "error injecting devices: {:?}", e);
|
info!(
|
||||||
println!("error injecting devices: {:?}", e);
|
logger,
|
||||||
|
"waiting for CDI spec(s) to be generated ({} of {} max tries) {:?}",
|
||||||
|
i,
|
||||||
|
cdi_timeout.as_secs(),
|
||||||
|
e
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
time::sleep(Duration::from_millis(1000)).await;
|
time::sleep(Duration::from_secs(1)).await;
|
||||||
}
|
}
|
||||||
Err(anyhow!(
|
Err(anyhow!(
|
||||||
"failed to inject devices after CDI timeout of {} seconds",
|
"failed to inject devices after CDI timeout of {} seconds",
|
||||||
cdi_timeout
|
cdi_timeout.as_secs()
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1243,8 +1248,15 @@ mod tests {
|
|||||||
|
|
||||||
fs::write(&cdi_file, cdi_content).expect("Failed to write CDI file");
|
fs::write(&cdi_file, cdi_content).expect("Failed to write CDI file");
|
||||||
|
|
||||||
let res =
|
let cdi_timeout = Duration::from_secs(0);
|
||||||
handle_cdi_devices(&logger, &mut spec, temp_dir.path().to_str().unwrap(), 0).await;
|
|
||||||
|
let res = handle_cdi_devices(
|
||||||
|
&logger,
|
||||||
|
&mut spec,
|
||||||
|
temp_dir.path().to_str().unwrap(),
|
||||||
|
cdi_timeout,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
println!("modfied spec {:?}", spec);
|
println!("modfied spec {:?}", spec);
|
||||||
assert!(res.is_ok(), "{}", res.err().unwrap());
|
assert!(res.is_ok(), "{}", res.err().unwrap());
|
||||||
|
|
||||||
|
@ -130,8 +130,6 @@ const ERR_NO_SANDBOX_PIDNS: &str = "Sandbox does not have sandbox_pidns";
|
|||||||
// not available.
|
// not available.
|
||||||
const IPTABLES_RESTORE_WAIT_SEC: u64 = 5;
|
const IPTABLES_RESTORE_WAIT_SEC: u64 = 5;
|
||||||
|
|
||||||
const CDI_TIMEOUT_LIMIT: u64 = 100;
|
|
||||||
|
|
||||||
// Convenience function to obtain the scope logger.
|
// Convenience function to obtain the scope logger.
|
||||||
fn sl() -> slog::Logger {
|
fn sl() -> slog::Logger {
|
||||||
slog_scope::logger()
|
slog_scope::logger()
|
||||||
@ -234,7 +232,7 @@ impl AgentService {
|
|||||||
// or other entities for a specifc device.
|
// or other entities for a specifc device.
|
||||||
// In Kata we only consider the directory "/var/run/cdi", "/etc" may be
|
// In Kata we only consider the directory "/var/run/cdi", "/etc" may be
|
||||||
// readonly
|
// readonly
|
||||||
handle_cdi_devices(&sl(), &mut oci, "/var/run/cdi", CDI_TIMEOUT_LIMIT).await?;
|
handle_cdi_devices(&sl(), &mut oci, "/var/run/cdi", AGENT_CONFIG.cdi_timeout).await?;
|
||||||
|
|
||||||
cdh_handler(&mut oci).await?;
|
cdh_handler(&mut oci).await?;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user