From 36a2d8e7f2943c7fa35e9b96deb5872edf74b28e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 9 Apr 2026 18:52:33 +0200 Subject: [PATCH] agent: Make launch_process_timeout configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardcoded DEFAULT_LAUNCH_PROCESS_TIMEOUT of 6 seconds in the kata agent is insufficient for environments with NVIDIA GPUs and NVSwitches, where the attestation-agent needs significantly more time to collect evidence during initialization (e.g. ~2 seconds per NVSwitch). When the timeout expires, the agent (PID 1) exits with an error, causing the guest kernel to perform an orderly shutdown before the attestation-agent has finished starting. Make this timeout configurable via the kernel parameter agent.launch_process_timeout (in seconds), preserving the 6-second default for backward compatibility. The Go runtime is wired up to pass this value from the TOML config's [agent.kata] section through to the kernel command line. The NVIDIA GPU configs set the new default to 15 seconds. Signed-off-by: Fabiano FidĂȘncio Made-with: Cursor --- src/agent/src/config.rs | 16 ++++++++ src/agent/src/main.rs | 10 ++--- src/libs/kata-types/src/config/agent.rs | 6 +++ src/libs/kata-types/src/config/mod.rs | 11 ++++++ ...iguration-qemu-coco-dev-runtime-rs.toml.in | 5 +++ .../configuration-qemu-se-runtime-rs.toml.in | 5 +++ .../configuration-qemu-snp-runtime-rs.toml.in | 5 +++ .../configuration-qemu-tdx-runtime-rs.toml.in | 5 +++ src/runtime/Makefile | 2 + .../config/configuration-qemu-cca.toml.in | 5 +++ .../configuration-qemu-coco-dev.toml.in | 5 +++ .../configuration-qemu-nvidia-gpu-snp.toml.in | 7 ++++ .../configuration-qemu-nvidia-gpu-tdx.toml.in | 7 ++++ .../configuration-qemu-nvidia-gpu.toml.in | 7 ++++ .../config/configuration-qemu-se.toml.in | 5 +++ .../config/configuration-qemu-snp.toml.in | 5 +++ .../config/configuration-qemu-tdx.toml.in | 5 +++ src/runtime/pkg/katautils/config.go | 32 ++++++++------- src/runtime/virtcontainers/kata_agent.go | 24 +++++++----- src/runtime/virtcontainers/kata_agent_test.go | 39 ++++++++++++------- .../pkg/annotations/annotations.go | 12 +++--- 21 files changed, 170 insertions(+), 48 deletions(-) diff --git a/src/agent/src/config.rs b/src/agent/src/config.rs index 21539b8996..a9bc041ca8 100644 --- a/src/agent/src/config.rs +++ b/src/agent/src/config.rs @@ -25,6 +25,7 @@ const HOTPLUG_TIMOUT_OPTION: &str = "agent.hotplug_timeout"; const CDH_API_TIMOUT_OPTION: &str = "agent.cdh_api_timeout"; const CDH_IMAGE_PULL_TIMEOUT_OPTION: &str = "agent.image_pull_timeout"; const CDI_TIMEOUT_OPTION: &str = "agent.cdi_timeout"; +const LAUNCH_PROCESS_TIMEOUT_OPTION: &str = "agent.launch_process_timeout"; const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport"; const LOG_VPORT_OPTION: &str = "agent.log_vport"; const CONTAINER_PIPE_SIZE_OPTION: &str = "agent.container_pipe_size"; @@ -66,6 +67,7 @@ const DEFAULT_HOTPLUG_TIMEOUT: time::Duration = time::Duration::from_secs(3); const DEFAULT_CDH_API_TIMEOUT: time::Duration = time::Duration::from_secs(50); const DEFAULT_IMAGE_PULL_TIMEOUT: time::Duration = time::Duration::from_secs(1200); const DEFAULT_CDI_TIMEOUT: time::Duration = time::Duration::from_secs(100); +const DEFAULT_LAUNCH_PROCESS_TIMEOUT: time::Duration = time::Duration::from_secs(6); const DEFAULT_CONTAINER_PIPE_SIZE: i32 = 0; const VSOCK_ADDR: &str = "vsock://-1"; @@ -130,6 +132,7 @@ pub struct AgentConfig { pub cdh_api_timeout: time::Duration, pub image_pull_timeout: time::Duration, pub cdi_timeout: time::Duration, + pub launch_process_timeout: time::Duration, pub debug_console_vport: i32, pub log_vport: i32, pub container_pipe_size: i32, @@ -163,6 +166,7 @@ pub struct AgentConfigBuilder { pub cdh_api_timeout: Option, pub image_pull_timeout: Option, pub cdi_timeout: Option, + pub launch_process_timeout: Option, pub debug_console_vport: Option, pub log_vport: Option, pub container_pipe_size: Option, @@ -257,6 +261,7 @@ impl Default for AgentConfig { cdh_api_timeout: DEFAULT_CDH_API_TIMEOUT, image_pull_timeout: DEFAULT_IMAGE_PULL_TIMEOUT, cdi_timeout: DEFAULT_CDI_TIMEOUT, + launch_process_timeout: DEFAULT_LAUNCH_PROCESS_TIMEOUT, debug_console_vport: 0, log_vport: 0, container_pipe_size: DEFAULT_CONTAINER_PIPE_SIZE, @@ -298,6 +303,7 @@ impl FromStr for AgentConfig { config_override!(agent_config_builder, agent_config, cdh_api_timeout); config_override!(agent_config_builder, agent_config, image_pull_timeout); config_override!(agent_config_builder, agent_config, cdi_timeout); + config_override!(agent_config_builder, agent_config, launch_process_timeout); config_override!(agent_config_builder, agent_config, debug_console_vport); config_override!(agent_config_builder, agent_config, log_vport); config_override!(agent_config_builder, agent_config, container_pipe_size); @@ -481,6 +487,14 @@ impl AgentConfig { |cdi_timeout: &time::Duration| cdi_timeout.as_secs() > 0 ); + parse_cmdline_param!( + param, + LAUNCH_PROCESS_TIMEOUT_OPTION, + config.launch_process_timeout, + get_timeout, + |launch_process_timeout: &time::Duration| launch_process_timeout.as_secs() > 0 + ); + // vsock port should be positive values parse_cmdline_param!( param, @@ -742,6 +756,7 @@ fn get_timeout(param: &str) -> Result { | CDH_API_TIMOUT_OPTION | CDH_IMAGE_PULL_TIMEOUT_OPTION | CDI_TIMEOUT_OPTION + | LAUNCH_PROCESS_TIMEOUT_OPTION ), ERR_INVALID_TIMEOUT_KEY ); @@ -1630,6 +1645,7 @@ Caused by: #[case("agent.cdh_api_timeout=600", Ok(time::Duration::from_secs(600)))] #[case("agent.image_pull_timeout=1200", Ok(time::Duration::from_secs(1200)))] #[case("agent.cdi_timeout=320", Ok(time::Duration::from_secs(320)))] + #[case("agent.launch_process_timeout=60", Ok(time::Duration::from_secs(60)))] fn test_timeout(#[case] param: &str, #[case] expected: Result) { let result = get_timeout(param); let msg = format!("expected: {expected:?}, result: {result:?}"); diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index c715ec128d..c5328f6b38 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -111,8 +111,6 @@ const API_SERVER_PATH: &str = "/usr/local/bin/api-server-rest"; /// TODO: remove this when we move the launch of CDH out of the kata-agent. const OCICRYPT_CONFIG_PATH: &str = "/etc/ocicrypt_config.json"; -const DEFAULT_LAUNCH_PROCESS_TIMEOUT: i32 = 6; - lazy_static! { static ref AGENT_CONFIG: AgentConfig = // Note: We can't do AgentOpts.parse() here to send through the processed arguments to AgentConfig @@ -505,7 +503,7 @@ async fn launch_guest_component_procs( aa_args, Some(AA_CONFIG_PATH), AA_ATTESTATION_SOCKET, - DEFAULT_LAUNCH_PROCESS_TIMEOUT, + config.launch_process_timeout.as_secs(), &[], ) .await @@ -527,7 +525,7 @@ async fn launch_guest_component_procs( vec![], Some(CDH_CONFIG_PATH), CDH_SOCKET, - DEFAULT_LAUNCH_PROCESS_TIMEOUT, + config.launch_process_timeout.as_secs(), &[("OCICRYPT_KEYPROVIDER_CONFIG", OCICRYPT_CONFIG_PATH)], ) .await @@ -587,7 +585,7 @@ async fn init_attestation_components( Ok(()) } -async fn wait_for_path_to_exist(logger: &Logger, path: &str, timeout_secs: i32) -> Result<()> { +async fn wait_for_path_to_exist(logger: &Logger, path: &str, timeout_secs: u64) -> Result<()> { let p = Path::new(path); let mut attempts = 0; loop { @@ -614,7 +612,7 @@ async fn launch_process( mut args: Vec<&str>, config: Option<&str>, unix_socket_path: &str, - timeout_secs: i32, + timeout_secs: u64, envs: &[(&str, &str)], ) -> Result<()> { if !Path::new(path).exists() { diff --git a/src/libs/kata-types/src/config/agent.rs b/src/libs/kata-types/src/config/agent.rs index dab97f1096..8dda214585 100644 --- a/src/libs/kata-types/src/config/agent.rs +++ b/src/libs/kata-types/src/config/agent.rs @@ -146,6 +146,11 @@ pub struct Agent { #[serde(default)] pub container_pipe_size: u32, + /// Timeout in seconds for guest components (attestation-agent, confidential-data-hub) + /// to create their Unix sockets after being spawned by the agent. + #[serde(default)] + pub launch_process_timeout: u32, + /// Memory agent configuration #[serde(default)] pub mem_agent: MemAgent, @@ -180,6 +185,7 @@ impl std::default::Default for Agent { health_check_request_timeout_ms: 90_000, kernel_modules: Default::default(), container_pipe_size: 0, + launch_process_timeout: 0, mem_agent: MemAgent::default(), policy: Default::default(), } diff --git a/src/libs/kata-types/src/config/mod.rs b/src/libs/kata-types/src/config/mod.rs index 9dac8144e3..f1d99e543c 100644 --- a/src/libs/kata-types/src/config/mod.rs +++ b/src/libs/kata-types/src/config/mod.rs @@ -54,6 +54,8 @@ pub const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport"; pub const LOG_VPORT_OPTION: &str = "agent.log_vport"; /// Option of setting the container's pipe size pub const CONTAINER_PIPE_SIZE_OPTION: &str = "agent.container_pipe_size"; +/// Option of setting the guest component launch process timeout +pub const LAUNCH_PROCESS_TIMEOUT_OPTION: &str = "agent.launch_process_timeout"; /// Option of setting the fd passthrough io listener port pub const PASSFD_LISTENER_PORT: &str = "agent.passfd_listener_port"; @@ -219,6 +221,13 @@ impl TomlConfig { let container_pipe_size = cfg.container_pipe_size.to_string(); kv.insert(CONTAINER_PIPE_SIZE_OPTION.to_string(), container_pipe_size); } + if cfg.launch_process_timeout > 0 { + let launch_process_timeout = cfg.launch_process_timeout.to_string(); + kv.insert( + LAUNCH_PROCESS_TIMEOUT_OPTION.to_string(), + launch_process_timeout, + ); + } if cfg.debug_console_enabled { kv.insert(DEBUG_CONSOLE_FLAG.to_string(), "".to_string()); kv.insert( @@ -479,6 +488,7 @@ mod tests { enable_tracing: true, container_pipe_size: 20, debug_console_enabled: true, + launch_process_timeout: 60, ..Default::default() }; let agent_name = "test_agent"; @@ -491,5 +501,6 @@ mod tests { assert_eq!(kv.get("agent.container_pipe_size").unwrap(), "20"); kv.get("agent.debug_console").unwrap(); assert_eq!(kv.get("agent.debug_console_vport").unwrap(), "1026"); // 1026 is the default port + assert_eq!(kv.get("agent.launch_process_timeout").unwrap(), "60"); } } diff --git a/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in index 302a4c2386..b7dc60c57d 100644 --- a/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-coco-dev-runtime-rs.toml.in @@ -541,6 +541,11 @@ dial_timeout_ms = 10 # (default: 3000) reconnect_timeout_ms = 3000 +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# (agent default when unset: 6) +launch_process_timeout = 6 + # Create Container Request Timeout # This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest. # It's also used to ensure that workloads, especially those involving large image pulls within the guest, diff --git a/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in index d67e1f424b..5e7b4d90f8 100644 --- a/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-se-runtime-rs.toml.in @@ -523,6 +523,11 @@ dial_timeout_ms = 90 # (default: 3000) reconnect_timeout_ms = 5000 +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# (agent default when unset: 6) +launch_process_timeout = 6 + # Create Container Request Timeout # This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest. # It's also used to ensure that workloads, especially those involving large image pulls within the guest, diff --git a/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in index 924bd622db..2fae996c16 100644 --- a/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-snp-runtime-rs.toml.in @@ -565,6 +565,11 @@ dial_timeout_ms = 10 # (default: 3000) reconnect_timeout_ms = 3000 +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# (agent default when unset: 6) +launch_process_timeout = 6 + # Create Container Request Timeout # This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest. # It's also used to ensure that workloads, especially those involving large image pulls within the guest, diff --git a/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in index 222fa62ab8..3fdec3dbab 100644 --- a/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-tdx-runtime-rs.toml.in @@ -541,6 +541,11 @@ dial_timeout_ms = 10 # (default: 3000) reconnect_timeout_ms = 3000 +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# (agent default when unset: 6) +launch_process_timeout = 6 + # Create Container Request Timeout # This timeout value is used to set the maximum duration for the agent to process a CreateContainerRequest. # It's also used to ensure that workloads, especially those involving large image pulls within the guest, diff --git a/src/runtime/Makefile b/src/runtime/Makefile index f4d9ccf03a..cf0b69e4f2 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -491,6 +491,7 @@ ifneq (,$(QEMUCMD)) DEFAULTVCPUS_NV = 1 DEFAULTMEMORY_NV = 8192 DEFAULTTIMEOUT_NV = 1200 + DEFAULTLAUNCHPROCESSTIMEOUT_NV = 15 DEFAULTVFIOPORT_NV = root-port DEFAULTPCIEROOTPORT_NV = 8 @@ -678,6 +679,7 @@ USER_VARS += KERNELPARAMS_CONFIDENTIAL_NV USER_VARS += KERNELVERITYPARAMS_NV USER_VARS += KERNELVERITYPARAMS_CONFIDENTIAL_NV USER_VARS += DEFAULTTIMEOUT_NV +USER_VARS += DEFAULTLAUNCHPROCESSTIMEOUT_NV USER_VARS += DEFSANDBOXCGROUPONLY_NV USER_VARS += DEFROOTFSTYPE USER_VARS += MACHINETYPE diff --git a/src/runtime/config/configuration-qemu-cca.toml.in b/src/runtime/config/configuration-qemu-cca.toml.in index 6f01d5f340..ae5e1956ee 100644 --- a/src/runtime/config/configuration-qemu-cca.toml.in +++ b/src/runtime/config/configuration-qemu-cca.toml.in @@ -537,6 +537,11 @@ debug_console_enabled = false # (default: 90) dial_timeout = 90 +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# (agent default when unset: 6) +launch_process_timeout = 6 + [runtime] # If enabled, the runtime will log additional debug messages to the # system log diff --git a/src/runtime/config/configuration-qemu-coco-dev.toml.in b/src/runtime/config/configuration-qemu-coco-dev.toml.in index 851ceeec55..c8639850ff 100644 --- a/src/runtime/config/configuration-qemu-coco-dev.toml.in +++ b/src/runtime/config/configuration-qemu-coco-dev.toml.in @@ -595,6 +595,11 @@ dial_timeout = 45 # (default: 50) cdh_api_timeout = 50 +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# (agent default when unset: 6) +launch_process_timeout = 6 + [runtime] # If enabled, the runtime will log additional debug messages to the # system log diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in index 633d85bef4..9a069354d7 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-snp.toml.in @@ -611,6 +611,13 @@ debug_console_enabled = false # (default: 90) dial_timeout = @DEFAULTTIMEOUT_NV@ +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# With NVIDIA GPUs and NVSwitches, the attestation-agent needs extra time +# to collect evidence during initialization. +# (agent default when unset: 6) +launch_process_timeout = @DEFAULTLAUNCHPROCESSTIMEOUT_NV@ + [runtime] # If enabled, the runtime will log additional debug messages to the # system log diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in index 7f8b7456b0..3a175db6ee 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu-tdx.toml.in @@ -588,6 +588,13 @@ debug_console_enabled = false # (default: 90) dial_timeout = @DEFAULTTIMEOUT_NV@ +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# With NVIDIA GPUs and NVSwitches, the attestation-agent needs extra time +# to collect evidence during initialization. +# (agent default when unset: 6) +launch_process_timeout = @DEFAULTLAUNCHPROCESSTIMEOUT_NV@ + [runtime] # If enabled, the runtime will log additional debug messages to the # system log diff --git a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in index bd44a2f099..98fec12a80 100644 --- a/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in +++ b/src/runtime/config/configuration-qemu-nvidia-gpu.toml.in @@ -590,6 +590,13 @@ debug_console_enabled = false # (default: 90) dial_timeout = @DEFAULTTIMEOUT_NV@ +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# With NVIDIA GPUs and NVSwitches, the attestation-agent needs extra time +# to collect evidence during initialization. +# (agent default when unset: 6) +launch_process_timeout = @DEFAULTLAUNCHPROCESSTIMEOUT_NV@ + [runtime] # If enabled, the runtime will log additional debug messages to the # system log diff --git a/src/runtime/config/configuration-qemu-se.toml.in b/src/runtime/config/configuration-qemu-se.toml.in index 519ec2d20d..09372b93c7 100644 --- a/src/runtime/config/configuration-qemu-se.toml.in +++ b/src/runtime/config/configuration-qemu-se.toml.in @@ -573,6 +573,11 @@ debug_console_enabled = false # (default: 30) dial_timeout = 90 +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# (agent default when unset: 6) +launch_process_timeout = 6 + [runtime] # If enabled, the runtime will log additional debug messages to the # system log diff --git a/src/runtime/config/configuration-qemu-snp.toml.in b/src/runtime/config/configuration-qemu-snp.toml.in index 68cdad0fd8..055b436f3a 100644 --- a/src/runtime/config/configuration-qemu-snp.toml.in +++ b/src/runtime/config/configuration-qemu-snp.toml.in @@ -603,6 +603,11 @@ debug_console_enabled = false # (default: 90) dial_timeout = 90 +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# (agent default when unset: 6) +launch_process_timeout = 6 + [runtime] # If enabled, the runtime will log additional debug messages to the # system log diff --git a/src/runtime/config/configuration-qemu-tdx.toml.in b/src/runtime/config/configuration-qemu-tdx.toml.in index a4b3bb41c1..e35371d5b6 100644 --- a/src/runtime/config/configuration-qemu-tdx.toml.in +++ b/src/runtime/config/configuration-qemu-tdx.toml.in @@ -580,6 +580,11 @@ debug_console_enabled = false # (default: 60) dial_timeout = 60 +# Timeout in seconds for guest components (attestation-agent, confidential-data-hub) +# to create their Unix sockets after being spawned by the agent. +# (agent default when unset: 6) +launch_process_timeout = 6 + [runtime] # If enabled, the runtime will log additional debug messages to the # system log diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 2ea52c6bed..061bf8b2ed 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -224,12 +224,13 @@ func (r runtime) emptyDirMode() (string, error) { } type agent struct { - KernelModules []string `toml:"kernel_modules"` - Debug bool `toml:"enable_debug"` - Tracing bool `toml:"enable_tracing"` - DebugConsoleEnabled bool `toml:"debug_console_enabled"` - DialTimeout uint32 `toml:"dial_timeout"` - CdhApiTimeout uint32 `toml:"cdh_api_timeout"` + KernelModules []string `toml:"kernel_modules"` + Debug bool `toml:"enable_debug"` + Tracing bool `toml:"enable_tracing"` + DebugConsoleEnabled bool `toml:"debug_console_enabled"` + DialTimeout uint32 `toml:"dial_timeout"` + CdhApiTimeout uint32 `toml:"cdh_api_timeout"` + LaunchProcessTimeout uint32 `toml:"launch_process_timeout"` } func (orig *tomlConfig) Clone() tomlConfig { @@ -798,6 +799,10 @@ func (a agent) cdhApiTimout() uint32 { return a.CdhApiTimeout } +func (a agent) launchProcessTimeout() uint32 { + return a.LaunchProcessTimeout +} + func (a agent) debug() bool { return a.Debug } @@ -1464,13 +1469,14 @@ func updateRuntimeConfigHypervisor(configPath string, tomlConf tomlConfig, confi func updateRuntimeConfigAgent(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig) error { for _, agent := range tomlConf.Agent { config.AgentConfig = vc.KataAgentConfig{ - LongLiveConn: true, - Debug: agent.debug(), - Trace: agent.trace(), - KernelModules: agent.kernelModules(), - EnableDebugConsole: agent.debugConsoleEnabled(), - DialTimeout: agent.dialTimout(), - CdhApiTimeout: agent.cdhApiTimout(), + LongLiveConn: true, + Debug: agent.debug(), + Trace: agent.trace(), + KernelModules: agent.kernelModules(), + EnableDebugConsole: agent.debugConsoleEnabled(), + DialTimeout: agent.dialTimout(), + CdhApiTimeout: agent.cdhApiTimout(), + LaunchProcessTimeout: agent.launchProcessTimeout(), } } diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index a138d804da..77bbe4e5b7 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -292,15 +292,16 @@ func ephemeralPath() string { // KataAgentConfig is a structure storing information needed // to reach the Kata Containers agent. type KataAgentConfig struct { - KernelModules []string - ContainerPipeSize uint32 - DialTimeout uint32 - CdhApiTimeout uint32 - LongLiveConn bool - Debug bool - Trace bool - EnableDebugConsole bool - Policy string + KernelModules []string + ContainerPipeSize uint32 + DialTimeout uint32 + CdhApiTimeout uint32 + LaunchProcessTimeout uint32 + LongLiveConn bool + Debug bool + Trace bool + EnableDebugConsole bool + Policy string } // KataAgentState is the structure describing the data stored from this @@ -366,6 +367,11 @@ func KataAgentKernelParams(config KataAgentConfig) []Param { params = append(params, Param{Key: vcAnnotations.CdhApiTimeoutKernelParam, Value: cdhApiTimeout}) } + if config.LaunchProcessTimeout > 0 { + launchProcessTimeout := strconv.FormatUint(uint64(config.LaunchProcessTimeout), 10) + params = append(params, Param{Key: vcAnnotations.LaunchProcessTimeoutKernelParam, Value: launchProcessTimeout}) + } + return params } diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 876c9f703b..c5389b8ef8 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -1083,47 +1083,56 @@ func TestKataAgentKernelParams(t *testing.T) { // nolint: govet type testData struct { - debug bool - trace bool - containerPipeSize uint32 - expectedParams []Param + debug bool + trace bool + containerPipeSize uint32 + launchProcessTimeout uint32 + expectedParams []Param } debugParam := Param{Key: "agent.log", Value: "debug"} traceParam := Param{Key: "agent.trace", Value: "true"} containerPipeSizeParam := Param{Key: vcAnnotations.ContainerPipeSizeKernelParam, Value: "2097152"} + launchProcessTimeoutParam := Param{Key: vcAnnotations.LaunchProcessTimeoutKernelParam, Value: "60"} data := []testData{ - {false, false, 0, []Param{}}, + {false, false, 0, 0, []Param{}}, // Debug - {true, false, 0, []Param{debugParam}}, + {true, false, 0, 0, []Param{debugParam}}, // Tracing - {false, true, 0, []Param{traceParam}}, + {false, true, 0, 0, []Param{traceParam}}, // Debug + Tracing - {true, true, 0, []Param{debugParam, traceParam}}, + {true, true, 0, 0, []Param{debugParam, traceParam}}, // pipesize - {false, false, 2097152, []Param{containerPipeSizeParam}}, + {false, false, 2097152, 0, []Param{containerPipeSizeParam}}, // Debug + pipesize - {true, false, 2097152, []Param{debugParam, containerPipeSizeParam}}, + {true, false, 2097152, 0, []Param{debugParam, containerPipeSizeParam}}, // Tracing + pipesize - {false, true, 2097152, []Param{traceParam, containerPipeSizeParam}}, + {false, true, 2097152, 0, []Param{traceParam, containerPipeSizeParam}}, // Debug + Tracing + pipesize - {true, true, 2097152, []Param{debugParam, traceParam, containerPipeSizeParam}}, + {true, true, 2097152, 0, []Param{debugParam, traceParam, containerPipeSizeParam}}, + + // LaunchProcessTimeout + {false, false, 0, 60, []Param{launchProcessTimeoutParam}}, + + // Debug + LaunchProcessTimeout + {true, false, 0, 60, []Param{debugParam, launchProcessTimeoutParam}}, } for i, d := range data { config := KataAgentConfig{ - Debug: d.debug, - Trace: d.trace, - ContainerPipeSize: d.containerPipeSize, + Debug: d.debug, + Trace: d.trace, + ContainerPipeSize: d.containerPipeSize, + LaunchProcessTimeout: d.launchProcessTimeout, } count := len(d.expectedParams) diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index fe34c47acb..06b0aadffb 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -334,11 +334,13 @@ const ( AgentTrace = kataAnnotAgentPrefix + "enable_tracing" // AgentContainerPipeSize is an annotation to specify the size of the pipes created for containers - AgentContainerPipeSize = kataAnnotAgentPrefix + ContainerPipeSizeOption - ContainerPipeSizeOption = "container_pipe_size" - ContainerPipeSizeKernelParam = "agent." + ContainerPipeSizeOption - CdhApiTimeoutOption = "cdh_api_timeout" - CdhApiTimeoutKernelParam = "agent." + CdhApiTimeoutOption + AgentContainerPipeSize = kataAnnotAgentPrefix + ContainerPipeSizeOption + ContainerPipeSizeOption = "container_pipe_size" + ContainerPipeSizeKernelParam = "agent." + ContainerPipeSizeOption + CdhApiTimeoutOption = "cdh_api_timeout" + CdhApiTimeoutKernelParam = "agent." + CdhApiTimeoutOption + LaunchProcessTimeoutOption = "launch_process_timeout" + LaunchProcessTimeoutKernelParam = "agent." + LaunchProcessTimeoutOption // Policy is an annotation containing the contents of an agent policy file, base64 encoded. Policy = kataAnnotAgentPrefix + "policy"