diff --git a/src/libs/kata-types/src/config/agent.rs b/src/libs/kata-types/src/config/agent.rs index ded2523171..86bd4f6888 100644 --- a/src/libs/kata-types/src/config/agent.rs +++ b/src/libs/kata-types/src/config/agent.rs @@ -93,6 +93,12 @@ pub struct Agent { #[serde(default)] pub debug_console_enabled: bool, + /// When enabled, the agent translates a container's VISIBLE_CDI_DEVICES + /// environment variable into CDI GPU device requests (nvidia.com/gpu) so + /// that the container sees the matching GPUs present in the VM. + #[serde(default)] + pub visible_cdi_devices: bool, + /// Agent server port #[serde(default = "default_server_port")] pub server_port: u32, @@ -180,6 +186,7 @@ impl std::default::Default for Agent { log_level: "info".to_string(), enable_tracing: false, debug_console_enabled: false, + visible_cdi_devices: false, server_port: DEFAULT_AGENT_VSOCK_PORT, log_port: DEFAULT_AGENT_LOG_PORT, passfd_listener_port: DEFAULT_PASSFD_LISTENER_PORT, diff --git a/src/libs/kata-types/src/config/mod.rs b/src/libs/kata-types/src/config/mod.rs index c638be75e9..0eef252d20 100644 --- a/src/libs/kata-types/src/config/mod.rs +++ b/src/libs/kata-types/src/config/mod.rs @@ -61,6 +61,8 @@ pub const CONTAINER_PIPE_SIZE_OPTION: &str = "agent.container_pipe_size"; pub const LAUNCH_PROCESS_TIMEOUT_OPTION: &str = "agent.launch_process_timeout"; /// Option of setting the fd passthrough io listener port pub const PASSFD_LISTENER_PORT: &str = "agent.passfd_listener_port"; +/// Option enabling translation of VISIBLE_CDI_DEVICES into CDI GPU requests +pub const VISIBLE_CDI_DEVICES_OPTION: &str = "agent.visible_cdi_devices"; /// Trait to manipulate global Kata configuration information. pub trait ConfigPlugin: Send + Sync { @@ -246,6 +248,9 @@ impl TomlConfig { DEFAULT_AGENT_DBG_CONSOLE_PORT.to_string(), ); } + if cfg.visible_cdi_devices { + kv.insert(VISIBLE_CDI_DEVICES_OPTION.to_string(), "true".to_string()); + } if cfg.mem_agent.enable { kv.insert("psi".to_string(), "1".to_string()); kv.insert("agent.mem_agent_enable".to_string(), "1".to_string()); @@ -500,6 +505,7 @@ mod tests { container_pipe_size: 20, debug_console_enabled: true, launch_process_timeout: 60, + visible_cdi_devices: true, ..Default::default() }; let agent_name = "test_agent"; @@ -513,5 +519,6 @@ mod tests { kv.get("agent.debug_console").unwrap(); assert_eq!(kv.get("agent.debug_console_vport").unwrap(), "1026"); // 1026 is the default port assert_eq!(kv.get("agent.launch_process_timeout").unwrap(), "60"); + assert_eq!(kv.get("agent.visible_cdi_devices").unwrap(), "true"); } } diff --git a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-runtime-rs.toml.in index 440803cf33..cb9f4e9b76 100644 --- a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-runtime-rs.toml.in @@ -565,6 +565,29 @@ reconnect_timeout_ms = @DEFRECONNECTTIMEOUTMS_NV@ # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s) create_container_timeout = @DEFAULTTIMEOUT_NV@ +# If enabled, the agent translates a container's VISIBLE_CDI_DEVICES +# environment variable into CDI GPU device requests (nvidia.com/gpu), resolved +# against the GPUs present in the VM via the CDI spec generated in the guest at +# /var/run/cdi/. The format of VISIBLE_CDI_DEVICES is: +# +# =[:=[...]] +# +# For example, you may set something like: +# +# VISIBLE_CDI_DEVICES="nvidia.com/gpu=all:nvidia.com/ib=0,1" +# +# The devices can be referenced by explicit CDI index or through the "all" +# keyword. +# +# This parameter is useful in the case where multiple containers in a pod need +# access to the same GPU and do not want to request additional GPUs from the +# outer runtime. This is especially useful with GPU observability where one +# workload container performs the CDI request to the outer runtime, and the +# sidecar observability containers would get access to the same resources by +# setting VISIBLE_CDI_DEVICES="nvidia.com/gpu=all". +# (default: false) +visible_cdi_devices = false + [agent.@PROJECT_TYPE@.mem_agent] # Control the mem-agent function enable or disable. # Default to false diff --git a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-snp-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-snp-runtime-rs.toml.in index d9807edc93..73977a9bf6 100644 --- a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-snp-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-snp-runtime-rs.toml.in @@ -595,6 +595,29 @@ reconnect_timeout_ms = @DEFRECONNECTTIMEOUTMS_NV@ # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s) create_container_timeout = @DEFAULTTIMEOUT_NV@ +# If enabled, the agent translates a container's VISIBLE_CDI_DEVICES +# environment variable into CDI GPU device requests (nvidia.com/gpu), resolved +# against the GPUs present in the VM via the CDI spec generated in the guest at +# /var/run/cdi/. The format of VISIBLE_CDI_DEVICES is: +# +# =[:=[...]] +# +# For example, you may set something like: +# +# VISIBLE_CDI_DEVICES="nvidia.com/gpu=all:nvidia.com/ib=0,1" +# +# The devices can be referenced by explicit CDI index or through the "all" +# keyword. +# +# This parameter is useful in the case where multiple containers in a pod need +# access to the same GPU and do not want to request additional GPUs from the +# outer runtime. This is especially useful with GPU observability where one +# workload container performs the CDI request to the outer runtime, and the +# sidecar observability containers would get access to the same resources by +# setting VISIBLE_CDI_DEVICES="nvidia.com/gpu=all". +# (default: false) +visible_cdi_devices = false + [runtime] # If enabled, the runtime will log additional debug messages to the # system log diff --git a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-tdx-runtime-rs.toml.in b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-tdx-runtime-rs.toml.in index b2fa39fa6e..5af5b74c51 100644 --- a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-tdx-runtime-rs.toml.in +++ b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-tdx-runtime-rs.toml.in @@ -571,6 +571,29 @@ reconnect_timeout_ms = @DEFRECONNECTTIMEOUTMS_NV@ # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s) create_container_timeout = @DEFAULTTIMEOUT_NV@ +# If enabled, the agent translates a container's VISIBLE_CDI_DEVICES +# environment variable into CDI GPU device requests (nvidia.com/gpu), resolved +# against the GPUs present in the VM via the CDI spec generated in the guest at +# /var/run/cdi/. The format of VISIBLE_CDI_DEVICES is: +# +# =[:=[...]] +# +# For example, you may set something like: +# +# VISIBLE_CDI_DEVICES="nvidia.com/gpu=all:nvidia.com/ib=0,1" +# +# The devices can be referenced by explicit CDI index or through the "all" +# keyword. +# +# This parameter is useful in the case where multiple containers in a pod need +# access to the same GPU and do not want to request additional GPUs from the +# outer runtime. This is especially useful with GPU observability where one +# workload container performs the CDI request to the outer runtime, and the +# sidecar observability containers would get access to the same resources by +# setting VISIBLE_CDI_DEVICES="nvidia.com/gpu=all". +# (default: false) +visible_cdi_devices = false + [runtime] # If enabled, the runtime will log additional debug messages to the # system log