feat(runtime-rs): expose visible_cdi_devices in config

Declare the `visible_cdi_devices` agent option (kernel param agent.visible_cdi_devices) in kata-types so runtime-rs can opt into emitting it to the guest, and expose it in the three NVIDIA GPU configuration templates (qemu, qemu-snp, qemu-tdx) at runtime-rs/config/. The agent consumes the corresponding VISIBLE_CDI_DEVICES env var to drive CDI device requests. Signed-off-by: LandonTClipp <lclipp@coreweave.com>
2026-07-01 06:28:11 +00:00 · 2026-06-15 16:34:21 +00:00
parent 676fc90d0b
commit b49eb577b2
5 changed files with 83 additions and 0 deletions
--- a/src/libs/kata-types/src/config/agent.rs
+++ b/src/libs/kata-types/src/config/agent.rs
@@ -93,6 +93,12 @@ pub struct Agent {
    #[serde(default)]
    pub debug_console_enabled: bool,

+    /// When enabled, the agent translates a container's VISIBLE_CDI_DEVICES
+    /// environment variable into CDI GPU device requests (nvidia.com/gpu) so
+    /// that the container sees the matching GPUs present in the VM.
+    #[serde(default)]
+    pub visible_cdi_devices: bool,
+
    /// Agent server port
    #[serde(default = "default_server_port")]
    pub server_port: u32,
@@ -180,6 +186,7 @@ impl std::default::Default for Agent {
            log_level: "info".to_string(),
            enable_tracing: false,
            debug_console_enabled: false,
+            visible_cdi_devices: false,
            server_port: DEFAULT_AGENT_VSOCK_PORT,
            log_port: DEFAULT_AGENT_LOG_PORT,
            passfd_listener_port: DEFAULT_PASSFD_LISTENER_PORT,
--- a/src/libs/kata-types/src/config/mod.rs
+++ b/src/libs/kata-types/src/config/mod.rs
@@ -61,6 +61,8 @@ pub const CONTAINER_PIPE_SIZE_OPTION: &str = "agent.container_pipe_size";
 pub const LAUNCH_PROCESS_TIMEOUT_OPTION: &str = "agent.launch_process_timeout";
 /// Option of setting the fd passthrough io listener port
 pub const PASSFD_LISTENER_PORT: &str = "agent.passfd_listener_port";
+/// Option enabling translation of VISIBLE_CDI_DEVICES into CDI GPU requests
+pub const VISIBLE_CDI_DEVICES_OPTION: &str = "agent.visible_cdi_devices";

 /// Trait to manipulate global Kata configuration information.
 pub trait ConfigPlugin: Send + Sync {
@@ -246,6 +248,9 @@ impl TomlConfig {
                    DEFAULT_AGENT_DBG_CONSOLE_PORT.to_string(),
                );
            }
+            if cfg.visible_cdi_devices {
+                kv.insert(VISIBLE_CDI_DEVICES_OPTION.to_string(), "true".to_string());
+            }
            if cfg.mem_agent.enable {
                kv.insert("psi".to_string(), "1".to_string());
                kv.insert("agent.mem_agent_enable".to_string(), "1".to_string());
@@ -500,6 +505,7 @@ mod tests {
            container_pipe_size: 20,
            debug_console_enabled: true,
            launch_process_timeout: 60,
+            visible_cdi_devices: true,
            ..Default::default()
        };
        let agent_name = "test_agent";
@@ -513,5 +519,6 @@ mod tests {
        kv.get("agent.debug_console").unwrap();
        assert_eq!(kv.get("agent.debug_console_vport").unwrap(), "1026"); // 1026 is the default port
        assert_eq!(kv.get("agent.launch_process_timeout").unwrap(), "60");
+        assert_eq!(kv.get("agent.visible_cdi_devices").unwrap(), "true");
    }
 }
--- a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-runtime-rs.toml.in
+++ b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-runtime-rs.toml.in
@@ -565,6 +565,29 @@ reconnect_timeout_ms = @DEFRECONNECTTIMEOUTMS_NV@
 # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
 create_container_timeout = @DEFAULTTIMEOUT_NV@

+# If enabled, the agent translates a container's VISIBLE_CDI_DEVICES
+# environment variable into CDI GPU device requests (nvidia.com/gpu), resolved
+# against the GPUs present in the VM via the CDI spec generated in the guest at
+# /var/run/cdi/. The format of VISIBLE_CDI_DEVICES is:
+#
+#   <cdi-kind>=<devices>[:<cdi-kind>=<devices>[...]]
+#
+# For example, you may set something like:
+#
+#   VISIBLE_CDI_DEVICES="nvidia.com/gpu=all:nvidia.com/ib=0,1"
+#
+# The devices can be referenced by explicit CDI index or through the "all"
+# keyword.
+#
+# This parameter is useful in the case where multiple containers in a pod need
+# access to the same GPU and do not want to request additional GPUs from the
+# outer runtime. This is especially useful with GPU observability where one
+# workload container performs the CDI request to the outer runtime, and the
+# sidecar observability containers would get access to the same resources by
+# setting VISIBLE_CDI_DEVICES="nvidia.com/gpu=all".
+# (default: false)
+visible_cdi_devices = false
+
 [agent.@PROJECT_TYPE@.mem_agent]
 # Control the mem-agent function enable or disable.
 # Default to false
--- a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-snp-runtime-rs.toml.in
+++ b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-snp-runtime-rs.toml.in
@@ -595,6 +595,29 @@ reconnect_timeout_ms = @DEFRECONNECTTIMEOUTMS_NV@
 # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
 create_container_timeout = @DEFAULTTIMEOUT_NV@

+# If enabled, the agent translates a container's VISIBLE_CDI_DEVICES
+# environment variable into CDI GPU device requests (nvidia.com/gpu), resolved
+# against the GPUs present in the VM via the CDI spec generated in the guest at
+# /var/run/cdi/. The format of VISIBLE_CDI_DEVICES is:
+#
+#   <cdi-kind>=<devices>[:<cdi-kind>=<devices>[...]]
+#
+# For example, you may set something like:
+#
+#   VISIBLE_CDI_DEVICES="nvidia.com/gpu=all:nvidia.com/ib=0,1"
+#
+# The devices can be referenced by explicit CDI index or through the "all"
+# keyword.
+#
+# This parameter is useful in the case where multiple containers in a pod need
+# access to the same GPU and do not want to request additional GPUs from the
+# outer runtime. This is especially useful with GPU observability where one
+# workload container performs the CDI request to the outer runtime, and the
+# sidecar observability containers would get access to the same resources by
+# setting VISIBLE_CDI_DEVICES="nvidia.com/gpu=all".
+# (default: false)
+visible_cdi_devices = false
+
 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log
--- a/src/runtime-rs/config/configuration-qemu-nvidia-gpu-tdx-runtime-rs.toml.in
+++ b/src/runtime-rs/config/configuration-qemu-nvidia-gpu-tdx-runtime-rs.toml.in
@@ -571,6 +571,29 @@ reconnect_timeout_ms = @DEFRECONNECTTIMEOUTMS_NV@
 # Defaults to @DEFCREATECONTAINERTIMEOUT@ second(s)
 create_container_timeout = @DEFAULTTIMEOUT_NV@

+# If enabled, the agent translates a container's VISIBLE_CDI_DEVICES
+# environment variable into CDI GPU device requests (nvidia.com/gpu), resolved
+# against the GPUs present in the VM via the CDI spec generated in the guest at
+# /var/run/cdi/. The format of VISIBLE_CDI_DEVICES is:
+#
+#   <cdi-kind>=<devices>[:<cdi-kind>=<devices>[...]]
+#
+# For example, you may set something like:
+#
+#   VISIBLE_CDI_DEVICES="nvidia.com/gpu=all:nvidia.com/ib=0,1"
+#
+# The devices can be referenced by explicit CDI index or through the "all"
+# keyword.
+#
+# This parameter is useful in the case where multiple containers in a pod need
+# access to the same GPU and do not want to request additional GPUs from the
+# outer runtime. This is especially useful with GPU observability where one
+# workload container performs the CDI request to the outer runtime, and the
+# sidecar observability containers would get access to the same resources by
+# setting VISIBLE_CDI_DEVICES="nvidia.com/gpu=all".
+# (default: false)
+visible_cdi_devices = false
+
 [runtime]
 # If enabled, the runtime will log additional debug messages to the
 # system log