From 4fbfba2f790326bc2953fb0a24d8cc6dabdfe020 Mon Sep 17 00:00:00 2001 From: Manuel Huber Date: Fri, 8 May 2026 12:39:05 +0000 Subject: [PATCH 1/5] agent: support run-backed EROFS upper Support multi-layer EROFS storage without an explicit ext4 upper layer. When runtime-rs sends only EROFS lower storage and overlay metadata, create the overlay upper/work directories under the container bundle in /run/kata-containers. Keep the explicit ext4 rwlayer path for disk-backed snapshots, and only track real temporary mount points for cleanup. The implicit /run-backed upper is bundle-scoped state and is removed with the container bundle. Assisted-by: OpenAI Codex Signed-off-by: Manuel Huber --- src/agent/src/storage/multi_layer_erofs.rs | 113 ++++++++++++++------- 1 file changed, 78 insertions(+), 35 deletions(-) diff --git a/src/agent/src/storage/multi_layer_erofs.rs b/src/agent/src/storage/multi_layer_erofs.rs index 60d078ac8a..633473e779 100644 --- a/src/agent/src/storage/multi_layer_erofs.rs +++ b/src/agent/src/storage/multi_layer_erofs.rs @@ -6,7 +6,8 @@ //! Multi-layer EROFS storage handler //! //! This handler implements the guest-side processing of multi-layer EROFS rootfs: -//! - Storage with X-kata.overlay-upper: ext4 rw layer (upperdir) +//! - Optional Storage with X-kata.overlay-upper: ext4 rw layer (upperdir) +//! - If no upper storage is provided, a directory under /run/kata-containers is used //! - Storage with X-kata.overlay-lower: erofs layers (lowerdir) //! - Creates overlay to combine them //! - Supports X-kata.mkdir.path options to create directories in upper layer before overlay mount @@ -23,6 +24,7 @@ use crate::device::block_device_handler::get_virtio_blk_pci_device_name; use crate::device::scsi_device_handler::get_scsi_device_name; use crate::linux_abi::pcipath_from_dev_tree_path; use crate::mount::baremount; +use crate::rpc::CONTAINER_BASE; use crate::sandbox::Sandbox; use crate::storage::{StorageContext, StorageHandler}; use anyhow::{anyhow, Context, Result}; @@ -36,7 +38,7 @@ use tokio::sync::Mutex; /// EROFS Type const EROFS_TYPE: &str = "erofs"; -/// ext4 Type +/// ext4 Type (upper virtio disk based rw layer) const EXT4_TYPE: &str = "ext4"; /// Overlay Type const OVERLAY_TYPE: &str = "overlay"; @@ -59,8 +61,8 @@ pub struct MultiLayerErofsHandler {} pub struct MultiLayerErofsResult { pub mount_point: String, pub processed_mount_points: Vec, - /// Temporary mount points (upper, lower-0, lower-1, …) that back the - /// overlay. These must be tracked so they are unmounted *after* the + /// Temporary mount points (explicit upper, lower-0, lower-1, …) that back + /// the overlay. These must be tracked so they are unmounted *after* the /// overlay target during container teardown. pub temp_mount_points: Vec, /// dm-verity device paths that need to be destroyed during cleanup @@ -131,26 +133,33 @@ pub async fn handle_multi_layer_erofs_group( return Err(anyhow!("no multi-layer storages found")); } - let mut ext4_storage: Option<&Storage> = None; + let mut upper_storage: Option<&Storage> = None; let mut erofs_storages: Vec<&Storage> = Vec::new(); let mut mkdir_dirs: Vec = Vec::new(); let mut has_gpt_partition: bool = false; for storage in &multi_layer_storages { + // Collect all X-kata.mkdir.path directives from this multi-layer EROFS group. + for opt in &storage.options { + if let Some(mkdir_spec) = opt.strip_prefix(OPT_MKDIR_PATH) { + mkdir_dirs.push(parse_mkdir_directive(mkdir_spec)?); + } + } + + if storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER) && storage.fstype != EXT4_TYPE { + return Err(anyhow!( + "multi-layer erofs explicit upper layer must be ext4, got '{}'; omit the upper storage for the implicit /run-backed upper", + storage.fstype + )); + } + if is_upper_storage(storage) { - if ext4_storage.is_some() { + if upper_storage.is_some() { return Err(anyhow!( - "multi-layer erofs currently supports exactly one ext4 upper layer" + "multi-layer erofs currently supports exactly one explicit ext4 upper layer" )); } - ext4_storage = Some(*storage); - - // Extract mkdir directories from X-kata.mkdir.path options - for opt in &storage.options { - if let Some(mkdir_spec) = opt.strip_prefix(OPT_MKDIR_PATH) { - mkdir_dirs.push(parse_mkdir_directive(mkdir_spec)?); - } - } + upper_storage = Some(*storage); } else if is_lower_storage(storage) { // Each GPT partition is provided as a separate storage entry by the host if !has_gpt_partition && is_gpt_partitioned(storage) { @@ -176,36 +185,57 @@ pub async fn handle_multi_layer_erofs_group( erofs_storages.sort_by_key(|storage| get_partition_number(storage).unwrap_or(u32::MAX)); } - let ext4 = ext4_storage - .ok_or_else(|| anyhow!("multi-layer erofs missing ext4 upper layer storage"))?; + // With an explicit upper layer, the upper Storage carries the final overlay + // target. With an implicit /run-backed upper, the runtime puts that target + // on the first EROFS lower Storage. + let target_mount_point = upper_storage + .map(|upper| upper.mount_point.clone()) + .unwrap_or_else(|| erofs_storages[0].mount_point.clone()); + // Explicit uppers have a device source, while the implicit + // layout uses a directory under /run rather than a block device. + let upper_source = upper_storage + .map(|upper| upper.source.as_str()) + .unwrap_or("run-backed directory"); info!( logger, "Handling multi-layer erofs group"; - "ext4-device" => &ext4.source, + "upper-source" => upper_source, "erofs-devices" => erofs_storages .iter() .map(|s| s.source.as_str()) .collect::>() .join(","), - "mount-point" => &ext4.mount_point, + "mount-point" => &target_mount_point, "mkdir-dirs-count" => mkdir_dirs.len(), ); - // Create temporary mount points for upper and lower layers + // Create temporary backing paths for upper and lower layers let cid_str = cid.as_deref().unwrap_or("sandbox"); // Validate container ID to prevent path traversal via crafted cid values validate_container_id(cid_str)?; - let temp_base = PathBuf::from(format!("/run/kata-containers/{}/multi-layer", cid_str)); + let container_base = + scoped_join(CONTAINER_BASE, cid_str).context("failed to build container temporary path")?; + fs::create_dir_all(&container_base).context("failed to create container temporary path")?; + let temp_base = + scoped_join(&container_base, "multi-layer").context("failed to build multi-layer path")?; fs::create_dir_all(&temp_base).context("failed to create temp mount base")?; // Validate mount point to prevent path traversal via crafted mount_point values - validate_mount_point(&ext4.mount_point)?; + validate_mount_point(&target_mount_point)?; let upper_mount = temp_base.join("upper"); fs::create_dir_all(&upper_mount).context("failed to create upper mount dir")?; - wait_and_mount_layer(ext4, &upper_mount, sandbox, &logger, None).await?; + if let Some(upper) = upper_storage { + wait_and_mount_layer(upper, &upper_mount, sandbox, &logger, None).await?; + } else { + info!( + logger, + "Using /run-backed upper directory"; + "mount-point" => upper_mount.display(), + ); + } for mkdir_dir in &mkdir_dirs { // As {{ mount 1 }} refers to the first lower layer, which is not available until we mount it. @@ -302,12 +332,12 @@ pub async fn handle_multi_layer_erofs_group( "upperdir" => upperdir.display(), "lowerdir" => &lowerdir, "workdir" => workdir.display(), - "target" => &ext4.mount_point, + "target" => &target_mount_point, ); create_mount_destination( Path::new(OVERLAY_TYPE), - Path::new(&ext4.mount_point), + Path::new(&target_mount_point), "", OVERLAY_TYPE, ) @@ -315,7 +345,7 @@ pub async fn handle_multi_layer_erofs_group( let overlay_mount = kata_types::mount::Mount { source: OVERLAY_TYPE.to_string(), - destination: PathBuf::from(&ext4.mount_point), + destination: PathBuf::from(&target_mount_point), fs_type: OVERLAY_TYPE.to_string(), options: vec![ format!("upperdir={}", upperdir.display()), @@ -326,13 +356,13 @@ pub async fn handle_multi_layer_erofs_group( }; overlay_mount - .mount(Path::new(&ext4.mount_point)) + .mount(Path::new(&target_mount_point)) .context("failed to mount overlay")?; info!( logger, "Multi-layer EROFS overlay mounted successfully"; - "mount-point" => &ext4.mount_point, + "mount-point" => &target_mount_point, ); // Collect all unique mount points to maintain a clean resource state. @@ -352,16 +382,19 @@ pub async fn handle_multi_layer_erofs_group( acc }); - // Collect the temporary mount points (upper first, then lowers) so the - // caller can register them in container_mounts for proper cleanup. - let mut temp_mount_points = Vec::with_capacity(1 + lower_mounts.len()); - temp_mount_points.push(upper_mount.display().to_string()); + // Collect temporary backing mounts. The implicit /run-backed upper is just + // a directory under the container bundle and is removed with that bundle. + let mut temp_mount_points = + Vec::with_capacity(usize::from(upper_storage.is_some()) + lower_mounts.len()); + if upper_storage.is_some() { + temp_mount_points.push(upper_mount.display().to_string()); + } for lm in &lower_mounts { temp_mount_points.push(lm.display().to_string()); } Ok(MultiLayerErofsResult { - mount_point: ext4.mount_point.clone(), + mount_point: target_mount_point, processed_mount_points, temp_mount_points, verity_devices, @@ -393,8 +426,9 @@ async fn track_temporary_mount_for_cleanup( } fn is_upper_storage(storage: &Storage) -> bool { - storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER) - || (storage.fstype == EXT4_TYPE && storage.options.iter().any(|o| o == OPT_MULTI_LAYER)) + storage.fstype == EXT4_TYPE + && (storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER) + || storage.options.iter().any(|o| o == OPT_MULTI_LAYER)) } fn is_lower_storage(storage: &Storage) -> bool { @@ -508,6 +542,7 @@ fn resolve_mkdir_path( Ok(safe) } +/// Wait for a block-backed layer device, then mount it at `layer_mount`. async fn wait_and_mount_layer( layer: &Storage, layer_mount: &Path, @@ -821,6 +856,7 @@ mod tests { let mut s = Storage::default(); assert!(!is_upper_storage(&s)); + s.fstype = EXT4_TYPE.to_string(); s.options.push(OPT_OVERLAY_UPPER.to_string()); assert!(is_upper_storage(&s)); @@ -830,6 +866,13 @@ mod tests { ..Default::default() }; assert!(is_upper_storage(&s2)); + + let s3 = Storage { + fstype: "tmpfs".to_string(), + options: vec![OPT_OVERLAY_UPPER.to_string(), OPT_MULTI_LAYER.to_string()], + ..Default::default() + }; + assert!(!is_upper_storage(&s3)); } #[test] From ebf2c99df3ec4fe9306c3f133df74b0531e67da5 Mon Sep 17 00:00:00 2001 From: Manuel Huber Date: Fri, 8 May 2026 12:39:12 +0000 Subject: [PATCH 2/5] runtime-rs: allow EROFS rootfs without rwlayer Treat the containerd erofs snapshotter active snapshot as an EROFS lower plus overlay metadata, with an optional ext4 rwlayer when host rw backing is enabled. This also covers default_size=0, where containerd sends no rwlayer and the agent provides the writable upper inside the guest. Forward overlay mkdir hints on the EROFS storage so the guest agent sees them in both layouts, and add unit coverage for the dispatcher patterns. Assisted-by: OpenAI Codex Signed-off-by: Manuel Huber --- .../resource/src/rootfs/erofs_rootfs.rs | 142 ++++++++++++++---- 1 file changed, 115 insertions(+), 27 deletions(-) diff --git a/src/runtime-rs/crates/resource/src/rootfs/erofs_rootfs.rs b/src/runtime-rs/crates/resource/src/rootfs/erofs_rootfs.rs index 36e03b2802..0b2dad2df3 100644 --- a/src/runtime-rs/crates/resource/src/rootfs/erofs_rootfs.rs +++ b/src/runtime-rs/crates/resource/src/rootfs/erofs_rootfs.rs @@ -2,10 +2,13 @@ // // SPDX-License-Identifier: Apache-2.0 // -// Handle multi-layer EROFS rootfs: -// Mount[0]: ext4 rw layer -> virtio-blk device (writable) -// Mount[1]: erofs with device= -> virtio-blk via VMDK (read-only) -// Mount[2]: overlay (format/mkdir/overlay) -> host mount OR guest agent +// Handle multi-layer EROFS rootfs. +// +// The containerd erofs snapshotter sends the active snapshot as either: +// - ext4 rwlayer.img + erofs lower + overlay when host rw backing is enabled. +// - erofs lower + overlay when default_size="0"; the agent then uses a +// guest-memory upper directory under /run. +// // The overlay mount may be handled by the guest agent if it contains "{{" // templates in upperdir/workdir. @@ -504,13 +507,14 @@ fn extract_block_device_info( /// EROFS Multi-Layer Rootfs with overlay support /// /// Handles the EROFS Multi-Layer where rootfs consists of: -/// - Mount[0]: ext4 rw layer (writable container layer) -> virtio-blk device -/// - Mount[1]: erofs layers (fsmeta + flattened layers) -> virtio-blk via VMDK -/// - Mount[2]: overlay (to combine ext4 upper + erofs lower) +/// - Optional ext4 rw disk -> virtio-blk when host rw backing exists. +/// - EROFS layers (fsmeta + flattened layers) -> virtio-blk via VMDK. +/// - Overlay metadata that combines the writable upper with the EROFS lower. pub(crate) struct ErofsMultiLayerRootfs { guest_path: String, device_ids: Vec, - // Writable layer storage (upper layer), typically ext4 + // Writable layer storage (upper layer), typically ext4 and optional when + // the agent creates a /run-backed upper. rwlayer_storage: Option, // Read-only EROFS layer storages (lower layers), one per partition in GPT mode erofs_storages: Vec, @@ -553,7 +557,10 @@ impl ErofsMultiLayerRootfs { // Check block device count limit let expected_device_count = rootfs_mounts .iter() - .filter(|m| matches!(m.fs_type.as_str(), RW_LAYER_ROOTFS_TYPE | EROFS_ROOTFS_TYPE)) + .filter(|m| { + m.fs_type.eq_ignore_ascii_case(RW_LAYER_ROOTFS_TYPE) + || m.fs_type.eq_ignore_ascii_case(EROFS_ROOTFS_TYPE) + }) .count(); // TODO(Alex Lyn): fsmerge mode with single erofs mount and multiple device= options @@ -900,13 +907,15 @@ impl ErofsMultiLayerRootfs { return Err(anyhow!("no devices attached for multi-layer erofs rootfs")); } - // Add mkdir directives to rwlayer storage options for guest agent - if let Some(ref mut rwlayer) = rwlayer_storage { - rwlayer.options.extend( - mkdir_dirs - .iter() - .map(|dir| format!("{}{}", X_KATA_MKDIR_PATH, dir)), - ); + // Forward overlay mkdir hints on the EROFS Storage only. The guest agent scans + // every multi-layer storage for X-kata.mkdir.path; attaching here avoids splitting + // the same metadata across rwlayer vs erofs when an ext4 upper exists. + let mkdir_options = mkdir_dirs + .iter() + .map(|dir| format!("{}{}", X_KATA_MKDIR_PATH, dir)) + .collect::>(); + if let Some(erofs) = erofs_storages.first_mut() { + erofs.options.extend(mkdir_options); } Ok(Self { @@ -936,9 +945,9 @@ impl Rootfs for ErofsMultiLayerRootfs { } async fn get_storage(&self) -> Option> { - // Return all storages for multi-layer EROFS (rw layer + erofs layers) to guest agent. - // Guest agent needs all of them to create overlay mount. - // In GPT mode, each partition has its own storage entry. + // Return all storages for multi-layer EROFS. The rw layer is optional; + // when absent, the agent creates a /run-backed upper dir. In GPT mode, + // each partition has its own EROFS storage entry. let mut storages = Vec::new(); if let Some(rwlayer) = self.rwlayer_storage.clone() { @@ -989,23 +998,102 @@ impl Rootfs for ErofsMultiLayerRootfs { } } +fn overlay_like(fs_type: &str) -> bool { + matches!( + fs_type.to_ascii_lowercase().as_str(), + "overlay" | "format/overlay" | "format/mkdir/overlay" + ) +} + /// Check if mounts represent a multi-layer EROFS rootfs. /// -/// Returns `true` when `rootfs_mounts` contains at least two entries: -/// an ext4 rw layer (upper) and an erofs layer (lower). +/// Matches what the containerd erofs snapshotter sends for an active snapshot: +/// an EROFS lower layer plus an overlay mount. With host rw backing enabled, +/// the mount list also includes an ext4 `rwlayer.img`; with `default_size="0"` +/// it does not, and the agent creates the writable upper under `/run`. +/// +/// This is only the coarse dispatcher check; `ErofsMultiLayerRootfs::new` +/// parses the optional rwlayer and overlay metadata. pub fn is_erofs_multi_layer(rootfs_mounts: &[Mount]) -> bool { if rootfs_mounts.len() < 2 { return false; } - let has_rwlayer = rootfs_mounts.iter().any(|m| { - m.fs_type.eq_ignore_ascii_case(RW_LAYER_ROOTFS_TYPE) && m.options.iter().any(|o| o == "rw") - }); - let has_erofs = rootfs_mounts .iter() .any(|m| m.fs_type.eq_ignore_ascii_case(EROFS_ROOTFS_TYPE)); - // Must have rwlayer + erofs (multi-layer or single-layer) - has_rwlayer && has_erofs + if !has_erofs { + return false; + } + + rootfs_mounts.iter().any(|m| overlay_like(&m.fs_type)) +} + +#[cfg(test)] +mod tests { + use super::{is_erofs_multi_layer, EROFS_ROOTFS_TYPE, RW_LAYER_ROOTFS_TYPE}; + use kata_types::mount::Mount; + use std::path::PathBuf; + + fn mount(fs_type: &str, options: &[&str]) -> Mount { + Mount { + fs_type: fs_type.to_string(), + options: options.iter().map(|s| (*s).to_string()).collect(), + destination: PathBuf::from("/"), + ..Default::default() + } + } + + #[test] + fn is_erofs_multi_layer_rejects_short_list() { + assert!(!is_erofs_multi_layer(&[])); + assert!(!is_erofs_multi_layer(&[mount(EROFS_ROOTFS_TYPE, &[])])); + } + + #[test] + fn is_erofs_multi_layer_requires_erofs() { + let mounts = vec![mount(RW_LAYER_ROOTFS_TYPE, &["rw"]), mount("overlay", &[])]; + assert!(!is_erofs_multi_layer(&mounts)); + } + + #[test] + fn is_erofs_multi_layer_ext4_rw_erofs_and_overlay() { + let mounts = vec![ + mount(RW_LAYER_ROOTFS_TYPE, &["rw"]), + mount(EROFS_ROOTFS_TYPE, &[]), + mount("overlay", &[]), + ]; + assert!(is_erofs_multi_layer(&mounts)); + } + + #[test] + fn is_erofs_multi_layer_implicit_upper_erofs_and_overlay_variants() { + for overlay_type in ["overlay", "format/overlay", "format/mkdir/overlay"] { + let mounts = vec![mount(EROFS_ROOTFS_TYPE, &[]), mount(overlay_type, &[])]; + assert!( + is_erofs_multi_layer(&mounts), + "expected multi-layer for overlay type {}", + overlay_type + ); + } + } + + #[test] + fn is_erofs_multi_layer_erofs_without_overlay_or_rw_is_false() { + let mounts = vec![mount(EROFS_ROOTFS_TYPE, &[]), mount("btrfs", &[])]; + assert!(!is_erofs_multi_layer(&mounts)); + } + + #[test] + fn is_erofs_multi_layer_does_not_validate_optional_rwlayer_options() { + // The dispatcher only requires EROFS + overlay. Detailed rwlayer + // interpretation is handled by ErofsMultiLayerRootfs::new. + let mounts = vec![ + mount(RW_LAYER_ROOTFS_TYPE, &["ro"]), + mount(EROFS_ROOTFS_TYPE, &[]), + mount("overlay", &[]), + ]; + assert!(is_erofs_multi_layer(&mounts)); + } } From 3e874d0eaf4b03fe12631190fed225398c607dcd Mon Sep 17 00:00:00 2001 From: Manuel Huber Date: Fri, 8 May 2026 17:29:06 +0000 Subject: [PATCH 3/5] tests: accept EROFS empty-image rootfs rejection The empty-image test expects pod creation to fail. With an EROFS snapshot that has a disk-backed rwlayer, runtime-rs can still reject that pod with the existing unsupported mount-count error. With default_size=0, there is no rwlayer mount. The same negative test can instead reach the bind rootfs shape produced for the empty active snapshot, which runtime-rs rejects as an unsupported rootfs mount. Accept both messages so the test covers the expected failure for both EROFS rwlayer modes. Assisted-by: OpenAI Codex Signed-off-by: Manuel Huber --- tests/integration/kubernetes/k8s-empty-image.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/kubernetes/k8s-empty-image.bats b/tests/integration/kubernetes/k8s-empty-image.bats index 1d451b31cf..02b0a4f96e 100644 --- a/tests/integration/kubernetes/k8s-empty-image.bats +++ b/tests/integration/kubernetes/k8s-empty-image.bats @@ -43,7 +43,7 @@ setup() { kubectl create -f "${yaml_file}" local -r command="kubectl describe pod/${pod_name} | grep -E \ - 'the file sleep was not found|\[CDH\] \[ERROR\]: Image Pull error|ENOENT|unsupported rootfs mounts count 2'" + 'the file sleep was not found|\[CDH\] \[ERROR\]: Image Pull error|ENOENT|unsupported rootfs mounts count 2|unsupported rootfs Mount'" info "Waiting ${wait_time} seconds for: ${command}" waitForProcess "${wait_time}" "${sleep_time}" "${command}" >/dev/null 2>/dev/null } From 76212b9e0c0e98ede20d572768b986c895d63fc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Tue, 26 May 2026 23:21:45 +0200 Subject: [PATCH 4/5] kata-deploy: allow containerd user drop-in overrides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an optional user-provided containerd drop-in that is loaded after kata-deploy's generated drop-in so operators can override snapshotter and other runtime settings without patching kata-deploy. Signed-off-by: Fabiano Fidêncio --- .../kata-deploy/binary/src/config.rs | 10 ++ .../binary/src/runtime/containerd.rs | 92 +++++++++++++++++++ .../containerd-user-dropin-config.yaml | 16 ++++ .../kata-deploy/templates/kata-deploy.yaml | 18 ++++ .../helm-chart/kata-deploy/values.yaml | 8 ++ 5 files changed, 144 insertions(+) create mode 100644 tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/containerd-user-dropin-config.yaml diff --git a/tools/packaging/kata-deploy/binary/src/config.rs b/tools/packaging/kata-deploy/binary/src/config.rs index 51659e3927..d7c372b426 100644 --- a/tools/packaging/kata-deploy/binary/src/config.rs +++ b/tools/packaging/kata-deploy/binary/src/config.rs @@ -159,6 +159,7 @@ pub struct Config { pub containerd_conf_file: String, pub containerd_conf_file_backup: String, pub containerd_drop_in_conf_file: String, + pub containerd_user_drop_in_source_file: Option, pub daemonset_name: String, pub custom_runtimes_enabled: bool, pub custom_runtimes: Vec, @@ -265,6 +266,10 @@ impl Config { let containerd_conf_file_backup = format!("{containerd_conf_file}.bak"); let containerd_drop_in_conf_file = format!("{dest_dir}/containerd/config.d/kata-deploy.toml"); + let containerd_user_drop_in_source_file = env::var("CONTAINERD_USER_DROP_IN_SOURCE_FILE") + .ok() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()); let helm_post_delete_hook = env::var("HELM_POST_DELETE_HOOK").unwrap_or_else(|_| "false".to_string()) == "true"; @@ -314,6 +319,7 @@ impl Config { containerd_conf_file, containerd_conf_file_backup, containerd_drop_in_conf_file, + containerd_user_drop_in_source_file, daemonset_name, custom_runtimes_enabled, custom_runtimes, @@ -522,6 +528,10 @@ impl Config { self.experimental_force_guest_pull_for_arch.join(",") ); info!("* CONTAINERD_CONF_FILE: {}", self.containerd_conf_file); + info!( + "* CONTAINERD_USER_DROP_IN_SOURCE_FILE: {:?}", + self.containerd_user_drop_in_source_file + ); info!( "* CUSTOM_RUNTIMES_ENABLED: {}", self.custom_runtimes_enabled diff --git a/tools/packaging/kata-deploy/binary/src/runtime/containerd.rs b/tools/packaging/kata-deploy/binary/src/runtime/containerd.rs index 9bfa3ff1c7..095107b30e 100644 --- a/tools/packaging/kata-deploy/binary/src/runtime/containerd.rs +++ b/tools/packaging/kata-deploy/binary/src/runtime/containerd.rs @@ -143,6 +143,81 @@ fn get_containerd_output_path(paths: &ContainerdPaths) -> PathBuf { } } +fn get_user_containerd_drop_in_output_path(paths: &ContainerdPaths) -> Result<(PathBuf, String)> { + if !paths.use_drop_in { + anyhow::bail!( + "Containerd user drop-in requires drop-in support, but runtime config is in non-drop-in mode" + ); + } + + let (base_drop_in, base_import_path) = if paths.drop_in_file.starts_with("/etc/containerd/") { + ( + Path::new(&paths.drop_in_file).to_path_buf(), + paths.drop_in_file.clone(), + ) + } else { + ( + Path::new("/host").join(paths.drop_in_file.trim_start_matches('/')), + paths.drop_in_file.clone(), + ) + }; + + let parent = base_drop_in.parent().ok_or_else(|| { + anyhow::anyhow!("Failed to resolve parent directory for {:?}", base_drop_in) + })?; + let user_file_name = "zz-kata-deploy-user.toml"; + let host_path = parent.join(user_file_name); + + let import_parent = Path::new(&base_import_path) + .parent() + .ok_or_else(|| anyhow::anyhow!("Failed to resolve import parent for {base_import_path}"))?; + let import_path = import_parent + .join(user_file_name) + .to_string_lossy() + .to_string(); + + Ok((host_path, import_path)) +} + +fn configure_user_containerd_drop_in(config: &Config, paths: &ContainerdPaths) -> Result<()> { + let Some(source_file) = config.containerd_user_drop_in_source_file.as_ref() else { + return Ok(()); + }; + + let source_path = Path::new(source_file); + if !source_path.exists() { + anyhow::bail!( + "Configured CONTAINERD_USER_DROP_IN_SOURCE_FILE does not exist: {}", + source_file + ); + } + + let (user_drop_in_path, user_drop_in_import_path) = + get_user_containerd_drop_in_output_path(paths)?; + if let Some(parent) = user_drop_in_path.parent() { + fs::create_dir_all(parent).with_context(|| { + format!("Failed to create user containerd drop-in directory: {parent:?}") + })?; + } + + fs::copy(source_path, &user_drop_in_path).with_context(|| { + format!( + "Failed to copy user containerd drop-in from {:?} to {:?}", + source_path, user_drop_in_path + ) + })?; + + if let Some(imports_file) = &paths.imports_file { + toml_utils::append_to_toml_array( + Path::new(imports_file), + ".imports", + &format!("\"{}\"", user_drop_in_import_path), + )?; + } + + Ok(()) +} + fn write_containerd_runtime_config( config_file: &Path, pluginid: &str, @@ -445,6 +520,8 @@ pub async fn configure_containerd(config: &Config, runtime: &str) -> Result<()> } } + configure_user_containerd_drop_in(config, &paths)?; + log::info!("Successfully configured all containerd runtimes"); Ok(()) } @@ -454,6 +531,21 @@ pub async fn cleanup_containerd(config: &Config, runtime: &str) -> Result<()> { let paths = config.get_containerd_paths(runtime).await?; if paths.use_drop_in { + if config.containerd_user_drop_in_source_file.is_some() { + let (user_drop_in_path, user_drop_in_import_path) = + get_user_containerd_drop_in_output_path(&paths)?; + if let Some(imports_file) = &paths.imports_file { + toml_utils::remove_from_toml_array( + Path::new(imports_file), + ".imports", + &format!("\"{}\"", user_drop_in_import_path), + )?; + } + if user_drop_in_path.exists() { + fs::remove_file(&user_drop_in_path)?; + } + } + // Remove drop-in from imports array (if we added it; K3s/RKE2 have imports_file = None) if let Some(imports_file) = &paths.imports_file { toml_utils::remove_from_toml_array( diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/containerd-user-dropin-config.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/containerd-user-dropin-config.yaml new file mode 100644 index 0000000000..b9c0705627 --- /dev/null +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/containerd-user-dropin-config.yaml @@ -0,0 +1,16 @@ +{{- if .Values.containerd.userDropIn | trim }} +apiVersion: v1 +kind: ConfigMap +metadata: +{{- if .Values.env.multiInstallSuffix }} + name: {{ .Chart.Name }}-containerd-user-dropin-{{ .Values.env.multiInstallSuffix }} +{{- else }} + name: {{ .Chart.Name }}-containerd-user-dropin +{{- end }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "kata-deploy.labels" . | nindent 4 }} +data: + containerd-user-dropin.toml: | +{{ .Values.containerd.userDropIn | indent 4 }} +{{- end }} diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml index e9d254d0ea..89405f6747 100644 --- a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml @@ -298,6 +298,10 @@ spec: - name: CONTAINERD_CONFIG_FILE_NAME value: {{ .Values.containerd.configFileName | trim | quote }} {{- end }} +{{- if .Values.containerd.userDropIn | trim }} + - name: CONTAINERD_USER_DROP_IN_SOURCE_FILE + value: "/custom-containerd-config/containerd-user-dropin.toml" +{{- end }} {{- with .Values.env.hostOS }} - name: HOST_OS value: {{ . | quote }} @@ -356,6 +360,11 @@ spec: mountPath: /etc/containerd/ - name: host mountPath: /host/ +{{- if .Values.containerd.userDropIn | trim }} + - name: custom-containerd-config + mountPath: /custom-containerd-config/ + readOnly: true +{{- end }} {{- if and .Values.customRuntimes.enabled .Values.customRuntimes.runtimes }} - name: custom-configs mountPath: /custom-configs/ @@ -371,6 +380,15 @@ spec: - name: host hostPath: path: / +{{- if .Values.containerd.userDropIn | trim }} + - name: custom-containerd-config + configMap: +{{- if .Values.env.multiInstallSuffix }} + name: {{ .Chart.Name }}-containerd-user-dropin-{{ .Values.env.multiInstallSuffix }} +{{- else }} + name: {{ .Chart.Name }}-containerd-user-dropin +{{- end }} +{{- end }} {{- if and .Values.customRuntimes.enabled .Values.customRuntimes.runtimes }} - name: custom-configs configMap: diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml index 6469ebc4a9..bb3adb3368 100644 --- a/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml @@ -25,6 +25,14 @@ containerd: # or auto-detects based on the runtime (k0s, microk8s, k3s/rke2). # Example: "my-config.toml" configFileName: "" + # Optional user-provided containerd drop-in TOML content. + # This is written as an extra drop-in loaded after kata-deploy's generated drop-in, + # so user keys can override kata-deploy defaults. + # Example: set erofs snapshotter default size to zero: + # userDropIn: | + # [plugins.'io.containerd.snapshotter.v1.erofs'] + # default_size = 0 + userDropIn: "" # Node selector and tolerations to control which nodes the kata-deploy daemonset runs on # Examples: From 7d9a1437476bdfecedf8d483707ac3f5c04a4749 Mon Sep 17 00:00:00 2001 From: Manuel Huber Date: Mon, 11 May 2026 11:25:07 +0000 Subject: [PATCH 5/5] ci: cover EROFS snapshotter default_size=0 path kata-deploy currently hard-codes the EROFS snapshotter default_size to "10G", so the CoCo EROFS CI lane only exercises the path where the snapshotter provides an rwlayer. Use the generic containerd.userDropIn support for the EROFS default_size and thread it through the Kubernetes CI helpers. Keep the kata-deploy default at "10G" to preserve current behavior, but allow the workflow to set "0" for the runtime-rs no-rwlayer path. Expand the existing EROFS snapshotter job to run both values. The override is written to containerd as a TOML string so "0" is not parsed as an integer. Assisted-by: OpenAI Codex Signed-off-by: Manuel Huber --- .github/workflows/run-kata-coco-tests.yaml | 8 +++++-- tests/gha-run-k8s-common.sh | 26 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-kata-coco-tests.yaml b/.github/workflows/run-kata-coco-tests.yaml index 3d730520b2..1b009f23a3 100644 --- a/.github/workflows/run-kata-coco-tests.yaml +++ b/.github/workflows/run-kata-coco-tests.yaml @@ -399,7 +399,7 @@ jobs: # Generate jobs for testing CoCo on non-TEE environments with erofs-snapshotter run-k8s-tests-coco-nontee-with-erofs-snapshotter: - name: run-k8s-tests-coco-nontee-with-erofs-snapshotter + name: run-k8s-tests-coco-nontee-with-erofs-snapshotter-${{ matrix.erofs-mode }} strategy: fail-fast: false matrix: @@ -409,8 +409,11 @@ jobs: - erofs pull-type: - default + erofs-mode: + - disk + - memory concurrency: - group: ${{ github.workflow }}-${{ github.job }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{ github.job }}-${{ github.event.pull_request.number || github.ref }}-${{ toJSON(matrix) }} cancel-in-progress: true runs-on: ubuntu-24.04 environment: @@ -431,6 +434,7 @@ jobs: CONTAINER_ENGINE_VERSION: "v2.3" PULL_TYPE: ${{ matrix.pull-type }} SNAPSHOTTER: ${{ matrix.snapshotter }} + EROFS_SNAPSHOTTER_MODE: ${{ matrix.erofs-mode }} USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: "true" K8S_TEST_HOST_TYPE: "all" # We are skipping the auto generated policy tests for now, diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 9f8c42eb17..ad6ad54e50 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -43,6 +43,7 @@ K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-small}" TEST_CLUSTER_NAMESPACE="${TEST_CLUSTER_NAMESPACE:-}" CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-containerd}" SNAPSHOTTER="${SNAPSHOTTER:-}" +EROFS_SNAPSHOTTER_MODE="${EROFS_SNAPSHOTTER_MODE:-}" # Wait for the Kubernetes API to recover after kata-deploy uninstall, then # retry the uninstall to purge any stale helm release state. On k3s/rke2, @@ -814,6 +815,31 @@ function helm_helper() { done fi + if [[ -n "${EROFS_SNAPSHOTTER_MODE}" ]]; then + if [[ "${SNAPSHOTTER}" != "erofs" ]]; then + die "EROFS_SNAPSHOTTER_MODE is only supported with SNAPSHOTTER=erofs" + fi + + local erofs_default_size + case "${EROFS_SNAPSHOTTER_MODE}" in + disk) + erofs_default_size="10G" + ;; + memory) + erofs_default_size="0" + ;; + *) + die "Unsupported EROFS_SNAPSHOTTER_MODE: ${EROFS_SNAPSHOTTER_MODE}" + ;; + esac + + HELM_CONTAINERD_USER_DROP_IN="[plugins.'io.containerd.snapshotter.v1.erofs']"$'\n' + HELM_CONTAINERD_USER_DROP_IN+=" default_size = \"${erofs_default_size}\"" + + HELM_CONTAINERD_USER_DROP_IN="${HELM_CONTAINERD_USER_DROP_IN}" \ + yq -i '.containerd.userDropIn = strenv(HELM_CONTAINERD_USER_DROP_IN)' "${values_yaml}" + fi + if [[ -z "${HELM_SHIMS}" ]]; then die "A list of shims is expected but none was provided" fi