diff --git a/.github/workflows/run-kata-coco-tests.yaml b/.github/workflows/run-kata-coco-tests.yaml index 3d730520b2..1b009f23a3 100644 --- a/.github/workflows/run-kata-coco-tests.yaml +++ b/.github/workflows/run-kata-coco-tests.yaml @@ -399,7 +399,7 @@ jobs: # Generate jobs for testing CoCo on non-TEE environments with erofs-snapshotter run-k8s-tests-coco-nontee-with-erofs-snapshotter: - name: run-k8s-tests-coco-nontee-with-erofs-snapshotter + name: run-k8s-tests-coco-nontee-with-erofs-snapshotter-${{ matrix.erofs-mode }} strategy: fail-fast: false matrix: @@ -409,8 +409,11 @@ jobs: - erofs pull-type: - default + erofs-mode: + - disk + - memory concurrency: - group: ${{ github.workflow }}-${{ github.job }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{ github.job }}-${{ github.event.pull_request.number || github.ref }}-${{ toJSON(matrix) }} cancel-in-progress: true runs-on: ubuntu-24.04 environment: @@ -431,6 +434,7 @@ jobs: CONTAINER_ENGINE_VERSION: "v2.3" PULL_TYPE: ${{ matrix.pull-type }} SNAPSHOTTER: ${{ matrix.snapshotter }} + EROFS_SNAPSHOTTER_MODE: ${{ matrix.erofs-mode }} USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: "true" K8S_TEST_HOST_TYPE: "all" # We are skipping the auto generated policy tests for now, diff --git a/src/agent/src/storage/multi_layer_erofs.rs b/src/agent/src/storage/multi_layer_erofs.rs index 60d078ac8a..633473e779 100644 --- a/src/agent/src/storage/multi_layer_erofs.rs +++ b/src/agent/src/storage/multi_layer_erofs.rs @@ -6,7 +6,8 @@ //! Multi-layer EROFS storage handler //! //! This handler implements the guest-side processing of multi-layer EROFS rootfs: -//! - Storage with X-kata.overlay-upper: ext4 rw layer (upperdir) +//! - Optional Storage with X-kata.overlay-upper: ext4 rw layer (upperdir) +//! - If no upper storage is provided, a directory under /run/kata-containers is used //! - Storage with X-kata.overlay-lower: erofs layers (lowerdir) //! - Creates overlay to combine them //! - Supports X-kata.mkdir.path options to create directories in upper layer before overlay mount @@ -23,6 +24,7 @@ use crate::device::block_device_handler::get_virtio_blk_pci_device_name; use crate::device::scsi_device_handler::get_scsi_device_name; use crate::linux_abi::pcipath_from_dev_tree_path; use crate::mount::baremount; +use crate::rpc::CONTAINER_BASE; use crate::sandbox::Sandbox; use crate::storage::{StorageContext, StorageHandler}; use anyhow::{anyhow, Context, Result}; @@ -36,7 +38,7 @@ use tokio::sync::Mutex; /// EROFS Type const EROFS_TYPE: &str = "erofs"; -/// ext4 Type +/// ext4 Type (upper virtio disk based rw layer) const EXT4_TYPE: &str = "ext4"; /// Overlay Type const OVERLAY_TYPE: &str = "overlay"; @@ -59,8 +61,8 @@ pub struct MultiLayerErofsHandler {} pub struct MultiLayerErofsResult { pub mount_point: String, pub processed_mount_points: Vec, - /// Temporary mount points (upper, lower-0, lower-1, …) that back the - /// overlay. These must be tracked so they are unmounted *after* the + /// Temporary mount points (explicit upper, lower-0, lower-1, …) that back + /// the overlay. These must be tracked so they are unmounted *after* the /// overlay target during container teardown. pub temp_mount_points: Vec, /// dm-verity device paths that need to be destroyed during cleanup @@ -131,26 +133,33 @@ pub async fn handle_multi_layer_erofs_group( return Err(anyhow!("no multi-layer storages found")); } - let mut ext4_storage: Option<&Storage> = None; + let mut upper_storage: Option<&Storage> = None; let mut erofs_storages: Vec<&Storage> = Vec::new(); let mut mkdir_dirs: Vec = Vec::new(); let mut has_gpt_partition: bool = false; for storage in &multi_layer_storages { + // Collect all X-kata.mkdir.path directives from this multi-layer EROFS group. + for opt in &storage.options { + if let Some(mkdir_spec) = opt.strip_prefix(OPT_MKDIR_PATH) { + mkdir_dirs.push(parse_mkdir_directive(mkdir_spec)?); + } + } + + if storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER) && storage.fstype != EXT4_TYPE { + return Err(anyhow!( + "multi-layer erofs explicit upper layer must be ext4, got '{}'; omit the upper storage for the implicit /run-backed upper", + storage.fstype + )); + } + if is_upper_storage(storage) { - if ext4_storage.is_some() { + if upper_storage.is_some() { return Err(anyhow!( - "multi-layer erofs currently supports exactly one ext4 upper layer" + "multi-layer erofs currently supports exactly one explicit ext4 upper layer" )); } - ext4_storage = Some(*storage); - - // Extract mkdir directories from X-kata.mkdir.path options - for opt in &storage.options { - if let Some(mkdir_spec) = opt.strip_prefix(OPT_MKDIR_PATH) { - mkdir_dirs.push(parse_mkdir_directive(mkdir_spec)?); - } - } + upper_storage = Some(*storage); } else if is_lower_storage(storage) { // Each GPT partition is provided as a separate storage entry by the host if !has_gpt_partition && is_gpt_partitioned(storage) { @@ -176,36 +185,57 @@ pub async fn handle_multi_layer_erofs_group( erofs_storages.sort_by_key(|storage| get_partition_number(storage).unwrap_or(u32::MAX)); } - let ext4 = ext4_storage - .ok_or_else(|| anyhow!("multi-layer erofs missing ext4 upper layer storage"))?; + // With an explicit upper layer, the upper Storage carries the final overlay + // target. With an implicit /run-backed upper, the runtime puts that target + // on the first EROFS lower Storage. + let target_mount_point = upper_storage + .map(|upper| upper.mount_point.clone()) + .unwrap_or_else(|| erofs_storages[0].mount_point.clone()); + // Explicit uppers have a device source, while the implicit + // layout uses a directory under /run rather than a block device. + let upper_source = upper_storage + .map(|upper| upper.source.as_str()) + .unwrap_or("run-backed directory"); info!( logger, "Handling multi-layer erofs group"; - "ext4-device" => &ext4.source, + "upper-source" => upper_source, "erofs-devices" => erofs_storages .iter() .map(|s| s.source.as_str()) .collect::>() .join(","), - "mount-point" => &ext4.mount_point, + "mount-point" => &target_mount_point, "mkdir-dirs-count" => mkdir_dirs.len(), ); - // Create temporary mount points for upper and lower layers + // Create temporary backing paths for upper and lower layers let cid_str = cid.as_deref().unwrap_or("sandbox"); // Validate container ID to prevent path traversal via crafted cid values validate_container_id(cid_str)?; - let temp_base = PathBuf::from(format!("/run/kata-containers/{}/multi-layer", cid_str)); + let container_base = + scoped_join(CONTAINER_BASE, cid_str).context("failed to build container temporary path")?; + fs::create_dir_all(&container_base).context("failed to create container temporary path")?; + let temp_base = + scoped_join(&container_base, "multi-layer").context("failed to build multi-layer path")?; fs::create_dir_all(&temp_base).context("failed to create temp mount base")?; // Validate mount point to prevent path traversal via crafted mount_point values - validate_mount_point(&ext4.mount_point)?; + validate_mount_point(&target_mount_point)?; let upper_mount = temp_base.join("upper"); fs::create_dir_all(&upper_mount).context("failed to create upper mount dir")?; - wait_and_mount_layer(ext4, &upper_mount, sandbox, &logger, None).await?; + if let Some(upper) = upper_storage { + wait_and_mount_layer(upper, &upper_mount, sandbox, &logger, None).await?; + } else { + info!( + logger, + "Using /run-backed upper directory"; + "mount-point" => upper_mount.display(), + ); + } for mkdir_dir in &mkdir_dirs { // As {{ mount 1 }} refers to the first lower layer, which is not available until we mount it. @@ -302,12 +332,12 @@ pub async fn handle_multi_layer_erofs_group( "upperdir" => upperdir.display(), "lowerdir" => &lowerdir, "workdir" => workdir.display(), - "target" => &ext4.mount_point, + "target" => &target_mount_point, ); create_mount_destination( Path::new(OVERLAY_TYPE), - Path::new(&ext4.mount_point), + Path::new(&target_mount_point), "", OVERLAY_TYPE, ) @@ -315,7 +345,7 @@ pub async fn handle_multi_layer_erofs_group( let overlay_mount = kata_types::mount::Mount { source: OVERLAY_TYPE.to_string(), - destination: PathBuf::from(&ext4.mount_point), + destination: PathBuf::from(&target_mount_point), fs_type: OVERLAY_TYPE.to_string(), options: vec![ format!("upperdir={}", upperdir.display()), @@ -326,13 +356,13 @@ pub async fn handle_multi_layer_erofs_group( }; overlay_mount - .mount(Path::new(&ext4.mount_point)) + .mount(Path::new(&target_mount_point)) .context("failed to mount overlay")?; info!( logger, "Multi-layer EROFS overlay mounted successfully"; - "mount-point" => &ext4.mount_point, + "mount-point" => &target_mount_point, ); // Collect all unique mount points to maintain a clean resource state. @@ -352,16 +382,19 @@ pub async fn handle_multi_layer_erofs_group( acc }); - // Collect the temporary mount points (upper first, then lowers) so the - // caller can register them in container_mounts for proper cleanup. - let mut temp_mount_points = Vec::with_capacity(1 + lower_mounts.len()); - temp_mount_points.push(upper_mount.display().to_string()); + // Collect temporary backing mounts. The implicit /run-backed upper is just + // a directory under the container bundle and is removed with that bundle. + let mut temp_mount_points = + Vec::with_capacity(usize::from(upper_storage.is_some()) + lower_mounts.len()); + if upper_storage.is_some() { + temp_mount_points.push(upper_mount.display().to_string()); + } for lm in &lower_mounts { temp_mount_points.push(lm.display().to_string()); } Ok(MultiLayerErofsResult { - mount_point: ext4.mount_point.clone(), + mount_point: target_mount_point, processed_mount_points, temp_mount_points, verity_devices, @@ -393,8 +426,9 @@ async fn track_temporary_mount_for_cleanup( } fn is_upper_storage(storage: &Storage) -> bool { - storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER) - || (storage.fstype == EXT4_TYPE && storage.options.iter().any(|o| o == OPT_MULTI_LAYER)) + storage.fstype == EXT4_TYPE + && (storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER) + || storage.options.iter().any(|o| o == OPT_MULTI_LAYER)) } fn is_lower_storage(storage: &Storage) -> bool { @@ -508,6 +542,7 @@ fn resolve_mkdir_path( Ok(safe) } +/// Wait for a block-backed layer device, then mount it at `layer_mount`. async fn wait_and_mount_layer( layer: &Storage, layer_mount: &Path, @@ -821,6 +856,7 @@ mod tests { let mut s = Storage::default(); assert!(!is_upper_storage(&s)); + s.fstype = EXT4_TYPE.to_string(); s.options.push(OPT_OVERLAY_UPPER.to_string()); assert!(is_upper_storage(&s)); @@ -830,6 +866,13 @@ mod tests { ..Default::default() }; assert!(is_upper_storage(&s2)); + + let s3 = Storage { + fstype: "tmpfs".to_string(), + options: vec![OPT_OVERLAY_UPPER.to_string(), OPT_MULTI_LAYER.to_string()], + ..Default::default() + }; + assert!(!is_upper_storage(&s3)); } #[test] diff --git a/src/runtime-rs/crates/resource/src/rootfs/erofs_rootfs.rs b/src/runtime-rs/crates/resource/src/rootfs/erofs_rootfs.rs index 36e03b2802..0b2dad2df3 100644 --- a/src/runtime-rs/crates/resource/src/rootfs/erofs_rootfs.rs +++ b/src/runtime-rs/crates/resource/src/rootfs/erofs_rootfs.rs @@ -2,10 +2,13 @@ // // SPDX-License-Identifier: Apache-2.0 // -// Handle multi-layer EROFS rootfs: -// Mount[0]: ext4 rw layer -> virtio-blk device (writable) -// Mount[1]: erofs with device= -> virtio-blk via VMDK (read-only) -// Mount[2]: overlay (format/mkdir/overlay) -> host mount OR guest agent +// Handle multi-layer EROFS rootfs. +// +// The containerd erofs snapshotter sends the active snapshot as either: +// - ext4 rwlayer.img + erofs lower + overlay when host rw backing is enabled. +// - erofs lower + overlay when default_size="0"; the agent then uses a +// guest-memory upper directory under /run. +// // The overlay mount may be handled by the guest agent if it contains "{{" // templates in upperdir/workdir. @@ -504,13 +507,14 @@ fn extract_block_device_info( /// EROFS Multi-Layer Rootfs with overlay support /// /// Handles the EROFS Multi-Layer where rootfs consists of: -/// - Mount[0]: ext4 rw layer (writable container layer) -> virtio-blk device -/// - Mount[1]: erofs layers (fsmeta + flattened layers) -> virtio-blk via VMDK -/// - Mount[2]: overlay (to combine ext4 upper + erofs lower) +/// - Optional ext4 rw disk -> virtio-blk when host rw backing exists. +/// - EROFS layers (fsmeta + flattened layers) -> virtio-blk via VMDK. +/// - Overlay metadata that combines the writable upper with the EROFS lower. pub(crate) struct ErofsMultiLayerRootfs { guest_path: String, device_ids: Vec, - // Writable layer storage (upper layer), typically ext4 + // Writable layer storage (upper layer), typically ext4 and optional when + // the agent creates a /run-backed upper. rwlayer_storage: Option, // Read-only EROFS layer storages (lower layers), one per partition in GPT mode erofs_storages: Vec, @@ -553,7 +557,10 @@ impl ErofsMultiLayerRootfs { // Check block device count limit let expected_device_count = rootfs_mounts .iter() - .filter(|m| matches!(m.fs_type.as_str(), RW_LAYER_ROOTFS_TYPE | EROFS_ROOTFS_TYPE)) + .filter(|m| { + m.fs_type.eq_ignore_ascii_case(RW_LAYER_ROOTFS_TYPE) + || m.fs_type.eq_ignore_ascii_case(EROFS_ROOTFS_TYPE) + }) .count(); // TODO(Alex Lyn): fsmerge mode with single erofs mount and multiple device= options @@ -900,13 +907,15 @@ impl ErofsMultiLayerRootfs { return Err(anyhow!("no devices attached for multi-layer erofs rootfs")); } - // Add mkdir directives to rwlayer storage options for guest agent - if let Some(ref mut rwlayer) = rwlayer_storage { - rwlayer.options.extend( - mkdir_dirs - .iter() - .map(|dir| format!("{}{}", X_KATA_MKDIR_PATH, dir)), - ); + // Forward overlay mkdir hints on the EROFS Storage only. The guest agent scans + // every multi-layer storage for X-kata.mkdir.path; attaching here avoids splitting + // the same metadata across rwlayer vs erofs when an ext4 upper exists. + let mkdir_options = mkdir_dirs + .iter() + .map(|dir| format!("{}{}", X_KATA_MKDIR_PATH, dir)) + .collect::>(); + if let Some(erofs) = erofs_storages.first_mut() { + erofs.options.extend(mkdir_options); } Ok(Self { @@ -936,9 +945,9 @@ impl Rootfs for ErofsMultiLayerRootfs { } async fn get_storage(&self) -> Option> { - // Return all storages for multi-layer EROFS (rw layer + erofs layers) to guest agent. - // Guest agent needs all of them to create overlay mount. - // In GPT mode, each partition has its own storage entry. + // Return all storages for multi-layer EROFS. The rw layer is optional; + // when absent, the agent creates a /run-backed upper dir. In GPT mode, + // each partition has its own EROFS storage entry. let mut storages = Vec::new(); if let Some(rwlayer) = self.rwlayer_storage.clone() { @@ -989,23 +998,102 @@ impl Rootfs for ErofsMultiLayerRootfs { } } +fn overlay_like(fs_type: &str) -> bool { + matches!( + fs_type.to_ascii_lowercase().as_str(), + "overlay" | "format/overlay" | "format/mkdir/overlay" + ) +} + /// Check if mounts represent a multi-layer EROFS rootfs. /// -/// Returns `true` when `rootfs_mounts` contains at least two entries: -/// an ext4 rw layer (upper) and an erofs layer (lower). +/// Matches what the containerd erofs snapshotter sends for an active snapshot: +/// an EROFS lower layer plus an overlay mount. With host rw backing enabled, +/// the mount list also includes an ext4 `rwlayer.img`; with `default_size="0"` +/// it does not, and the agent creates the writable upper under `/run`. +/// +/// This is only the coarse dispatcher check; `ErofsMultiLayerRootfs::new` +/// parses the optional rwlayer and overlay metadata. pub fn is_erofs_multi_layer(rootfs_mounts: &[Mount]) -> bool { if rootfs_mounts.len() < 2 { return false; } - let has_rwlayer = rootfs_mounts.iter().any(|m| { - m.fs_type.eq_ignore_ascii_case(RW_LAYER_ROOTFS_TYPE) && m.options.iter().any(|o| o == "rw") - }); - let has_erofs = rootfs_mounts .iter() .any(|m| m.fs_type.eq_ignore_ascii_case(EROFS_ROOTFS_TYPE)); - // Must have rwlayer + erofs (multi-layer or single-layer) - has_rwlayer && has_erofs + if !has_erofs { + return false; + } + + rootfs_mounts.iter().any(|m| overlay_like(&m.fs_type)) +} + +#[cfg(test)] +mod tests { + use super::{is_erofs_multi_layer, EROFS_ROOTFS_TYPE, RW_LAYER_ROOTFS_TYPE}; + use kata_types::mount::Mount; + use std::path::PathBuf; + + fn mount(fs_type: &str, options: &[&str]) -> Mount { + Mount { + fs_type: fs_type.to_string(), + options: options.iter().map(|s| (*s).to_string()).collect(), + destination: PathBuf::from("/"), + ..Default::default() + } + } + + #[test] + fn is_erofs_multi_layer_rejects_short_list() { + assert!(!is_erofs_multi_layer(&[])); + assert!(!is_erofs_multi_layer(&[mount(EROFS_ROOTFS_TYPE, &[])])); + } + + #[test] + fn is_erofs_multi_layer_requires_erofs() { + let mounts = vec![mount(RW_LAYER_ROOTFS_TYPE, &["rw"]), mount("overlay", &[])]; + assert!(!is_erofs_multi_layer(&mounts)); + } + + #[test] + fn is_erofs_multi_layer_ext4_rw_erofs_and_overlay() { + let mounts = vec![ + mount(RW_LAYER_ROOTFS_TYPE, &["rw"]), + mount(EROFS_ROOTFS_TYPE, &[]), + mount("overlay", &[]), + ]; + assert!(is_erofs_multi_layer(&mounts)); + } + + #[test] + fn is_erofs_multi_layer_implicit_upper_erofs_and_overlay_variants() { + for overlay_type in ["overlay", "format/overlay", "format/mkdir/overlay"] { + let mounts = vec![mount(EROFS_ROOTFS_TYPE, &[]), mount(overlay_type, &[])]; + assert!( + is_erofs_multi_layer(&mounts), + "expected multi-layer for overlay type {}", + overlay_type + ); + } + } + + #[test] + fn is_erofs_multi_layer_erofs_without_overlay_or_rw_is_false() { + let mounts = vec![mount(EROFS_ROOTFS_TYPE, &[]), mount("btrfs", &[])]; + assert!(!is_erofs_multi_layer(&mounts)); + } + + #[test] + fn is_erofs_multi_layer_does_not_validate_optional_rwlayer_options() { + // The dispatcher only requires EROFS + overlay. Detailed rwlayer + // interpretation is handled by ErofsMultiLayerRootfs::new. + let mounts = vec![ + mount(RW_LAYER_ROOTFS_TYPE, &["ro"]), + mount(EROFS_ROOTFS_TYPE, &[]), + mount("overlay", &[]), + ]; + assert!(is_erofs_multi_layer(&mounts)); + } } diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 9f8c42eb17..ad6ad54e50 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -43,6 +43,7 @@ K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-small}" TEST_CLUSTER_NAMESPACE="${TEST_CLUSTER_NAMESPACE:-}" CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-containerd}" SNAPSHOTTER="${SNAPSHOTTER:-}" +EROFS_SNAPSHOTTER_MODE="${EROFS_SNAPSHOTTER_MODE:-}" # Wait for the Kubernetes API to recover after kata-deploy uninstall, then # retry the uninstall to purge any stale helm release state. On k3s/rke2, @@ -814,6 +815,31 @@ function helm_helper() { done fi + if [[ -n "${EROFS_SNAPSHOTTER_MODE}" ]]; then + if [[ "${SNAPSHOTTER}" != "erofs" ]]; then + die "EROFS_SNAPSHOTTER_MODE is only supported with SNAPSHOTTER=erofs" + fi + + local erofs_default_size + case "${EROFS_SNAPSHOTTER_MODE}" in + disk) + erofs_default_size="10G" + ;; + memory) + erofs_default_size="0" + ;; + *) + die "Unsupported EROFS_SNAPSHOTTER_MODE: ${EROFS_SNAPSHOTTER_MODE}" + ;; + esac + + HELM_CONTAINERD_USER_DROP_IN="[plugins.'io.containerd.snapshotter.v1.erofs']"$'\n' + HELM_CONTAINERD_USER_DROP_IN+=" default_size = \"${erofs_default_size}\"" + + HELM_CONTAINERD_USER_DROP_IN="${HELM_CONTAINERD_USER_DROP_IN}" \ + yq -i '.containerd.userDropIn = strenv(HELM_CONTAINERD_USER_DROP_IN)' "${values_yaml}" + fi + if [[ -z "${HELM_SHIMS}" ]]; then die "A list of shims is expected but none was provided" fi diff --git a/tests/integration/kubernetes/k8s-empty-image.bats b/tests/integration/kubernetes/k8s-empty-image.bats index 1d451b31cf..02b0a4f96e 100644 --- a/tests/integration/kubernetes/k8s-empty-image.bats +++ b/tests/integration/kubernetes/k8s-empty-image.bats @@ -43,7 +43,7 @@ setup() { kubectl create -f "${yaml_file}" local -r command="kubectl describe pod/${pod_name} | grep -E \ - 'the file sleep was not found|\[CDH\] \[ERROR\]: Image Pull error|ENOENT|unsupported rootfs mounts count 2'" + 'the file sleep was not found|\[CDH\] \[ERROR\]: Image Pull error|ENOENT|unsupported rootfs mounts count 2|unsupported rootfs Mount'" info "Waiting ${wait_time} seconds for: ${command}" waitForProcess "${wait_time}" "${sleep_time}" "${command}" >/dev/null 2>/dev/null } diff --git a/tools/packaging/kata-deploy/binary/src/config.rs b/tools/packaging/kata-deploy/binary/src/config.rs index 51659e3927..d7c372b426 100644 --- a/tools/packaging/kata-deploy/binary/src/config.rs +++ b/tools/packaging/kata-deploy/binary/src/config.rs @@ -159,6 +159,7 @@ pub struct Config { pub containerd_conf_file: String, pub containerd_conf_file_backup: String, pub containerd_drop_in_conf_file: String, + pub containerd_user_drop_in_source_file: Option, pub daemonset_name: String, pub custom_runtimes_enabled: bool, pub custom_runtimes: Vec, @@ -265,6 +266,10 @@ impl Config { let containerd_conf_file_backup = format!("{containerd_conf_file}.bak"); let containerd_drop_in_conf_file = format!("{dest_dir}/containerd/config.d/kata-deploy.toml"); + let containerd_user_drop_in_source_file = env::var("CONTAINERD_USER_DROP_IN_SOURCE_FILE") + .ok() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()); let helm_post_delete_hook = env::var("HELM_POST_DELETE_HOOK").unwrap_or_else(|_| "false".to_string()) == "true"; @@ -314,6 +319,7 @@ impl Config { containerd_conf_file, containerd_conf_file_backup, containerd_drop_in_conf_file, + containerd_user_drop_in_source_file, daemonset_name, custom_runtimes_enabled, custom_runtimes, @@ -522,6 +528,10 @@ impl Config { self.experimental_force_guest_pull_for_arch.join(",") ); info!("* CONTAINERD_CONF_FILE: {}", self.containerd_conf_file); + info!( + "* CONTAINERD_USER_DROP_IN_SOURCE_FILE: {:?}", + self.containerd_user_drop_in_source_file + ); info!( "* CUSTOM_RUNTIMES_ENABLED: {}", self.custom_runtimes_enabled diff --git a/tools/packaging/kata-deploy/binary/src/runtime/containerd.rs b/tools/packaging/kata-deploy/binary/src/runtime/containerd.rs index 9bfa3ff1c7..095107b30e 100644 --- a/tools/packaging/kata-deploy/binary/src/runtime/containerd.rs +++ b/tools/packaging/kata-deploy/binary/src/runtime/containerd.rs @@ -143,6 +143,81 @@ fn get_containerd_output_path(paths: &ContainerdPaths) -> PathBuf { } } +fn get_user_containerd_drop_in_output_path(paths: &ContainerdPaths) -> Result<(PathBuf, String)> { + if !paths.use_drop_in { + anyhow::bail!( + "Containerd user drop-in requires drop-in support, but runtime config is in non-drop-in mode" + ); + } + + let (base_drop_in, base_import_path) = if paths.drop_in_file.starts_with("/etc/containerd/") { + ( + Path::new(&paths.drop_in_file).to_path_buf(), + paths.drop_in_file.clone(), + ) + } else { + ( + Path::new("/host").join(paths.drop_in_file.trim_start_matches('/')), + paths.drop_in_file.clone(), + ) + }; + + let parent = base_drop_in.parent().ok_or_else(|| { + anyhow::anyhow!("Failed to resolve parent directory for {:?}", base_drop_in) + })?; + let user_file_name = "zz-kata-deploy-user.toml"; + let host_path = parent.join(user_file_name); + + let import_parent = Path::new(&base_import_path) + .parent() + .ok_or_else(|| anyhow::anyhow!("Failed to resolve import parent for {base_import_path}"))?; + let import_path = import_parent + .join(user_file_name) + .to_string_lossy() + .to_string(); + + Ok((host_path, import_path)) +} + +fn configure_user_containerd_drop_in(config: &Config, paths: &ContainerdPaths) -> Result<()> { + let Some(source_file) = config.containerd_user_drop_in_source_file.as_ref() else { + return Ok(()); + }; + + let source_path = Path::new(source_file); + if !source_path.exists() { + anyhow::bail!( + "Configured CONTAINERD_USER_DROP_IN_SOURCE_FILE does not exist: {}", + source_file + ); + } + + let (user_drop_in_path, user_drop_in_import_path) = + get_user_containerd_drop_in_output_path(paths)?; + if let Some(parent) = user_drop_in_path.parent() { + fs::create_dir_all(parent).with_context(|| { + format!("Failed to create user containerd drop-in directory: {parent:?}") + })?; + } + + fs::copy(source_path, &user_drop_in_path).with_context(|| { + format!( + "Failed to copy user containerd drop-in from {:?} to {:?}", + source_path, user_drop_in_path + ) + })?; + + if let Some(imports_file) = &paths.imports_file { + toml_utils::append_to_toml_array( + Path::new(imports_file), + ".imports", + &format!("\"{}\"", user_drop_in_import_path), + )?; + } + + Ok(()) +} + fn write_containerd_runtime_config( config_file: &Path, pluginid: &str, @@ -445,6 +520,8 @@ pub async fn configure_containerd(config: &Config, runtime: &str) -> Result<()> } } + configure_user_containerd_drop_in(config, &paths)?; + log::info!("Successfully configured all containerd runtimes"); Ok(()) } @@ -454,6 +531,21 @@ pub async fn cleanup_containerd(config: &Config, runtime: &str) -> Result<()> { let paths = config.get_containerd_paths(runtime).await?; if paths.use_drop_in { + if config.containerd_user_drop_in_source_file.is_some() { + let (user_drop_in_path, user_drop_in_import_path) = + get_user_containerd_drop_in_output_path(&paths)?; + if let Some(imports_file) = &paths.imports_file { + toml_utils::remove_from_toml_array( + Path::new(imports_file), + ".imports", + &format!("\"{}\"", user_drop_in_import_path), + )?; + } + if user_drop_in_path.exists() { + fs::remove_file(&user_drop_in_path)?; + } + } + // Remove drop-in from imports array (if we added it; K3s/RKE2 have imports_file = None) if let Some(imports_file) = &paths.imports_file { toml_utils::remove_from_toml_array( diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/containerd-user-dropin-config.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/containerd-user-dropin-config.yaml new file mode 100644 index 0000000000..b9c0705627 --- /dev/null +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/containerd-user-dropin-config.yaml @@ -0,0 +1,16 @@ +{{- if .Values.containerd.userDropIn | trim }} +apiVersion: v1 +kind: ConfigMap +metadata: +{{- if .Values.env.multiInstallSuffix }} + name: {{ .Chart.Name }}-containerd-user-dropin-{{ .Values.env.multiInstallSuffix }} +{{- else }} + name: {{ .Chart.Name }}-containerd-user-dropin +{{- end }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "kata-deploy.labels" . | nindent 4 }} +data: + containerd-user-dropin.toml: | +{{ .Values.containerd.userDropIn | indent 4 }} +{{- end }} diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml index e9d254d0ea..89405f6747 100644 --- a/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/templates/kata-deploy.yaml @@ -298,6 +298,10 @@ spec: - name: CONTAINERD_CONFIG_FILE_NAME value: {{ .Values.containerd.configFileName | trim | quote }} {{- end }} +{{- if .Values.containerd.userDropIn | trim }} + - name: CONTAINERD_USER_DROP_IN_SOURCE_FILE + value: "/custom-containerd-config/containerd-user-dropin.toml" +{{- end }} {{- with .Values.env.hostOS }} - name: HOST_OS value: {{ . | quote }} @@ -356,6 +360,11 @@ spec: mountPath: /etc/containerd/ - name: host mountPath: /host/ +{{- if .Values.containerd.userDropIn | trim }} + - name: custom-containerd-config + mountPath: /custom-containerd-config/ + readOnly: true +{{- end }} {{- if and .Values.customRuntimes.enabled .Values.customRuntimes.runtimes }} - name: custom-configs mountPath: /custom-configs/ @@ -371,6 +380,15 @@ spec: - name: host hostPath: path: / +{{- if .Values.containerd.userDropIn | trim }} + - name: custom-containerd-config + configMap: +{{- if .Values.env.multiInstallSuffix }} + name: {{ .Chart.Name }}-containerd-user-dropin-{{ .Values.env.multiInstallSuffix }} +{{- else }} + name: {{ .Chart.Name }}-containerd-user-dropin +{{- end }} +{{- end }} {{- if and .Values.customRuntimes.enabled .Values.customRuntimes.runtimes }} - name: custom-configs configMap: diff --git a/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml b/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml index 6469ebc4a9..bb3adb3368 100644 --- a/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml +++ b/tools/packaging/kata-deploy/helm-chart/kata-deploy/values.yaml @@ -25,6 +25,14 @@ containerd: # or auto-detects based on the runtime (k0s, microk8s, k3s/rke2). # Example: "my-config.toml" configFileName: "" + # Optional user-provided containerd drop-in TOML content. + # This is written as an extra drop-in loaded after kata-deploy's generated drop-in, + # so user keys can override kata-deploy defaults. + # Example: set erofs snapshotter default size to zero: + # userDropIn: | + # [plugins.'io.containerd.snapshotter.v1.erofs'] + # default_size = 0 + userDropIn: "" # Node selector and tolerations to control which nodes the kata-deploy daemonset runs on # Examples: