Merge pull request #12979 from manuelh-dev/mahuber/erofs-tmpfs-mount

runtime-rs/agent: support EROFS snapshots without a rwlayer
This commit is contained in:
manuelh-dev
2026-05-29 13:50:27 -07:00
committed by GitHub
10 changed files with 370 additions and 65 deletions

View File

@@ -399,7 +399,7 @@ jobs:
# Generate jobs for testing CoCo on non-TEE environments with erofs-snapshotter
run-k8s-tests-coco-nontee-with-erofs-snapshotter:
name: run-k8s-tests-coco-nontee-with-erofs-snapshotter
name: run-k8s-tests-coco-nontee-with-erofs-snapshotter-${{ matrix.erofs-mode }}
strategy:
fail-fast: false
matrix:
@@ -409,8 +409,11 @@ jobs:
- erofs
pull-type:
- default
erofs-mode:
- disk
- memory
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-${{ github.event.pull_request.number || github.ref }}
group: ${{ github.workflow }}-${{ github.job }}-${{ github.event.pull_request.number || github.ref }}-${{ toJSON(matrix) }}
cancel-in-progress: true
runs-on: ubuntu-24.04
environment:
@@ -431,6 +434,7 @@ jobs:
CONTAINER_ENGINE_VERSION: "v2.3"
PULL_TYPE: ${{ matrix.pull-type }}
SNAPSHOTTER: ${{ matrix.snapshotter }}
EROFS_SNAPSHOTTER_MODE: ${{ matrix.erofs-mode }}
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER: "true"
K8S_TEST_HOST_TYPE: "all"
# We are skipping the auto generated policy tests for now,

View File

@@ -6,7 +6,8 @@
//! Multi-layer EROFS storage handler
//!
//! This handler implements the guest-side processing of multi-layer EROFS rootfs:
//! - Storage with X-kata.overlay-upper: ext4 rw layer (upperdir)
//! - Optional Storage with X-kata.overlay-upper: ext4 rw layer (upperdir)
//! - If no upper storage is provided, a directory under /run/kata-containers is used
//! - Storage with X-kata.overlay-lower: erofs layers (lowerdir)
//! - Creates overlay to combine them
//! - Supports X-kata.mkdir.path options to create directories in upper layer before overlay mount
@@ -23,6 +24,7 @@ use crate::device::block_device_handler::get_virtio_blk_pci_device_name;
use crate::device::scsi_device_handler::get_scsi_device_name;
use crate::linux_abi::pcipath_from_dev_tree_path;
use crate::mount::baremount;
use crate::rpc::CONTAINER_BASE;
use crate::sandbox::Sandbox;
use crate::storage::{StorageContext, StorageHandler};
use anyhow::{anyhow, Context, Result};
@@ -36,7 +38,7 @@ use tokio::sync::Mutex;
/// EROFS Type
const EROFS_TYPE: &str = "erofs";
/// ext4 Type
/// ext4 Type (upper virtio disk based rw layer)
const EXT4_TYPE: &str = "ext4";
/// Overlay Type
const OVERLAY_TYPE: &str = "overlay";
@@ -59,8 +61,8 @@ pub struct MultiLayerErofsHandler {}
pub struct MultiLayerErofsResult {
pub mount_point: String,
pub processed_mount_points: Vec<String>,
/// Temporary mount points (upper, lower-0, lower-1, …) that back the
/// overlay. These must be tracked so they are unmounted *after* the
/// Temporary mount points (explicit upper, lower-0, lower-1, …) that back
/// the overlay. These must be tracked so they are unmounted *after* the
/// overlay target during container teardown.
pub temp_mount_points: Vec<String>,
/// dm-verity device paths that need to be destroyed during cleanup
@@ -131,26 +133,33 @@ pub async fn handle_multi_layer_erofs_group(
return Err(anyhow!("no multi-layer storages found"));
}
let mut ext4_storage: Option<&Storage> = None;
let mut upper_storage: Option<&Storage> = None;
let mut erofs_storages: Vec<&Storage> = Vec::new();
let mut mkdir_dirs: Vec<MkdirDirective> = Vec::new();
let mut has_gpt_partition: bool = false;
for storage in &multi_layer_storages {
// Collect all X-kata.mkdir.path directives from this multi-layer EROFS group.
for opt in &storage.options {
if let Some(mkdir_spec) = opt.strip_prefix(OPT_MKDIR_PATH) {
mkdir_dirs.push(parse_mkdir_directive(mkdir_spec)?);
}
}
if storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER) && storage.fstype != EXT4_TYPE {
return Err(anyhow!(
"multi-layer erofs explicit upper layer must be ext4, got '{}'; omit the upper storage for the implicit /run-backed upper",
storage.fstype
));
}
if is_upper_storage(storage) {
if ext4_storage.is_some() {
if upper_storage.is_some() {
return Err(anyhow!(
"multi-layer erofs currently supports exactly one ext4 upper layer"
"multi-layer erofs currently supports exactly one explicit ext4 upper layer"
));
}
ext4_storage = Some(*storage);
// Extract mkdir directories from X-kata.mkdir.path options
for opt in &storage.options {
if let Some(mkdir_spec) = opt.strip_prefix(OPT_MKDIR_PATH) {
mkdir_dirs.push(parse_mkdir_directive(mkdir_spec)?);
}
}
upper_storage = Some(*storage);
} else if is_lower_storage(storage) {
// Each GPT partition is provided as a separate storage entry by the host
if !has_gpt_partition && is_gpt_partitioned(storage) {
@@ -176,36 +185,57 @@ pub async fn handle_multi_layer_erofs_group(
erofs_storages.sort_by_key(|storage| get_partition_number(storage).unwrap_or(u32::MAX));
}
let ext4 = ext4_storage
.ok_or_else(|| anyhow!("multi-layer erofs missing ext4 upper layer storage"))?;
// With an explicit upper layer, the upper Storage carries the final overlay
// target. With an implicit /run-backed upper, the runtime puts that target
// on the first EROFS lower Storage.
let target_mount_point = upper_storage
.map(|upper| upper.mount_point.clone())
.unwrap_or_else(|| erofs_storages[0].mount_point.clone());
// Explicit uppers have a device source, while the implicit
// layout uses a directory under /run rather than a block device.
let upper_source = upper_storage
.map(|upper| upper.source.as_str())
.unwrap_or("run-backed directory");
info!(
logger,
"Handling multi-layer erofs group";
"ext4-device" => &ext4.source,
"upper-source" => upper_source,
"erofs-devices" => erofs_storages
.iter()
.map(|s| s.source.as_str())
.collect::<Vec<_>>()
.join(","),
"mount-point" => &ext4.mount_point,
"mount-point" => &target_mount_point,
"mkdir-dirs-count" => mkdir_dirs.len(),
);
// Create temporary mount points for upper and lower layers
// Create temporary backing paths for upper and lower layers
let cid_str = cid.as_deref().unwrap_or("sandbox");
// Validate container ID to prevent path traversal via crafted cid values
validate_container_id(cid_str)?;
let temp_base = PathBuf::from(format!("/run/kata-containers/{}/multi-layer", cid_str));
let container_base =
scoped_join(CONTAINER_BASE, cid_str).context("failed to build container temporary path")?;
fs::create_dir_all(&container_base).context("failed to create container temporary path")?;
let temp_base =
scoped_join(&container_base, "multi-layer").context("failed to build multi-layer path")?;
fs::create_dir_all(&temp_base).context("failed to create temp mount base")?;
// Validate mount point to prevent path traversal via crafted mount_point values
validate_mount_point(&ext4.mount_point)?;
validate_mount_point(&target_mount_point)?;
let upper_mount = temp_base.join("upper");
fs::create_dir_all(&upper_mount).context("failed to create upper mount dir")?;
wait_and_mount_layer(ext4, &upper_mount, sandbox, &logger, None).await?;
if let Some(upper) = upper_storage {
wait_and_mount_layer(upper, &upper_mount, sandbox, &logger, None).await?;
} else {
info!(
logger,
"Using /run-backed upper directory";
"mount-point" => upper_mount.display(),
);
}
for mkdir_dir in &mkdir_dirs {
// As {{ mount 1 }} refers to the first lower layer, which is not available until we mount it.
@@ -302,12 +332,12 @@ pub async fn handle_multi_layer_erofs_group(
"upperdir" => upperdir.display(),
"lowerdir" => &lowerdir,
"workdir" => workdir.display(),
"target" => &ext4.mount_point,
"target" => &target_mount_point,
);
create_mount_destination(
Path::new(OVERLAY_TYPE),
Path::new(&ext4.mount_point),
Path::new(&target_mount_point),
"",
OVERLAY_TYPE,
)
@@ -315,7 +345,7 @@ pub async fn handle_multi_layer_erofs_group(
let overlay_mount = kata_types::mount::Mount {
source: OVERLAY_TYPE.to_string(),
destination: PathBuf::from(&ext4.mount_point),
destination: PathBuf::from(&target_mount_point),
fs_type: OVERLAY_TYPE.to_string(),
options: vec![
format!("upperdir={}", upperdir.display()),
@@ -326,13 +356,13 @@ pub async fn handle_multi_layer_erofs_group(
};
overlay_mount
.mount(Path::new(&ext4.mount_point))
.mount(Path::new(&target_mount_point))
.context("failed to mount overlay")?;
info!(
logger,
"Multi-layer EROFS overlay mounted successfully";
"mount-point" => &ext4.mount_point,
"mount-point" => &target_mount_point,
);
// Collect all unique mount points to maintain a clean resource state.
@@ -352,16 +382,19 @@ pub async fn handle_multi_layer_erofs_group(
acc
});
// Collect the temporary mount points (upper first, then lowers) so the
// caller can register them in container_mounts for proper cleanup.
let mut temp_mount_points = Vec::with_capacity(1 + lower_mounts.len());
temp_mount_points.push(upper_mount.display().to_string());
// Collect temporary backing mounts. The implicit /run-backed upper is just
// a directory under the container bundle and is removed with that bundle.
let mut temp_mount_points =
Vec::with_capacity(usize::from(upper_storage.is_some()) + lower_mounts.len());
if upper_storage.is_some() {
temp_mount_points.push(upper_mount.display().to_string());
}
for lm in &lower_mounts {
temp_mount_points.push(lm.display().to_string());
}
Ok(MultiLayerErofsResult {
mount_point: ext4.mount_point.clone(),
mount_point: target_mount_point,
processed_mount_points,
temp_mount_points,
verity_devices,
@@ -393,8 +426,9 @@ async fn track_temporary_mount_for_cleanup(
}
fn is_upper_storage(storage: &Storage) -> bool {
storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER)
|| (storage.fstype == EXT4_TYPE && storage.options.iter().any(|o| o == OPT_MULTI_LAYER))
storage.fstype == EXT4_TYPE
&& (storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER)
|| storage.options.iter().any(|o| o == OPT_MULTI_LAYER))
}
fn is_lower_storage(storage: &Storage) -> bool {
@@ -508,6 +542,7 @@ fn resolve_mkdir_path(
Ok(safe)
}
/// Wait for a block-backed layer device, then mount it at `layer_mount`.
async fn wait_and_mount_layer(
layer: &Storage,
layer_mount: &Path,
@@ -821,6 +856,7 @@ mod tests {
let mut s = Storage::default();
assert!(!is_upper_storage(&s));
s.fstype = EXT4_TYPE.to_string();
s.options.push(OPT_OVERLAY_UPPER.to_string());
assert!(is_upper_storage(&s));
@@ -830,6 +866,13 @@ mod tests {
..Default::default()
};
assert!(is_upper_storage(&s2));
let s3 = Storage {
fstype: "tmpfs".to_string(),
options: vec![OPT_OVERLAY_UPPER.to_string(), OPT_MULTI_LAYER.to_string()],
..Default::default()
};
assert!(!is_upper_storage(&s3));
}
#[test]

View File

@@ -2,10 +2,13 @@
//
// SPDX-License-Identifier: Apache-2.0
//
// Handle multi-layer EROFS rootfs:
// Mount[0]: ext4 rw layer -> virtio-blk device (writable)
// Mount[1]: erofs with device= -> virtio-blk via VMDK (read-only)
// Mount[2]: overlay (format/mkdir/overlay) -> host mount OR guest agent
// Handle multi-layer EROFS rootfs.
//
// The containerd erofs snapshotter sends the active snapshot as either:
// - ext4 rwlayer.img + erofs lower + overlay when host rw backing is enabled.
// - erofs lower + overlay when default_size="0"; the agent then uses a
// guest-memory upper directory under /run.
//
// The overlay mount may be handled by the guest agent if it contains "{{"
// templates in upperdir/workdir.
@@ -504,13 +507,14 @@ fn extract_block_device_info(
/// EROFS Multi-Layer Rootfs with overlay support
///
/// Handles the EROFS Multi-Layer where rootfs consists of:
/// - Mount[0]: ext4 rw layer (writable container layer) -> virtio-blk device
/// - Mount[1]: erofs layers (fsmeta + flattened layers) -> virtio-blk via VMDK
/// - Mount[2]: overlay (to combine ext4 upper + erofs lower)
/// - Optional ext4 rw disk -> virtio-blk when host rw backing exists.
/// - EROFS layers (fsmeta + flattened layers) -> virtio-blk via VMDK.
/// - Overlay metadata that combines the writable upper with the EROFS lower.
pub(crate) struct ErofsMultiLayerRootfs {
guest_path: String,
device_ids: Vec<String>,
// Writable layer storage (upper layer), typically ext4
// Writable layer storage (upper layer), typically ext4 and optional when
// the agent creates a /run-backed upper.
rwlayer_storage: Option<Storage>,
// Read-only EROFS layer storages (lower layers), one per partition in GPT mode
erofs_storages: Vec<Storage>,
@@ -553,7 +557,10 @@ impl ErofsMultiLayerRootfs {
// Check block device count limit
let expected_device_count = rootfs_mounts
.iter()
.filter(|m| matches!(m.fs_type.as_str(), RW_LAYER_ROOTFS_TYPE | EROFS_ROOTFS_TYPE))
.filter(|m| {
m.fs_type.eq_ignore_ascii_case(RW_LAYER_ROOTFS_TYPE)
|| m.fs_type.eq_ignore_ascii_case(EROFS_ROOTFS_TYPE)
})
.count();
// TODO(Alex Lyn): fsmerge mode with single erofs mount and multiple device= options
@@ -900,13 +907,15 @@ impl ErofsMultiLayerRootfs {
return Err(anyhow!("no devices attached for multi-layer erofs rootfs"));
}
// Add mkdir directives to rwlayer storage options for guest agent
if let Some(ref mut rwlayer) = rwlayer_storage {
rwlayer.options.extend(
mkdir_dirs
.iter()
.map(|dir| format!("{}{}", X_KATA_MKDIR_PATH, dir)),
);
// Forward overlay mkdir hints on the EROFS Storage only. The guest agent scans
// every multi-layer storage for X-kata.mkdir.path; attaching here avoids splitting
// the same metadata across rwlayer vs erofs when an ext4 upper exists.
let mkdir_options = mkdir_dirs
.iter()
.map(|dir| format!("{}{}", X_KATA_MKDIR_PATH, dir))
.collect::<Vec<_>>();
if let Some(erofs) = erofs_storages.first_mut() {
erofs.options.extend(mkdir_options);
}
Ok(Self {
@@ -936,9 +945,9 @@ impl Rootfs for ErofsMultiLayerRootfs {
}
async fn get_storage(&self) -> Option<Vec<Storage>> {
// Return all storages for multi-layer EROFS (rw layer + erofs layers) to guest agent.
// Guest agent needs all of them to create overlay mount.
// In GPT mode, each partition has its own storage entry.
// Return all storages for multi-layer EROFS. The rw layer is optional;
// when absent, the agent creates a /run-backed upper dir. In GPT mode,
// each partition has its own EROFS storage entry.
let mut storages = Vec::new();
if let Some(rwlayer) = self.rwlayer_storage.clone() {
@@ -989,23 +998,102 @@ impl Rootfs for ErofsMultiLayerRootfs {
}
}
fn overlay_like(fs_type: &str) -> bool {
matches!(
fs_type.to_ascii_lowercase().as_str(),
"overlay" | "format/overlay" | "format/mkdir/overlay"
)
}
/// Check if mounts represent a multi-layer EROFS rootfs.
///
/// Returns `true` when `rootfs_mounts` contains at least two entries:
/// an ext4 rw layer (upper) and an erofs layer (lower).
/// Matches what the containerd erofs snapshotter sends for an active snapshot:
/// an EROFS lower layer plus an overlay mount. With host rw backing enabled,
/// the mount list also includes an ext4 `rwlayer.img`; with `default_size="0"`
/// it does not, and the agent creates the writable upper under `/run`.
///
/// This is only the coarse dispatcher check; `ErofsMultiLayerRootfs::new`
/// parses the optional rwlayer and overlay metadata.
pub fn is_erofs_multi_layer(rootfs_mounts: &[Mount]) -> bool {
if rootfs_mounts.len() < 2 {
return false;
}
let has_rwlayer = rootfs_mounts.iter().any(|m| {
m.fs_type.eq_ignore_ascii_case(RW_LAYER_ROOTFS_TYPE) && m.options.iter().any(|o| o == "rw")
});
let has_erofs = rootfs_mounts
.iter()
.any(|m| m.fs_type.eq_ignore_ascii_case(EROFS_ROOTFS_TYPE));
// Must have rwlayer + erofs (multi-layer or single-layer)
has_rwlayer && has_erofs
if !has_erofs {
return false;
}
rootfs_mounts.iter().any(|m| overlay_like(&m.fs_type))
}
#[cfg(test)]
mod tests {
use super::{is_erofs_multi_layer, EROFS_ROOTFS_TYPE, RW_LAYER_ROOTFS_TYPE};
use kata_types::mount::Mount;
use std::path::PathBuf;
fn mount(fs_type: &str, options: &[&str]) -> Mount {
Mount {
fs_type: fs_type.to_string(),
options: options.iter().map(|s| (*s).to_string()).collect(),
destination: PathBuf::from("/"),
..Default::default()
}
}
#[test]
fn is_erofs_multi_layer_rejects_short_list() {
assert!(!is_erofs_multi_layer(&[]));
assert!(!is_erofs_multi_layer(&[mount(EROFS_ROOTFS_TYPE, &[])]));
}
#[test]
fn is_erofs_multi_layer_requires_erofs() {
let mounts = vec![mount(RW_LAYER_ROOTFS_TYPE, &["rw"]), mount("overlay", &[])];
assert!(!is_erofs_multi_layer(&mounts));
}
#[test]
fn is_erofs_multi_layer_ext4_rw_erofs_and_overlay() {
let mounts = vec![
mount(RW_LAYER_ROOTFS_TYPE, &["rw"]),
mount(EROFS_ROOTFS_TYPE, &[]),
mount("overlay", &[]),
];
assert!(is_erofs_multi_layer(&mounts));
}
#[test]
fn is_erofs_multi_layer_implicit_upper_erofs_and_overlay_variants() {
for overlay_type in ["overlay", "format/overlay", "format/mkdir/overlay"] {
let mounts = vec![mount(EROFS_ROOTFS_TYPE, &[]), mount(overlay_type, &[])];
assert!(
is_erofs_multi_layer(&mounts),
"expected multi-layer for overlay type {}",
overlay_type
);
}
}
#[test]
fn is_erofs_multi_layer_erofs_without_overlay_or_rw_is_false() {
let mounts = vec![mount(EROFS_ROOTFS_TYPE, &[]), mount("btrfs", &[])];
assert!(!is_erofs_multi_layer(&mounts));
}
#[test]
fn is_erofs_multi_layer_does_not_validate_optional_rwlayer_options() {
// The dispatcher only requires EROFS + overlay. Detailed rwlayer
// interpretation is handled by ErofsMultiLayerRootfs::new.
let mounts = vec![
mount(RW_LAYER_ROOTFS_TYPE, &["ro"]),
mount(EROFS_ROOTFS_TYPE, &[]),
mount("overlay", &[]),
];
assert!(is_erofs_multi_layer(&mounts));
}
}

View File

@@ -43,6 +43,7 @@ K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-small}"
TEST_CLUSTER_NAMESPACE="${TEST_CLUSTER_NAMESPACE:-}"
CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-containerd}"
SNAPSHOTTER="${SNAPSHOTTER:-}"
EROFS_SNAPSHOTTER_MODE="${EROFS_SNAPSHOTTER_MODE:-}"
# Wait for the Kubernetes API to recover after kata-deploy uninstall, then
# retry the uninstall to purge any stale helm release state. On k3s/rke2,
@@ -814,6 +815,31 @@ function helm_helper() {
done
fi
if [[ -n "${EROFS_SNAPSHOTTER_MODE}" ]]; then
if [[ "${SNAPSHOTTER}" != "erofs" ]]; then
die "EROFS_SNAPSHOTTER_MODE is only supported with SNAPSHOTTER=erofs"
fi
local erofs_default_size
case "${EROFS_SNAPSHOTTER_MODE}" in
disk)
erofs_default_size="10G"
;;
memory)
erofs_default_size="0"
;;
*)
die "Unsupported EROFS_SNAPSHOTTER_MODE: ${EROFS_SNAPSHOTTER_MODE}"
;;
esac
HELM_CONTAINERD_USER_DROP_IN="[plugins.'io.containerd.snapshotter.v1.erofs']"$'\n'
HELM_CONTAINERD_USER_DROP_IN+=" default_size = \"${erofs_default_size}\""
HELM_CONTAINERD_USER_DROP_IN="${HELM_CONTAINERD_USER_DROP_IN}" \
yq -i '.containerd.userDropIn = strenv(HELM_CONTAINERD_USER_DROP_IN)' "${values_yaml}"
fi
if [[ -z "${HELM_SHIMS}" ]]; then
die "A list of shims is expected but none was provided"
fi

View File

@@ -43,7 +43,7 @@ setup() {
kubectl create -f "${yaml_file}"
local -r command="kubectl describe pod/${pod_name} | grep -E \
'the file sleep was not found|\[CDH\] \[ERROR\]: Image Pull error|ENOENT|unsupported rootfs mounts count 2'"
'the file sleep was not found|\[CDH\] \[ERROR\]: Image Pull error|ENOENT|unsupported rootfs mounts count 2|unsupported rootfs Mount'"
info "Waiting ${wait_time} seconds for: ${command}"
waitForProcess "${wait_time}" "${sleep_time}" "${command}" >/dev/null 2>/dev/null
}

View File

@@ -159,6 +159,7 @@ pub struct Config {
pub containerd_conf_file: String,
pub containerd_conf_file_backup: String,
pub containerd_drop_in_conf_file: String,
pub containerd_user_drop_in_source_file: Option<String>,
pub daemonset_name: String,
pub custom_runtimes_enabled: bool,
pub custom_runtimes: Vec<CustomRuntime>,
@@ -265,6 +266,10 @@ impl Config {
let containerd_conf_file_backup = format!("{containerd_conf_file}.bak");
let containerd_drop_in_conf_file =
format!("{dest_dir}/containerd/config.d/kata-deploy.toml");
let containerd_user_drop_in_source_file = env::var("CONTAINERD_USER_DROP_IN_SOURCE_FILE")
.ok()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty());
let helm_post_delete_hook =
env::var("HELM_POST_DELETE_HOOK").unwrap_or_else(|_| "false".to_string()) == "true";
@@ -314,6 +319,7 @@ impl Config {
containerd_conf_file,
containerd_conf_file_backup,
containerd_drop_in_conf_file,
containerd_user_drop_in_source_file,
daemonset_name,
custom_runtimes_enabled,
custom_runtimes,
@@ -522,6 +528,10 @@ impl Config {
self.experimental_force_guest_pull_for_arch.join(",")
);
info!("* CONTAINERD_CONF_FILE: {}", self.containerd_conf_file);
info!(
"* CONTAINERD_USER_DROP_IN_SOURCE_FILE: {:?}",
self.containerd_user_drop_in_source_file
);
info!(
"* CUSTOM_RUNTIMES_ENABLED: {}",
self.custom_runtimes_enabled

View File

@@ -143,6 +143,81 @@ fn get_containerd_output_path(paths: &ContainerdPaths) -> PathBuf {
}
}
fn get_user_containerd_drop_in_output_path(paths: &ContainerdPaths) -> Result<(PathBuf, String)> {
if !paths.use_drop_in {
anyhow::bail!(
"Containerd user drop-in requires drop-in support, but runtime config is in non-drop-in mode"
);
}
let (base_drop_in, base_import_path) = if paths.drop_in_file.starts_with("/etc/containerd/") {
(
Path::new(&paths.drop_in_file).to_path_buf(),
paths.drop_in_file.clone(),
)
} else {
(
Path::new("/host").join(paths.drop_in_file.trim_start_matches('/')),
paths.drop_in_file.clone(),
)
};
let parent = base_drop_in.parent().ok_or_else(|| {
anyhow::anyhow!("Failed to resolve parent directory for {:?}", base_drop_in)
})?;
let user_file_name = "zz-kata-deploy-user.toml";
let host_path = parent.join(user_file_name);
let import_parent = Path::new(&base_import_path)
.parent()
.ok_or_else(|| anyhow::anyhow!("Failed to resolve import parent for {base_import_path}"))?;
let import_path = import_parent
.join(user_file_name)
.to_string_lossy()
.to_string();
Ok((host_path, import_path))
}
fn configure_user_containerd_drop_in(config: &Config, paths: &ContainerdPaths) -> Result<()> {
let Some(source_file) = config.containerd_user_drop_in_source_file.as_ref() else {
return Ok(());
};
let source_path = Path::new(source_file);
if !source_path.exists() {
anyhow::bail!(
"Configured CONTAINERD_USER_DROP_IN_SOURCE_FILE does not exist: {}",
source_file
);
}
let (user_drop_in_path, user_drop_in_import_path) =
get_user_containerd_drop_in_output_path(paths)?;
if let Some(parent) = user_drop_in_path.parent() {
fs::create_dir_all(parent).with_context(|| {
format!("Failed to create user containerd drop-in directory: {parent:?}")
})?;
}
fs::copy(source_path, &user_drop_in_path).with_context(|| {
format!(
"Failed to copy user containerd drop-in from {:?} to {:?}",
source_path, user_drop_in_path
)
})?;
if let Some(imports_file) = &paths.imports_file {
toml_utils::append_to_toml_array(
Path::new(imports_file),
".imports",
&format!("\"{}\"", user_drop_in_import_path),
)?;
}
Ok(())
}
fn write_containerd_runtime_config(
config_file: &Path,
pluginid: &str,
@@ -445,6 +520,8 @@ pub async fn configure_containerd(config: &Config, runtime: &str) -> Result<()>
}
}
configure_user_containerd_drop_in(config, &paths)?;
log::info!("Successfully configured all containerd runtimes");
Ok(())
}
@@ -454,6 +531,21 @@ pub async fn cleanup_containerd(config: &Config, runtime: &str) -> Result<()> {
let paths = config.get_containerd_paths(runtime).await?;
if paths.use_drop_in {
if config.containerd_user_drop_in_source_file.is_some() {
let (user_drop_in_path, user_drop_in_import_path) =
get_user_containerd_drop_in_output_path(&paths)?;
if let Some(imports_file) = &paths.imports_file {
toml_utils::remove_from_toml_array(
Path::new(imports_file),
".imports",
&format!("\"{}\"", user_drop_in_import_path),
)?;
}
if user_drop_in_path.exists() {
fs::remove_file(&user_drop_in_path)?;
}
}
// Remove drop-in from imports array (if we added it; K3s/RKE2 have imports_file = None)
if let Some(imports_file) = &paths.imports_file {
toml_utils::remove_from_toml_array(

View File

@@ -0,0 +1,16 @@
{{- if .Values.containerd.userDropIn | trim }}
apiVersion: v1
kind: ConfigMap
metadata:
{{- if .Values.env.multiInstallSuffix }}
name: {{ .Chart.Name }}-containerd-user-dropin-{{ .Values.env.multiInstallSuffix }}
{{- else }}
name: {{ .Chart.Name }}-containerd-user-dropin
{{- end }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "kata-deploy.labels" . | nindent 4 }}
data:
containerd-user-dropin.toml: |
{{ .Values.containerd.userDropIn | indent 4 }}
{{- end }}

View File

@@ -298,6 +298,10 @@ spec:
- name: CONTAINERD_CONFIG_FILE_NAME
value: {{ .Values.containerd.configFileName | trim | quote }}
{{- end }}
{{- if .Values.containerd.userDropIn | trim }}
- name: CONTAINERD_USER_DROP_IN_SOURCE_FILE
value: "/custom-containerd-config/containerd-user-dropin.toml"
{{- end }}
{{- with .Values.env.hostOS }}
- name: HOST_OS
value: {{ . | quote }}
@@ -356,6 +360,11 @@ spec:
mountPath: /etc/containerd/
- name: host
mountPath: /host/
{{- if .Values.containerd.userDropIn | trim }}
- name: custom-containerd-config
mountPath: /custom-containerd-config/
readOnly: true
{{- end }}
{{- if and .Values.customRuntimes.enabled .Values.customRuntimes.runtimes }}
- name: custom-configs
mountPath: /custom-configs/
@@ -371,6 +380,15 @@ spec:
- name: host
hostPath:
path: /
{{- if .Values.containerd.userDropIn | trim }}
- name: custom-containerd-config
configMap:
{{- if .Values.env.multiInstallSuffix }}
name: {{ .Chart.Name }}-containerd-user-dropin-{{ .Values.env.multiInstallSuffix }}
{{- else }}
name: {{ .Chart.Name }}-containerd-user-dropin
{{- end }}
{{- end }}
{{- if and .Values.customRuntimes.enabled .Values.customRuntimes.runtimes }}
- name: custom-configs
configMap:

View File

@@ -25,6 +25,14 @@ containerd:
# or auto-detects based on the runtime (k0s, microk8s, k3s/rke2).
# Example: "my-config.toml"
configFileName: ""
# Optional user-provided containerd drop-in TOML content.
# This is written as an extra drop-in loaded after kata-deploy's generated drop-in,
# so user keys can override kata-deploy defaults.
# Example: set erofs snapshotter default size to zero:
# userDropIn: |
# [plugins.'io.containerd.snapshotter.v1.erofs']
# default_size = 0
userDropIn: ""
# Node selector and tolerations to control which nodes the kata-deploy daemonset runs on
# Examples: