mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-07-02 07:02:16 +00:00
agent: support run-backed EROFS upper
Support multi-layer EROFS storage without an explicit ext4 upper layer. When runtime-rs sends only EROFS lower storage and overlay metadata, create the overlay upper/work directories under the container bundle in /run/kata-containers. Keep the explicit ext4 rwlayer path for disk-backed snapshots, and only track real temporary mount points for cleanup. The implicit /run-backed upper is bundle-scoped state and is removed with the container bundle. Assisted-by: OpenAI Codex <codex@openai.com> Signed-off-by: Manuel Huber <manuelh@nvidia.com>
This commit is contained in:
@@ -6,7 +6,8 @@
|
||||
//! Multi-layer EROFS storage handler
|
||||
//!
|
||||
//! This handler implements the guest-side processing of multi-layer EROFS rootfs:
|
||||
//! - Storage with X-kata.overlay-upper: ext4 rw layer (upperdir)
|
||||
//! - Optional Storage with X-kata.overlay-upper: ext4 rw layer (upperdir)
|
||||
//! - If no upper storage is provided, a directory under /run/kata-containers is used
|
||||
//! - Storage with X-kata.overlay-lower: erofs layers (lowerdir)
|
||||
//! - Creates overlay to combine them
|
||||
//! - Supports X-kata.mkdir.path options to create directories in upper layer before overlay mount
|
||||
@@ -23,6 +24,7 @@ use crate::device::block_device_handler::get_virtio_blk_pci_device_name;
|
||||
use crate::device::scsi_device_handler::get_scsi_device_name;
|
||||
use crate::linux_abi::pcipath_from_dev_tree_path;
|
||||
use crate::mount::baremount;
|
||||
use crate::rpc::CONTAINER_BASE;
|
||||
use crate::sandbox::Sandbox;
|
||||
use crate::storage::{StorageContext, StorageHandler};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
@@ -36,7 +38,7 @@ use tokio::sync::Mutex;
|
||||
|
||||
/// EROFS Type
|
||||
const EROFS_TYPE: &str = "erofs";
|
||||
/// ext4 Type
|
||||
/// ext4 Type (upper virtio disk based rw layer)
|
||||
const EXT4_TYPE: &str = "ext4";
|
||||
/// Overlay Type
|
||||
const OVERLAY_TYPE: &str = "overlay";
|
||||
@@ -59,8 +61,8 @@ pub struct MultiLayerErofsHandler {}
|
||||
pub struct MultiLayerErofsResult {
|
||||
pub mount_point: String,
|
||||
pub processed_mount_points: Vec<String>,
|
||||
/// Temporary mount points (upper, lower-0, lower-1, …) that back the
|
||||
/// overlay. These must be tracked so they are unmounted *after* the
|
||||
/// Temporary mount points (explicit upper, lower-0, lower-1, …) that back
|
||||
/// the overlay. These must be tracked so they are unmounted *after* the
|
||||
/// overlay target during container teardown.
|
||||
pub temp_mount_points: Vec<String>,
|
||||
/// dm-verity device paths that need to be destroyed during cleanup
|
||||
@@ -131,26 +133,33 @@ pub async fn handle_multi_layer_erofs_group(
|
||||
return Err(anyhow!("no multi-layer storages found"));
|
||||
}
|
||||
|
||||
let mut ext4_storage: Option<&Storage> = None;
|
||||
let mut upper_storage: Option<&Storage> = None;
|
||||
let mut erofs_storages: Vec<&Storage> = Vec::new();
|
||||
let mut mkdir_dirs: Vec<MkdirDirective> = Vec::new();
|
||||
let mut has_gpt_partition: bool = false;
|
||||
|
||||
for storage in &multi_layer_storages {
|
||||
// Collect all X-kata.mkdir.path directives from this multi-layer EROFS group.
|
||||
for opt in &storage.options {
|
||||
if let Some(mkdir_spec) = opt.strip_prefix(OPT_MKDIR_PATH) {
|
||||
mkdir_dirs.push(parse_mkdir_directive(mkdir_spec)?);
|
||||
}
|
||||
}
|
||||
|
||||
if storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER) && storage.fstype != EXT4_TYPE {
|
||||
return Err(anyhow!(
|
||||
"multi-layer erofs explicit upper layer must be ext4, got '{}'; omit the upper storage for the implicit /run-backed upper",
|
||||
storage.fstype
|
||||
));
|
||||
}
|
||||
|
||||
if is_upper_storage(storage) {
|
||||
if ext4_storage.is_some() {
|
||||
if upper_storage.is_some() {
|
||||
return Err(anyhow!(
|
||||
"multi-layer erofs currently supports exactly one ext4 upper layer"
|
||||
"multi-layer erofs currently supports exactly one explicit ext4 upper layer"
|
||||
));
|
||||
}
|
||||
ext4_storage = Some(*storage);
|
||||
|
||||
// Extract mkdir directories from X-kata.mkdir.path options
|
||||
for opt in &storage.options {
|
||||
if let Some(mkdir_spec) = opt.strip_prefix(OPT_MKDIR_PATH) {
|
||||
mkdir_dirs.push(parse_mkdir_directive(mkdir_spec)?);
|
||||
}
|
||||
}
|
||||
upper_storage = Some(*storage);
|
||||
} else if is_lower_storage(storage) {
|
||||
// Each GPT partition is provided as a separate storage entry by the host
|
||||
if !has_gpt_partition && is_gpt_partitioned(storage) {
|
||||
@@ -176,36 +185,57 @@ pub async fn handle_multi_layer_erofs_group(
|
||||
erofs_storages.sort_by_key(|storage| get_partition_number(storage).unwrap_or(u32::MAX));
|
||||
}
|
||||
|
||||
let ext4 = ext4_storage
|
||||
.ok_or_else(|| anyhow!("multi-layer erofs missing ext4 upper layer storage"))?;
|
||||
// With an explicit upper layer, the upper Storage carries the final overlay
|
||||
// target. With an implicit /run-backed upper, the runtime puts that target
|
||||
// on the first EROFS lower Storage.
|
||||
let target_mount_point = upper_storage
|
||||
.map(|upper| upper.mount_point.clone())
|
||||
.unwrap_or_else(|| erofs_storages[0].mount_point.clone());
|
||||
// Explicit uppers have a device source, while the implicit
|
||||
// layout uses a directory under /run rather than a block device.
|
||||
let upper_source = upper_storage
|
||||
.map(|upper| upper.source.as_str())
|
||||
.unwrap_or("run-backed directory");
|
||||
|
||||
info!(
|
||||
logger,
|
||||
"Handling multi-layer erofs group";
|
||||
"ext4-device" => &ext4.source,
|
||||
"upper-source" => upper_source,
|
||||
"erofs-devices" => erofs_storages
|
||||
.iter()
|
||||
.map(|s| s.source.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(","),
|
||||
"mount-point" => &ext4.mount_point,
|
||||
"mount-point" => &target_mount_point,
|
||||
"mkdir-dirs-count" => mkdir_dirs.len(),
|
||||
);
|
||||
|
||||
// Create temporary mount points for upper and lower layers
|
||||
// Create temporary backing paths for upper and lower layers
|
||||
let cid_str = cid.as_deref().unwrap_or("sandbox");
|
||||
// Validate container ID to prevent path traversal via crafted cid values
|
||||
validate_container_id(cid_str)?;
|
||||
let temp_base = PathBuf::from(format!("/run/kata-containers/{}/multi-layer", cid_str));
|
||||
let container_base =
|
||||
scoped_join(CONTAINER_BASE, cid_str).context("failed to build container temporary path")?;
|
||||
fs::create_dir_all(&container_base).context("failed to create container temporary path")?;
|
||||
let temp_base =
|
||||
scoped_join(&container_base, "multi-layer").context("failed to build multi-layer path")?;
|
||||
fs::create_dir_all(&temp_base).context("failed to create temp mount base")?;
|
||||
|
||||
// Validate mount point to prevent path traversal via crafted mount_point values
|
||||
validate_mount_point(&ext4.mount_point)?;
|
||||
validate_mount_point(&target_mount_point)?;
|
||||
|
||||
let upper_mount = temp_base.join("upper");
|
||||
fs::create_dir_all(&upper_mount).context("failed to create upper mount dir")?;
|
||||
|
||||
wait_and_mount_layer(ext4, &upper_mount, sandbox, &logger, None).await?;
|
||||
if let Some(upper) = upper_storage {
|
||||
wait_and_mount_layer(upper, &upper_mount, sandbox, &logger, None).await?;
|
||||
} else {
|
||||
info!(
|
||||
logger,
|
||||
"Using /run-backed upper directory";
|
||||
"mount-point" => upper_mount.display(),
|
||||
);
|
||||
}
|
||||
|
||||
for mkdir_dir in &mkdir_dirs {
|
||||
// As {{ mount 1 }} refers to the first lower layer, which is not available until we mount it.
|
||||
@@ -302,12 +332,12 @@ pub async fn handle_multi_layer_erofs_group(
|
||||
"upperdir" => upperdir.display(),
|
||||
"lowerdir" => &lowerdir,
|
||||
"workdir" => workdir.display(),
|
||||
"target" => &ext4.mount_point,
|
||||
"target" => &target_mount_point,
|
||||
);
|
||||
|
||||
create_mount_destination(
|
||||
Path::new(OVERLAY_TYPE),
|
||||
Path::new(&ext4.mount_point),
|
||||
Path::new(&target_mount_point),
|
||||
"",
|
||||
OVERLAY_TYPE,
|
||||
)
|
||||
@@ -315,7 +345,7 @@ pub async fn handle_multi_layer_erofs_group(
|
||||
|
||||
let overlay_mount = kata_types::mount::Mount {
|
||||
source: OVERLAY_TYPE.to_string(),
|
||||
destination: PathBuf::from(&ext4.mount_point),
|
||||
destination: PathBuf::from(&target_mount_point),
|
||||
fs_type: OVERLAY_TYPE.to_string(),
|
||||
options: vec![
|
||||
format!("upperdir={}", upperdir.display()),
|
||||
@@ -326,13 +356,13 @@ pub async fn handle_multi_layer_erofs_group(
|
||||
};
|
||||
|
||||
overlay_mount
|
||||
.mount(Path::new(&ext4.mount_point))
|
||||
.mount(Path::new(&target_mount_point))
|
||||
.context("failed to mount overlay")?;
|
||||
|
||||
info!(
|
||||
logger,
|
||||
"Multi-layer EROFS overlay mounted successfully";
|
||||
"mount-point" => &ext4.mount_point,
|
||||
"mount-point" => &target_mount_point,
|
||||
);
|
||||
|
||||
// Collect all unique mount points to maintain a clean resource state.
|
||||
@@ -352,16 +382,19 @@ pub async fn handle_multi_layer_erofs_group(
|
||||
acc
|
||||
});
|
||||
|
||||
// Collect the temporary mount points (upper first, then lowers) so the
|
||||
// caller can register them in container_mounts for proper cleanup.
|
||||
let mut temp_mount_points = Vec::with_capacity(1 + lower_mounts.len());
|
||||
temp_mount_points.push(upper_mount.display().to_string());
|
||||
// Collect temporary backing mounts. The implicit /run-backed upper is just
|
||||
// a directory under the container bundle and is removed with that bundle.
|
||||
let mut temp_mount_points =
|
||||
Vec::with_capacity(usize::from(upper_storage.is_some()) + lower_mounts.len());
|
||||
if upper_storage.is_some() {
|
||||
temp_mount_points.push(upper_mount.display().to_string());
|
||||
}
|
||||
for lm in &lower_mounts {
|
||||
temp_mount_points.push(lm.display().to_string());
|
||||
}
|
||||
|
||||
Ok(MultiLayerErofsResult {
|
||||
mount_point: ext4.mount_point.clone(),
|
||||
mount_point: target_mount_point,
|
||||
processed_mount_points,
|
||||
temp_mount_points,
|
||||
verity_devices,
|
||||
@@ -393,8 +426,9 @@ async fn track_temporary_mount_for_cleanup(
|
||||
}
|
||||
|
||||
fn is_upper_storage(storage: &Storage) -> bool {
|
||||
storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER)
|
||||
|| (storage.fstype == EXT4_TYPE && storage.options.iter().any(|o| o == OPT_MULTI_LAYER))
|
||||
storage.fstype == EXT4_TYPE
|
||||
&& (storage.options.iter().any(|o| o == OPT_OVERLAY_UPPER)
|
||||
|| storage.options.iter().any(|o| o == OPT_MULTI_LAYER))
|
||||
}
|
||||
|
||||
fn is_lower_storage(storage: &Storage) -> bool {
|
||||
@@ -508,6 +542,7 @@ fn resolve_mkdir_path(
|
||||
Ok(safe)
|
||||
}
|
||||
|
||||
/// Wait for a block-backed layer device, then mount it at `layer_mount`.
|
||||
async fn wait_and_mount_layer(
|
||||
layer: &Storage,
|
||||
layer_mount: &Path,
|
||||
@@ -821,6 +856,7 @@ mod tests {
|
||||
let mut s = Storage::default();
|
||||
assert!(!is_upper_storage(&s));
|
||||
|
||||
s.fstype = EXT4_TYPE.to_string();
|
||||
s.options.push(OPT_OVERLAY_UPPER.to_string());
|
||||
assert!(is_upper_storage(&s));
|
||||
|
||||
@@ -830,6 +866,13 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
assert!(is_upper_storage(&s2));
|
||||
|
||||
let s3 = Storage {
|
||||
fstype: "tmpfs".to_string(),
|
||||
options: vec![OPT_OVERLAY_UPPER.to_string(), OPT_MULTI_LAYER.to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
assert!(!is_upper_storage(&s3));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user