mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-04-09 21:42:14 +00:00
runtime-rs: Add support for erofs rootfs with multi-layer
Add erofs_rootfs.rs implementing ErofsMultiLayerRootfs for multi-layer EROFS rootfs with VMDK descriptor generation. It's the core implementation of Erofs rootfs within runtime. Signed-off-by: Alex Lyn <alex.lyn@antgroup.com>
This commit is contained in:
committed by
Fabiano Fidêncio
parent
740e724e82
commit
620849fb34
640
src/runtime-rs/crates/resource/src/rootfs/erofs_rootfs.rs
Normal file
640
src/runtime-rs/crates/resource/src/rootfs/erofs_rootfs.rs
Normal file
@@ -0,0 +1,640 @@
|
||||
// Copyright (c) 2026 Ant Group
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// Handle multi-layer EROFS rootfs:
|
||||
// Mount[0]: ext4 rw layer -> virtio-blk device (writable)
|
||||
// Mount[1]: erofs with device= -> virtio-blk via VMDK (read-only)
|
||||
// Mount[2]: overlay (format/mkdir/overlay) -> host mount OR guest agent
|
||||
// The overlay mount may be handled by the guest agent if it contains "{{"
|
||||
// templates in upperdir/workdir.
|
||||
|
||||
use super::{Rootfs, ROOTFS};
|
||||
use crate::share_fs::{do_get_guest_path, do_get_host_path};
|
||||
use agent::Storage;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::{
|
||||
device::{
|
||||
device_manager::{do_handle_device, get_block_device_info, DeviceManager},
|
||||
DeviceConfig, DeviceType,
|
||||
},
|
||||
BlockConfig, BlockDeviceAio, BlockDeviceFormat,
|
||||
};
|
||||
use kata_types::config::hypervisor::{
|
||||
VIRTIO_BLK_CCW, VIRTIO_BLK_MMIO, VIRTIO_BLK_PCI, VIRTIO_PMEM, VIRTIO_SCSI,
|
||||
};
|
||||
use kata_types::mount::Mount;
|
||||
use oci_spec::runtime as oci;
|
||||
use std::fs;
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
/// EROFS rootfs type identifier
|
||||
pub(crate) const EROFS_ROOTFS_TYPE: &str = "erofs";
|
||||
/// RW layer rootfs type identifier, used for multi-layer EROFS as the writable upper layer
|
||||
/// Typically ext4 format, but can be extended to other fs types in the future.
|
||||
pub(crate) const RW_LAYER_ROOTFS_TYPE: &str = "ext4";
|
||||
/// VMDK file extension for merged EROFS image
|
||||
const EROFS_MERGED_VMDK: &str = "merged_fs.vmdk";
|
||||
/// Maximum number of virtio-blk devices allowed
|
||||
const MAX_VIRTIO_BLK_DEVICES: usize = 10;
|
||||
/// Maximum sectors per 2GB extent (2GB / 512 bytes per sector)
|
||||
const MAX_2GB_EXTENT_SECTORS: u64 = 0x8000_0000 >> 9;
|
||||
/// Sectors per track for VMDK geometry
|
||||
const SECTORS_PER_TRACK: u64 = 63;
|
||||
/// Number of heads for VMDK geometry
|
||||
const NUMBER_HEADS: u64 = 16;
|
||||
/// VMDK subformat type (twoGbMaxExtentFlat for large files)
|
||||
const VMDK_SUBFORMAT: &str = "twoGbMaxExtentFlat";
|
||||
/// VMDK adapter type
|
||||
const VMDK_ADAPTER_TYPE: &str = "ide";
|
||||
/// VMDK hardware version
|
||||
const VMDK_HW_VERSION: &str = "4";
|
||||
/// Default shared directory for guest rootfs VMDK files (for multi-layer EROFS)
|
||||
const DEFAULT_KATA_GUEST_ROOT_SHARED_FS: &str = "/run/kata-containers/";
|
||||
/// Template for mkdir option in overlay mount (X-containerd.mkdir.path)
|
||||
const X_CONTAINERD_MKDIR_PATH: &str = "X-containerd.mkdir.path=";
|
||||
/// Template for mkdir option passed to guest agent (X-kata.mkdir.path)
|
||||
const X_KATA_MKDIR_PATH: &str = "X-kata.mkdir.path=";
|
||||
|
||||
/// Generate merged VMDK file from multiple EROFS devices
|
||||
///
|
||||
/// Creates a VMDK descriptor that combines multiple EROFS images into a single
|
||||
/// virtual block device (flatten device). For a single device, the EROFS image
|
||||
/// is used directly without a VMDK wrapper.
|
||||
///
|
||||
/// And `erofs_devices` are for host paths to EROFS image files (from `source` and `device=` options)
|
||||
async fn generate_merged_erofs_vmdk(
|
||||
sid: &str,
|
||||
cid: &str,
|
||||
erofs_devices: &[String],
|
||||
) -> Result<(String, BlockDeviceFormat)> {
|
||||
if erofs_devices.is_empty() {
|
||||
return Err(anyhow!("no EROFS devices provided"));
|
||||
}
|
||||
|
||||
// Validate all device paths exist and are regular files before proceeding.
|
||||
for dev_path in erofs_devices {
|
||||
let metadata = fs::metadata(dev_path)
|
||||
.context(format!("EROFS device path not accessible: {}", dev_path))?;
|
||||
if !metadata.is_file() {
|
||||
return Err(anyhow!(
|
||||
"EROFS device path is not a regular file: {}",
|
||||
dev_path
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// For single device, use it directly with Raw format (no need for VMDK descriptor)
|
||||
if erofs_devices.len() == 1 {
|
||||
info!(
|
||||
sl!(),
|
||||
"single EROFS device, using directly with Raw format: {}", erofs_devices[0]
|
||||
);
|
||||
return Ok((erofs_devices[0].clone(), BlockDeviceFormat::Raw));
|
||||
}
|
||||
|
||||
// For multiple devices, create VMDK descriptor
|
||||
let sandbox_dir = PathBuf::from(DEFAULT_KATA_GUEST_ROOT_SHARED_FS).join(sid);
|
||||
let container_dir = sandbox_dir.join(cid);
|
||||
fs::create_dir_all(&container_dir).context(format!(
|
||||
"failed to create container directory: {}",
|
||||
container_dir.display()
|
||||
))?;
|
||||
|
||||
let vmdk_path = container_dir.join(EROFS_MERGED_VMDK);
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
"creating VMDK descriptor for {} EROFS devices: {}",
|
||||
erofs_devices.len(),
|
||||
vmdk_path.display()
|
||||
);
|
||||
|
||||
// create_vmdk_descriptor uses atomic write (temp + rename) internally,
|
||||
// so a failure will not leave a corrupt descriptor file.
|
||||
create_vmdk_descriptor(&vmdk_path, erofs_devices)
|
||||
.context("failed to create VMDK descriptor")?;
|
||||
|
||||
Ok((vmdk_path.display().to_string(), BlockDeviceFormat::Vmdk))
|
||||
}
|
||||
|
||||
/// Create VMDK descriptor for multiple EROFS extents (flatten device)
|
||||
///
|
||||
/// Generates a VMDK descriptor file (twoGbMaxExtentFlat format) that references
|
||||
/// multiple EROFS images as flat extents, allowing them to be treated as a single
|
||||
/// contiguous block device in the VM.
|
||||
fn create_vmdk_descriptor(vmdk_path: &Path, erofs_paths: &[String]) -> Result<()> {
|
||||
if erofs_paths.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"empty EROFS path list, cannot create VMDK descriptor"
|
||||
));
|
||||
}
|
||||
|
||||
// collect extent information without writing anything.
|
||||
struct ExtentInfo {
|
||||
path: String,
|
||||
total_sectors: u64,
|
||||
}
|
||||
|
||||
let mut extents: Vec<ExtentInfo> = Vec::with_capacity(erofs_paths.len());
|
||||
let mut total_sectors: u64 = 0;
|
||||
|
||||
for erofs_path in erofs_paths {
|
||||
let metadata = fs::metadata(erofs_path)
|
||||
.context(format!("failed to stat EROFS file: {}", erofs_path))?;
|
||||
|
||||
let file_size = metadata.len();
|
||||
if file_size == 0 {
|
||||
warn!(sl!(), "EROFS file {} is zero-length, skipping", erofs_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
// round up to whole sectors to avoid losing tail bytes on non-aligned files.
|
||||
// VMDK extents are measured in 512-byte sectors; a file that is not sector-aligned
|
||||
// still needs the last partial sector to be addressable by the VM.
|
||||
let sectors = file_size.div_ceil(512);
|
||||
|
||||
if file_size % 512 != 0 {
|
||||
warn!(
|
||||
sl!(),
|
||||
"EROFS file {} size ({} bytes) is not 512-byte aligned, \
|
||||
rounding up to {} sectors ({} bytes addressable)",
|
||||
erofs_path,
|
||||
file_size,
|
||||
sectors,
|
||||
sectors * 512
|
||||
);
|
||||
}
|
||||
|
||||
total_sectors = total_sectors.checked_add(sectors).ok_or_else(|| {
|
||||
anyhow!(
|
||||
"total sector count overflow when adding {} ({} sectors)",
|
||||
erofs_path,
|
||||
sectors
|
||||
)
|
||||
})?;
|
||||
|
||||
extents.push(ExtentInfo {
|
||||
path: erofs_path.clone(),
|
||||
total_sectors: sectors,
|
||||
});
|
||||
}
|
||||
|
||||
if total_sectors == 0 {
|
||||
return Err(anyhow!(
|
||||
"no valid EROFS files to create VMDK descriptor (all files are empty)"
|
||||
));
|
||||
}
|
||||
|
||||
// write descriptor to a temp file, then atomically rename.
|
||||
let tmp_path = vmdk_path.with_extension("vmdk.tmp");
|
||||
|
||||
let file = fs::File::create(&tmp_path).context(format!(
|
||||
"failed to create temp VMDK file: {}",
|
||||
tmp_path.display()
|
||||
))?;
|
||||
let mut writer = BufWriter::new(file);
|
||||
|
||||
// Header
|
||||
writeln!(writer, "# Disk DescriptorFile")?;
|
||||
writeln!(writer, "version=1")?;
|
||||
writeln!(writer, "CID=fffffffe")?;
|
||||
writeln!(writer, "parentCID=ffffffff")?;
|
||||
writeln!(writer, "createType=\"{}\"", VMDK_SUBFORMAT)?;
|
||||
writeln!(writer)?;
|
||||
|
||||
// Extent descriptions
|
||||
writeln!(writer, "# Extent description")?;
|
||||
for extent in &extents {
|
||||
let mut remaining = extent.total_sectors;
|
||||
let mut file_offset: u64 = 0;
|
||||
|
||||
while remaining > 0 {
|
||||
let chunk = remaining.min(MAX_2GB_EXTENT_SECTORS);
|
||||
writeln!(
|
||||
writer,
|
||||
"RW {} FLAT \"{}\" {}",
|
||||
chunk, extent.path, file_offset
|
||||
)?;
|
||||
file_offset += chunk;
|
||||
remaining -= chunk;
|
||||
}
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
"VMDK extent: {} ({} sectors, {} extent chunk(s))",
|
||||
extent.path,
|
||||
extent.total_sectors,
|
||||
extent.total_sectors.div_ceil(MAX_2GB_EXTENT_SECTORS)
|
||||
);
|
||||
}
|
||||
writeln!(writer)?;
|
||||
|
||||
// Disk Data Base (DDB)
|
||||
// Geometry: cylinders = ceil(total_sectors / (sectors_per_track * heads))
|
||||
let cylinders = total_sectors.div_ceil(SECTORS_PER_TRACK * NUMBER_HEADS);
|
||||
|
||||
writeln!(writer, "# The Disk Data Base")?;
|
||||
writeln!(writer, "#DDB")?;
|
||||
writeln!(writer)?;
|
||||
writeln!(writer, "ddb.virtualHWVersion = \"{}\"", VMDK_HW_VERSION)?;
|
||||
writeln!(writer, "ddb.geometry.cylinders = \"{}\"", cylinders)?;
|
||||
writeln!(writer, "ddb.geometry.heads = \"{}\"", NUMBER_HEADS)?;
|
||||
writeln!(writer, "ddb.geometry.sectors = \"{}\"", SECTORS_PER_TRACK)?;
|
||||
writeln!(writer, "ddb.adapterType = \"{}\"", VMDK_ADAPTER_TYPE)?;
|
||||
|
||||
// Flush the BufWriter to ensure all data is written before rename.
|
||||
writer.flush().context("failed to flush VMDK descriptor")?;
|
||||
// Explicitly drop to close the file handle before rename.
|
||||
drop(writer);
|
||||
|
||||
// atomic rename: tmp -> final path.
|
||||
fs::rename(&tmp_path, vmdk_path).context(format!(
|
||||
"failed to rename temp VMDK {} -> {}",
|
||||
tmp_path.display(),
|
||||
vmdk_path.display()
|
||||
))?;
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
"VMDK descriptor created: {} (total {} sectors, {} extents, {} cylinders)",
|
||||
vmdk_path.display(),
|
||||
total_sectors,
|
||||
extents.len(),
|
||||
cylinders
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Extract block device information from hypervisor device info
|
||||
fn extract_block_device_info(
|
||||
device_info: &DeviceType,
|
||||
block_driver: &str,
|
||||
) -> Result<(String, String, String)> {
|
||||
if let DeviceType::Block(device) = device_info {
|
||||
let blk_driver = device.config.driver_option.clone();
|
||||
let device_id = device.device_id.clone();
|
||||
|
||||
// Use virt_path as guest device path (e.g., /dev/vda)
|
||||
// pci_path is PCI address (e.g., 02/00) which is not a valid mount source
|
||||
let guest_path = match block_driver {
|
||||
VIRTIO_BLK_PCI | VIRTIO_BLK_MMIO | VIRTIO_BLK_CCW => {
|
||||
// virt_path is the correct guest device path for all virtio-blk types
|
||||
if device.config.virt_path.is_empty() {
|
||||
return Err(anyhow!("virt_path is empty for block device"));
|
||||
}
|
||||
device.config.virt_path.clone()
|
||||
}
|
||||
VIRTIO_SCSI | VIRTIO_PMEM => {
|
||||
return Err(anyhow!(
|
||||
"Block driver {} not fully supported for EROFS",
|
||||
block_driver
|
||||
));
|
||||
}
|
||||
_ => {
|
||||
return Err(anyhow!("Unknown block driver: {}", block_driver));
|
||||
}
|
||||
};
|
||||
|
||||
Ok((device_id, guest_path, blk_driver))
|
||||
} else {
|
||||
Err(anyhow!("Expected block device, got {:?}", device_info))
|
||||
}
|
||||
}
|
||||
|
||||
/// EROFS Multi-Layer Rootfs with overlay support
|
||||
///
|
||||
/// Handles the EROFS Multi-Layer where rootfs consists of:
|
||||
/// - Mount[0]: ext4 rw layer (writable container layer) -> virtio-blk device
|
||||
/// - Mount[1]: erofs layers (fsmeta + flattened layers) -> virtio-blk via VMDK
|
||||
/// - Mount[2]: overlay (to combine ext4 upper + erofs lower)
|
||||
pub(crate) struct ErofsMultiLayerRootfs {
|
||||
guest_path: String,
|
||||
device_ids: Vec<String>,
|
||||
mount: oci::Mount,
|
||||
rwlayer_storage: Option<Storage>, // Writable layer storage (upper layer), typically ext4
|
||||
erofs_storage: Option<Storage>,
|
||||
/// Path to generated VMDK descriptor (only set when multiple EROFS devices are merged)
|
||||
vmdk_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl ErofsMultiLayerRootfs {
|
||||
pub async fn new(
|
||||
device_manager: &RwLock<DeviceManager>,
|
||||
sid: &str,
|
||||
cid: &str,
|
||||
rootfs_mounts: &[Mount],
|
||||
_share_fs: &Option<Arc<dyn crate::share_fs::ShareFs>>,
|
||||
) -> Result<Self> {
|
||||
let container_path = do_get_guest_path(ROOTFS, cid, false, false);
|
||||
let host_path = do_get_host_path(ROOTFS, sid, cid, false, false);
|
||||
|
||||
fs::create_dir_all(&host_path)
|
||||
.map_err(|e| anyhow!("failed to create rootfs dir {}: {:?}", host_path, e))?;
|
||||
|
||||
let mut device_ids = Vec::new();
|
||||
let mut rwlayer_storage: Option<Storage> = None;
|
||||
let mut erofs_storage: Option<Storage> = None;
|
||||
let mut vmdk_path: Option<PathBuf> = None;
|
||||
|
||||
// Directories to create (X-containerd.mkdir.path)
|
||||
let mut mkdir_dirs: Vec<String> = Vec::new();
|
||||
|
||||
let blkdev_info = get_block_device_info(device_manager).await;
|
||||
let block_driver = blkdev_info.block_device_driver.clone();
|
||||
|
||||
// Process each mount in rootfs_mounts to set up devices and storages
|
||||
for mount in rootfs_mounts {
|
||||
match mount.fs_type.as_str() {
|
||||
fmt if fmt.eq_ignore_ascii_case(RW_LAYER_ROOTFS_TYPE) => {
|
||||
// Mount[0]: rw layer -> virtio-blk device /dev/vdX1
|
||||
info!(
|
||||
sl!(),
|
||||
"multi-layer erofs: adding rw layer: {}", mount.source
|
||||
);
|
||||
|
||||
let device_config = &mut BlockConfig {
|
||||
driver_option: block_driver.clone(),
|
||||
format: BlockDeviceFormat::Raw, // rw layer should be raw format
|
||||
path_on_host: mount.source.clone(),
|
||||
blkdev_aio: BlockDeviceAio::new(&blkdev_info.block_device_aio),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let device_info = do_handle_device(
|
||||
device_manager,
|
||||
&DeviceConfig::BlockCfg(device_config.clone()),
|
||||
)
|
||||
.await
|
||||
.context("failed to attach rw block device")?;
|
||||
|
||||
let (device_id, guest_path, blk_driver) =
|
||||
extract_block_device_info(&device_info, &block_driver)?;
|
||||
info!(
|
||||
sl!(),
|
||||
"writable block device attached - device_id: {} guest_path: {}",
|
||||
device_id,
|
||||
guest_path
|
||||
);
|
||||
|
||||
// Filter out "loop" option which is not needed in VM (device is already /dev/vdX)
|
||||
let mut options: Vec<String> = mount
|
||||
.options
|
||||
.iter()
|
||||
.filter(|o| *o != "loop")
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
// RW layer is the writable upper layer (marked with X-kata.overlay-upper)
|
||||
options.push("X-kata.overlay-upper".to_string());
|
||||
options.push("X-kata.multi-layer=true".to_string());
|
||||
|
||||
// Set up storage for rw layer (upper layer)
|
||||
rwlayer_storage = Some(Storage {
|
||||
driver: blk_driver,
|
||||
source: guest_path.clone(),
|
||||
fs_type: RW_LAYER_ROOTFS_TYPE.to_string(),
|
||||
mount_point: container_path.clone(),
|
||||
options,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
device_ids.push(device_id);
|
||||
}
|
||||
fmt if fmt.eq_ignore_ascii_case(EROFS_ROOTFS_TYPE) => {
|
||||
// Mount[1]: erofs layers -> virtio-blk via VMDK /dev/vdX2
|
||||
info!(
|
||||
sl!(),
|
||||
"multi-layer erofs: adding erofs layers: {}", mount.source
|
||||
);
|
||||
|
||||
// Collect all EROFS devices: source + `device=` options
|
||||
let mut erofs_devices = vec![mount.source.clone()];
|
||||
for opt in &mount.options {
|
||||
if let Some(device_path) = opt.strip_prefix("device=") {
|
||||
erofs_devices.push(device_path.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
info!(sl!(), "EROFS devices count: {}", erofs_devices.len());
|
||||
|
||||
// Generate merged VMDK file from all EROFS devices
|
||||
// Returns (path, format) - format is Vmdk for multiple devices, Raw for single device
|
||||
let (erofs_path, erofs_format) =
|
||||
generate_merged_erofs_vmdk(sid, cid, &erofs_devices)
|
||||
.await
|
||||
.context("failed to generate EROFS VMDK")?;
|
||||
|
||||
// Track VMDK path for cleanup (only when VMDK is actually created)
|
||||
if erofs_format == BlockDeviceFormat::Vmdk {
|
||||
vmdk_path = Some(PathBuf::from(&erofs_path));
|
||||
}
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
"EROFS block device config - path: {}, format: {:?}",
|
||||
erofs_path,
|
||||
erofs_format
|
||||
);
|
||||
|
||||
let device_config = &mut BlockConfig {
|
||||
driver_option: block_driver.clone(),
|
||||
format: erofs_format, // Vmdk for multiple devices, Raw for single device
|
||||
path_on_host: erofs_path,
|
||||
blkdev_aio: BlockDeviceAio::new(&blkdev_info.block_device_aio),
|
||||
is_readonly: true, // EROFS layer is read-only
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let device_info = do_handle_device(
|
||||
device_manager,
|
||||
&DeviceConfig::BlockCfg(device_config.clone()),
|
||||
)
|
||||
.await
|
||||
.context("failed to attach erofs block device")?;
|
||||
|
||||
let (device_id, guest_path, blk_driver) =
|
||||
extract_block_device_info(&device_info, &block_driver)?;
|
||||
info!(
|
||||
sl!(),
|
||||
"erofs device attached - device_id: {} guest_path: {}",
|
||||
device_id,
|
||||
guest_path
|
||||
);
|
||||
|
||||
let mut options: Vec<String> = mount
|
||||
.options
|
||||
.iter()
|
||||
.filter(|o| {
|
||||
// Filter out options that are not valid erofs mount parameters:
|
||||
// 1. "loop" - not needed in VM, device is already /dev/vdX
|
||||
// 2. "device=" prefix - used for VMDK generation only, not for mount
|
||||
// 3. "X-kata." prefix - metadata markers for kata internals
|
||||
*o != "loop" && !o.starts_with("device=") && !o.starts_with("X-kata.")
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
// Erofs layers are read-only lower layers (marked with X-kata.overlay-lower)
|
||||
options.push("X-kata.overlay-lower".to_string());
|
||||
options.push("X-kata.multi-layer=true".to_string());
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
"erofs storage options filtered: {:?} -> {:?}", mount.options, options
|
||||
);
|
||||
|
||||
erofs_storage = Some(Storage {
|
||||
driver: blk_driver,
|
||||
source: guest_path.clone(),
|
||||
fs_type: EROFS_ROOTFS_TYPE.to_string(),
|
||||
mount_point: container_path.clone(),
|
||||
options,
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
device_ids.push(device_id);
|
||||
}
|
||||
fmt if fmt.eq_ignore_ascii_case("overlay")
|
||||
|| fmt.eq_ignore_ascii_case("format/overlay")
|
||||
|| fmt.eq_ignore_ascii_case("format/mkdir/overlay") =>
|
||||
{
|
||||
// Mount[2]: overlay to combine rwlayer (upper) + erofs (lower)
|
||||
info!(
|
||||
sl!(),
|
||||
"multi-layer erofs: parsing overlay mount, options: {:?}", mount.options
|
||||
);
|
||||
|
||||
// Parse mkdir options (X-containerd.mkdir.path)
|
||||
for opt in &mount.options {
|
||||
if let Some(mkdir_spec) = opt.strip_prefix(X_CONTAINERD_MKDIR_PATH) {
|
||||
// Keep the full spec (path:mode or path:mode:uid:gid) for guest agent
|
||||
mkdir_dirs.push(mkdir_spec.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
info!(
|
||||
sl!(),
|
||||
"multi-layer erofs: ignoring unknown mount type: {}", mount.fs_type
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if device_ids.is_empty() {
|
||||
return Err(anyhow!("no devices attached for multi-layer erofs rootfs"));
|
||||
}
|
||||
|
||||
// Check device count limit
|
||||
if device_ids.len() > MAX_VIRTIO_BLK_DEVICES {
|
||||
return Err(anyhow!(
|
||||
"exceeded maximum virtio disk count: {} > {}",
|
||||
device_ids.len(),
|
||||
MAX_VIRTIO_BLK_DEVICES
|
||||
));
|
||||
}
|
||||
|
||||
// Add mkdir directives to rwlayer storage options for guest agent
|
||||
if let Some(ref mut rwlayer) = rwlayer_storage {
|
||||
rwlayer.options.extend(
|
||||
mkdir_dirs
|
||||
.iter()
|
||||
.map(|dir| format!("{}{}", X_KATA_MKDIR_PATH, dir)),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
guest_path: container_path,
|
||||
device_ids,
|
||||
mount: oci::Mount::default(),
|
||||
rwlayer_storage,
|
||||
erofs_storage,
|
||||
vmdk_path,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Rootfs for ErofsMultiLayerRootfs {
|
||||
async fn get_guest_rootfs_path(&self) -> Result<String> {
|
||||
Ok(self.guest_path.clone())
|
||||
}
|
||||
|
||||
async fn get_rootfs_mount(&self) -> Result<Vec<oci::Mount>> {
|
||||
Ok(vec![self.mount.clone()])
|
||||
}
|
||||
|
||||
async fn get_storage(&self) -> Option<Vec<Storage>> {
|
||||
// Return all storages for multi-layer EROFS (rw layer + erofs layer) to guest agent.
|
||||
// Guest agent needs both to create overlay mount
|
||||
let mut storages = Vec::new();
|
||||
|
||||
if let Some(rwlayer) = self.rwlayer_storage.clone() {
|
||||
storages.push(rwlayer);
|
||||
}
|
||||
|
||||
if let Some(erofs) = self.erofs_storage.clone() {
|
||||
storages.push(erofs);
|
||||
}
|
||||
|
||||
if storages.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(storages)
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_device_id(&self) -> Result<Option<String>> {
|
||||
Ok(self.device_ids.first().cloned())
|
||||
}
|
||||
|
||||
async fn cleanup(&self, device_manager: &RwLock<DeviceManager>) -> Result<()> {
|
||||
let mut dm = device_manager.write().await;
|
||||
for device_id in &self.device_ids {
|
||||
dm.try_remove_device(device_id).await?;
|
||||
}
|
||||
|
||||
// Clean up generated VMDK descriptor file if it exists (only for multi-device case)
|
||||
if let Some(ref vmdk) = self.vmdk_path {
|
||||
if vmdk.exists() {
|
||||
if let Err(e) = fs::remove_file(vmdk) {
|
||||
warn!(
|
||||
sl!(),
|
||||
"failed to remove VMDK descriptor {}: {}",
|
||||
vmdk.display(),
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if mounts represent multi-layer EROFS rootfs(with or without `device=` options):
|
||||
/// - Must have at least 2 mounts (rw layer + erofs layer)
|
||||
/// - Multi-layer: erofs with `device=` options
|
||||
/// - Single-layer: erofs without `device=` options (just layer.erofs)
|
||||
pub fn is_erofs_multi_layer(rootfs_mounts: &[Mount]) -> bool {
|
||||
if rootfs_mounts.len() < 2 {
|
||||
return false;
|
||||
}
|
||||
|
||||
let has_rwlayer = rootfs_mounts.iter().any(|m| {
|
||||
m.fs_type.eq_ignore_ascii_case(RW_LAYER_ROOTFS_TYPE) && m.options.iter().any(|o| o == "rw")
|
||||
});
|
||||
|
||||
let has_erofs = rootfs_mounts
|
||||
.iter()
|
||||
.any(|m| m.fs_type.eq_ignore_ascii_case(EROFS_ROOTFS_TYPE));
|
||||
|
||||
// Must have rwlayer + erofs (multi-layer or single-layer)
|
||||
has_rwlayer && has_erofs
|
||||
}
|
||||
Reference in New Issue
Block a user