From d5483aaf7cd3c36b6169b8d2e15ccd56d1e13b91 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 24 Aug 2023 09:40:09 +0800 Subject: [PATCH] agent: move storage device related code into dedicated files Move storage device related code into dedicated files. Signed-off-by: Jiang Liu --- src/agent/src/main.rs | 1 + src/agent/src/mount.rs | 1146 +---------------- src/agent/src/rpc.rs | 3 +- src/agent/src/storage/bind_watcher_handler.rs | 36 + src/agent/src/storage/block_handler.rs | 145 +++ src/agent/src/storage/ephemeral_handler.rs | 293 +++++ src/agent/src/storage/fs_handler.rs | 88 ++ src/agent/src/storage/local_handler.rs | 61 + src/agent/src/storage/mod.rs | 648 ++++++++++ 9 files changed, 1279 insertions(+), 1142 deletions(-) create mode 100644 src/agent/src/storage/bind_watcher_handler.rs create mode 100644 src/agent/src/storage/block_handler.rs create mode 100644 src/agent/src/storage/ephemeral_handler.rs create mode 100644 src/agent/src/storage/fs_handler.rs create mode 100644 src/agent/src/storage/local_handler.rs create mode 100644 src/agent/src/storage/mod.rs diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index a869e5afa3..7e59e2daab 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -48,6 +48,7 @@ mod pci; pub mod random; mod sandbox; mod signal; +mod storage; mod uevent; mod util; mod version; diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index 59bdfdf555..e7bbf96f8d 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -5,53 +5,23 @@ use std::collections::HashMap; use std::fmt::Debug; -use std::fs::{self, File, OpenOptions}; -use std::io::{BufRead, BufReader, Write}; -use std::iter; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; use std::ops::Deref; -use std::os::unix::fs::{MetadataExt, PermissionsExt}; use std::path::Path; -use std::str::FromStr; -use std::sync::Arc; use anyhow::{anyhow, Context, Result}; -use kata_sys_util::mount::{create_mount_destination, get_linux_mount_info, parse_mount_options}; -use kata_types::mount::{ - StorageDeviceGeneric, StorageHandlerManager, KATA_MOUNT_OPTION_FS_GID, - KATA_SHAREDFS_GUEST_PREMOUNT_TAG, -}; +use kata_sys_util::mount::{get_linux_mount_info, parse_mount_options}; use nix::mount::MsFlags; -use nix::unistd::{Gid, Uid}; use regex::Regex; use slog::Logger; -use tokio::sync::Mutex; use tracing::instrument; -use crate::device::{ - get_scsi_device_name, get_virtio_blk_pci_device_name, get_virtio_mmio_device_name, - online_device, wait_for_pmem_device, DRIVER_9P_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, - DRIVER_EPHEMERAL_TYPE, DRIVER_LOCAL_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_OVERLAYFS_TYPE, - DRIVER_SCSI_TYPE, DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE, FS_TYPE_HUGETLB, -}; +use crate::device::online_device; use crate::linux_abi::*; -use crate::pci; -use crate::protocols::agent::Storage; -use crate::protocols::types::FSGroupChangePolicy; -use crate::sandbox::StorageDeviceObject; -use crate::Sandbox; -#[cfg(target_arch = "s390x")] -use crate::{ccw, device::get_virtio_blk_ccw_device_name}; pub const TYPE_ROOTFS: &str = "rootfs"; -const FS_GID_EQ: &str = "fsgid="; -const SYS_FS_HUGEPAGES_PREFIX: &str = "/sys/kernel/mm/hugepages"; - -const RW_MASK: u32 = 0o660; -const RO_MASK: u32 = 0o440; -const EXEC_MASK: u32 = 0o110; -const MODE_SETGID: u32 = 0o2000; - #[derive(Debug, PartialEq)] pub struct InitMount<'a> { fstype: &'a str, @@ -60,24 +30,6 @@ pub struct InitMount<'a> { options: Vec<&'a str>, } -#[derive(Debug)] -pub struct StorageContext<'a> { - cid: &'a Option, - logger: &'a Logger, - sandbox: &'a Arc>, -} - -/// Trait object to handle storage device. -#[async_trait::async_trait] -pub trait StorageHandler: Send + Sync { - /// Create a new storage device. - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result; -} - #[rustfmt::skip] lazy_static!{ static ref CGROUPS: HashMap<&'static str, &'static str> = { @@ -111,26 +63,6 @@ lazy_static! { ]; } -#[rustfmt::skip] -lazy_static! { - pub static ref STORAGE_HANDLERS: StorageHandlerManager> = { - let mut manager: StorageHandlerManager> = StorageHandlerManager::new(); - manager.add_handler(DRIVER_9P_TYPE, Arc::new(Virtio9pHandler{})).unwrap(); - #[cfg(target_arch = "s390x")] - manager.add_handler(crate::device::DRIVER_BLK_CCW_TYPE, Arc::new(VirtioBlkCcwHandler{})).unwrap(); - manager.add_handler(DRIVER_BLK_MMIO_TYPE, Arc::new(VirtioBlkMmioHandler{})).unwrap(); - manager.add_handler(DRIVER_BLK_PCI_TYPE, Arc::new(VirtioBlkPciHandler{})).unwrap(); - manager.add_handler(DRIVER_EPHEMERAL_TYPE, Arc::new(EphemeralHandler{})).unwrap(); - manager.add_handler(DRIVER_LOCAL_TYPE, Arc::new(LocalHandler{})).unwrap(); - manager.add_handler(DRIVER_NVDIMM_TYPE, Arc::new(PmemHandler{})).unwrap(); - manager.add_handler(DRIVER_OVERLAYFS_TYPE, Arc::new(OverlayfsHandler{})).unwrap(); - manager.add_handler(DRIVER_SCSI_TYPE, Arc::new(ScsiHandler{})).unwrap(); - manager.add_handler(DRIVER_VIRTIOFS_TYPE, Arc::new(VirtioFsHandler{})).unwrap(); - manager.add_handler(DRIVER_WATCHABLE_BIND_TYPE, Arc::new(BindWatcherHandler{})).unwrap(); - manager - }; -} - #[instrument] pub fn baremount( source: &Path, @@ -189,608 +121,6 @@ pub fn baremount( }) } -#[derive(Debug)] -struct EphemeralHandler {} - -#[async_trait::async_trait] -impl StorageHandler for EphemeralHandler { - #[instrument] - async fn create_device( - &self, - mut storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - // hugetlbfs - if storage.fstype == FS_TYPE_HUGETLB { - info!(ctx.logger, "handle hugetlbfs storage"); - // Allocate hugepages before mount - // /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages - // /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages - // options eg "pagesize=2097152,size=524288000"(2M, 500M) - Self::allocate_hugepages(ctx.logger, &storage.options.to_vec()) - .context("allocate hugepages")?; - common_storage_handler(ctx.logger, &storage)?; - } else if !storage.options.is_empty() { - // By now we only support one option field: "fsGroup" which - // isn't an valid mount option, thus we should remove it when - // do mount. - let opts = parse_options(&storage.options); - storage.options = Default::default(); - common_storage_handler(ctx.logger, &storage)?; - - // ephemeral_storage didn't support mount options except fsGroup. - if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { - let gid = fsgid.parse::()?; - - nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; - - let meta = fs::metadata(&storage.mount_point)?; - let mut permission = meta.permissions(); - - let o_mode = meta.mode() | MODE_SETGID; - permission.set_mode(o_mode); - fs::set_permissions(&storage.mount_point, permission)?; - } - } else { - common_storage_handler(ctx.logger, &storage)?; - } - - new_device("".to_string()) - } -} - -impl EphemeralHandler { - // Allocate hugepages by writing to sysfs - fn allocate_hugepages(logger: &Logger, options: &[String]) -> Result<()> { - info!(logger, "mounting hugePages storage options: {:?}", options); - - let (pagesize, size) = Self::get_pagesize_and_size_from_option(options) - .context(format!("parse mount options: {:?}", &options))?; - - info!( - logger, - "allocate hugepages. pageSize: {}, size: {}", pagesize, size - ); - - // sysfs entry is always of the form hugepages-${pagesize}kB - // Ref: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt - let path = Path::new(SYS_FS_HUGEPAGES_PREFIX) - .join(format!("hugepages-{}kB", pagesize / 1024)) - .join("nr_hugepages"); - - // write numpages to nr_hugepages file. - let numpages = format!("{}", size / pagesize); - info!(logger, "write {} pages to {:?}", &numpages, &path); - - let mut file = OpenOptions::new() - .write(true) - .open(&path) - .context(format!("open nr_hugepages directory {:?}", &path))?; - - file.write_all(numpages.as_bytes()) - .context(format!("write nr_hugepages failed: {:?}", &path))?; - - // Even if the write succeeds, the kernel isn't guaranteed to be - // able to allocate all the pages we requested. Verify that it - // did. - let verify = fs::read_to_string(&path).context(format!("reading {:?}", &path))?; - let allocated = verify - .trim_end() - .parse::() - .map_err(|_| anyhow!("Unexpected text {:?} in {:?}", &verify, &path))?; - if allocated != size / pagesize { - return Err(anyhow!( - "Only allocated {} of {} hugepages of size {}", - allocated, - numpages, - pagesize - )); - } - - Ok(()) - } - - // Parse filesystem options string to retrieve hugepage details - // options eg "pagesize=2048,size=107374182" - fn get_pagesize_and_size_from_option(options: &[String]) -> Result<(u64, u64)> { - let mut pagesize_str: Option<&str> = None; - let mut size_str: Option<&str> = None; - - for option in options { - let vars: Vec<&str> = option.trim().split(',').collect(); - - for var in vars { - if let Some(stripped) = var.strip_prefix("pagesize=") { - pagesize_str = Some(stripped); - } else if let Some(stripped) = var.strip_prefix("size=") { - size_str = Some(stripped); - } - - if pagesize_str.is_some() && size_str.is_some() { - break; - } - } - } - - if pagesize_str.is_none() || size_str.is_none() { - return Err(anyhow!("no pagesize/size options found")); - } - - let pagesize = pagesize_str - .unwrap() - .parse::() - .context(format!("parse pagesize: {:?}", &pagesize_str))?; - let size = size_str - .unwrap() - .parse::() - .context(format!("parse size: {:?}", &pagesize_str))?; - - Ok((pagesize, size)) - } -} - -// update_ephemeral_mounts takes a list of ephemeral mounts and remounts them -// with mount options passed by the caller -#[instrument] -pub async fn update_ephemeral_mounts( - logger: Logger, - storages: &[Storage], - _sandbox: &Arc>, -) -> Result<()> { - for storage in storages { - let handler_name = &storage.driver; - let logger = logger.new(o!( - "msg" => "updating tmpfs storage", - "subsystem" => "storage", - "storage-type" => handler_name.to_owned())); - - match handler_name.as_str() { - DRIVER_EPHEMERAL_TYPE => { - fs::create_dir_all(&storage.mount_point)?; - - if storage.options.is_empty() { - continue; - } else { - // assume that fsGid has already been set - let mount_path = Path::new(&storage.mount_point); - let src_path = Path::new(&storage.source); - let opts: Vec<&String> = storage - .options - .iter() - .filter(|&opt| !opt.starts_with(FS_GID_EQ)) - .collect(); - let (flags, options) = parse_mount_options(&opts)?; - - info!(logger, "mounting storage"; - "mount-source" => src_path.display(), - "mount-destination" => mount_path.display(), - "mount-fstype" => storage.fstype.as_str(), - "mount-options" => options.as_str(), - ); - - baremount( - src_path, - mount_path, - storage.fstype.as_str(), - flags, - options.as_str(), - &logger, - )?; - } - } - _ => { - return Err(anyhow!( - "Unsupported storage type for syncing mounts {}. Only ephemeral storage update is supported", - storage.driver - )); - } - }; - } - - Ok(()) -} - -#[derive(Debug)] -struct OverlayfsHandler {} - -#[async_trait::async_trait] -impl StorageHandler for OverlayfsHandler { - #[instrument] - async fn create_device( - &self, - mut storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - if storage - .options - .iter() - .any(|e| e == "io.katacontainers.fs-opt.overlay-rw") - { - let cid = ctx - .cid - .clone() - .ok_or_else(|| anyhow!("No container id in rw overlay"))?; - let cpath = Path::new(crate::rpc::CONTAINER_BASE).join(cid); - let work = cpath.join("work"); - let upper = cpath.join("upper"); - - fs::create_dir_all(&work).context("Creating overlay work directory")?; - fs::create_dir_all(&upper).context("Creating overlay upper directory")?; - - storage.fstype = "overlay".into(); - storage - .options - .push(format!("upperdir={}", upper.to_string_lossy())); - storage - .options - .push(format!("workdir={}", work.to_string_lossy())); - } - - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct LocalHandler {} - -#[async_trait::async_trait] -impl StorageHandler for LocalHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - _ctx: &mut StorageContext, - ) -> Result { - fs::create_dir_all(&storage.mount_point).context(format!( - "failed to create dir all {:?}", - &storage.mount_point - ))?; - - let opts = parse_options(&storage.options); - - let mut need_set_fsgid = false; - if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { - let gid = fsgid.parse::()?; - - nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; - need_set_fsgid = true; - } - - if let Some(mode) = opts.get("mode") { - let mut permission = fs::metadata(&storage.mount_point)?.permissions(); - - let mut o_mode = u32::from_str_radix(mode, 8)?; - - if need_set_fsgid { - // set SetGid mode mask. - o_mode |= MODE_SETGID; - } - permission.set_mode(o_mode); - - fs::set_permissions(&storage.mount_point, permission)?; - } - - new_device("".to_string()) - } -} - -#[derive(Debug)] -struct Virtio9pHandler {} - -#[async_trait::async_trait] -impl StorageHandler for Virtio9pHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct VirtioFsHandler {} - -#[async_trait::async_trait] -impl StorageHandler for VirtioFsHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct VirtioBlkMmioHandler {} - -#[async_trait::async_trait] -impl StorageHandler for VirtioBlkMmioHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - if !Path::new(&storage.source).exists() { - get_virtio_mmio_device_name(ctx.sandbox, &storage.source) - .await - .context("failed to get mmio device name")?; - } - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct VirtioBlkPciHandler {} - -#[async_trait::async_trait] -impl StorageHandler for VirtioBlkPciHandler { - #[instrument] - async fn create_device( - &self, - mut storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - // If hot-plugged, get the device node path based on the PCI path - // otherwise use the virt path provided in Storage Source - if storage.source.starts_with("/dev") { - let metadata = fs::metadata(&storage.source) - .context(format!("get metadata on file {:?}", &storage.source))?; - let mode = metadata.permissions().mode(); - if mode & libc::S_IFBLK == 0 { - return Err(anyhow!("Invalid device {}", &storage.source)); - } - } else { - let pcipath = pci::Path::from_str(&storage.source)?; - let dev_path = get_virtio_blk_pci_device_name(ctx.sandbox, &pcipath).await?; - storage.source = dev_path; - } - - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct VirtioBlkCcwHandler {} - -#[async_trait::async_trait] -impl StorageHandler for VirtioBlkCcwHandler { - #[cfg(target_arch = "s390x")] - #[instrument] - async fn create_device( - &self, - mut storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - let ccw_device = ccw::Device::from_str(&storage.source)?; - let dev_path = get_virtio_blk_ccw_device_name(ctx.sandbox, &ccw_device).await?; - storage.source = dev_path; - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } - - #[cfg(not(target_arch = "s390x"))] - #[instrument] - async fn create_device( - &self, - _storage: Storage, - _ctx: &mut StorageContext, - ) -> Result { - Err(anyhow!("CCW is only supported on s390x")) - } -} - -#[derive(Debug)] -struct ScsiHandler {} - -#[async_trait::async_trait] -impl StorageHandler for ScsiHandler { - #[instrument] - async fn create_device( - &self, - mut storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - // Retrieve the device path from SCSI address. - let dev_path = get_scsi_device_name(ctx.sandbox, &storage.source).await?; - storage.source = dev_path; - - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct PmemHandler {} - -#[async_trait::async_trait] -impl StorageHandler for PmemHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - // Retrieve the device path from NVDIMM address. - wait_for_pmem_device(ctx.sandbox, &storage.source).await?; - - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct BindWatcherHandler {} - -#[async_trait::async_trait] -impl StorageHandler for BindWatcherHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - if let Some(cid) = ctx.cid { - ctx.sandbox - .lock() - .await - .bind_watcher - .add_container(cid.to_string(), iter::once(storage.clone()), ctx.logger) - .await?; - } - new_device("".to_string()) - } -} - -fn new_device(path: String) -> Result { - let device = StorageDeviceGeneric::new(path); - Ok(Arc::new(Mutex::new(device))) -} - -#[instrument] -fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result { - mount_storage(logger, storage)?; - set_ownership(logger, storage)?; - Ok(storage.mount_point.clone()) -} - -// mount_storage performs the mount described by the storage structure. -#[instrument] -fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> { - let logger = logger.new(o!("subsystem" => "mount")); - - // There's a special mechanism to create mountpoint from a `sharedfs` instance before - // starting the kata-agent. Check for such cases. - if storage.source == KATA_SHAREDFS_GUEST_PREMOUNT_TAG && is_mounted(&storage.mount_point)? { - warn!( - logger, - "{} already mounted on {}, ignoring...", - KATA_SHAREDFS_GUEST_PREMOUNT_TAG, - &storage.mount_point - ); - return Ok(()); - } - - let (flags, options) = parse_mount_options(&storage.options)?; - let mount_path = Path::new(&storage.mount_point); - let src_path = Path::new(&storage.source); - create_mount_destination(src_path, mount_path, "", &storage.fstype) - .context("Could not create mountpoint")?; - - info!(logger, "mounting storage"; - "mount-source" => src_path.display(), - "mount-destination" => mount_path.display(), - "mount-fstype" => storage.fstype.as_str(), - "mount-options" => options.as_str(), - ); - - baremount( - src_path, - mount_path, - storage.fstype.as_str(), - flags, - options.as_str(), - &logger, - ) -} - -#[instrument] -pub fn set_ownership(logger: &Logger, storage: &Storage) -> Result<()> { - let logger = logger.new(o!("subsystem" => "mount", "fn" => "set_ownership")); - - // If fsGroup is not set, skip performing ownership change - if storage.fs_group.is_none() { - return Ok(()); - } - - let fs_group = storage.fs_group(); - let read_only = storage.options.contains(&String::from("ro")); - let mount_path = Path::new(&storage.mount_point); - let metadata = mount_path.metadata().map_err(|err| { - error!(logger, "failed to obtain metadata for mount path"; - "mount-path" => mount_path.to_str(), - "error" => err.to_string(), - ); - err - })?; - - if fs_group.group_change_policy == FSGroupChangePolicy::OnRootMismatch.into() - && metadata.gid() == fs_group.group_id - { - let mut mask = if read_only { RO_MASK } else { RW_MASK }; - mask |= EXEC_MASK; - - // With fsGroup change policy to OnRootMismatch, if the current - // gid of the mount path root directory matches the desired gid - // and the current permission of mount path root directory is correct, - // then ownership change will be skipped. - let current_mode = metadata.permissions().mode(); - if (mask & current_mode == mask) && (current_mode & MODE_SETGID != 0) { - info!(logger, "skipping ownership change for volume"; - "mount-path" => mount_path.to_str(), - "fs-group" => fs_group.group_id.to_string(), - ); - return Ok(()); - } - } - - info!(logger, "performing recursive ownership change"; - "mount-path" => mount_path.to_str(), - "fs-group" => fs_group.group_id.to_string(), - ); - recursive_ownership_change( - mount_path, - None, - Some(Gid::from_raw(fs_group.group_id)), - read_only, - ) -} - -#[instrument] -pub fn recursive_ownership_change( - path: &Path, - uid: Option, - gid: Option, - read_only: bool, -) -> Result<()> { - let mut mask = if read_only { RO_MASK } else { RW_MASK }; - if path.is_dir() { - for entry in fs::read_dir(path)? { - recursive_ownership_change(entry?.path().as_path(), uid, gid, read_only)?; - } - mask |= EXEC_MASK; - mask |= MODE_SETGID; - } - - // We do not want to change the permission of the underlying file - // using symlink. Hence we skip symlinks from recursive ownership - // and permission changes. - if path.is_symlink() { - return Ok(()); - } - - nix::unistd::chown(path, uid, gid)?; - - if gid.is_some() { - let metadata = path.metadata()?; - let mut permission = metadata.permissions(); - let target_mode = metadata.mode() | mask; - permission.set_mode(target_mode); - fs::set_permissions(path, permission)?; - } - - Ok(()) -} - /// Looks for `mount_point` entry in the /proc/mounts. #[instrument] pub fn is_mounted(mount_point: &str) -> Result { @@ -799,80 +129,6 @@ pub fn is_mounted(mount_point: &str) -> Result { Ok(found) } -// add_storages takes a list of storages passed by the caller, and perform the -// associated operations such as waiting for the device to show up, and mount -// it to a specific location, according to the type of handler chosen, and for -// each storage. -#[instrument] -pub async fn add_storages( - logger: Logger, - storages: Vec, - sandbox: &Arc>, - cid: Option, -) -> Result> { - let mut mount_list = Vec::new(); - - for storage in storages { - let path = storage.mount_point.clone(); - let state = sandbox.lock().await.add_sandbox_storage(&path).await; - if state.ref_count().await > 1 { - // The device already exists. - continue; - } - - if let Some(handler) = STORAGE_HANDLERS.handler(&storage.driver) { - let logger = - logger.new(o!( "subsystem" => "storage", "storage-type" => storage.driver.clone())); - let mut ctx = StorageContext { - cid: &cid, - logger: &logger, - sandbox, - }; - - match handler.create_device(storage, &mut ctx).await { - Ok(device) => { - match sandbox - .lock() - .await - .update_sandbox_storage(&path, device.clone()) - { - Ok(d) => { - let path = device.lock().await.path().to_string(); - if !path.is_empty() { - mount_list.push(path.clone()); - } - drop(d); - } - Err(device) => { - error!(logger, "failed to update device for storage"); - if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await - { - warn!(logger, "failed to remove dummy sandbox storage {:?}", e); - } - device.lock().await.cleanup(); - return Err(anyhow!("failed to update device for storage")); - } - } - } - Err(e) => { - error!(logger, "failed to create device for storage, error: {e:?}"); - if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await { - warn!(logger, "failed to remove dummy sandbox storage {e:?}"); - } - return Err(e); - } - } - } else { - return Err(anyhow!( - "Failed to find the storage handler {}", - storage.driver - )); - } - } - - Ok(mount_list) -} - #[instrument] fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> { fs::create_dir_all(m.dest).context("could not create directory")?; @@ -1054,26 +310,14 @@ pub fn remove_mounts + std::fmt::Debug>(mounts: &[P]) -> Result<() Ok(()) } -#[instrument] -fn parse_options(option_list: &[String]) -> HashMap { - let mut options = HashMap::new(); - for opt in option_list { - let fields: Vec<&str> = opt.split('=').collect(); - if fields.len() == 2 { - options.insert(fields[0].to_string(), fields[1].to_string()); - } - } - options -} - #[cfg(test)] mod tests { use super::*; - use protocols::agent::FSGroup; use slog::Drain; use std::fs::File; use std::fs::OpenOptions; use std::io::Write; + use std::os::unix::fs::PermissionsExt; use std::path::PathBuf; use tempfile::tempdir; use test_utils::TestUserType; @@ -1643,127 +887,6 @@ mod tests { } } - #[test] - fn test_mount_storage() { - #[derive(Debug)] - struct TestData<'a> { - test_user: TestUserType, - storage: Storage, - error_contains: &'a str, - - make_source_dir: bool, - make_mount_dir: bool, - deny_mount_permission: bool, - } - - impl Default for TestData<'_> { - fn default() -> Self { - TestData { - test_user: TestUserType::Any, - storage: Storage { - mount_point: "mnt".to_string(), - source: "src".to_string(), - fstype: "tmpfs".to_string(), - ..Default::default() - }, - make_source_dir: true, - make_mount_dir: false, - deny_mount_permission: false, - error_contains: "", - } - } - } - - let tests = &[ - TestData { - test_user: TestUserType::NonRootOnly, - error_contains: "EPERM: Operation not permitted", - ..Default::default() - }, - TestData { - test_user: TestUserType::RootOnly, - ..Default::default() - }, - TestData { - storage: Storage { - mount_point: "mnt".to_string(), - source: "src".to_string(), - fstype: "bind".to_string(), - ..Default::default() - }, - make_source_dir: false, - make_mount_dir: true, - error_contains: "Could not create mountpoint", - ..Default::default() - }, - TestData { - test_user: TestUserType::NonRootOnly, - deny_mount_permission: true, - error_contains: "Could not create mountpoint", - ..Default::default() - }, - ]; - - for (i, d) in tests.iter().enumerate() { - let msg = format!("test[{}]: {:?}", i, d); - - skip_loop_by_user!(msg, d.test_user); - - let drain = slog::Discard; - let logger = slog::Logger::root(drain, o!()); - - let tempdir = tempdir().unwrap(); - - let source = tempdir.path().join(&d.storage.source); - let mount_point = tempdir.path().join(&d.storage.mount_point); - - let storage = Storage { - source: source.to_str().unwrap().to_string(), - mount_point: mount_point.to_str().unwrap().to_string(), - ..d.storage.clone() - }; - - if d.make_source_dir { - fs::create_dir_all(&storage.source).unwrap(); - } - if d.make_mount_dir { - fs::create_dir_all(&storage.mount_point).unwrap(); - } - - if d.deny_mount_permission { - fs::set_permissions( - mount_point.parent().unwrap(), - fs::Permissions::from_mode(0o000), - ) - .unwrap(); - } - - let result = mount_storage(&logger, &storage); - - // restore permissions so tempdir can be cleaned up - if d.deny_mount_permission { - fs::set_permissions( - mount_point.parent().unwrap(), - fs::Permissions::from_mode(0o755), - ) - .unwrap(); - } - - if result.is_ok() { - nix::mount::umount(&mount_point).unwrap(); - } - - let msg = format!("{}: result: {:?}", msg, result); - if d.error_contains.is_empty() { - assert!(result.is_ok(), "{}", msg); - } else { - assert!(result.is_err(), "{}", msg); - let error_msg = format!("{}", result.unwrap_err()); - assert!(error_msg.contains(d.error_contains), "{}", msg); - } - } - } - #[test] fn test_mount_to_rootfs() { #[derive(Debug)] @@ -1863,62 +986,6 @@ mod tests { } } - #[test] - fn test_get_pagesize_and_size_from_option() { - let expected_pagesize = 2048; - let expected_size = 107374182; - let expected = (expected_pagesize, expected_size); - - let data = vec![ - // (input, expected, is_ok) - ("size-1=107374182,pagesize-1=2048", expected, false), - ("size-1=107374182,pagesize=2048", expected, false), - ("size=107374182,pagesize-1=2048", expected, false), - ("size=107374182,pagesize=abc", expected, false), - ("size=abc,pagesize=2048", expected, false), - ("size=,pagesize=2048", expected, false), - ("size=107374182,pagesize=", expected, false), - ("size=107374182,pagesize=2048", expected, true), - ("pagesize=2048,size=107374182", expected, true), - ("foo=bar,pagesize=2048,size=107374182", expected, true), - ( - "foo=bar,pagesize=2048,foo1=bar1,size=107374182", - expected, - true, - ), - ( - "pagesize=2048,foo1=bar1,foo=bar,size=107374182", - expected, - true, - ), - ( - "foo=bar,pagesize=2048,foo1=bar1,size=107374182,foo2=bar2", - expected, - true, - ), - ( - "foo=bar,size=107374182,foo1=bar1,pagesize=2048", - expected, - true, - ), - ]; - - for case in data { - let input = case.0; - let r = EphemeralHandler::get_pagesize_and_size_from_option(&[input.to_string()]); - - let is_ok = case.2; - if is_ok { - let expected = case.1; - let (pagesize, size) = r.unwrap(); - assert_eq!(expected.0, pagesize); - assert_eq!(expected.1, size); - } else { - assert!(r.is_err()); - } - } - } - #[test] fn test_parse_mount_flags_and_options() { #[derive(Debug)] @@ -1969,207 +1036,4 @@ mod tests { assert_eq!(expected_result, result, "{}", msg); } } - - #[test] - fn test_set_ownership() { - skip_if_not_root!(); - - let logger = slog::Logger::root(slog::Discard, o!()); - - #[derive(Debug)] - struct TestData<'a> { - mount_path: &'a str, - fs_group: Option, - read_only: bool, - expected_group_id: u32, - expected_permission: u32, - } - - let tests = &[ - TestData { - mount_path: "foo", - fs_group: None, - read_only: false, - expected_group_id: 0, - expected_permission: 0, - }, - TestData { - mount_path: "rw_mount", - fs_group: Some(FSGroup { - group_id: 3000, - group_change_policy: FSGroupChangePolicy::Always.into(), - ..Default::default() - }), - read_only: false, - expected_group_id: 3000, - expected_permission: RW_MASK | EXEC_MASK | MODE_SETGID, - }, - TestData { - mount_path: "ro_mount", - fs_group: Some(FSGroup { - group_id: 3000, - group_change_policy: FSGroupChangePolicy::OnRootMismatch.into(), - ..Default::default() - }), - read_only: true, - expected_group_id: 3000, - expected_permission: RO_MASK | EXEC_MASK | MODE_SETGID, - }, - ]; - - let tempdir = tempdir().expect("failed to create tmpdir"); - - for (i, d) in tests.iter().enumerate() { - let msg = format!("test[{}]: {:?}", i, d); - - let mount_dir = tempdir.path().join(d.mount_path); - fs::create_dir(&mount_dir) - .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); - - let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); - let mut storage_data = Storage::new(); - if d.read_only { - storage_data.set_options(vec!["foo".to_string(), "ro".to_string()]); - } - if let Some(fs_group) = d.fs_group.clone() { - storage_data.set_fs_group(fs_group); - } - storage_data.mount_point = mount_dir.clone().into_os_string().into_string().unwrap(); - - let result = set_ownership(&logger, &storage_data); - assert!(result.is_ok()); - - assert_eq!( - mount_dir.as_path().metadata().unwrap().gid(), - d.expected_group_id - ); - assert_eq!( - mount_dir.as_path().metadata().unwrap().permissions().mode(), - (directory_mode | d.expected_permission) - ); - } - } - - #[test] - fn test_recursive_ownership_change() { - skip_if_not_root!(); - - const COUNT: usize = 5; - - #[derive(Debug)] - struct TestData<'a> { - // Directory where the recursive ownership change should be performed on - path: &'a str, - - // User ID for ownership change - uid: u32, - - // Group ID for ownership change - gid: u32, - - // Set when the permission should be read-only - read_only: bool, - - // The expected permission of all directories after ownership change - expected_permission_directory: u32, - - // The expected permission of all files after ownership change - expected_permission_file: u32, - } - - let tests = &[ - TestData { - path: "no_gid_change", - uid: 0, - gid: 0, - read_only: false, - expected_permission_directory: 0, - expected_permission_file: 0, - }, - TestData { - path: "rw_gid_change", - uid: 0, - gid: 3000, - read_only: false, - expected_permission_directory: RW_MASK | EXEC_MASK | MODE_SETGID, - expected_permission_file: RW_MASK, - }, - TestData { - path: "ro_gid_change", - uid: 0, - gid: 3000, - read_only: true, - expected_permission_directory: RO_MASK | EXEC_MASK | MODE_SETGID, - expected_permission_file: RO_MASK, - }, - ]; - - let tempdir = tempdir().expect("failed to create tmpdir"); - - for (i, d) in tests.iter().enumerate() { - let msg = format!("test[{}]: {:?}", i, d); - - let mount_dir = tempdir.path().join(d.path); - fs::create_dir(&mount_dir) - .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); - - let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); - let mut file_mode: u32 = 0; - - // create testing directories and files - for n in 1..COUNT { - let nest_dir = mount_dir.join(format!("nested{}", n)); - fs::create_dir(&nest_dir) - .unwrap_or_else(|_| panic!("{}: failed to create nest directory", msg)); - - for f in 1..COUNT { - let filename = nest_dir.join(format!("file{}", f)); - File::create(&filename) - .unwrap_or_else(|_| panic!("{}: failed to create file", msg)); - file_mode = filename.as_path().metadata().unwrap().permissions().mode(); - } - } - - let uid = if d.uid > 0 { - Some(Uid::from_raw(d.uid)) - } else { - None - }; - let gid = if d.gid > 0 { - Some(Gid::from_raw(d.gid)) - } else { - None - }; - let result = recursive_ownership_change(&mount_dir, uid, gid, d.read_only); - - assert!(result.is_ok()); - - assert_eq!(mount_dir.as_path().metadata().unwrap().gid(), d.gid); - assert_eq!( - mount_dir.as_path().metadata().unwrap().permissions().mode(), - (directory_mode | d.expected_permission_directory) - ); - - for n in 1..COUNT { - let nest_dir = mount_dir.join(format!("nested{}", n)); - for f in 1..COUNT { - let filename = nest_dir.join(format!("file{}", f)); - let file = Path::new(&filename); - - assert_eq!(file.metadata().unwrap().gid(), d.gid); - assert_eq!( - file.metadata().unwrap().permissions().mode(), - (file_mode | d.expected_permission_file) - ); - } - - let dir = Path::new(&nest_dir); - assert_eq!(dir.metadata().unwrap().gid(), d.gid); - assert_eq!( - dir.metadata().unwrap().permissions().mode(), - (directory_mode | d.expected_permission_directory) - ); - } - } - } } diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 3ef0cc688f..6db6bbcc0b 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -59,12 +59,13 @@ use crate::device::{ use crate::image_rpc; use crate::linux_abi::*; use crate::metrics::get_metrics; -use crate::mount::{add_storages, baremount, update_ephemeral_mounts, STORAGE_HANDLERS}; +use crate::mount::baremount; use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS}; use crate::network::setup_guest_dns; use crate::pci; use crate::random; use crate::sandbox::Sandbox; +use crate::storage::{add_storages, update_ephemeral_mounts, STORAGE_HANDLERS}; use crate::version::{AGENT_VERSION, API_VERSION}; use crate::AGENT_CONFIG; diff --git a/src/agent/src/storage/bind_watcher_handler.rs b/src/agent/src/storage/bind_watcher_handler.rs new file mode 100644 index 0000000000..44f7094e8b --- /dev/null +++ b/src/agent/src/storage/bind_watcher_handler.rs @@ -0,0 +1,36 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; +use protocols::agent::Storage; +use std::iter; +use tracing::instrument; + +use crate::sandbox::StorageDeviceObject; +use crate::storage::{new_device, StorageContext, StorageHandler}; + +#[derive(Debug)] +pub struct BindWatcherHandler {} + +#[async_trait::async_trait] +impl StorageHandler for BindWatcherHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + if let Some(cid) = ctx.cid { + ctx.sandbox + .lock() + .await + .bind_watcher + .add_container(cid.to_string(), iter::once(storage.clone()), ctx.logger) + .await?; + } + new_device("".to_string()) + } +} diff --git a/src/agent/src/storage/block_handler.rs b/src/agent/src/storage/block_handler.rs new file mode 100644 index 0000000000..7d676211b0 --- /dev/null +++ b/src/agent/src/storage/block_handler.rs @@ -0,0 +1,145 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::os::unix::fs::PermissionsExt; +use std::path::Path; +use std::str::FromStr; + +use anyhow::{anyhow, Context, Result}; +use protocols::agent::Storage; +use tracing::instrument; + +use crate::device::{ + get_scsi_device_name, get_virtio_blk_pci_device_name, get_virtio_mmio_device_name, + wait_for_pmem_device, +}; +use crate::pci; +use crate::sandbox::StorageDeviceObject; +use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler}; +#[cfg(target_arch = "s390x")] +use crate::{ccw, device::get_virtio_blk_ccw_device_name}; + +#[derive(Debug)] +pub struct VirtioBlkMmioHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkMmioHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + if !Path::new(&storage.source).exists() { + get_virtio_mmio_device_name(ctx.sandbox, &storage.source) + .await + .context("failed to get mmio device name")?; + } + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct VirtioBlkPciHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkPciHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // If hot-plugged, get the device node path based on the PCI path + // otherwise use the virt path provided in Storage Source + if storage.source.starts_with("/dev") { + let metadata = fs::metadata(&storage.source) + .context(format!("get metadata on file {:?}", &storage.source))?; + let mode = metadata.permissions().mode(); + if mode & libc::S_IFBLK == 0 { + return Err(anyhow!("Invalid device {}", &storage.source)); + } + } else { + let pcipath = pci::Path::from_str(&storage.source)?; + let dev_path = get_virtio_blk_pci_device_name(ctx.sandbox, &pcipath).await?; + storage.source = dev_path; + } + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct VirtioBlkCcwHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkCcwHandler { + #[cfg(target_arch = "s390x")] + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + let ccw_device = ccw::Device::from_str(&storage.source)?; + let dev_path = get_virtio_blk_ccw_device_name(ctx.sandbox, &ccw_device).await?; + storage.source = dev_path; + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } + + #[cfg(not(target_arch = "s390x"))] + #[instrument] + async fn create_device( + &self, + _storage: Storage, + _ctx: &mut StorageContext, + ) -> Result { + Err(anyhow!("CCW is only supported on s390x")) + } +} + +#[derive(Debug)] +pub struct ScsiHandler {} + +#[async_trait::async_trait] +impl StorageHandler for ScsiHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // Retrieve the device path from SCSI address. + let dev_path = get_scsi_device_name(ctx.sandbox, &storage.source).await?; + storage.source = dev_path; + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct PmemHandler {} + +#[async_trait::async_trait] +impl StorageHandler for PmemHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // Retrieve the device for pmem storage + wait_for_pmem_device(ctx.sandbox, &storage.source).await?; + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} diff --git a/src/agent/src/storage/ephemeral_handler.rs b/src/agent/src/storage/ephemeral_handler.rs new file mode 100644 index 0000000000..38ceb8f556 --- /dev/null +++ b/src/agent/src/storage/ephemeral_handler.rs @@ -0,0 +1,293 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::fs::OpenOptions; +use std::io::Write; +use std::os::unix::fs::{MetadataExt, PermissionsExt}; +use std::path::Path; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use kata_sys_util::mount::parse_mount_options; +use kata_types::mount::KATA_MOUNT_OPTION_FS_GID; +use nix::unistd::Gid; +use protocols::agent::Storage; +use slog::Logger; +use tokio::sync::Mutex; +use tracing::instrument; + +use crate::device::{DRIVER_EPHEMERAL_TYPE, FS_TYPE_HUGETLB}; +use crate::mount::baremount; +use crate::sandbox::{Sandbox, StorageDeviceObject}; +use crate::storage::{ + common_storage_handler, new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID, +}; + +const FS_GID_EQ: &str = "fsgid="; +const SYS_FS_HUGEPAGES_PREFIX: &str = "/sys/kernel/mm/hugepages"; + +#[derive(Debug)] +pub struct EphemeralHandler {} + +#[async_trait::async_trait] +impl StorageHandler for EphemeralHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // hugetlbfs + if storage.fstype == FS_TYPE_HUGETLB { + info!(ctx.logger, "handle hugetlbfs storage"); + // Allocate hugepages before mount + // /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages + // /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + // options eg "pagesize=2097152,size=524288000"(2M, 500M) + Self::allocate_hugepages(ctx.logger, &storage.options.to_vec()) + .context("allocate hugepages")?; + common_storage_handler(ctx.logger, &storage)?; + } else if !storage.options.is_empty() { + // By now we only support one option field: "fsGroup" which + // isn't an valid mount option, thus we should remove it when + // do mount. + let opts = parse_options(&storage.options); + storage.options = Default::default(); + common_storage_handler(ctx.logger, &storage)?; + + // ephemeral_storage didn't support mount options except fsGroup. + if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { + let gid = fsgid.parse::()?; + + nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; + + let meta = fs::metadata(&storage.mount_point)?; + let mut permission = meta.permissions(); + + let o_mode = meta.mode() | MODE_SETGID; + permission.set_mode(o_mode); + fs::set_permissions(&storage.mount_point, permission)?; + } + } else { + common_storage_handler(ctx.logger, &storage)?; + } + + new_device("".to_string()) + } +} + +impl EphemeralHandler { + // Allocate hugepages by writing to sysfs + fn allocate_hugepages(logger: &Logger, options: &[String]) -> Result<()> { + info!(logger, "mounting hugePages storage options: {:?}", options); + + let (pagesize, size) = Self::get_pagesize_and_size_from_option(options) + .context(format!("parse mount options: {:?}", &options))?; + + info!( + logger, + "allocate hugepages. pageSize: {}, size: {}", pagesize, size + ); + + // sysfs entry is always of the form hugepages-${pagesize}kB + // Ref: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt + let path = Path::new(SYS_FS_HUGEPAGES_PREFIX) + .join(format!("hugepages-{}kB", pagesize / 1024)) + .join("nr_hugepages"); + + // write numpages to nr_hugepages file. + let numpages = format!("{}", size / pagesize); + info!(logger, "write {} pages to {:?}", &numpages, &path); + + let mut file = OpenOptions::new() + .write(true) + .open(&path) + .context(format!("open nr_hugepages directory {:?}", &path))?; + + file.write_all(numpages.as_bytes()) + .context(format!("write nr_hugepages failed: {:?}", &path))?; + + // Even if the write succeeds, the kernel isn't guaranteed to be + // able to allocate all the pages we requested. Verify that it + // did. + let verify = fs::read_to_string(&path).context(format!("reading {:?}", &path))?; + let allocated = verify + .trim_end() + .parse::() + .map_err(|_| anyhow!("Unexpected text {:?} in {:?}", &verify, &path))?; + if allocated != size / pagesize { + return Err(anyhow!( + "Only allocated {} of {} hugepages of size {}", + allocated, + numpages, + pagesize + )); + } + + Ok(()) + } + + // Parse filesystem options string to retrieve hugepage details + // options eg "pagesize=2048,size=107374182" + fn get_pagesize_and_size_from_option(options: &[String]) -> Result<(u64, u64)> { + let mut pagesize_str: Option<&str> = None; + let mut size_str: Option<&str> = None; + + for option in options { + let vars: Vec<&str> = option.trim().split(',').collect(); + + for var in vars { + if let Some(stripped) = var.strip_prefix("pagesize=") { + pagesize_str = Some(stripped); + } else if let Some(stripped) = var.strip_prefix("size=") { + size_str = Some(stripped); + } + + if pagesize_str.is_some() && size_str.is_some() { + break; + } + } + } + + if pagesize_str.is_none() || size_str.is_none() { + return Err(anyhow!("no pagesize/size options found")); + } + + let pagesize = pagesize_str + .unwrap() + .parse::() + .context(format!("parse pagesize: {:?}", &pagesize_str))?; + let size = size_str + .unwrap() + .parse::() + .context(format!("parse size: {:?}", &pagesize_str))?; + + Ok((pagesize, size)) + } +} + +// update_ephemeral_mounts takes a list of ephemeral mounts and remounts them +// with mount options passed by the caller +#[instrument] +pub async fn update_ephemeral_mounts( + logger: Logger, + storages: &[Storage], + _sandbox: &Arc>, +) -> Result<()> { + for storage in storages { + let handler_name = &storage.driver; + let logger = logger.new(o!( + "msg" => "updating tmpfs storage", + "subsystem" => "storage", + "storage-type" => handler_name.to_owned())); + + match handler_name.as_str() { + DRIVER_EPHEMERAL_TYPE => { + fs::create_dir_all(&storage.mount_point)?; + + if storage.options.is_empty() { + continue; + } else { + // assume that fsGid has already been set + let mount_path = Path::new(&storage.mount_point); + let src_path = Path::new(&storage.source); + let opts: Vec<&String> = storage + .options + .iter() + .filter(|&opt| !opt.starts_with(FS_GID_EQ)) + .collect(); + let (flags, options) = parse_mount_options(&opts)?; + + info!(logger, "mounting storage"; + "mount-source" => src_path.display(), + "mount-destination" => mount_path.display(), + "mount-fstype" => storage.fstype.as_str(), + "mount-options" => options.as_str(), + ); + + baremount( + src_path, + mount_path, + storage.fstype.as_str(), + flags, + options.as_str(), + &logger, + )?; + } + } + _ => { + return Err(anyhow!( + "Unsupported storage type for syncing mounts {}. Only ephemeral storage update is supported", + storage.driver + )); + } + }; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_pagesize_and_size_from_option() { + let expected_pagesize = 2048; + let expected_size = 107374182; + let expected = (expected_pagesize, expected_size); + + let data = vec![ + // (input, expected, is_ok) + ("size-1=107374182,pagesize-1=2048", expected, false), + ("size-1=107374182,pagesize=2048", expected, false), + ("size=107374182,pagesize-1=2048", expected, false), + ("size=107374182,pagesize=abc", expected, false), + ("size=abc,pagesize=2048", expected, false), + ("size=,pagesize=2048", expected, false), + ("size=107374182,pagesize=", expected, false), + ("size=107374182,pagesize=2048", expected, true), + ("pagesize=2048,size=107374182", expected, true), + ("foo=bar,pagesize=2048,size=107374182", expected, true), + ( + "foo=bar,pagesize=2048,foo1=bar1,size=107374182", + expected, + true, + ), + ( + "pagesize=2048,foo1=bar1,foo=bar,size=107374182", + expected, + true, + ), + ( + "foo=bar,pagesize=2048,foo1=bar1,size=107374182,foo2=bar2", + expected, + true, + ), + ( + "foo=bar,size=107374182,foo1=bar1,pagesize=2048", + expected, + true, + ), + ]; + + for case in data { + let input = case.0; + let r = EphemeralHandler::get_pagesize_and_size_from_option(&[input.to_string()]); + + let is_ok = case.2; + if is_ok { + let expected = case.1; + let (pagesize, size) = r.unwrap(); + assert_eq!(expected.0, pagesize); + assert_eq!(expected.1, size); + } else { + assert!(r.is_err()); + } + } + } +} diff --git a/src/agent/src/storage/fs_handler.rs b/src/agent/src/storage/fs_handler.rs new file mode 100644 index 0000000000..97f3cb2589 --- /dev/null +++ b/src/agent/src/storage/fs_handler.rs @@ -0,0 +1,88 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::path::Path; + +use anyhow::{anyhow, Context, Result}; +use protocols::agent::Storage; +use tracing::instrument; + +use crate::sandbox::StorageDeviceObject; +use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler}; + +#[derive(Debug)] +pub struct OverlayfsHandler {} + +#[async_trait::async_trait] +impl StorageHandler for OverlayfsHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + if storage + .options + .iter() + .any(|e| e == "io.katacontainers.fs-opt.overlay-rw") + { + let cid = ctx + .cid + .clone() + .ok_or_else(|| anyhow!("No container id in rw overlay"))?; + let cpath = Path::new(crate::rpc::CONTAINER_BASE).join(cid); + let work = cpath.join("work"); + let upper = cpath.join("upper"); + + fs::create_dir_all(&work).context("Creating overlay work directory")?; + fs::create_dir_all(&upper).context("Creating overlay upper directory")?; + + storage.fstype = "overlay".into(); + storage + .options + .push(format!("upperdir={}", upper.to_string_lossy())); + storage + .options + .push(format!("workdir={}", work.to_string_lossy())); + } + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct Virtio9pHandler {} + +#[async_trait::async_trait] +impl StorageHandler for Virtio9pHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct VirtioFsHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioFsHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} diff --git a/src/agent/src/storage/local_handler.rs b/src/agent/src/storage/local_handler.rs new file mode 100644 index 0000000000..0ff6f26f8e --- /dev/null +++ b/src/agent/src/storage/local_handler.rs @@ -0,0 +1,61 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::os::unix::fs::PermissionsExt; + +use anyhow::{Context, Result}; +use kata_types::mount::KATA_MOUNT_OPTION_FS_GID; +use nix::unistd::Gid; +use protocols::agent::Storage; +use tracing::instrument; + +use crate::sandbox::StorageDeviceObject; +use crate::storage::{new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID}; + +#[derive(Debug)] +pub struct LocalHandler {} + +#[async_trait::async_trait] +impl StorageHandler for LocalHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + _ctx: &mut StorageContext, + ) -> Result { + fs::create_dir_all(&storage.mount_point).context(format!( + "failed to create dir all {:?}", + &storage.mount_point + ))?; + + let opts = parse_options(&storage.options); + + let mut need_set_fsgid = false; + if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { + let gid = fsgid.parse::()?; + + nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; + need_set_fsgid = true; + } + + if let Some(mode) = opts.get("mode") { + let mut permission = fs::metadata(&storage.mount_point)?.permissions(); + + let mut o_mode = u32::from_str_radix(mode, 8)?; + + if need_set_fsgid { + // set SetGid mode mask. + o_mode |= MODE_SETGID; + } + permission.set_mode(o_mode); + + fs::set_permissions(&storage.mount_point, permission)?; + } + + new_device("".to_string()) + } +} diff --git a/src/agent/src/storage/mod.rs b/src/agent/src/storage/mod.rs new file mode 100644 index 0000000000..11bf170c6a --- /dev/null +++ b/src/agent/src/storage/mod.rs @@ -0,0 +1,648 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::fs; +use std::os::unix::fs::{MetadataExt, PermissionsExt}; +use std::path::Path; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use kata_sys_util::mount::{create_mount_destination, parse_mount_options}; +use kata_types::mount::{ + StorageDeviceGeneric, StorageHandlerManager, KATA_SHAREDFS_GUEST_PREMOUNT_TAG, +}; +use nix::unistd::{Gid, Uid}; +use protocols::agent::Storage; +use protocols::types::FSGroupChangePolicy; +use slog::Logger; +use tokio::sync::Mutex; +use tracing::instrument; + +use self::bind_watcher_handler::BindWatcherHandler; +use self::block_handler::{PmemHandler, ScsiHandler, VirtioBlkMmioHandler, VirtioBlkPciHandler}; +use self::ephemeral_handler::EphemeralHandler; +use self::fs_handler::{OverlayfsHandler, Virtio9pHandler, VirtioFsHandler}; +use self::local_handler::LocalHandler; +use crate::device::{ + DRIVER_9P_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, DRIVER_EPHEMERAL_TYPE, + DRIVER_LOCAL_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_OVERLAYFS_TYPE, DRIVER_SCSI_TYPE, + DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE, +}; +use crate::mount::{baremount, is_mounted}; +use crate::sandbox::{Sandbox, StorageDeviceObject}; + +pub use self::ephemeral_handler::update_ephemeral_mounts; + +mod bind_watcher_handler; +mod block_handler; +mod ephemeral_handler; +mod fs_handler; +mod local_handler; + +const RW_MASK: u32 = 0o660; +const RO_MASK: u32 = 0o440; +const EXEC_MASK: u32 = 0o110; +const MODE_SETGID: u32 = 0o2000; + +#[derive(Debug)] +pub struct StorageContext<'a> { + cid: &'a Option, + logger: &'a Logger, + sandbox: &'a Arc>, +} + +/// Trait object to handle storage device. +#[async_trait::async_trait] +pub trait StorageHandler: Send + Sync { + /// Create a new storage device. + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result; +} + +#[rustfmt::skip] +lazy_static! { + pub static ref STORAGE_HANDLERS: StorageHandlerManager> = { + let mut manager: StorageHandlerManager> = StorageHandlerManager::new(); + manager.add_handler(DRIVER_9P_TYPE, Arc::new(Virtio9pHandler{})).unwrap(); + #[cfg(target_arch = "s390x")] + manager.add_handler(crate::device::DRIVER_BLK_CCW_TYPE, Arc::new(self::block_handler::VirtioBlkCcwHandler{})).unwrap(); + manager.add_handler(DRIVER_BLK_MMIO_TYPE, Arc::new(VirtioBlkMmioHandler{})).unwrap(); + manager.add_handler(DRIVER_BLK_PCI_TYPE, Arc::new(VirtioBlkPciHandler{})).unwrap(); + manager.add_handler(DRIVER_EPHEMERAL_TYPE, Arc::new(EphemeralHandler{})).unwrap(); + manager.add_handler(DRIVER_LOCAL_TYPE, Arc::new(LocalHandler{})).unwrap(); + manager.add_handler(DRIVER_NVDIMM_TYPE, Arc::new(PmemHandler{})).unwrap(); + manager.add_handler(DRIVER_OVERLAYFS_TYPE, Arc::new(OverlayfsHandler{})).unwrap(); + manager.add_handler(DRIVER_SCSI_TYPE, Arc::new(ScsiHandler{})).unwrap(); + manager.add_handler(DRIVER_VIRTIOFS_TYPE, Arc::new(VirtioFsHandler{})).unwrap(); + manager.add_handler(DRIVER_WATCHABLE_BIND_TYPE, Arc::new(BindWatcherHandler{})).unwrap(); + manager + }; +} + +// add_storages takes a list of storages passed by the caller, and perform the +// associated operations such as waiting for the device to show up, and mount +// it to a specific location, according to the type of handler chosen, and for +// each storage. +#[instrument] +pub async fn add_storages( + logger: Logger, + storages: Vec, + sandbox: &Arc>, + cid: Option, +) -> Result> { + let mut mount_list = Vec::new(); + + for storage in storages { + let path = storage.mount_point.clone(); + let state = sandbox.lock().await.add_sandbox_storage(&path).await; + if state.ref_count().await > 1 { + // The device already exists. + continue; + } + + if let Some(handler) = STORAGE_HANDLERS.handler(&storage.driver) { + let logger = + logger.new(o!( "subsystem" => "storage", "storage-type" => storage.driver.clone())); + let mut ctx = StorageContext { + cid: &cid, + logger: &logger, + sandbox, + }; + + match handler.create_device(storage, &mut ctx).await { + Ok(device) => { + match sandbox + .lock() + .await + .update_sandbox_storage(&path, device.clone()) + { + Ok(d) => { + let path = device.lock().await.path().to_string(); + if !path.is_empty() { + mount_list.push(path.clone()); + } + drop(d); + } + Err(device) => { + error!(logger, "failed to update device for storage"); + if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await + { + warn!(logger, "failed to remove dummy sandbox storage {:?}", e); + } + device.lock().await.cleanup(); + return Err(anyhow!("failed to update device for storage")); + } + } + } + Err(e) => { + error!(logger, "failed to create device for storage, error: {e:?}"); + if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await { + warn!(logger, "failed to remove dummy sandbox storage {e:?}"); + } + return Err(e); + } + } + } else { + return Err(anyhow!( + "Failed to find the storage handler {}", + storage.driver + )); + } + } + + Ok(mount_list) +} + +pub(crate) fn new_device(path: String) -> Result { + let device = StorageDeviceGeneric::new(path); + Ok(Arc::new(Mutex::new(device))) +} + +#[instrument] +pub(crate) fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result { + mount_storage(logger, storage)?; + set_ownership(logger, storage)?; + Ok(storage.mount_point.clone()) +} + +// mount_storage performs the mount described by the storage structure. +#[instrument] +fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> { + let logger = logger.new(o!("subsystem" => "mount")); + + // There's a special mechanism to create mountpoint from a `sharedfs` instance before + // starting the kata-agent. Check for such cases. + if storage.source == KATA_SHAREDFS_GUEST_PREMOUNT_TAG && is_mounted(&storage.mount_point)? { + warn!( + logger, + "{} already mounted on {}, ignoring...", + KATA_SHAREDFS_GUEST_PREMOUNT_TAG, + &storage.mount_point + ); + return Ok(()); + } + + let (flags, options) = parse_mount_options(&storage.options)?; + let mount_path = Path::new(&storage.mount_point); + let src_path = Path::new(&storage.source); + create_mount_destination(src_path, mount_path, "", &storage.fstype) + .context("Could not create mountpoint")?; + + info!(logger, "mounting storage"; + "mount-source" => src_path.display(), + "mount-destination" => mount_path.display(), + "mount-fstype" => storage.fstype.as_str(), + "mount-options" => options.as_str(), + ); + + baremount( + src_path, + mount_path, + storage.fstype.as_str(), + flags, + options.as_str(), + &logger, + ) +} + +#[instrument] +pub(crate) fn parse_options(option_list: &[String]) -> HashMap { + let mut options = HashMap::new(); + for opt in option_list { + let fields: Vec<&str> = opt.split('=').collect(); + if fields.len() == 2 { + options.insert(fields[0].to_string(), fields[1].to_string()); + } + } + options +} + +#[instrument] +pub fn set_ownership(logger: &Logger, storage: &Storage) -> Result<()> { + let logger = logger.new(o!("subsystem" => "mount", "fn" => "set_ownership")); + + // If fsGroup is not set, skip performing ownership change + if storage.fs_group.is_none() { + return Ok(()); + } + + let fs_group = storage.fs_group(); + let read_only = storage.options.contains(&String::from("ro")); + let mount_path = Path::new(&storage.mount_point); + let metadata = mount_path.metadata().map_err(|err| { + error!(logger, "failed to obtain metadata for mount path"; + "mount-path" => mount_path.to_str(), + "error" => err.to_string(), + ); + err + })?; + + if fs_group.group_change_policy == FSGroupChangePolicy::OnRootMismatch.into() + && metadata.gid() == fs_group.group_id + { + let mut mask = if read_only { RO_MASK } else { RW_MASK }; + mask |= EXEC_MASK; + + // With fsGroup change policy to OnRootMismatch, if the current + // gid of the mount path root directory matches the desired gid + // and the current permission of mount path root directory is correct, + // then ownership change will be skipped. + let current_mode = metadata.permissions().mode(); + if (mask & current_mode == mask) && (current_mode & MODE_SETGID != 0) { + info!(logger, "skipping ownership change for volume"; + "mount-path" => mount_path.to_str(), + "fs-group" => fs_group.group_id.to_string(), + ); + return Ok(()); + } + } + + info!(logger, "performing recursive ownership change"; + "mount-path" => mount_path.to_str(), + "fs-group" => fs_group.group_id.to_string(), + ); + recursive_ownership_change( + mount_path, + None, + Some(Gid::from_raw(fs_group.group_id)), + read_only, + ) +} + +#[instrument] +pub fn recursive_ownership_change( + path: &Path, + uid: Option, + gid: Option, + read_only: bool, +) -> Result<()> { + let mut mask = if read_only { RO_MASK } else { RW_MASK }; + if path.is_dir() { + for entry in fs::read_dir(path)? { + recursive_ownership_change(entry?.path().as_path(), uid, gid, read_only)?; + } + mask |= EXEC_MASK; + mask |= MODE_SETGID; + } + + // We do not want to change the permission of the underlying file + // using symlink. Hence we skip symlinks from recursive ownership + // and permission changes. + if path.is_symlink() { + return Ok(()); + } + + nix::unistd::chown(path, uid, gid)?; + + if gid.is_some() { + let metadata = path.metadata()?; + let mut permission = metadata.permissions(); + let target_mode = metadata.mode() | mask; + permission.set_mode(target_mode); + fs::set_permissions(path, permission)?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use protocols::agent::FSGroup; + use std::fs::File; + use tempfile::tempdir; + use test_utils::{ + skip_if_not_root, skip_loop_by_user, skip_loop_if_not_root, skip_loop_if_root, TestUserType, + }; + + #[test] + fn test_mount_storage() { + #[derive(Debug)] + struct TestData<'a> { + test_user: TestUserType, + storage: Storage, + error_contains: &'a str, + + make_source_dir: bool, + make_mount_dir: bool, + deny_mount_permission: bool, + } + + impl Default for TestData<'_> { + fn default() -> Self { + TestData { + test_user: TestUserType::Any, + storage: Storage { + mount_point: "mnt".to_string(), + source: "src".to_string(), + fstype: "tmpfs".to_string(), + ..Default::default() + }, + make_source_dir: true, + make_mount_dir: false, + deny_mount_permission: false, + error_contains: "", + } + } + } + + let tests = &[ + TestData { + test_user: TestUserType::NonRootOnly, + error_contains: "EPERM: Operation not permitted", + ..Default::default() + }, + TestData { + test_user: TestUserType::RootOnly, + ..Default::default() + }, + TestData { + storage: Storage { + mount_point: "mnt".to_string(), + source: "src".to_string(), + fstype: "bind".to_string(), + ..Default::default() + }, + make_source_dir: false, + make_mount_dir: true, + error_contains: "Could not create mountpoint", + ..Default::default() + }, + TestData { + test_user: TestUserType::NonRootOnly, + deny_mount_permission: true, + error_contains: "Could not create mountpoint", + ..Default::default() + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + skip_loop_by_user!(msg, d.test_user); + + let drain = slog::Discard; + let logger = slog::Logger::root(drain, o!()); + + let tempdir = tempdir().unwrap(); + + let source = tempdir.path().join(&d.storage.source); + let mount_point = tempdir.path().join(&d.storage.mount_point); + + let storage = Storage { + source: source.to_str().unwrap().to_string(), + mount_point: mount_point.to_str().unwrap().to_string(), + ..d.storage.clone() + }; + + if d.make_source_dir { + fs::create_dir_all(&storage.source).unwrap(); + } + if d.make_mount_dir { + fs::create_dir_all(&storage.mount_point).unwrap(); + } + + if d.deny_mount_permission { + fs::set_permissions( + mount_point.parent().unwrap(), + fs::Permissions::from_mode(0o000), + ) + .unwrap(); + } + + let result = mount_storage(&logger, &storage); + + // restore permissions so tempdir can be cleaned up + if d.deny_mount_permission { + fs::set_permissions( + mount_point.parent().unwrap(), + fs::Permissions::from_mode(0o755), + ) + .unwrap(); + } + + if result.is_ok() { + nix::mount::umount(&mount_point).unwrap(); + } + + let msg = format!("{}: result: {:?}", msg, result); + if d.error_contains.is_empty() { + assert!(result.is_ok(), "{}", msg); + } else { + assert!(result.is_err(), "{}", msg); + let error_msg = format!("{}", result.unwrap_err()); + assert!(error_msg.contains(d.error_contains), "{}", msg); + } + } + } + + #[test] + fn test_set_ownership() { + skip_if_not_root!(); + + let logger = slog::Logger::root(slog::Discard, o!()); + + #[derive(Debug)] + struct TestData<'a> { + mount_path: &'a str, + fs_group: Option, + read_only: bool, + expected_group_id: u32, + expected_permission: u32, + } + + let tests = &[ + TestData { + mount_path: "foo", + fs_group: None, + read_only: false, + expected_group_id: 0, + expected_permission: 0, + }, + TestData { + mount_path: "rw_mount", + fs_group: Some(FSGroup { + group_id: 3000, + group_change_policy: FSGroupChangePolicy::Always.into(), + ..Default::default() + }), + read_only: false, + expected_group_id: 3000, + expected_permission: RW_MASK | EXEC_MASK | MODE_SETGID, + }, + TestData { + mount_path: "ro_mount", + fs_group: Some(FSGroup { + group_id: 3000, + group_change_policy: FSGroupChangePolicy::OnRootMismatch.into(), + ..Default::default() + }), + read_only: true, + expected_group_id: 3000, + expected_permission: RO_MASK | EXEC_MASK | MODE_SETGID, + }, + ]; + + let tempdir = tempdir().expect("failed to create tmpdir"); + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let mount_dir = tempdir.path().join(d.mount_path); + fs::create_dir(&mount_dir) + .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); + + let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); + let mut storage_data = Storage::new(); + if d.read_only { + storage_data.set_options(vec!["foo".to_string(), "ro".to_string()]); + } + if let Some(fs_group) = d.fs_group.clone() { + storage_data.set_fs_group(fs_group); + } + storage_data.mount_point = mount_dir.clone().into_os_string().into_string().unwrap(); + + let result = set_ownership(&logger, &storage_data); + assert!(result.is_ok()); + + assert_eq!( + mount_dir.as_path().metadata().unwrap().gid(), + d.expected_group_id + ); + assert_eq!( + mount_dir.as_path().metadata().unwrap().permissions().mode(), + (directory_mode | d.expected_permission) + ); + } + } + + #[test] + fn test_recursive_ownership_change() { + skip_if_not_root!(); + + const COUNT: usize = 5; + + #[derive(Debug)] + struct TestData<'a> { + // Directory where the recursive ownership change should be performed on + path: &'a str, + + // User ID for ownership change + uid: u32, + + // Group ID for ownership change + gid: u32, + + // Set when the permission should be read-only + read_only: bool, + + // The expected permission of all directories after ownership change + expected_permission_directory: u32, + + // The expected permission of all files after ownership change + expected_permission_file: u32, + } + + let tests = &[ + TestData { + path: "no_gid_change", + uid: 0, + gid: 0, + read_only: false, + expected_permission_directory: 0, + expected_permission_file: 0, + }, + TestData { + path: "rw_gid_change", + uid: 0, + gid: 3000, + read_only: false, + expected_permission_directory: RW_MASK | EXEC_MASK | MODE_SETGID, + expected_permission_file: RW_MASK, + }, + TestData { + path: "ro_gid_change", + uid: 0, + gid: 3000, + read_only: true, + expected_permission_directory: RO_MASK | EXEC_MASK | MODE_SETGID, + expected_permission_file: RO_MASK, + }, + ]; + + let tempdir = tempdir().expect("failed to create tmpdir"); + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let mount_dir = tempdir.path().join(d.path); + fs::create_dir(&mount_dir) + .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); + + let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); + let mut file_mode: u32 = 0; + + // create testing directories and files + for n in 1..COUNT { + let nest_dir = mount_dir.join(format!("nested{}", n)); + fs::create_dir(&nest_dir) + .unwrap_or_else(|_| panic!("{}: failed to create nest directory", msg)); + + for f in 1..COUNT { + let filename = nest_dir.join(format!("file{}", f)); + File::create(&filename) + .unwrap_or_else(|_| panic!("{}: failed to create file", msg)); + file_mode = filename.as_path().metadata().unwrap().permissions().mode(); + } + } + + let uid = if d.uid > 0 { + Some(Uid::from_raw(d.uid)) + } else { + None + }; + let gid = if d.gid > 0 { + Some(Gid::from_raw(d.gid)) + } else { + None + }; + let result = recursive_ownership_change(&mount_dir, uid, gid, d.read_only); + + assert!(result.is_ok()); + + assert_eq!(mount_dir.as_path().metadata().unwrap().gid(), d.gid); + assert_eq!( + mount_dir.as_path().metadata().unwrap().permissions().mode(), + (directory_mode | d.expected_permission_directory) + ); + + for n in 1..COUNT { + let nest_dir = mount_dir.join(format!("nested{}", n)); + for f in 1..COUNT { + let filename = nest_dir.join(format!("file{}", f)); + let file = Path::new(&filename); + + assert_eq!(file.metadata().unwrap().gid(), d.gid); + assert_eq!( + file.metadata().unwrap().permissions().mode(), + (file_mode | d.expected_permission_file) + ); + } + + let dir = Path::new(&nest_dir); + assert_eq!(dir.metadata().unwrap().gid(), d.gid); + assert_eq!( + dir.metadata().unwrap().permissions().mode(), + (directory_mode | d.expected_permission_directory) + ); + } + } + } +}