From 7f12e27a68d4722fe09beb8ae194782029f1dfbb Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 7 Aug 2023 10:28:50 +0800 Subject: [PATCH 1/9] types: add more mount related constants Add more mount related constants. Signed-off-by: Jiang Liu --- src/agent/src/mount.rs | 14 +++++++------- src/libs/kata-types/src/mount.rs | 6 ++++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index ca4253aea1..b941fe57be 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use anyhow::{anyhow, Context, Result}; use kata_sys_util::mount::get_linux_mount_info; +use kata_types::mount::{KATA_MOUNT_OPTION_FS_GID, KATA_SHAREDFS_GUEST_PREMOUNT_TAG}; use nix::mount::MsFlags; use nix::unistd::{Gid, Uid}; use regex::Regex; @@ -39,10 +40,7 @@ use crate::Sandbox; use crate::{ccw, device::get_virtio_blk_ccw_device_name}; pub const TYPE_ROOTFS: &str = "rootfs"; -pub const MOUNT_GUEST_TAG: &str = "kataShared"; -// Allocating an FSGroup that owns the pod's volumes -const FS_GID: &str = "fsgid"; const FS_GID_EQ: &str = "fsgid="; const SYS_FS_HUGEPAGES_PREFIX: &str = "/sys/kernel/mm/hugepages"; @@ -233,7 +231,7 @@ async fn ephemeral_storage_handler( let opts = parse_options(&storage.options); // ephemeral_storage didn't support mount options except fsGroup. - if let Some(fsgid) = opts.get(FS_GID) { + if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { let gid = fsgid.parse::()?; nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; @@ -360,7 +358,7 @@ async fn local_storage_handler( let opts = parse_options(&storage.options); let mut need_set_fsgid = false; - if let Some(fsgid) = opts.get(FS_GID) { + if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { let gid = fsgid.parse::()?; nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; @@ -638,10 +636,12 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> { // There's a special mechanism to create mountpoint from a `sharedfs` instance before // starting the kata-agent. Check for such cases. - if storage.source == MOUNT_GUEST_TAG && is_mounted(&storage.mount_point)? { + if storage.source == KATA_SHAREDFS_GUEST_PREMOUNT_TAG && is_mounted(&storage.mount_point)? { warn!( logger, - "{} already mounted on {}, ignoring...", MOUNT_GUEST_TAG, &storage.mount_point + "{} already mounted on {}, ignoring...", + KATA_SHAREDFS_GUEST_PREMOUNT_TAG, + &storage.mount_point ); return Ok(()); } diff --git a/src/libs/kata-types/src/mount.rs b/src/libs/kata-types/src/mount.rs index 1c0e69d3ec..473e7d5c08 100644 --- a/src/libs/kata-types/src/mount.rs +++ b/src/libs/kata-types/src/mount.rs @@ -14,6 +14,9 @@ pub const KATA_VOLUME_TYPE_PREFIX: &str = "kata:"; /// The Mount should be ignored by the host and handled by the guest. pub const KATA_GUEST_MOUNT_PREFIX: &str = "kata:guest-mount:"; +/// The sharedfs volume is mounted by guest OS before starting the kata-agent. +pub const KATA_SHAREDFS_GUEST_PREMOUNT_TAG: &str = "kataShared"; + /// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers. pub const KATA_EPHEMERAL_VOLUME_TYPE: &str = "ephemeral"; @@ -23,6 +26,9 @@ pub const KATA_HOST_DIR_VOLUME_TYPE: &str = "kata:hostdir"; /// KATA_MOUNT_INFO_FILE_NAME is used for the file that holds direct-volume mount info pub const KATA_MOUNT_INFO_FILE_NAME: &str = "mountInfo.json"; +/// Specify `fsgid` for a volume or mount, `fsgid=1`. +pub const KATA_MOUNT_OPTION_FS_GID: &str = "fsgid"; + /// KATA_DIRECT_VOLUME_ROOT_PATH is the root path used for concatenating with the direct-volume mount info file path pub const KATA_DIRECT_VOLUME_ROOT_PATH: &str = "/run/kata-containers/shared/direct-volumes"; From cf15777eddd8820560ca39630d8079bb415ff281 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 7 Aug 2023 16:41:07 +0800 Subject: [PATCH 2/9] agent: use create_mount_destination() from kata-sys-util Use create_mount_destination() from kata-sys-util crate to reduce redundant code. Signed-off-by: Jiang Liu --- src/agent/src/mount.rs | 59 ++++++------------------------------------ 1 file changed, 8 insertions(+), 51 deletions(-) diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index b941fe57be..f2dd9de3ab 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -15,7 +15,7 @@ use std::str::FromStr; use std::sync::Arc; use anyhow::{anyhow, Context, Result}; -use kata_sys_util::mount::get_linux_mount_info; +use kata_sys_util::mount::{create_mount_destination, get_linux_mount_info}; use kata_types::mount::{KATA_MOUNT_OPTION_FS_GID, KATA_SHAREDFS_GUEST_PREMOUNT_TAG}; use nix::mount::MsFlags; use nix::unistd::{Gid, Uid}; @@ -646,16 +646,11 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> { return Ok(()); } + let (flags, options) = parse_mount_flags_and_options(&storage.options); let mount_path = Path::new(&storage.mount_point); let src_path = Path::new(&storage.source); - if storage.fstype == "bind" && !src_path.is_dir() { - ensure_destination_file_exists(mount_path).context("Could not create mountpoint file")?; - } else { - fs::create_dir_all(mount_path) - .map_err(anyhow::Error::from) - .context("Could not create mountpoint")?; - } - let (flags, options) = parse_mount_flags_and_options(storage.options.iter()); + create_mount_destination(src_path, mount_path, "", &storage.fstype) + .context("Could not create mountpoint")?; info!(logger, "mounting storage"; "mount-source" => src_path.display(), @@ -923,7 +918,7 @@ pub fn get_mount_fs_type(mount_point: &str) -> Result { } // get_mount_fs_type_from_file returns the FS type corresponding to the passed mount point and -// any error ecountered. +// any error encountered. #[instrument] pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Result { if mount_point.is_empty() { @@ -1058,37 +1053,17 @@ pub fn cgroups_mount(logger: &Logger, unified_cgroup_hierarchy: bool) -> Result< // Enable memory hierarchical account. // For more information see https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt - online_device("/sys/fs/cgroup/memory/memory.use_hierarchy")?; - Ok(()) + online_device("/sys/fs/cgroup/memory/memory.use_hierarchy") } #[instrument] -pub fn remove_mounts(mounts: &[String]) -> Result<()> { +pub fn remove_mounts + std::fmt::Debug>(mounts: &[P]) -> Result<()> { for m in mounts.iter() { - nix::mount::umount(m.as_str()).context(format!("failed to umount {:?}", m))?; + nix::mount::umount(m.as_ref()).context(format!("failed to umount {:?}", m.as_ref()))?; } Ok(()) } -#[instrument] -fn ensure_destination_file_exists(path: &Path) -> Result<()> { - if path.is_file() { - return Ok(()); - } else if path.exists() { - return Err(anyhow!("{:?} exists but is not a regular file", path)); - } - - let dir = path - .parent() - .ok_or_else(|| anyhow!("failed to find parent path for {:?}", path))?; - - fs::create_dir_all(dir).context(format!("create_dir_all {:?}", dir))?; - - fs::File::create(path).context(format!("create empty file {:?}", path))?; - - Ok(()) -} - #[instrument] fn parse_options(option_list: &[String]) -> HashMap { let mut options = HashMap::new(); @@ -1678,24 +1653,6 @@ mod tests { } } - #[test] - fn test_ensure_destination_file_exists() { - let dir = tempdir().expect("failed to create tmpdir"); - - let mut testfile = dir.into_path(); - testfile.push("testfile"); - - let result = ensure_destination_file_exists(&testfile); - - assert!(result.is_ok()); - assert!(testfile.exists()); - - let result = ensure_destination_file_exists(&testfile); - assert!(result.is_ok()); - - assert!(testfile.is_file()); - } - #[test] fn test_mount_storage() { #[derive(Debug)] From 6dcd164c4eb4e4b9af9672e26600ff252a4d1ff6 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 7 Aug 2023 10:26:12 +0800 Subject: [PATCH 3/9] sys-util: support more mount flags in parse_mount_options() Support more mount flags in parse_mount_options(). Signed-off-by: Jiang Liu --- src/agent/src/mount.rs | 133 +++++----------------------- src/agent/src/namespace.rs | 14 +-- src/libs/kata-sys-util/src/mount.rs | 21 +++-- 3 files changed, 40 insertions(+), 128 deletions(-) diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index f2dd9de3ab..1602d7ac43 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -15,7 +15,7 @@ use std::str::FromStr; use std::sync::Arc; use anyhow::{anyhow, Context, Result}; -use kata_sys_util::mount::{create_mount_destination, get_linux_mount_info}; +use kata_sys_util::mount::{create_mount_destination, get_linux_mount_info, parse_mount_options}; use kata_types::mount::{KATA_MOUNT_OPTION_FS_GID, KATA_SHAREDFS_GUEST_PREMOUNT_TAG}; use nix::mount::MsFlags; use nix::unistd::{Gid, Uid}; @@ -49,47 +49,6 @@ const RO_MASK: u32 = 0o440; const EXEC_MASK: u32 = 0o110; const MODE_SETGID: u32 = 0o2000; -#[rustfmt::skip] -lazy_static! { - pub static ref FLAGS: HashMap<&'static str, (bool, MsFlags)> = { - let mut m = HashMap::new(); - m.insert("defaults", (false, MsFlags::empty())); - m.insert("ro", (false, MsFlags::MS_RDONLY)); - m.insert("rw", (true, MsFlags::MS_RDONLY)); - m.insert("suid", (true, MsFlags::MS_NOSUID)); - m.insert("nosuid", (false, MsFlags::MS_NOSUID)); - m.insert("dev", (true, MsFlags::MS_NODEV)); - m.insert("nodev", (false, MsFlags::MS_NODEV)); - m.insert("exec", (true, MsFlags::MS_NOEXEC)); - m.insert("noexec", (false, MsFlags::MS_NOEXEC)); - m.insert("sync", (false, MsFlags::MS_SYNCHRONOUS)); - m.insert("async", (true, MsFlags::MS_SYNCHRONOUS)); - m.insert("dirsync", (false, MsFlags::MS_DIRSYNC)); - m.insert("remount", (false, MsFlags::MS_REMOUNT)); - m.insert("mand", (false, MsFlags::MS_MANDLOCK)); - m.insert("nomand", (true, MsFlags::MS_MANDLOCK)); - m.insert("atime", (true, MsFlags::MS_NOATIME)); - m.insert("noatime", (false, MsFlags::MS_NOATIME)); - m.insert("diratime", (true, MsFlags::MS_NODIRATIME)); - m.insert("nodiratime", (false, MsFlags::MS_NODIRATIME)); - m.insert("bind", (false, MsFlags::MS_BIND)); - m.insert("rbind", (false, MsFlags::MS_BIND | MsFlags::MS_REC)); - m.insert("unbindable", (false, MsFlags::MS_UNBINDABLE)); - m.insert("runbindable", (false, MsFlags::MS_UNBINDABLE | MsFlags::MS_REC)); - m.insert("private", (false, MsFlags::MS_PRIVATE)); - m.insert("rprivate", (false, MsFlags::MS_PRIVATE | MsFlags::MS_REC)); - m.insert("shared", (false, MsFlags::MS_SHARED)); - m.insert("rshared", (false, MsFlags::MS_SHARED | MsFlags::MS_REC)); - m.insert("slave", (false, MsFlags::MS_SLAVE)); - m.insert("rslave", (false, MsFlags::MS_SLAVE | MsFlags::MS_REC)); - m.insert("relatime", (false, MsFlags::MS_RELATIME)); - m.insert("norelatime", (true, MsFlags::MS_RELATIME)); - m.insert("strictatime", (false, MsFlags::MS_STRICTATIME)); - m.insert("nostrictatime", (true, MsFlags::MS_STRICTATIME)); - m - }; -} - #[derive(Debug, PartialEq)] pub struct InitMount<'a> { fstype: &'a str, @@ -168,16 +127,12 @@ pub fn baremount( } let destination_str = destination.to_string_lossy(); - let mut already_mounted = false; if let Ok(m) = get_linux_mount_info(destination_str.deref()) { if m.fs_type == fs_type { - already_mounted = true; + slog_info!(logger, "{source:?} is already mounted at {destination:?}"); + return Ok(()); } } - if already_mounted { - slog_info!(logger, "{source:?} is already mounted at {destination:?}"); - return Ok(()); - } info!( logger, @@ -275,11 +230,12 @@ pub async fn update_ephemeral_mounts( // assume that fsGid has already been set let mount_path = Path::new(&storage.mount_point); let src_path = Path::new(&storage.source); - let opts = storage + let opts: Vec<&String> = storage .options .iter() - .filter(|&opt| !opt.starts_with(FS_GID_EQ)); - let (flags, options) = parse_mount_flags_and_options(opts); + .filter(|&opt| !opt.starts_with(FS_GID_EQ)) + .collect(); + let (flags, options) = parse_mount_options(&opts)?; info!(logger, "mounting storage"; "mount-source" => src_path.display(), @@ -646,7 +602,7 @@ fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> { return Ok(()); } - let (flags, options) = parse_mount_flags_and_options(&storage.options); + let (flags, options) = parse_mount_options(&storage.options)?; let mount_path = Path::new(&storage.mount_point); let src_path = Path::new(&storage.source); create_mount_destination(src_path, mount_path, "", &storage.fstype) @@ -765,42 +721,6 @@ pub fn is_mounted(mount_point: &str) -> Result { Ok(found) } -#[instrument] -fn parse_mount_flags_and_options( - opts_iter: impl Iterator> + Debug, -) -> (MsFlags, String) { - let mut flags = MsFlags::empty(); - let mut options: String = "".to_string(); - - for opt in opts_iter { - let opt = opt.as_ref(); - if !opt.is_empty() { - match FLAGS.get(opt) { - Some(x) => { - let (clear, f) = *x; - if clear { - flags &= !f; - } else { - flags |= f; - } - } - None => { - if opt.starts_with("io.katacontainers.") { - continue; - } - - if !options.is_empty() { - options.push_str(format!(",{}", opt).as_str()); - } else { - options.push_str(opt); - } - } - }; - } - } - (flags, options) -} - // add_storages takes a list of storages passed by the caller, and perform the // associated operations such as waiting for the device to show up, and mount // it to a specific location, according to the type of handler chosen, and for @@ -878,27 +798,23 @@ pub async fn add_storages( #[instrument] fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> { - let (flags, options) = parse_mount_flags_and_options(m.options.iter()); - fs::create_dir_all(m.dest).context("could not create directory")?; + let (flags, options) = parse_mount_options(&m.options)?; let source = Path::new(m.src); let dest = Path::new(m.dest); baremount(source, dest, m.fstype, flags, &options, logger).or_else(|e| { - if m.src != "dev" { - return Err(e); + if m.src == "dev" { + error!( + logger, + "Could not mount filesystem from {} to {}", m.src, m.dest + ); + Ok(()) + } else { + Err(e) } - - error!( - logger, - "Could not mount filesystem from {} to {}", m.src, m.dest - ); - - Ok(()) - })?; - - Ok(()) + }) } #[instrument] @@ -932,13 +848,10 @@ pub fn get_mount_fs_type_from_file(mount_file: &str, mount_point: &str) -> Resul // Read the file line by line using the lines() iterator from std::io::BufRead. for (_index, line) in content.lines().enumerate() { - let capes = match re.captures(line) { - Some(c) => c, - None => continue, - }; - - if capes.len() > 1 { - return Ok(capes[1].to_string()); + if let Some(capes) = re.captures(line) { + if capes.len() > 1 { + return Ok(capes[1].to_string()); + } } } @@ -1971,7 +1884,7 @@ mod tests { for (i, d) in tests.iter().enumerate() { let msg = format!("test[{}]: {:?}", i, d); - let result = parse_mount_flags_and_options(d.options_vec.iter()); + let result = parse_mount_options(&d.options_vec)?; let msg = format!("{}: result: {:?}", msg, result); diff --git a/src/agent/src/namespace.rs b/src/agent/src/namespace.rs index a3ad266162..bf24cd1048 100644 --- a/src/agent/src/namespace.rs +++ b/src/agent/src/namespace.rs @@ -7,14 +7,14 @@ use anyhow::{anyhow, Result}; use nix::mount::MsFlags; use nix::sched::{unshare, CloneFlags}; use nix::unistd::{getpid, gettid}; +use slog::Logger; use std::fmt; use std::fs; use std::fs::File; use std::path::{Path, PathBuf}; use tracing::instrument; -use crate::mount::{baremount, FLAGS}; -use slog::Logger; +use crate::mount::baremount; const PERSISTENT_NS_DIR: &str = "/var/run/sandbox-ns"; pub const NSTYPEIPC: &str = "ipc"; @@ -116,15 +116,7 @@ impl Namespace { // Bind mount the new namespace from the current thread onto the mount point to persist it. let mut flags = MsFlags::empty(); - - if let Some(x) = FLAGS.get("rbind") { - let (clear, f) = *x; - if clear { - flags &= !f; - } else { - flags |= f; - } - }; + flags |= MsFlags::MS_BIND | MsFlags::MS_REC; baremount(source, destination, "none", flags, "", &logger).map_err(|e| { anyhow!( diff --git a/src/libs/kata-sys-util/src/mount.rs b/src/libs/kata-sys-util/src/mount.rs index 522ce3acb1..e649ab436d 100644 --- a/src/libs/kata-sys-util/src/mount.rs +++ b/src/libs/kata-sys-util/src/mount.rs @@ -390,19 +390,17 @@ fn do_rebind_mount>(path: P, readonly: bool, flags: MsFlags) -> R } /// Take fstab style mount options and parses them for use with a standard mount() syscall. -fn parse_mount_options(options: &[String]) -> Result<(MsFlags, String)> { +pub fn parse_mount_options>(options: &[T]) -> Result<(MsFlags, String)> { let mut flags: MsFlags = MsFlags::empty(); let mut data: Vec = Vec::new(); for opt in options.iter() { - if opt == "defaults" { - continue; - } else if opt == "loop" { + if opt.as_ref() == "loop" { return Err(Error::InvalidMountOption("loop".to_string())); - } else if let Some(v) = parse_mount_flags(flags, opt) { + } else if let Some(v) = parse_mount_flags(flags, opt.as_ref()) { flags = v; } else { - data.push(opt.clone()); + data.push(opt.as_ref().to_string()); } } @@ -441,6 +439,7 @@ fn parse_mount_flags(mut flags: MsFlags, flag_str: &str) -> Option { // overridden by subsequent options, as in the option line users,exec,dev,suid). match flag_str { // Clear flags + "defaults" => {} "async" => flags &= !MsFlags::MS_SYNCHRONOUS, "atime" => flags &= !MsFlags::MS_NOATIME, "dev" => flags &= !MsFlags::MS_NODEV, @@ -464,6 +463,14 @@ fn parse_mount_flags(mut flags: MsFlags, flag_str: &str) -> Option { "noexec" => flags |= MsFlags::MS_NOEXEC, "nosuid" => flags |= MsFlags::MS_NOSUID, "rbind" => flags |= MsFlags::MS_BIND | MsFlags::MS_REC, + "unbindable" => flags |= MsFlags::MS_UNBINDABLE, + "runbindable" => flags |= MsFlags::MS_UNBINDABLE | MsFlags::MS_REC, + "private" => flags |= MsFlags::MS_PRIVATE, + "rprivate" => flags |= MsFlags::MS_PRIVATE | MsFlags::MS_REC, + "shared" => flags |= MsFlags::MS_SHARED, + "rshared" => flags |= MsFlags::MS_SHARED | MsFlags::MS_REC, + "slave" => flags |= MsFlags::MS_SLAVE, + "rslave" => flags |= MsFlags::MS_SLAVE | MsFlags::MS_REC, "relatime" => flags |= MsFlags::MS_RELATIME, "remount" => flags |= MsFlags::MS_REMOUNT, "ro" => flags |= MsFlags::MS_RDONLY, @@ -1030,7 +1037,7 @@ mod tests { #[test] fn test_parse_mount_options() { - let options = vec![]; + let options: Vec<&str> = vec![]; let (flags, data) = parse_mount_options(&options).unwrap(); assert!(flags.is_empty()); assert!(data.is_empty()); From 0d5a9eaeff4d794879ee0eb5add77dbfc44c8f02 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Tue, 8 Aug 2023 21:52:02 +0800 Subject: [PATCH 4/9] agent: simplify the way to manage storage object Simplify the way to manage storage objects, and introduce StorageStateCommon structures for coming extensions. Signed-off-by: Jiang Liu --- src/agent/src/mount.rs | 8 +- src/agent/src/rpc.rs | 10 +- src/agent/src/sandbox.rs | 169 +++++++++++++++++-------------- src/libs/kata-types/src/mount.rs | 52 ++++++++++ 4 files changed, 154 insertions(+), 85 deletions(-) diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index 1602d7ac43..73fa9b183d 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -741,8 +741,8 @@ pub async fn add_storages( { let mut sb = sandbox.lock().await; - let new_storage = sb.set_sandbox_storage(&storage.mount_point); - if !new_storage { + let state = sb.add_sandbox_storage(&storage.mount_point).await; + if state.ref_count().await > 1 { continue; } } @@ -780,7 +780,7 @@ pub async fn add_storages( "add_storages failed, storage: {:?}, error: {:?} ", storage, e ); let mut sb = sandbox.lock().await; - if let Err(e) = sb.unset_sandbox_storage(&storage.mount_point) { + if let Err(e) = sb.remove_sandbox_storage(&storage.mount_point).await { warn!(logger, "fail to unset sandbox storage {:?}", e); } return Err(e); @@ -1884,7 +1884,7 @@ mod tests { for (i, d) in tests.iter().enumerate() { let msg = format!("test[{}]: {:?}", i, d); - let result = parse_mount_options(&d.options_vec)?; + let result = parse_mount_options(&d.options_vec).unwrap(); let msg = format!("{}: result: {:?}", msg, result); diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index effedb0b13..12bce34139 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -296,7 +296,7 @@ impl AgentService { if let Err(e) = ctr.destroy().await { error!(sl(), "failed to destroy container: {:?}", e); } - if let Err(e) = remove_container_resources(&mut s, &cid) { + if let Err(e) = remove_container_resources(&mut s, &cid).await { error!(sl(), "failed to remove container resources: {:?}", e); } return Err(err); @@ -348,7 +348,7 @@ impl AgentService { .ok_or_else(|| anyhow!("Invalid container id"))? .destroy() .await?; - remove_container_resources(&mut sandbox, &cid)?; + remove_container_resources(&mut sandbox, &cid).await?; return Ok(()); } @@ -370,7 +370,7 @@ impl AgentService { .await .map_err(|_| anyhow!(nix::Error::ETIME))???; - remove_container_resources(&mut *self.sandbox.lock().await, &cid) + remove_container_resources(&mut *self.sandbox.lock().await, &cid).await } #[instrument] @@ -1780,7 +1780,7 @@ fn update_container_namespaces( Ok(()) } -fn remove_container_resources(sandbox: &mut Sandbox, cid: &str) -> Result<()> { +async fn remove_container_resources(sandbox: &mut Sandbox, cid: &str) -> Result<()> { let mut cmounts: Vec = vec![]; // Find the sandbox storage used by this container @@ -1794,7 +1794,7 @@ fn remove_container_resources(sandbox: &mut Sandbox, cid: &str) -> Result<()> { } for m in cmounts.iter() { - if let Err(err) = sandbox.unset_and_remove_sandbox_storage(m) { + if let Err(err) = sandbox.remove_sandbox_storage(m).await { error!( sl(), "failed to unset_and_remove_sandbox_storage for container {}, error: {:?}", diff --git a/src/agent/src/sandbox.rs b/src/agent/src/sandbox.rs index 473c7cee7f..7ffd81b56b 100644 --- a/src/agent/src/sandbox.rs +++ b/src/agent/src/sandbox.rs @@ -3,6 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // +use std::collections::hash_map::Entry; use std::collections::HashMap; use std::fs; use std::os::unix::fs::PermissionsExt; @@ -13,6 +14,7 @@ use std::{thread, time}; use anyhow::{anyhow, Context, Result}; use kata_types::cpu::CpuSet; +use kata_types::mount::StorageDeviceGeneric; use libc::pid_t; use oci::{Hook, Hooks}; use protocols::agent::OnlineCPUMemRequest; @@ -28,7 +30,7 @@ use tokio::sync::Mutex; use tracing::instrument; use crate::linux_abi::*; -use crate::mount::{get_mount_fs_type, remove_mounts, TYPE_ROOTFS}; +use crate::mount::{get_mount_fs_type, is_mounted, remove_mounts, TYPE_ROOTFS}; use crate::namespace::Namespace; use crate::netlink::Handle; use crate::network::Network; @@ -40,6 +42,31 @@ pub const ERR_INVALID_CONTAINER_ID: &str = "Invalid container id"; type UeventWatcher = (Box, oneshot::Sender); +#[derive(Clone, Debug)] +pub struct StorageState { + inner: Arc>, +} + +impl StorageState { + fn new() -> Self { + StorageState { + inner: Arc::new(Mutex::new(StorageDeviceGeneric::new())), + } + } + + pub async fn ref_count(&self) -> u32 { + self.inner.lock().await.ref_count() + } + + async fn inc_ref_count(&self) { + self.inner.lock().await.inc_ref_count() + } + + async fn dec_and_test_ref_count(&self) -> bool { + self.inner.lock().await.dec_and_test_ref_count() + } +} + #[derive(Debug)] pub struct Sandbox { pub logger: Logger, @@ -54,7 +81,7 @@ pub struct Sandbox { pub shared_utsns: Namespace, pub shared_ipcns: Namespace, pub sandbox_pidns: Option, - pub storages: HashMap, + pub storages: HashMap, pub running: bool, pub no_pivot_root: bool, pub sender: Option>, @@ -100,52 +127,38 @@ impl Sandbox { }) } - // set_sandbox_storage sets the sandbox level reference - // counter for the sandbox storage. - // This method also returns a boolean to let - // callers know if the storage already existed or not. - // It will return true if storage is new. + /// Add a new storage object or increase reference count of existing one. + /// The caller may detect new storage object by checking `StorageState.refcount == 1`. #[instrument] - pub fn set_sandbox_storage(&mut self, path: &str) -> bool { - match self.storages.get_mut(path) { - None => { - self.storages.insert(path.to_string(), 1); - true + pub async fn add_sandbox_storage(&mut self, path: &str) -> StorageState { + match self.storages.entry(path.to_string()) { + Entry::Occupied(e) => { + let state = e.get().clone(); + state.inc_ref_count().await; + state } - Some(count) => { - *count += 1; - false + Entry::Vacant(e) => { + let state = StorageState::new(); + e.insert(state.clone()); + state } } } - // unset_sandbox_storage will decrement the sandbox storage - // reference counter. If there aren't any containers using - // that sandbox storage, this method will remove the - // storage reference from the sandbox and return 'true' to - // let the caller know that they can clean up the storage - // related directories by calling remove_sandbox_storage + // Clean mount and directory of a mountpoint. #[instrument] - pub fn unset_sandbox_storage(&mut self, path: &str) -> Result { - match self.storages.get_mut(path) { - None => Err(anyhow!("Sandbox storage with path {} not found", path)), - Some(count) => { - *count -= 1; - if *count == 0 { - self.storages.remove(path); - return Ok(true); - } - Ok(false) - } + fn cleanup_sandbox_storage(&mut self, path: &str) -> Result<()> { + if path.is_empty() { + return Err(anyhow!("mountpoint path is empty")); + } else if !Path::new(path).exists() { + return Ok(()); + } + + if matches!(is_mounted(path), Ok(true)) { + let mounts = vec![path.to_string()]; + remove_mounts(&mounts)?; } - } - // remove_sandbox_storage removes the sandbox storage if no - // containers are using that storage. - #[instrument] - pub fn remove_sandbox_storage(&mut self, path: &str) -> Result<()> { - let mounts = vec![path.to_string()]; - remove_mounts(&mounts)?; // "remove_dir" will fail if the mount point is backed by a read-only filesystem. // This is the case with the device mapper snapshotter, where we mount the block device directly // at the underlying sandbox path which was provided from the base RO kataShared path from the host. @@ -155,16 +168,23 @@ impl Sandbox { Ok(()) } - // unset_and_remove_sandbox_storage unsets the storage from sandbox - // and if there are no containers using this storage it will - // remove it from the sandbox. + /// Decrease reference count and destroy the storage object if reference count reaches zero. + /// Returns `Ok(true)` if the reference count has reached zero and the storage object has been + /// removed. #[instrument] - pub fn unset_and_remove_sandbox_storage(&mut self, path: &str) -> Result<()> { - if self.unset_sandbox_storage(path)? { - return self.remove_sandbox_storage(path); + pub async fn remove_sandbox_storage(&mut self, path: &str) -> Result { + match self.storages.get(path) { + None => Err(anyhow!("Sandbox storage with path {} not found", path)), + Some(state) => { + if state.dec_and_test_ref_count().await { + self.storages.remove(path); + self.cleanup_sandbox_storage(path)?; + Ok(true) + } else { + Ok(false) + } + } } - - Ok(()) } #[instrument] @@ -493,24 +513,22 @@ mod tests { let tmpdir_path = tmpdir.path().to_str().unwrap(); // Add a new sandbox storage - let new_storage = s.set_sandbox_storage(tmpdir_path); + let new_storage = s.add_sandbox_storage(tmpdir_path).await; // Check the reference counter - let ref_count = s.storages[tmpdir_path]; + let ref_count = new_storage.ref_count().await; assert_eq!( ref_count, 1, "Invalid refcount, got {} expected 1.", ref_count ); - assert!(new_storage); // Use the existing sandbox storage - let new_storage = s.set_sandbox_storage(tmpdir_path); - assert!(!new_storage, "Should be false as already exists."); + let new_storage = s.add_sandbox_storage(tmpdir_path).await; // Since we are using existing storage, the reference counter // should be 2 by now. - let ref_count = s.storages[tmpdir_path]; + let ref_count = new_storage.ref_count().await; assert_eq!( ref_count, 2, "Invalid refcount, got {} expected 2.", @@ -546,22 +564,20 @@ mod tests { .tempdir_in(tmpdir_path) .unwrap(); - assert!( - s.remove_sandbox_storage(srcdir_path).is_err(), - "Expect Err as the directory is not a mountpoint" - ); - - assert!(s.remove_sandbox_storage("").is_err()); + assert!(s.cleanup_sandbox_storage("").is_err()); let invalid_dir = emptydir.path().join("invalid"); assert!(s - .remove_sandbox_storage(invalid_dir.to_str().unwrap()) - .is_err()); + .cleanup_sandbox_storage(invalid_dir.to_str().unwrap()) + .is_ok()); assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok()); - assert!(s.remove_sandbox_storage(destdir_path).is_ok()); + assert!(s.cleanup_sandbox_storage(destdir_path).is_ok()); + + // remove a directory without umount + s.cleanup_sandbox_storage(srcdir_path).unwrap(); } #[tokio::test] @@ -573,8 +589,7 @@ mod tests { let mut s = Sandbox::new(&logger).unwrap(); assert!( - s.unset_and_remove_sandbox_storage("/tmp/testEphePath") - .is_err(), + s.remove_sandbox_storage("/tmp/testEphePath").await.is_err(), "Should fail because sandbox storage doesn't exist" ); @@ -595,8 +610,8 @@ mod tests { assert!(bind_mount(srcdir_path, destdir_path, &logger).is_ok()); - assert!(s.set_sandbox_storage(destdir_path)); - assert!(s.unset_and_remove_sandbox_storage(destdir_path).is_ok()); + s.add_sandbox_storage(destdir_path).await; + assert!(s.remove_sandbox_storage(destdir_path).await.is_ok()); let other_dir_str; { @@ -609,10 +624,10 @@ mod tests { let other_dir_path = other_dir.path().to_str().unwrap(); other_dir_str = other_dir_path.to_string(); - assert!(s.set_sandbox_storage(other_dir_path)); + s.add_sandbox_storage(other_dir_path).await; } - assert!(s.unset_and_remove_sandbox_storage(&other_dir_str).is_err()); + assert!(s.remove_sandbox_storage(&other_dir_str).await.is_ok()); } #[tokio::test] @@ -624,28 +639,30 @@ mod tests { let storage_path = "/tmp/testEphe"; // Add a new sandbox storage - assert!(s.set_sandbox_storage(storage_path)); + s.add_sandbox_storage(storage_path).await; // Use the existing sandbox storage + let state = s.add_sandbox_storage(storage_path).await; assert!( - !s.set_sandbox_storage(storage_path), + state.ref_count().await > 1, "Expects false as the storage is not new." ); assert!( - !s.unset_sandbox_storage(storage_path).unwrap(), + !s.remove_sandbox_storage(storage_path).await.unwrap(), "Expects false as there is still a storage." ); // Reference counter should decrement to 1. - let ref_count = s.storages[storage_path]; + let storage = &s.storages[storage_path]; + let refcount = storage.ref_count().await; assert_eq!( - ref_count, 1, + refcount, 1, "Invalid refcount, got {} expected 1.", - ref_count + refcount ); assert!( - s.unset_sandbox_storage(storage_path).unwrap(), + s.remove_sandbox_storage(storage_path).await.unwrap(), "Expects true as there is still a storage." ); @@ -661,7 +678,7 @@ mod tests { // If no container is using the sandbox storage, the reference // counter for it should not exist. assert!( - s.unset_sandbox_storage(storage_path).is_err(), + s.remove_sandbox_storage(storage_path).await.is_err(), "Expects false as the reference counter should no exist." ); } diff --git a/src/libs/kata-types/src/mount.rs b/src/libs/kata-types/src/mount.rs index 473e7d5c08..91a061797a 100644 --- a/src/libs/kata-types/src/mount.rs +++ b/src/libs/kata-types/src/mount.rs @@ -6,6 +6,7 @@ use anyhow::{anyhow, Context, Error, Result}; use std::convert::TryFrom; +use std::fmt::Formatter; use std::{collections::HashMap, fs, path::PathBuf}; /// Prefix to mark a volume as Kata special. @@ -427,6 +428,43 @@ impl TryFrom<&NydusExtraOptions> for KataVirtualVolume { } } +/// An implementation of generic storage device. +pub struct StorageDeviceGeneric { + refcount: u32, +} + +impl std::fmt::Debug for StorageDeviceGeneric { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("StorageState") + .field("refcount", &self.refcount) + .finish() + } +} + +impl StorageDeviceGeneric { + /// Create a new instance of `StorageStateCommon`. + pub fn new() -> Self { + StorageDeviceGeneric { refcount: 0 } + } + + /// Get reference count. + pub fn ref_count(&self) -> u32 { + self.refcount + } + + /// Decrease reference count and return true if it reaches zero. + pub fn inc_ref_count(&mut self) { + self.refcount += 1; + } + + /// Decrease reference count and return true if it reaches zero. + pub fn dec_and_test_ref_count(&mut self) -> bool { + assert!(self.refcount > 0); + self.refcount -= 1; + self.refcount == 0 + } +} + /// Join user provided volume path with kata direct-volume root path. /// /// The `volume_path` is base64-encoded and then safely joined to the `prefix` @@ -659,4 +697,18 @@ mod tests { ); assert_eq!(volume.fs_type.as_str(), "rafsv6") } + + #[test] + fn test_storage_state_common() { + let mut state = StorageDeviceGeneric::new(); + assert_eq!(state.ref_count(), 0); + state.inc_ref_count(); + assert_eq!(state.ref_count(), 1); + state.inc_ref_count(); + assert_eq!(state.ref_count(), 2); + assert!(!state.dec_and_test_ref_count()); + assert_eq!(state.ref_count(), 1); + assert!(state.dec_and_test_ref_count()); + assert_eq!(state.ref_count(), 0); + } } From aee71b16f1fc05f5cfacf3f480fb4a6094622f6f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 9 Aug 2023 10:12:47 +0800 Subject: [PATCH 5/9] kata-types: introduce StorageDevice and StorageHandlerManager Introduce StorageDevice and StorageHandlerManager, which will be used to refine storage device management for kata-agent. Signed-off-by: Jiang Liu --- src/agent/src/rpc.rs | 2 +- src/agent/src/sandbox.rs | 40 +++++++++++++-- src/libs/kata-types/src/mount.rs | 85 +++++++++++++++++++++++++++----- 3 files changed, 111 insertions(+), 16 deletions(-) diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 12bce34139..4b9929339f 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -1787,7 +1787,7 @@ async fn remove_container_resources(sandbox: &mut Sandbox, cid: &str) -> Result< let mounts = sandbox.container_mounts.get(cid); if let Some(mounts) = mounts { for m in mounts.iter() { - if sandbox.storages.get(m).is_some() { + if sandbox.storages.contains_key(m) { cmounts.push(m.to_string()); } } diff --git a/src/agent/src/sandbox.rs b/src/agent/src/sandbox.rs index 7ffd81b56b..759d932065 100644 --- a/src/agent/src/sandbox.rs +++ b/src/agent/src/sandbox.rs @@ -5,6 +5,7 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; +use std::fmt::{Debug, Formatter}; use std::fs; use std::os::unix::fs::PermissionsExt; use std::path::Path; @@ -14,7 +15,7 @@ use std::{thread, time}; use anyhow::{anyhow, Context, Result}; use kata_types::cpu::CpuSet; -use kata_types::mount::StorageDeviceGeneric; +use kata_types::mount::{StorageDevice, StorageDeviceGeneric}; use libc::pid_t; use oci::{Hook, Hooks}; use protocols::agent::OnlineCPUMemRequest; @@ -42,18 +43,31 @@ pub const ERR_INVALID_CONTAINER_ID: &str = "Invalid container id"; type UeventWatcher = (Box, oneshot::Sender); -#[derive(Clone, Debug)] +pub type StorageDeviceObject = Arc>; + +#[derive(Clone)] pub struct StorageState { - inner: Arc>, + inner: StorageDeviceObject, +} + +impl Debug for StorageState { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("StorageState").finish() + } } impl StorageState { fn new() -> Self { StorageState { - inner: Arc::new(Mutex::new(StorageDeviceGeneric::new())), + inner: Arc::new(Mutex::new(StorageDeviceGeneric::new("".to_string()))), } } + #[allow(dead_code)] + pub fn from_device(device: StorageDeviceObject) -> Self { + Self { inner: device } + } + pub async fn ref_count(&self) -> u32 { self.inner.lock().await.ref_count() } @@ -145,7 +159,25 @@ impl Sandbox { } } + /// Update the storage device associated with a path. + #[allow(dead_code)] + pub fn update_sandbox_storage( + &mut self, + path: &str, + device: StorageDeviceObject, + ) -> std::result::Result { + if !self.storages.contains_key(path) { + return Err(device); + } + + let state = StorageState::from_device(device); + // Safe to unwrap() because we have just ensured existence of entry. + let state = self.storages.insert(path.to_string(), state).unwrap(); + Ok(state.inner) + } + // Clean mount and directory of a mountpoint. + // This is actually StorageDeviceGeneric::cleanup(), kept here due to dependency chain. #[instrument] fn cleanup_sandbox_storage(&mut self, path: &str) -> Result<()> { if path.is_empty() { diff --git a/src/libs/kata-types/src/mount.rs b/src/libs/kata-types/src/mount.rs index 91a061797a..44ccd26045 100644 --- a/src/libs/kata-types/src/mount.rs +++ b/src/libs/kata-types/src/mount.rs @@ -5,6 +5,7 @@ // use anyhow::{anyhow, Context, Error, Result}; +use std::collections::hash_map::Entry; use std::convert::TryFrom; use std::fmt::Formatter; use std::{collections::HashMap, fs, path::PathBuf}; @@ -431,6 +432,7 @@ impl TryFrom<&NydusExtraOptions> for KataVirtualVolume { /// An implementation of generic storage device. pub struct StorageDeviceGeneric { refcount: u32, + path: String, } impl std::fmt::Debug for StorageDeviceGeneric { @@ -443,26 +445,85 @@ impl std::fmt::Debug for StorageDeviceGeneric { impl StorageDeviceGeneric { /// Create a new instance of `StorageStateCommon`. - pub fn new() -> Self { - StorageDeviceGeneric { refcount: 0 } + pub fn new(path: String) -> Self { + StorageDeviceGeneric { refcount: 1, path } + } +} + +impl StorageDevice for StorageDeviceGeneric { + fn path(&self) -> &str { + &self.path } - /// Get reference count. - pub fn ref_count(&self) -> u32 { + fn ref_count(&self) -> u32 { self.refcount } - /// Decrease reference count and return true if it reaches zero. - pub fn inc_ref_count(&mut self) { + fn inc_ref_count(&mut self) { self.refcount += 1; } - /// Decrease reference count and return true if it reaches zero. - pub fn dec_and_test_ref_count(&mut self) -> bool { + fn dec_and_test_ref_count(&mut self) -> bool { assert!(self.refcount > 0); self.refcount -= 1; self.refcount == 0 } + + fn cleanup(&self) {} +} + +/// Trait object for storage device. +pub trait StorageDevice: Send + Sync { + /// Path + fn path(&self) -> &str; + + /// Get reference count. + fn ref_count(&self) -> u32; + + /// Increase reference count. + fn inc_ref_count(&mut self); + + /// Decrease reference count and return true if it reaches zero. + fn dec_and_test_ref_count(&mut self) -> bool; + + /// Clean up resources related to the storage device. + fn cleanup(&self); +} + +/// Manager to manage registered storage device handlers. +pub struct StorageHandlerManager { + handlers: HashMap, +} + +impl Default for StorageHandlerManager { + fn default() -> Self { + Self::new() + } +} + +impl StorageHandlerManager { + /// Create a new instance of `StorageHandlerManager`. + pub fn new() -> Self { + Self { + handlers: HashMap::new(), + } + } + + /// Register a storage device handler. + pub fn add_handler(&mut self, id: &str, handler: H) -> Result<()> { + match self.handlers.entry(id.to_string()) { + Entry::Occupied(_) => Err(anyhow!("storage handler for {} already exists", id)), + Entry::Vacant(entry) => { + entry.insert(handler); + Ok(()) + } + } + } + + /// Get storage handler with specified `id`. + pub fn handler(&self, id: &str) -> Option<&H> { + self.handlers.get(id) + } } /// Join user provided volume path with kata direct-volume root path. @@ -700,12 +761,14 @@ mod tests { #[test] fn test_storage_state_common() { - let mut state = StorageDeviceGeneric::new(); - assert_eq!(state.ref_count(), 0); - state.inc_ref_count(); + let mut state = StorageDeviceGeneric::new("".to_string()); assert_eq!(state.ref_count(), 1); state.inc_ref_count(); assert_eq!(state.ref_count(), 2); + state.inc_ref_count(); + assert_eq!(state.ref_count(), 3); + assert!(!state.dec_and_test_ref_count()); + assert_eq!(state.ref_count(), 2); assert!(!state.dec_and_test_ref_count()); assert_eq!(state.ref_count(), 1); assert!(state.dec_and_test_ref_count()); From 331e35bc1ad7578f8b1d8aa6a45c91f6a4f5fd1c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 14 Aug 2023 20:26:58 +0800 Subject: [PATCH 6/9] agent: switch to new storage subsystem Switch to new storage subsystem to create a StorageDevice for each storage object. Fixes: #7614 Signed-off-by: Jiang Liu --- src/agent/src/device.rs | 8 +- src/agent/src/mount.rs | 676 +++++++++++++++++++++++----------------- src/agent/src/rpc.rs | 10 +- 3 files changed, 389 insertions(+), 305 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 24b8be55b8..b789d24f27 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -35,9 +35,9 @@ const VM_ROOTFS: &str = "/"; const BLOCK: &str = "block"; pub const DRIVER_9P_TYPE: &str = "9p"; pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs"; -pub const DRIVER_BLK_TYPE: &str = "blk"; +pub const DRIVER_BLK_PCI_TYPE: &str = "blk"; pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw"; -pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk"; +pub const DRIVER_BLK_MMIO_TYPE: &str = "mmioblk"; pub const DRIVER_SCSI_TYPE: &str = "scsi"; pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm"; pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral"; @@ -937,9 +937,9 @@ async fn add_device(device: &Device, sandbox: &Arc>) -> Result virtio_blk_device_handler(device, sandbox).await, + DRIVER_BLK_PCI_TYPE => virtio_blk_device_handler(device, sandbox).await, DRIVER_BLK_CCW_TYPE => virtio_blk_ccw_device_handler(device, sandbox).await, - DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, sandbox).await, + DRIVER_BLK_MMIO_TYPE => virtiommio_blk_device_handler(device, sandbox).await, DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, sandbox).await, DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, sandbox).await, DRIVER_VFIO_PCI_GK_TYPE | DRIVER_VFIO_PCI_TYPE => { diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index 73fa9b183d..bdf10329ba 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -3,6 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 // +#![allow(dead_code)] + use std::collections::HashMap; use std::fmt::Debug; use std::fs::{self, File, OpenOptions}; @@ -16,7 +18,10 @@ use std::sync::Arc; use anyhow::{anyhow, Context, Result}; use kata_sys_util::mount::{create_mount_destination, get_linux_mount_info, parse_mount_options}; -use kata_types::mount::{KATA_MOUNT_OPTION_FS_GID, KATA_SHAREDFS_GUEST_PREMOUNT_TAG}; +use kata_types::mount::{ + StorageDeviceGeneric, StorageHandlerManager, KATA_MOUNT_OPTION_FS_GID, + KATA_SHAREDFS_GUEST_PREMOUNT_TAG, +}; use nix::mount::MsFlags; use nix::unistd::{Gid, Uid}; use regex::Regex; @@ -26,15 +31,15 @@ use tracing::instrument; use crate::device::{ get_scsi_device_name, get_virtio_blk_pci_device_name, get_virtio_mmio_device_name, - online_device, wait_for_pmem_device, DRIVER_9P_TYPE, DRIVER_BLK_CCW_TYPE, DRIVER_BLK_TYPE, - DRIVER_EPHEMERAL_TYPE, DRIVER_LOCAL_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, - DRIVER_OVERLAYFS_TYPE, DRIVER_SCSI_TYPE, DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE, - FS_TYPE_HUGETLB, + online_device, wait_for_pmem_device, DRIVER_9P_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, + DRIVER_EPHEMERAL_TYPE, DRIVER_LOCAL_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_OVERLAYFS_TYPE, + DRIVER_SCSI_TYPE, DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE, FS_TYPE_HUGETLB, }; use crate::linux_abi::*; use crate::pci; use crate::protocols::agent::Storage; use crate::protocols::types::FSGroupChangePolicy; +use crate::sandbox::StorageDeviceObject; use crate::Sandbox; #[cfg(target_arch = "s390x")] use crate::{ccw, device::get_virtio_blk_ccw_device_name}; @@ -57,6 +62,24 @@ pub struct InitMount<'a> { options: Vec<&'a str>, } +#[derive(Debug)] +pub struct StorageContext<'a> { + cid: &'a Option, + logger: &'a Logger, + sandbox: &'a Arc>, +} + +/// Trait object to handle storage device. +#[async_trait::async_trait] +pub trait StorageHandler: Send + Sync { + /// Create a new storage device. + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result; +} + #[rustfmt::skip] lazy_static!{ static ref CGROUPS: HashMap<&'static str, &'static str> = { @@ -90,17 +113,37 @@ lazy_static! { ]; } +#[rustfmt::skip] +lazy_static! { + pub static ref STORAGE_HANDLERS: StorageHandlerManager> = { + let mut manager: StorageHandlerManager> = StorageHandlerManager::new(); + manager.add_handler(DRIVER_9P_TYPE, Arc::new(Virtio9pHandler{})).unwrap(); + #[cfg(target_arch = "s390x")] + manager.add_handler(crate::device::DRIVER_BLK_CCW_TYPE, Arc::new(VirtioBlkCcwHandler{})).unwrap(); + manager.add_handler(DRIVER_BLK_MMIO_TYPE, Arc::new(VirtioBlkMmioHandler{})).unwrap(); + manager.add_handler(DRIVER_BLK_PCI_TYPE, Arc::new(VirtioBlkPciHandler{})).unwrap(); + manager.add_handler(DRIVER_EPHEMERAL_TYPE, Arc::new(EphemeralHandler{})).unwrap(); + manager.add_handler(DRIVER_LOCAL_TYPE, Arc::new(LocalHandler{})).unwrap(); + manager.add_handler(DRIVER_NVDIMM_TYPE, Arc::new(PmemHandler{})).unwrap(); + manager.add_handler(DRIVER_OVERLAYFS_TYPE, Arc::new(OverlayfsHandler{})).unwrap(); + manager.add_handler(DRIVER_SCSI_TYPE, Arc::new(ScsiHandler{})).unwrap(); + manager.add_handler(DRIVER_VIRTIOFS_TYPE, Arc::new(VirtioFsHandler{})).unwrap(); + manager.add_handler(DRIVER_WATCHABLE_BIND_TYPE, Arc::new(BindWatcherHandler{})).unwrap(); + manager + }; +} + pub const STORAGE_HANDLER_LIST: &[&str] = &[ - DRIVER_BLK_TYPE, - DRIVER_9P_TYPE, - DRIVER_VIRTIOFS_TYPE, - DRIVER_EPHEMERAL_TYPE, - DRIVER_OVERLAYFS_TYPE, - DRIVER_MMIO_BLK_TYPE, - DRIVER_LOCAL_TYPE, - DRIVER_SCSI_TYPE, - DRIVER_NVDIMM_TYPE, - DRIVER_WATCHABLE_BIND_TYPE, + //DRIVER_BLK_TYPE, + //DRIVER_9P_TYPE, + //DRIVER_VIRTIOFS_TYPE, + //DRIVER_EPHEMERAL_TYPE, + //DRIVER_OVERLAYFS_TYPE, + //DRIVER_MMIO_BLK_TYPE, + //DRIVER_LOCAL_TYPE, + //DRIVER_SCSI_TYPE, + //DRIVER_NVDIMM_TYPE, + //DRIVER_WATCHABLE_BIND_TYPE, ]; #[instrument] @@ -161,48 +204,54 @@ pub fn baremount( }) } -#[instrument] -async fn ephemeral_storage_handler( - logger: &Logger, - storage: &Storage, - _sandbox: &Arc>, -) -> Result { - // hugetlbfs - if storage.fstype == FS_TYPE_HUGETLB { - return handle_hugetlbfs_storage(logger, storage).await; - } +#[derive(Debug)] +struct EphemeralHandler {} - // normal ephemeral storage - fs::create_dir_all(&storage.mount_point)?; +#[async_trait::async_trait] +impl StorageHandler for EphemeralHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // hugetlbfs + if storage.fstype == FS_TYPE_HUGETLB { + info!(ctx.logger, "handle hugetlbfs storage"); + // Allocate hugepages before mount + // /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages + // /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + // options eg "pagesize=2097152,size=524288000"(2M, 500M) + allocate_hugepages(ctx.logger, &storage.options.to_vec()) + .context("allocate hugepages")?; + common_storage_handler(ctx.logger, &storage)?; + } else if !storage.options.is_empty() { + // By now we only support one option field: "fsGroup" which + // isn't an valid mount option, thus we should remove it when + // do mount. + let opts = parse_options(&storage.options); + storage.options = Default::default(); + common_storage_handler(ctx.logger, &storage)?; - if !storage.options.is_empty() { - // By now we only support one option field: "fsGroup" which - // isn't an valid mount option, thus we should remove it when - // do mount. - let mut new_storage = storage.clone(); - new_storage.options = Default::default(); - common_storage_handler(logger, &new_storage)?; + // ephemeral_storage didn't support mount options except fsGroup. + if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { + let gid = fsgid.parse::()?; - let opts = parse_options(&storage.options); + nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; - // ephemeral_storage didn't support mount options except fsGroup. - if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { - let gid = fsgid.parse::()?; + let meta = fs::metadata(&storage.mount_point)?; + let mut permission = meta.permissions(); - nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; - - let meta = fs::metadata(&storage.mount_point)?; - let mut permission = meta.permissions(); - - let o_mode = meta.mode() | MODE_SETGID; - permission.set_mode(o_mode); - fs::set_permissions(&storage.mount_point, permission)?; + let o_mode = meta.mode() | MODE_SETGID; + permission.set_mode(o_mode); + fs::set_permissions(&storage.mount_point, permission)?; + } + } else { + common_storage_handler(ctx.logger, &storage)?; } - } else { - common_storage_handler(logger, storage)?; - } - Ok("".to_string()) + new_device("".to_string()) + } } // update_ephemeral_mounts takes a list of ephemeral mounts and remounts them @@ -266,101 +315,105 @@ pub async fn update_ephemeral_mounts( Ok(()) } -#[instrument] -async fn overlayfs_storage_handler( - logger: &Logger, - storage: &Storage, - cid: Option<&str>, - _sandbox: &Arc>, -) -> Result { - if storage - .options - .iter() - .any(|e| e == "io.katacontainers.fs-opt.overlay-rw") - { - let cid = cid.ok_or_else(|| anyhow!("No container id in rw overlay"))?; - let cpath = Path::new(crate::rpc::CONTAINER_BASE).join(cid); - let work = cpath.join("work"); - let upper = cpath.join("upper"); +#[derive(Debug)] +struct OverlayfsHandler {} - fs::create_dir_all(&work).context("Creating overlay work directory")?; - fs::create_dir_all(&upper).context("Creating overlay upper directory")?; - - let mut storage = storage.clone(); - storage.fstype = "overlay".into(); - storage +#[async_trait::async_trait] +impl StorageHandler for OverlayfsHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + if storage .options - .push(format!("upperdir={}", upper.to_string_lossy())); - storage - .options - .push(format!("workdir={}", work.to_string_lossy())); - return common_storage_handler(logger, &storage); - } + .iter() + .any(|e| e == "io.katacontainers.fs-opt.overlay-rw") + { + let cid = ctx + .cid + .clone() + .ok_or_else(|| anyhow!("No container id in rw overlay"))?; + let cpath = Path::new(crate::rpc::CONTAINER_BASE).join(cid); + let work = cpath.join("work"); + let upper = cpath.join("upper"); - common_storage_handler(logger, storage) -} + fs::create_dir_all(&work).context("Creating overlay work directory")?; + fs::create_dir_all(&upper).context("Creating overlay upper directory")?; -#[instrument] -async fn local_storage_handler( - _logger: &Logger, - storage: &Storage, - _sandbox: &Arc>, -) -> Result { - fs::create_dir_all(&storage.mount_point).context(format!( - "failed to create dir all {:?}", - &storage.mount_point - ))?; - - let opts = parse_options(&storage.options); - - let mut need_set_fsgid = false; - if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { - let gid = fsgid.parse::()?; - - nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; - need_set_fsgid = true; - } - - if let Some(mode) = opts.get("mode") { - let mut permission = fs::metadata(&storage.mount_point)?.permissions(); - - let mut o_mode = u32::from_str_radix(mode, 8)?; - - if need_set_fsgid { - // set SetGid mode mask. - o_mode |= MODE_SETGID; + storage.fstype = "overlay".into(); + storage + .options + .push(format!("upperdir={}", upper.to_string_lossy())); + storage + .options + .push(format!("workdir={}", work.to_string_lossy())); } - permission.set_mode(o_mode); - fs::set_permissions(&storage.mount_point, permission)?; + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) } - - Ok("".to_string()) } -#[instrument] -async fn virtio9p_storage_handler( - logger: &Logger, - storage: &Storage, - _sandbox: &Arc>, -) -> Result { - common_storage_handler(logger, storage) +#[derive(Debug)] +struct LocalHandler {} + +#[async_trait::async_trait] +impl StorageHandler for LocalHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + _ctx: &mut StorageContext, + ) -> Result { + fs::create_dir_all(&storage.mount_point).context(format!( + "failed to create dir all {:?}", + &storage.mount_point + ))?; + + let opts = parse_options(&storage.options); + + let mut need_set_fsgid = false; + if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { + let gid = fsgid.parse::()?; + + nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; + need_set_fsgid = true; + } + + if let Some(mode) = opts.get("mode") { + let mut permission = fs::metadata(&storage.mount_point)?.permissions(); + + let mut o_mode = u32::from_str_radix(mode, 8)?; + + if need_set_fsgid { + // set SetGid mode mask. + o_mode |= MODE_SETGID; + } + permission.set_mode(o_mode); + + fs::set_permissions(&storage.mount_point, permission)?; + } + + new_device("".to_string()) + } } -#[instrument] -async fn handle_hugetlbfs_storage(logger: &Logger, storage: &Storage) -> Result { - info!(logger, "handle hugetlbfs storage"); - // Allocate hugepages before mount - // /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages - // /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages - // options eg "pagesize=2097152,size=524288000"(2M, 500M) - allocate_hugepages(logger, &storage.options).context("allocate hugepages")?; +#[derive(Debug)] +struct Virtio9pHandler {} - common_storage_handler(logger, storage)?; - - // hugetlbfs return empty string as ephemeral_storage_handler do. - // this is a sandbox level storage, but not a container-level mount. - Ok("".to_string()) +#[async_trait::async_trait] +impl StorageHandler for Virtio9pHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } } // Allocate hugepages by writing to sysfs @@ -451,98 +504,169 @@ fn get_pagesize_and_size_from_option(options: &[String]) -> Result<(u64, u64)> { Ok((pagesize, size)) } -// virtiommio_blk_storage_handler handles the storage for mmio blk driver. -#[instrument] -async fn virtiommio_blk_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: &Arc>, -) -> Result { - let storage = storage.clone(); - if !Path::new(&storage.source).exists() { - get_virtio_mmio_device_name(sandbox, &storage.source) - .await - .context("failed to get mmio device name")?; +#[derive(Debug)] +struct VirtioFsHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioFsHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) } - //The source path is VmPath - common_storage_handler(logger, &storage) } -// virtiofs_storage_handler handles the storage for virtio-fs. -#[instrument] -async fn virtiofs_storage_handler( - logger: &Logger, - storage: &Storage, - _sandbox: &Arc>, -) -> Result { - common_storage_handler(logger, storage) -} +#[derive(Debug)] +struct VirtioBlkMmioHandler {} -// virtio_blk_storage_handler handles the storage for blk driver. -#[instrument] -async fn virtio_blk_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: &Arc>, -) -> Result { - let mut storage = storage.clone(); - // If hot-plugged, get the device node path based on the PCI path - // otherwise use the virt path provided in Storage Source - if storage.source.starts_with("/dev") { - let metadata = fs::metadata(&storage.source) - .context(format!("get metadata on file {:?}", &storage.source))?; - let mode = metadata.permissions().mode(); - if mode & libc::S_IFBLK == 0 { - return Err(anyhow!("Invalid device {}", &storage.source)); +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkMmioHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + if !Path::new(&storage.source).exists() { + get_virtio_mmio_device_name(ctx.sandbox, &storage.source) + .await + .context("failed to get mmio device name")?; } - } else { - let pcipath = pci::Path::from_str(&storage.source)?; - let dev_path = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?; + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +struct VirtioBlkPciHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkPciHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // If hot-plugged, get the device node path based on the PCI path + // otherwise use the virt path provided in Storage Source + if storage.source.starts_with("/dev") { + let metadata = fs::metadata(&storage.source) + .context(format!("get metadata on file {:?}", &storage.source))?; + let mode = metadata.permissions().mode(); + if mode & libc::S_IFBLK == 0 { + return Err(anyhow!("Invalid device {}", &storage.source)); + } + } else { + let pcipath = pci::Path::from_str(&storage.source)?; + let dev_path = get_virtio_blk_pci_device_name(ctx.sandbox, &pcipath).await?; + storage.source = dev_path; + } + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +struct VirtioBlkCcwHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkCcwHandler { + #[cfg(target_arch = "s390x")] + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + let ccw_device = ccw::Device::from_str(&storage.source)?; + let dev_path = get_virtio_blk_ccw_device_name(ctx.sandbox, &ccw_device).await?; storage.source = dev_path; + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) } - common_storage_handler(logger, &storage) + #[cfg(not(target_arch = "s390x"))] + #[instrument] + async fn create_device( + &self, + _storage: Storage, + _ctx: &mut StorageContext, + ) -> Result { + Err(anyhow!("CCW is only supported on s390x")) + } } -// virtio_blk_ccw_storage_handler handles storage for the blk-ccw driver (s390x) -#[cfg(target_arch = "s390x")] -#[instrument] -async fn virtio_blk_ccw_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: &Arc>, -) -> Result { - let mut storage = storage.clone(); - let ccw_device = ccw::Device::from_str(&storage.source)?; - let dev_path = get_virtio_blk_ccw_device_name(sandbox, &ccw_device).await?; - storage.source = dev_path; - common_storage_handler(logger, &storage) +#[derive(Debug)] +struct ScsiHandler {} + +#[async_trait::async_trait] +impl StorageHandler for ScsiHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // Retrieve the device path from SCSI address. + let dev_path = get_scsi_device_name(ctx.sandbox, &storage.source).await?; + storage.source = dev_path; + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } } -#[cfg(not(target_arch = "s390x"))] -#[instrument] -async fn virtio_blk_ccw_storage_handler( - _: &Logger, - _: &Storage, - _: &Arc>, -) -> Result { - Err(anyhow!("CCW is only supported on s390x")) +#[derive(Debug)] +struct PmemHandler {} + +#[async_trait::async_trait] +impl StorageHandler for PmemHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // Retrieve the device path from NVDIMM address. + wait_for_pmem_device(ctx.sandbox, &storage.source).await?; + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } } -// virtio_scsi_storage_handler handles the storage for scsi driver. -#[instrument] -async fn virtio_scsi_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: &Arc>, -) -> Result { - let mut storage = storage.clone(); +#[derive(Debug)] +struct BindWatcherHandler {} - // Retrieve the device path from SCSI address. - let dev_path = get_scsi_device_name(sandbox, &storage.source).await?; - storage.source = dev_path; +#[async_trait::async_trait] +impl StorageHandler for BindWatcherHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + if let Some(cid) = ctx.cid { + ctx.sandbox + .lock() + .await + .bind_watcher + .add_container(cid.to_string(), iter::once(storage.clone()), ctx.logger) + .await?; + } + new_device("".to_string()) + } +} - common_storage_handler(logger, &storage) +fn new_device(path: String) -> Result { + let device = StorageDeviceGeneric::new(path); + Ok(Arc::new(Mutex::new(device))) } #[instrument] @@ -552,39 +676,6 @@ fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result Ok(storage.mount_point.clone()) } -// nvdimm_storage_handler handles the storage for NVDIMM driver. -#[instrument] -async fn nvdimm_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: &Arc>, -) -> Result { - let storage = storage.clone(); - - // Retrieve the device path from NVDIMM address. - wait_for_pmem_device(sandbox, &storage.source).await?; - - common_storage_handler(logger, &storage) -} - -async fn bind_watcher_storage_handler( - logger: &Logger, - storage: &Storage, - sandbox: &Arc>, - cid: Option, -) -> Result<()> { - let mut locked = sandbox.lock().await; - - if let Some(cid) = cid { - locked - .bind_watcher - .add_container(cid, iter::once(storage.clone()), logger) - .await - } else { - Ok(()) - } -} - // mount_storage performs the mount described by the storage structure. #[instrument] fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> { @@ -728,68 +819,67 @@ pub fn is_mounted(mount_point: &str) -> Result { #[instrument] pub async fn add_storages( logger: Logger, - storages: &[Storage], + storages: Vec, sandbox: &Arc>, cid: Option, ) -> Result> { let mut mount_list = Vec::new(); for storage in storages { - let handler_name = &storage.driver; - let logger = - logger.new(o!( "subsystem" => "storage", "storage-type" => handler_name.to_string())); - - { - let mut sb = sandbox.lock().await; - let state = sb.add_sandbox_storage(&storage.mount_point).await; - if state.ref_count().await > 1 { - continue; - } + let path = storage.mount_point.clone(); + let state = sandbox.lock().await.add_sandbox_storage(&path).await; + if state.ref_count().await > 1 { + // The device already exists. + continue; } - let res = match handler_name.as_str() { - DRIVER_BLK_TYPE => virtio_blk_storage_handler(&logger, storage, sandbox).await, - DRIVER_BLK_CCW_TYPE => virtio_blk_ccw_storage_handler(&logger, storage, sandbox).await, - DRIVER_9P_TYPE => virtio9p_storage_handler(&logger, storage, sandbox).await, - DRIVER_VIRTIOFS_TYPE => virtiofs_storage_handler(&logger, storage, sandbox).await, - DRIVER_EPHEMERAL_TYPE => ephemeral_storage_handler(&logger, storage, sandbox).await, - DRIVER_OVERLAYFS_TYPE => { - overlayfs_storage_handler(&logger, storage, cid.as_deref(), sandbox).await - } - DRIVER_MMIO_BLK_TYPE => virtiommio_blk_storage_handler(&logger, storage, sandbox).await, - DRIVER_LOCAL_TYPE => local_storage_handler(&logger, storage, sandbox).await, - DRIVER_SCSI_TYPE => virtio_scsi_storage_handler(&logger, storage, sandbox).await, - DRIVER_NVDIMM_TYPE => nvdimm_storage_handler(&logger, storage, sandbox).await, - DRIVER_WATCHABLE_BIND_TYPE => { - bind_watcher_storage_handler(&logger, storage, sandbox, cid.clone()).await?; - // Don't register watch mounts, they're handled separately by the watcher. - Ok(String::new()) - } - _ => { - return Err(anyhow!( - "Failed to find the storage handler {}", - storage.driver.to_owned() - )); - } - }; + if let Some(handler) = STORAGE_HANDLERS.handler(&storage.driver) { + let logger = + logger.new(o!( "subsystem" => "storage", "storage-type" => storage.driver.clone())); + let mut ctx = StorageContext { + cid: &cid, + logger: &logger, + sandbox, + }; - let mount_point = match res { - Err(e) => { - error!( - logger, - "add_storages failed, storage: {:?}, error: {:?} ", storage, e - ); - let mut sb = sandbox.lock().await; - if let Err(e) = sb.remove_sandbox_storage(&storage.mount_point).await { - warn!(logger, "fail to unset sandbox storage {:?}", e); + match handler.create_device(storage, &mut ctx).await { + Ok(device) => { + match sandbox + .lock() + .await + .update_sandbox_storage(&path, device.clone()) + { + Ok(d) => { + let path = device.lock().await.path().to_string(); + if !path.is_empty() { + mount_list.push(path.clone()); + } + drop(d); + } + Err(device) => { + error!(logger, "failed to update device for storage"); + if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await + { + warn!(logger, "failed to remove dummy sandbox storage {:?}", e); + } + device.lock().await.cleanup(); + return Err(anyhow!("failed to update device for storage")); + } + } + } + Err(e) => { + error!(logger, "failed to create device for storage, error: {e:?}"); + if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await { + warn!(logger, "failed to remove dummy sandbox storage {e:?}"); + } + return Err(e); } - return Err(e); } - Ok(m) => m, - }; - - if !mount_point.is_empty() { - mount_list.push(mount_point); + } else { + return Err(anyhow!( + "Failed to find the storage handler {}", + storage.driver + )); } } diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 4b9929339f..4b405becb5 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -231,13 +231,7 @@ impl AgentService { // After all those storages have been processed, no matter the order // here, the agent will rely on rustjail (using the oci.Mounts // list) to bind mount all of them inside the container. - let m = add_storages( - sl(), - &req.storages, - &self.sandbox, - Some(req.container_id.clone()), - ) - .await?; + let m = add_storages(sl(), req.storages, &self.sandbox, Some(req.container_id)).await?; let mut s = self.sandbox.lock().await; s.container_mounts.insert(cid.clone(), m); @@ -1268,7 +1262,7 @@ impl agent_ttrpc::AgentService for AgentService { .map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e))?; } - match add_storages(sl(), &req.storages, &self.sandbox, None).await { + match add_storages(sl(), req.storages, &self.sandbox, None).await { Ok(m) => { self.sandbox.lock().await.mounts = m; } From 3b1af40b16875da919cbf313010f00743058cc63 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 10 Aug 2023 22:42:24 +0800 Subject: [PATCH 7/9] agent: refine storage related code a bit Refine storage related code by: - remove the STORAGE_HANDLER_LIST - define type alias - move code near to its caller Signed-off-by: Jiang Liu --- src/agent/src/mount.rs | 197 +++++++++++++--------------- src/agent/src/rpc.rs | 4 +- src/agent/src/sandbox.rs | 2 - src/libs/kata-sys-util/src/mount.rs | 10 +- src/libs/kata-types/src/mount.rs | 5 + 5 files changed, 106 insertions(+), 112 deletions(-) diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index bdf10329ba..59bdfdf555 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -3,8 +3,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#![allow(dead_code)] - use std::collections::HashMap; use std::fmt::Debug; use std::fs::{self, File, OpenOptions}; @@ -133,19 +131,6 @@ lazy_static! { }; } -pub const STORAGE_HANDLER_LIST: &[&str] = &[ - //DRIVER_BLK_TYPE, - //DRIVER_9P_TYPE, - //DRIVER_VIRTIOFS_TYPE, - //DRIVER_EPHEMERAL_TYPE, - //DRIVER_OVERLAYFS_TYPE, - //DRIVER_MMIO_BLK_TYPE, - //DRIVER_LOCAL_TYPE, - //DRIVER_SCSI_TYPE, - //DRIVER_NVDIMM_TYPE, - //DRIVER_WATCHABLE_BIND_TYPE, -]; - #[instrument] pub fn baremount( source: &Path, @@ -222,7 +207,7 @@ impl StorageHandler for EphemeralHandler { // /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages // /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages // options eg "pagesize=2097152,size=524288000"(2M, 500M) - allocate_hugepages(ctx.logger, &storage.options.to_vec()) + Self::allocate_hugepages(ctx.logger, &storage.options.to_vec()) .context("allocate hugepages")?; common_storage_handler(ctx.logger, &storage)?; } else if !storage.options.is_empty() { @@ -254,6 +239,96 @@ impl StorageHandler for EphemeralHandler { } } +impl EphemeralHandler { + // Allocate hugepages by writing to sysfs + fn allocate_hugepages(logger: &Logger, options: &[String]) -> Result<()> { + info!(logger, "mounting hugePages storage options: {:?}", options); + + let (pagesize, size) = Self::get_pagesize_and_size_from_option(options) + .context(format!("parse mount options: {:?}", &options))?; + + info!( + logger, + "allocate hugepages. pageSize: {}, size: {}", pagesize, size + ); + + // sysfs entry is always of the form hugepages-${pagesize}kB + // Ref: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt + let path = Path::new(SYS_FS_HUGEPAGES_PREFIX) + .join(format!("hugepages-{}kB", pagesize / 1024)) + .join("nr_hugepages"); + + // write numpages to nr_hugepages file. + let numpages = format!("{}", size / pagesize); + info!(logger, "write {} pages to {:?}", &numpages, &path); + + let mut file = OpenOptions::new() + .write(true) + .open(&path) + .context(format!("open nr_hugepages directory {:?}", &path))?; + + file.write_all(numpages.as_bytes()) + .context(format!("write nr_hugepages failed: {:?}", &path))?; + + // Even if the write succeeds, the kernel isn't guaranteed to be + // able to allocate all the pages we requested. Verify that it + // did. + let verify = fs::read_to_string(&path).context(format!("reading {:?}", &path))?; + let allocated = verify + .trim_end() + .parse::() + .map_err(|_| anyhow!("Unexpected text {:?} in {:?}", &verify, &path))?; + if allocated != size / pagesize { + return Err(anyhow!( + "Only allocated {} of {} hugepages of size {}", + allocated, + numpages, + pagesize + )); + } + + Ok(()) + } + + // Parse filesystem options string to retrieve hugepage details + // options eg "pagesize=2048,size=107374182" + fn get_pagesize_and_size_from_option(options: &[String]) -> Result<(u64, u64)> { + let mut pagesize_str: Option<&str> = None; + let mut size_str: Option<&str> = None; + + for option in options { + let vars: Vec<&str> = option.trim().split(',').collect(); + + for var in vars { + if let Some(stripped) = var.strip_prefix("pagesize=") { + pagesize_str = Some(stripped); + } else if let Some(stripped) = var.strip_prefix("size=") { + size_str = Some(stripped); + } + + if pagesize_str.is_some() && size_str.is_some() { + break; + } + } + } + + if pagesize_str.is_none() || size_str.is_none() { + return Err(anyhow!("no pagesize/size options found")); + } + + let pagesize = pagesize_str + .unwrap() + .parse::() + .context(format!("parse pagesize: {:?}", &pagesize_str))?; + let size = size_str + .unwrap() + .parse::() + .context(format!("parse size: {:?}", &pagesize_str))?; + + Ok((pagesize, size)) + } +} + // update_ephemeral_mounts takes a list of ephemeral mounts and remounts them // with mount options passed by the caller #[instrument] @@ -416,94 +491,6 @@ impl StorageHandler for Virtio9pHandler { } } -// Allocate hugepages by writing to sysfs -fn allocate_hugepages(logger: &Logger, options: &[String]) -> Result<()> { - info!(logger, "mounting hugePages storage options: {:?}", options); - - let (pagesize, size) = get_pagesize_and_size_from_option(options) - .context(format!("parse mount options: {:?}", &options))?; - - info!( - logger, - "allocate hugepages. pageSize: {}, size: {}", pagesize, size - ); - - // sysfs entry is always of the form hugepages-${pagesize}kB - // Ref: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt - let path = Path::new(SYS_FS_HUGEPAGES_PREFIX) - .join(format!("hugepages-{}kB", pagesize / 1024)) - .join("nr_hugepages"); - - // write numpages to nr_hugepages file. - let numpages = format!("{}", size / pagesize); - info!(logger, "write {} pages to {:?}", &numpages, &path); - - let mut file = OpenOptions::new() - .write(true) - .open(&path) - .context(format!("open nr_hugepages directory {:?}", &path))?; - - file.write_all(numpages.as_bytes()) - .context(format!("write nr_hugepages failed: {:?}", &path))?; - - // Even if the write succeeds, the kernel isn't guaranteed to be - // able to allocate all the pages we requested. Verify that it - // did. - let verify = fs::read_to_string(&path).context(format!("reading {:?}", &path))?; - let allocated = verify - .trim_end() - .parse::() - .map_err(|_| anyhow!("Unexpected text {:?} in {:?}", &verify, &path))?; - if allocated != size / pagesize { - return Err(anyhow!( - "Only allocated {} of {} hugepages of size {}", - allocated, - numpages, - pagesize - )); - } - - Ok(()) -} - -// Parse filesystem options string to retrieve hugepage details -// options eg "pagesize=2048,size=107374182" -fn get_pagesize_and_size_from_option(options: &[String]) -> Result<(u64, u64)> { - let mut pagesize_str: Option<&str> = None; - let mut size_str: Option<&str> = None; - - for option in options { - let vars: Vec<&str> = option.trim().split(',').collect(); - - for var in vars { - if let Some(stripped) = var.strip_prefix("pagesize=") { - pagesize_str = Some(stripped); - } else if let Some(stripped) = var.strip_prefix("size=") { - size_str = Some(stripped); - } - - if pagesize_str.is_some() && size_str.is_some() { - break; - } - } - } - - if pagesize_str.is_none() || size_str.is_none() { - return Err(anyhow!("no pagesize/size options found")); - } - - let pagesize = pagesize_str - .unwrap() - .parse::() - .context(format!("parse pagesize: {:?}", &pagesize_str))?; - let size = size_str - .unwrap() - .parse::() - .context(format!("parse size: {:?}", &pagesize_str))?; - - Ok((pagesize, size)) -} - #[derive(Debug)] struct VirtioFsHandler {} @@ -1918,7 +1905,7 @@ mod tests { for case in data { let input = case.0; - let r = get_pagesize_and_size_from_option(&[input.to_string()]); + let r = EphemeralHandler::get_pagesize_and_size_from_option(&[input.to_string()]); let is_ok = case.2; if is_ok { diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 4b405becb5..3ef0cc688f 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -59,7 +59,7 @@ use crate::device::{ use crate::image_rpc; use crate::linux_abi::*; use crate::metrics::get_metrics; -use crate::mount::{add_storages, baremount, update_ephemeral_mounts, STORAGE_HANDLER_LIST}; +use crate::mount::{add_storages, baremount, update_ephemeral_mounts, STORAGE_HANDLERS}; use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS}; use crate::network::setup_guest_dns; use crate::pci; @@ -1669,7 +1669,7 @@ fn get_agent_details() -> AgentDetails { detail.init_daemon = unistd::getpid() == Pid::from_raw(1); detail.device_handlers = Vec::new(); - detail.storage_handlers = STORAGE_HANDLER_LIST.iter().map(|x| x.to_string()).collect(); + detail.storage_handlers = STORAGE_HANDLERS.get_handlers(); detail } diff --git a/src/agent/src/sandbox.rs b/src/agent/src/sandbox.rs index 759d932065..05ae044197 100644 --- a/src/agent/src/sandbox.rs +++ b/src/agent/src/sandbox.rs @@ -63,7 +63,6 @@ impl StorageState { } } - #[allow(dead_code)] pub fn from_device(device: StorageDeviceObject) -> Self { Self { inner: device } } @@ -160,7 +159,6 @@ impl Sandbox { } /// Update the storage device associated with a path. - #[allow(dead_code)] pub fn update_sandbox_storage( &mut self, path: &str, diff --git a/src/libs/kata-sys-util/src/mount.rs b/src/libs/kata-sys-util/src/mount.rs index e649ab436d..873db5f5b9 100644 --- a/src/libs/kata-sys-util/src/mount.rs +++ b/src/libs/kata-sys-util/src/mount.rs @@ -58,7 +58,8 @@ use crate::fs::is_symlink; use crate::sl; /// Default permission for directories created for mountpoint. -const MOUNT_PERM: u32 = 0o755; +const MOUNT_DIR_PERM: u32 = 0o755; +const MOUNT_FILE_PERM: u32 = 0o644; pub const PROC_MOUNTS_FILE: &str = "/proc/mounts"; const PROC_FIELDS_PER_LINE: usize = 6; @@ -187,13 +188,16 @@ pub fn create_mount_destination, D: AsRef, R: AsRef>( .parent() .ok_or_else(|| Error::InvalidPath(dst.to_path_buf()))?; let mut builder = fs::DirBuilder::new(); - builder.mode(MOUNT_PERM).recursive(true).create(parent)?; + builder + .mode(MOUNT_DIR_PERM) + .recursive(true) + .create(parent)?; if fs_type == "bind" { // The source and destination for bind mounting must be the same type: file or directory. if !src.as_ref().is_dir() { fs::OpenOptions::new() - .mode(MOUNT_PERM) + .mode(MOUNT_FILE_PERM) .write(true) .create(true) .open(dst)?; diff --git a/src/libs/kata-types/src/mount.rs b/src/libs/kata-types/src/mount.rs index 44ccd26045..a3747af51c 100644 --- a/src/libs/kata-types/src/mount.rs +++ b/src/libs/kata-types/src/mount.rs @@ -524,6 +524,11 @@ impl StorageHandlerManager { pub fn handler(&self, id: &str) -> Option<&H> { self.handlers.get(id) } + + /// Get names of registered handlers. + pub fn get_handlers(&self) -> Vec { + self.handlers.keys().map(|v| v.to_string()).collect() + } } /// Join user provided volume path with kata direct-volume root path. From d5483aaf7cd3c36b6169b8d2e15ccd56d1e13b91 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 24 Aug 2023 09:40:09 +0800 Subject: [PATCH 8/9] agent: move storage device related code into dedicated files Move storage device related code into dedicated files. Signed-off-by: Jiang Liu --- src/agent/src/main.rs | 1 + src/agent/src/mount.rs | 1146 +---------------- src/agent/src/rpc.rs | 3 +- src/agent/src/storage/bind_watcher_handler.rs | 36 + src/agent/src/storage/block_handler.rs | 145 +++ src/agent/src/storage/ephemeral_handler.rs | 293 +++++ src/agent/src/storage/fs_handler.rs | 88 ++ src/agent/src/storage/local_handler.rs | 61 + src/agent/src/storage/mod.rs | 648 ++++++++++ 9 files changed, 1279 insertions(+), 1142 deletions(-) create mode 100644 src/agent/src/storage/bind_watcher_handler.rs create mode 100644 src/agent/src/storage/block_handler.rs create mode 100644 src/agent/src/storage/ephemeral_handler.rs create mode 100644 src/agent/src/storage/fs_handler.rs create mode 100644 src/agent/src/storage/local_handler.rs create mode 100644 src/agent/src/storage/mod.rs diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index a869e5afa3..7e59e2daab 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -48,6 +48,7 @@ mod pci; pub mod random; mod sandbox; mod signal; +mod storage; mod uevent; mod util; mod version; diff --git a/src/agent/src/mount.rs b/src/agent/src/mount.rs index 59bdfdf555..e7bbf96f8d 100644 --- a/src/agent/src/mount.rs +++ b/src/agent/src/mount.rs @@ -5,53 +5,23 @@ use std::collections::HashMap; use std::fmt::Debug; -use std::fs::{self, File, OpenOptions}; -use std::io::{BufRead, BufReader, Write}; -use std::iter; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; use std::ops::Deref; -use std::os::unix::fs::{MetadataExt, PermissionsExt}; use std::path::Path; -use std::str::FromStr; -use std::sync::Arc; use anyhow::{anyhow, Context, Result}; -use kata_sys_util::mount::{create_mount_destination, get_linux_mount_info, parse_mount_options}; -use kata_types::mount::{ - StorageDeviceGeneric, StorageHandlerManager, KATA_MOUNT_OPTION_FS_GID, - KATA_SHAREDFS_GUEST_PREMOUNT_TAG, -}; +use kata_sys_util::mount::{get_linux_mount_info, parse_mount_options}; use nix::mount::MsFlags; -use nix::unistd::{Gid, Uid}; use regex::Regex; use slog::Logger; -use tokio::sync::Mutex; use tracing::instrument; -use crate::device::{ - get_scsi_device_name, get_virtio_blk_pci_device_name, get_virtio_mmio_device_name, - online_device, wait_for_pmem_device, DRIVER_9P_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, - DRIVER_EPHEMERAL_TYPE, DRIVER_LOCAL_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_OVERLAYFS_TYPE, - DRIVER_SCSI_TYPE, DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE, FS_TYPE_HUGETLB, -}; +use crate::device::online_device; use crate::linux_abi::*; -use crate::pci; -use crate::protocols::agent::Storage; -use crate::protocols::types::FSGroupChangePolicy; -use crate::sandbox::StorageDeviceObject; -use crate::Sandbox; -#[cfg(target_arch = "s390x")] -use crate::{ccw, device::get_virtio_blk_ccw_device_name}; pub const TYPE_ROOTFS: &str = "rootfs"; -const FS_GID_EQ: &str = "fsgid="; -const SYS_FS_HUGEPAGES_PREFIX: &str = "/sys/kernel/mm/hugepages"; - -const RW_MASK: u32 = 0o660; -const RO_MASK: u32 = 0o440; -const EXEC_MASK: u32 = 0o110; -const MODE_SETGID: u32 = 0o2000; - #[derive(Debug, PartialEq)] pub struct InitMount<'a> { fstype: &'a str, @@ -60,24 +30,6 @@ pub struct InitMount<'a> { options: Vec<&'a str>, } -#[derive(Debug)] -pub struct StorageContext<'a> { - cid: &'a Option, - logger: &'a Logger, - sandbox: &'a Arc>, -} - -/// Trait object to handle storage device. -#[async_trait::async_trait] -pub trait StorageHandler: Send + Sync { - /// Create a new storage device. - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result; -} - #[rustfmt::skip] lazy_static!{ static ref CGROUPS: HashMap<&'static str, &'static str> = { @@ -111,26 +63,6 @@ lazy_static! { ]; } -#[rustfmt::skip] -lazy_static! { - pub static ref STORAGE_HANDLERS: StorageHandlerManager> = { - let mut manager: StorageHandlerManager> = StorageHandlerManager::new(); - manager.add_handler(DRIVER_9P_TYPE, Arc::new(Virtio9pHandler{})).unwrap(); - #[cfg(target_arch = "s390x")] - manager.add_handler(crate::device::DRIVER_BLK_CCW_TYPE, Arc::new(VirtioBlkCcwHandler{})).unwrap(); - manager.add_handler(DRIVER_BLK_MMIO_TYPE, Arc::new(VirtioBlkMmioHandler{})).unwrap(); - manager.add_handler(DRIVER_BLK_PCI_TYPE, Arc::new(VirtioBlkPciHandler{})).unwrap(); - manager.add_handler(DRIVER_EPHEMERAL_TYPE, Arc::new(EphemeralHandler{})).unwrap(); - manager.add_handler(DRIVER_LOCAL_TYPE, Arc::new(LocalHandler{})).unwrap(); - manager.add_handler(DRIVER_NVDIMM_TYPE, Arc::new(PmemHandler{})).unwrap(); - manager.add_handler(DRIVER_OVERLAYFS_TYPE, Arc::new(OverlayfsHandler{})).unwrap(); - manager.add_handler(DRIVER_SCSI_TYPE, Arc::new(ScsiHandler{})).unwrap(); - manager.add_handler(DRIVER_VIRTIOFS_TYPE, Arc::new(VirtioFsHandler{})).unwrap(); - manager.add_handler(DRIVER_WATCHABLE_BIND_TYPE, Arc::new(BindWatcherHandler{})).unwrap(); - manager - }; -} - #[instrument] pub fn baremount( source: &Path, @@ -189,608 +121,6 @@ pub fn baremount( }) } -#[derive(Debug)] -struct EphemeralHandler {} - -#[async_trait::async_trait] -impl StorageHandler for EphemeralHandler { - #[instrument] - async fn create_device( - &self, - mut storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - // hugetlbfs - if storage.fstype == FS_TYPE_HUGETLB { - info!(ctx.logger, "handle hugetlbfs storage"); - // Allocate hugepages before mount - // /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages - // /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages - // options eg "pagesize=2097152,size=524288000"(2M, 500M) - Self::allocate_hugepages(ctx.logger, &storage.options.to_vec()) - .context("allocate hugepages")?; - common_storage_handler(ctx.logger, &storage)?; - } else if !storage.options.is_empty() { - // By now we only support one option field: "fsGroup" which - // isn't an valid mount option, thus we should remove it when - // do mount. - let opts = parse_options(&storage.options); - storage.options = Default::default(); - common_storage_handler(ctx.logger, &storage)?; - - // ephemeral_storage didn't support mount options except fsGroup. - if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { - let gid = fsgid.parse::()?; - - nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; - - let meta = fs::metadata(&storage.mount_point)?; - let mut permission = meta.permissions(); - - let o_mode = meta.mode() | MODE_SETGID; - permission.set_mode(o_mode); - fs::set_permissions(&storage.mount_point, permission)?; - } - } else { - common_storage_handler(ctx.logger, &storage)?; - } - - new_device("".to_string()) - } -} - -impl EphemeralHandler { - // Allocate hugepages by writing to sysfs - fn allocate_hugepages(logger: &Logger, options: &[String]) -> Result<()> { - info!(logger, "mounting hugePages storage options: {:?}", options); - - let (pagesize, size) = Self::get_pagesize_and_size_from_option(options) - .context(format!("parse mount options: {:?}", &options))?; - - info!( - logger, - "allocate hugepages. pageSize: {}, size: {}", pagesize, size - ); - - // sysfs entry is always of the form hugepages-${pagesize}kB - // Ref: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt - let path = Path::new(SYS_FS_HUGEPAGES_PREFIX) - .join(format!("hugepages-{}kB", pagesize / 1024)) - .join("nr_hugepages"); - - // write numpages to nr_hugepages file. - let numpages = format!("{}", size / pagesize); - info!(logger, "write {} pages to {:?}", &numpages, &path); - - let mut file = OpenOptions::new() - .write(true) - .open(&path) - .context(format!("open nr_hugepages directory {:?}", &path))?; - - file.write_all(numpages.as_bytes()) - .context(format!("write nr_hugepages failed: {:?}", &path))?; - - // Even if the write succeeds, the kernel isn't guaranteed to be - // able to allocate all the pages we requested. Verify that it - // did. - let verify = fs::read_to_string(&path).context(format!("reading {:?}", &path))?; - let allocated = verify - .trim_end() - .parse::() - .map_err(|_| anyhow!("Unexpected text {:?} in {:?}", &verify, &path))?; - if allocated != size / pagesize { - return Err(anyhow!( - "Only allocated {} of {} hugepages of size {}", - allocated, - numpages, - pagesize - )); - } - - Ok(()) - } - - // Parse filesystem options string to retrieve hugepage details - // options eg "pagesize=2048,size=107374182" - fn get_pagesize_and_size_from_option(options: &[String]) -> Result<(u64, u64)> { - let mut pagesize_str: Option<&str> = None; - let mut size_str: Option<&str> = None; - - for option in options { - let vars: Vec<&str> = option.trim().split(',').collect(); - - for var in vars { - if let Some(stripped) = var.strip_prefix("pagesize=") { - pagesize_str = Some(stripped); - } else if let Some(stripped) = var.strip_prefix("size=") { - size_str = Some(stripped); - } - - if pagesize_str.is_some() && size_str.is_some() { - break; - } - } - } - - if pagesize_str.is_none() || size_str.is_none() { - return Err(anyhow!("no pagesize/size options found")); - } - - let pagesize = pagesize_str - .unwrap() - .parse::() - .context(format!("parse pagesize: {:?}", &pagesize_str))?; - let size = size_str - .unwrap() - .parse::() - .context(format!("parse size: {:?}", &pagesize_str))?; - - Ok((pagesize, size)) - } -} - -// update_ephemeral_mounts takes a list of ephemeral mounts and remounts them -// with mount options passed by the caller -#[instrument] -pub async fn update_ephemeral_mounts( - logger: Logger, - storages: &[Storage], - _sandbox: &Arc>, -) -> Result<()> { - for storage in storages { - let handler_name = &storage.driver; - let logger = logger.new(o!( - "msg" => "updating tmpfs storage", - "subsystem" => "storage", - "storage-type" => handler_name.to_owned())); - - match handler_name.as_str() { - DRIVER_EPHEMERAL_TYPE => { - fs::create_dir_all(&storage.mount_point)?; - - if storage.options.is_empty() { - continue; - } else { - // assume that fsGid has already been set - let mount_path = Path::new(&storage.mount_point); - let src_path = Path::new(&storage.source); - let opts: Vec<&String> = storage - .options - .iter() - .filter(|&opt| !opt.starts_with(FS_GID_EQ)) - .collect(); - let (flags, options) = parse_mount_options(&opts)?; - - info!(logger, "mounting storage"; - "mount-source" => src_path.display(), - "mount-destination" => mount_path.display(), - "mount-fstype" => storage.fstype.as_str(), - "mount-options" => options.as_str(), - ); - - baremount( - src_path, - mount_path, - storage.fstype.as_str(), - flags, - options.as_str(), - &logger, - )?; - } - } - _ => { - return Err(anyhow!( - "Unsupported storage type for syncing mounts {}. Only ephemeral storage update is supported", - storage.driver - )); - } - }; - } - - Ok(()) -} - -#[derive(Debug)] -struct OverlayfsHandler {} - -#[async_trait::async_trait] -impl StorageHandler for OverlayfsHandler { - #[instrument] - async fn create_device( - &self, - mut storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - if storage - .options - .iter() - .any(|e| e == "io.katacontainers.fs-opt.overlay-rw") - { - let cid = ctx - .cid - .clone() - .ok_or_else(|| anyhow!("No container id in rw overlay"))?; - let cpath = Path::new(crate::rpc::CONTAINER_BASE).join(cid); - let work = cpath.join("work"); - let upper = cpath.join("upper"); - - fs::create_dir_all(&work).context("Creating overlay work directory")?; - fs::create_dir_all(&upper).context("Creating overlay upper directory")?; - - storage.fstype = "overlay".into(); - storage - .options - .push(format!("upperdir={}", upper.to_string_lossy())); - storage - .options - .push(format!("workdir={}", work.to_string_lossy())); - } - - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct LocalHandler {} - -#[async_trait::async_trait] -impl StorageHandler for LocalHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - _ctx: &mut StorageContext, - ) -> Result { - fs::create_dir_all(&storage.mount_point).context(format!( - "failed to create dir all {:?}", - &storage.mount_point - ))?; - - let opts = parse_options(&storage.options); - - let mut need_set_fsgid = false; - if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { - let gid = fsgid.parse::()?; - - nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; - need_set_fsgid = true; - } - - if let Some(mode) = opts.get("mode") { - let mut permission = fs::metadata(&storage.mount_point)?.permissions(); - - let mut o_mode = u32::from_str_radix(mode, 8)?; - - if need_set_fsgid { - // set SetGid mode mask. - o_mode |= MODE_SETGID; - } - permission.set_mode(o_mode); - - fs::set_permissions(&storage.mount_point, permission)?; - } - - new_device("".to_string()) - } -} - -#[derive(Debug)] -struct Virtio9pHandler {} - -#[async_trait::async_trait] -impl StorageHandler for Virtio9pHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct VirtioFsHandler {} - -#[async_trait::async_trait] -impl StorageHandler for VirtioFsHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct VirtioBlkMmioHandler {} - -#[async_trait::async_trait] -impl StorageHandler for VirtioBlkMmioHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - if !Path::new(&storage.source).exists() { - get_virtio_mmio_device_name(ctx.sandbox, &storage.source) - .await - .context("failed to get mmio device name")?; - } - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct VirtioBlkPciHandler {} - -#[async_trait::async_trait] -impl StorageHandler for VirtioBlkPciHandler { - #[instrument] - async fn create_device( - &self, - mut storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - // If hot-plugged, get the device node path based on the PCI path - // otherwise use the virt path provided in Storage Source - if storage.source.starts_with("/dev") { - let metadata = fs::metadata(&storage.source) - .context(format!("get metadata on file {:?}", &storage.source))?; - let mode = metadata.permissions().mode(); - if mode & libc::S_IFBLK == 0 { - return Err(anyhow!("Invalid device {}", &storage.source)); - } - } else { - let pcipath = pci::Path::from_str(&storage.source)?; - let dev_path = get_virtio_blk_pci_device_name(ctx.sandbox, &pcipath).await?; - storage.source = dev_path; - } - - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct VirtioBlkCcwHandler {} - -#[async_trait::async_trait] -impl StorageHandler for VirtioBlkCcwHandler { - #[cfg(target_arch = "s390x")] - #[instrument] - async fn create_device( - &self, - mut storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - let ccw_device = ccw::Device::from_str(&storage.source)?; - let dev_path = get_virtio_blk_ccw_device_name(ctx.sandbox, &ccw_device).await?; - storage.source = dev_path; - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } - - #[cfg(not(target_arch = "s390x"))] - #[instrument] - async fn create_device( - &self, - _storage: Storage, - _ctx: &mut StorageContext, - ) -> Result { - Err(anyhow!("CCW is only supported on s390x")) - } -} - -#[derive(Debug)] -struct ScsiHandler {} - -#[async_trait::async_trait] -impl StorageHandler for ScsiHandler { - #[instrument] - async fn create_device( - &self, - mut storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - // Retrieve the device path from SCSI address. - let dev_path = get_scsi_device_name(ctx.sandbox, &storage.source).await?; - storage.source = dev_path; - - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct PmemHandler {} - -#[async_trait::async_trait] -impl StorageHandler for PmemHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - // Retrieve the device path from NVDIMM address. - wait_for_pmem_device(ctx.sandbox, &storage.source).await?; - - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) - } -} - -#[derive(Debug)] -struct BindWatcherHandler {} - -#[async_trait::async_trait] -impl StorageHandler for BindWatcherHandler { - #[instrument] - async fn create_device( - &self, - storage: Storage, - ctx: &mut StorageContext, - ) -> Result { - if let Some(cid) = ctx.cid { - ctx.sandbox - .lock() - .await - .bind_watcher - .add_container(cid.to_string(), iter::once(storage.clone()), ctx.logger) - .await?; - } - new_device("".to_string()) - } -} - -fn new_device(path: String) -> Result { - let device = StorageDeviceGeneric::new(path); - Ok(Arc::new(Mutex::new(device))) -} - -#[instrument] -fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result { - mount_storage(logger, storage)?; - set_ownership(logger, storage)?; - Ok(storage.mount_point.clone()) -} - -// mount_storage performs the mount described by the storage structure. -#[instrument] -fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> { - let logger = logger.new(o!("subsystem" => "mount")); - - // There's a special mechanism to create mountpoint from a `sharedfs` instance before - // starting the kata-agent. Check for such cases. - if storage.source == KATA_SHAREDFS_GUEST_PREMOUNT_TAG && is_mounted(&storage.mount_point)? { - warn!( - logger, - "{} already mounted on {}, ignoring...", - KATA_SHAREDFS_GUEST_PREMOUNT_TAG, - &storage.mount_point - ); - return Ok(()); - } - - let (flags, options) = parse_mount_options(&storage.options)?; - let mount_path = Path::new(&storage.mount_point); - let src_path = Path::new(&storage.source); - create_mount_destination(src_path, mount_path, "", &storage.fstype) - .context("Could not create mountpoint")?; - - info!(logger, "mounting storage"; - "mount-source" => src_path.display(), - "mount-destination" => mount_path.display(), - "mount-fstype" => storage.fstype.as_str(), - "mount-options" => options.as_str(), - ); - - baremount( - src_path, - mount_path, - storage.fstype.as_str(), - flags, - options.as_str(), - &logger, - ) -} - -#[instrument] -pub fn set_ownership(logger: &Logger, storage: &Storage) -> Result<()> { - let logger = logger.new(o!("subsystem" => "mount", "fn" => "set_ownership")); - - // If fsGroup is not set, skip performing ownership change - if storage.fs_group.is_none() { - return Ok(()); - } - - let fs_group = storage.fs_group(); - let read_only = storage.options.contains(&String::from("ro")); - let mount_path = Path::new(&storage.mount_point); - let metadata = mount_path.metadata().map_err(|err| { - error!(logger, "failed to obtain metadata for mount path"; - "mount-path" => mount_path.to_str(), - "error" => err.to_string(), - ); - err - })?; - - if fs_group.group_change_policy == FSGroupChangePolicy::OnRootMismatch.into() - && metadata.gid() == fs_group.group_id - { - let mut mask = if read_only { RO_MASK } else { RW_MASK }; - mask |= EXEC_MASK; - - // With fsGroup change policy to OnRootMismatch, if the current - // gid of the mount path root directory matches the desired gid - // and the current permission of mount path root directory is correct, - // then ownership change will be skipped. - let current_mode = metadata.permissions().mode(); - if (mask & current_mode == mask) && (current_mode & MODE_SETGID != 0) { - info!(logger, "skipping ownership change for volume"; - "mount-path" => mount_path.to_str(), - "fs-group" => fs_group.group_id.to_string(), - ); - return Ok(()); - } - } - - info!(logger, "performing recursive ownership change"; - "mount-path" => mount_path.to_str(), - "fs-group" => fs_group.group_id.to_string(), - ); - recursive_ownership_change( - mount_path, - None, - Some(Gid::from_raw(fs_group.group_id)), - read_only, - ) -} - -#[instrument] -pub fn recursive_ownership_change( - path: &Path, - uid: Option, - gid: Option, - read_only: bool, -) -> Result<()> { - let mut mask = if read_only { RO_MASK } else { RW_MASK }; - if path.is_dir() { - for entry in fs::read_dir(path)? { - recursive_ownership_change(entry?.path().as_path(), uid, gid, read_only)?; - } - mask |= EXEC_MASK; - mask |= MODE_SETGID; - } - - // We do not want to change the permission of the underlying file - // using symlink. Hence we skip symlinks from recursive ownership - // and permission changes. - if path.is_symlink() { - return Ok(()); - } - - nix::unistd::chown(path, uid, gid)?; - - if gid.is_some() { - let metadata = path.metadata()?; - let mut permission = metadata.permissions(); - let target_mode = metadata.mode() | mask; - permission.set_mode(target_mode); - fs::set_permissions(path, permission)?; - } - - Ok(()) -} - /// Looks for `mount_point` entry in the /proc/mounts. #[instrument] pub fn is_mounted(mount_point: &str) -> Result { @@ -799,80 +129,6 @@ pub fn is_mounted(mount_point: &str) -> Result { Ok(found) } -// add_storages takes a list of storages passed by the caller, and perform the -// associated operations such as waiting for the device to show up, and mount -// it to a specific location, according to the type of handler chosen, and for -// each storage. -#[instrument] -pub async fn add_storages( - logger: Logger, - storages: Vec, - sandbox: &Arc>, - cid: Option, -) -> Result> { - let mut mount_list = Vec::new(); - - for storage in storages { - let path = storage.mount_point.clone(); - let state = sandbox.lock().await.add_sandbox_storage(&path).await; - if state.ref_count().await > 1 { - // The device already exists. - continue; - } - - if let Some(handler) = STORAGE_HANDLERS.handler(&storage.driver) { - let logger = - logger.new(o!( "subsystem" => "storage", "storage-type" => storage.driver.clone())); - let mut ctx = StorageContext { - cid: &cid, - logger: &logger, - sandbox, - }; - - match handler.create_device(storage, &mut ctx).await { - Ok(device) => { - match sandbox - .lock() - .await - .update_sandbox_storage(&path, device.clone()) - { - Ok(d) => { - let path = device.lock().await.path().to_string(); - if !path.is_empty() { - mount_list.push(path.clone()); - } - drop(d); - } - Err(device) => { - error!(logger, "failed to update device for storage"); - if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await - { - warn!(logger, "failed to remove dummy sandbox storage {:?}", e); - } - device.lock().await.cleanup(); - return Err(anyhow!("failed to update device for storage")); - } - } - } - Err(e) => { - error!(logger, "failed to create device for storage, error: {e:?}"); - if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await { - warn!(logger, "failed to remove dummy sandbox storage {e:?}"); - } - return Err(e); - } - } - } else { - return Err(anyhow!( - "Failed to find the storage handler {}", - storage.driver - )); - } - } - - Ok(mount_list) -} - #[instrument] fn mount_to_rootfs(logger: &Logger, m: &InitMount) -> Result<()> { fs::create_dir_all(m.dest).context("could not create directory")?; @@ -1054,26 +310,14 @@ pub fn remove_mounts + std::fmt::Debug>(mounts: &[P]) -> Result<() Ok(()) } -#[instrument] -fn parse_options(option_list: &[String]) -> HashMap { - let mut options = HashMap::new(); - for opt in option_list { - let fields: Vec<&str> = opt.split('=').collect(); - if fields.len() == 2 { - options.insert(fields[0].to_string(), fields[1].to_string()); - } - } - options -} - #[cfg(test)] mod tests { use super::*; - use protocols::agent::FSGroup; use slog::Drain; use std::fs::File; use std::fs::OpenOptions; use std::io::Write; + use std::os::unix::fs::PermissionsExt; use std::path::PathBuf; use tempfile::tempdir; use test_utils::TestUserType; @@ -1643,127 +887,6 @@ mod tests { } } - #[test] - fn test_mount_storage() { - #[derive(Debug)] - struct TestData<'a> { - test_user: TestUserType, - storage: Storage, - error_contains: &'a str, - - make_source_dir: bool, - make_mount_dir: bool, - deny_mount_permission: bool, - } - - impl Default for TestData<'_> { - fn default() -> Self { - TestData { - test_user: TestUserType::Any, - storage: Storage { - mount_point: "mnt".to_string(), - source: "src".to_string(), - fstype: "tmpfs".to_string(), - ..Default::default() - }, - make_source_dir: true, - make_mount_dir: false, - deny_mount_permission: false, - error_contains: "", - } - } - } - - let tests = &[ - TestData { - test_user: TestUserType::NonRootOnly, - error_contains: "EPERM: Operation not permitted", - ..Default::default() - }, - TestData { - test_user: TestUserType::RootOnly, - ..Default::default() - }, - TestData { - storage: Storage { - mount_point: "mnt".to_string(), - source: "src".to_string(), - fstype: "bind".to_string(), - ..Default::default() - }, - make_source_dir: false, - make_mount_dir: true, - error_contains: "Could not create mountpoint", - ..Default::default() - }, - TestData { - test_user: TestUserType::NonRootOnly, - deny_mount_permission: true, - error_contains: "Could not create mountpoint", - ..Default::default() - }, - ]; - - for (i, d) in tests.iter().enumerate() { - let msg = format!("test[{}]: {:?}", i, d); - - skip_loop_by_user!(msg, d.test_user); - - let drain = slog::Discard; - let logger = slog::Logger::root(drain, o!()); - - let tempdir = tempdir().unwrap(); - - let source = tempdir.path().join(&d.storage.source); - let mount_point = tempdir.path().join(&d.storage.mount_point); - - let storage = Storage { - source: source.to_str().unwrap().to_string(), - mount_point: mount_point.to_str().unwrap().to_string(), - ..d.storage.clone() - }; - - if d.make_source_dir { - fs::create_dir_all(&storage.source).unwrap(); - } - if d.make_mount_dir { - fs::create_dir_all(&storage.mount_point).unwrap(); - } - - if d.deny_mount_permission { - fs::set_permissions( - mount_point.parent().unwrap(), - fs::Permissions::from_mode(0o000), - ) - .unwrap(); - } - - let result = mount_storage(&logger, &storage); - - // restore permissions so tempdir can be cleaned up - if d.deny_mount_permission { - fs::set_permissions( - mount_point.parent().unwrap(), - fs::Permissions::from_mode(0o755), - ) - .unwrap(); - } - - if result.is_ok() { - nix::mount::umount(&mount_point).unwrap(); - } - - let msg = format!("{}: result: {:?}", msg, result); - if d.error_contains.is_empty() { - assert!(result.is_ok(), "{}", msg); - } else { - assert!(result.is_err(), "{}", msg); - let error_msg = format!("{}", result.unwrap_err()); - assert!(error_msg.contains(d.error_contains), "{}", msg); - } - } - } - #[test] fn test_mount_to_rootfs() { #[derive(Debug)] @@ -1863,62 +986,6 @@ mod tests { } } - #[test] - fn test_get_pagesize_and_size_from_option() { - let expected_pagesize = 2048; - let expected_size = 107374182; - let expected = (expected_pagesize, expected_size); - - let data = vec![ - // (input, expected, is_ok) - ("size-1=107374182,pagesize-1=2048", expected, false), - ("size-1=107374182,pagesize=2048", expected, false), - ("size=107374182,pagesize-1=2048", expected, false), - ("size=107374182,pagesize=abc", expected, false), - ("size=abc,pagesize=2048", expected, false), - ("size=,pagesize=2048", expected, false), - ("size=107374182,pagesize=", expected, false), - ("size=107374182,pagesize=2048", expected, true), - ("pagesize=2048,size=107374182", expected, true), - ("foo=bar,pagesize=2048,size=107374182", expected, true), - ( - "foo=bar,pagesize=2048,foo1=bar1,size=107374182", - expected, - true, - ), - ( - "pagesize=2048,foo1=bar1,foo=bar,size=107374182", - expected, - true, - ), - ( - "foo=bar,pagesize=2048,foo1=bar1,size=107374182,foo2=bar2", - expected, - true, - ), - ( - "foo=bar,size=107374182,foo1=bar1,pagesize=2048", - expected, - true, - ), - ]; - - for case in data { - let input = case.0; - let r = EphemeralHandler::get_pagesize_and_size_from_option(&[input.to_string()]); - - let is_ok = case.2; - if is_ok { - let expected = case.1; - let (pagesize, size) = r.unwrap(); - assert_eq!(expected.0, pagesize); - assert_eq!(expected.1, size); - } else { - assert!(r.is_err()); - } - } - } - #[test] fn test_parse_mount_flags_and_options() { #[derive(Debug)] @@ -1969,207 +1036,4 @@ mod tests { assert_eq!(expected_result, result, "{}", msg); } } - - #[test] - fn test_set_ownership() { - skip_if_not_root!(); - - let logger = slog::Logger::root(slog::Discard, o!()); - - #[derive(Debug)] - struct TestData<'a> { - mount_path: &'a str, - fs_group: Option, - read_only: bool, - expected_group_id: u32, - expected_permission: u32, - } - - let tests = &[ - TestData { - mount_path: "foo", - fs_group: None, - read_only: false, - expected_group_id: 0, - expected_permission: 0, - }, - TestData { - mount_path: "rw_mount", - fs_group: Some(FSGroup { - group_id: 3000, - group_change_policy: FSGroupChangePolicy::Always.into(), - ..Default::default() - }), - read_only: false, - expected_group_id: 3000, - expected_permission: RW_MASK | EXEC_MASK | MODE_SETGID, - }, - TestData { - mount_path: "ro_mount", - fs_group: Some(FSGroup { - group_id: 3000, - group_change_policy: FSGroupChangePolicy::OnRootMismatch.into(), - ..Default::default() - }), - read_only: true, - expected_group_id: 3000, - expected_permission: RO_MASK | EXEC_MASK | MODE_SETGID, - }, - ]; - - let tempdir = tempdir().expect("failed to create tmpdir"); - - for (i, d) in tests.iter().enumerate() { - let msg = format!("test[{}]: {:?}", i, d); - - let mount_dir = tempdir.path().join(d.mount_path); - fs::create_dir(&mount_dir) - .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); - - let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); - let mut storage_data = Storage::new(); - if d.read_only { - storage_data.set_options(vec!["foo".to_string(), "ro".to_string()]); - } - if let Some(fs_group) = d.fs_group.clone() { - storage_data.set_fs_group(fs_group); - } - storage_data.mount_point = mount_dir.clone().into_os_string().into_string().unwrap(); - - let result = set_ownership(&logger, &storage_data); - assert!(result.is_ok()); - - assert_eq!( - mount_dir.as_path().metadata().unwrap().gid(), - d.expected_group_id - ); - assert_eq!( - mount_dir.as_path().metadata().unwrap().permissions().mode(), - (directory_mode | d.expected_permission) - ); - } - } - - #[test] - fn test_recursive_ownership_change() { - skip_if_not_root!(); - - const COUNT: usize = 5; - - #[derive(Debug)] - struct TestData<'a> { - // Directory where the recursive ownership change should be performed on - path: &'a str, - - // User ID for ownership change - uid: u32, - - // Group ID for ownership change - gid: u32, - - // Set when the permission should be read-only - read_only: bool, - - // The expected permission of all directories after ownership change - expected_permission_directory: u32, - - // The expected permission of all files after ownership change - expected_permission_file: u32, - } - - let tests = &[ - TestData { - path: "no_gid_change", - uid: 0, - gid: 0, - read_only: false, - expected_permission_directory: 0, - expected_permission_file: 0, - }, - TestData { - path: "rw_gid_change", - uid: 0, - gid: 3000, - read_only: false, - expected_permission_directory: RW_MASK | EXEC_MASK | MODE_SETGID, - expected_permission_file: RW_MASK, - }, - TestData { - path: "ro_gid_change", - uid: 0, - gid: 3000, - read_only: true, - expected_permission_directory: RO_MASK | EXEC_MASK | MODE_SETGID, - expected_permission_file: RO_MASK, - }, - ]; - - let tempdir = tempdir().expect("failed to create tmpdir"); - - for (i, d) in tests.iter().enumerate() { - let msg = format!("test[{}]: {:?}", i, d); - - let mount_dir = tempdir.path().join(d.path); - fs::create_dir(&mount_dir) - .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); - - let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); - let mut file_mode: u32 = 0; - - // create testing directories and files - for n in 1..COUNT { - let nest_dir = mount_dir.join(format!("nested{}", n)); - fs::create_dir(&nest_dir) - .unwrap_or_else(|_| panic!("{}: failed to create nest directory", msg)); - - for f in 1..COUNT { - let filename = nest_dir.join(format!("file{}", f)); - File::create(&filename) - .unwrap_or_else(|_| panic!("{}: failed to create file", msg)); - file_mode = filename.as_path().metadata().unwrap().permissions().mode(); - } - } - - let uid = if d.uid > 0 { - Some(Uid::from_raw(d.uid)) - } else { - None - }; - let gid = if d.gid > 0 { - Some(Gid::from_raw(d.gid)) - } else { - None - }; - let result = recursive_ownership_change(&mount_dir, uid, gid, d.read_only); - - assert!(result.is_ok()); - - assert_eq!(mount_dir.as_path().metadata().unwrap().gid(), d.gid); - assert_eq!( - mount_dir.as_path().metadata().unwrap().permissions().mode(), - (directory_mode | d.expected_permission_directory) - ); - - for n in 1..COUNT { - let nest_dir = mount_dir.join(format!("nested{}", n)); - for f in 1..COUNT { - let filename = nest_dir.join(format!("file{}", f)); - let file = Path::new(&filename); - - assert_eq!(file.metadata().unwrap().gid(), d.gid); - assert_eq!( - file.metadata().unwrap().permissions().mode(), - (file_mode | d.expected_permission_file) - ); - } - - let dir = Path::new(&nest_dir); - assert_eq!(dir.metadata().unwrap().gid(), d.gid); - assert_eq!( - dir.metadata().unwrap().permissions().mode(), - (directory_mode | d.expected_permission_directory) - ); - } - } - } } diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 3ef0cc688f..6db6bbcc0b 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -59,12 +59,13 @@ use crate::device::{ use crate::image_rpc; use crate::linux_abi::*; use crate::metrics::get_metrics; -use crate::mount::{add_storages, baremount, update_ephemeral_mounts, STORAGE_HANDLERS}; +use crate::mount::baremount; use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS}; use crate::network::setup_guest_dns; use crate::pci; use crate::random; use crate::sandbox::Sandbox; +use crate::storage::{add_storages, update_ephemeral_mounts, STORAGE_HANDLERS}; use crate::version::{AGENT_VERSION, API_VERSION}; use crate::AGENT_CONFIG; diff --git a/src/agent/src/storage/bind_watcher_handler.rs b/src/agent/src/storage/bind_watcher_handler.rs new file mode 100644 index 0000000000..44f7094e8b --- /dev/null +++ b/src/agent/src/storage/bind_watcher_handler.rs @@ -0,0 +1,36 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::Result; +use protocols::agent::Storage; +use std::iter; +use tracing::instrument; + +use crate::sandbox::StorageDeviceObject; +use crate::storage::{new_device, StorageContext, StorageHandler}; + +#[derive(Debug)] +pub struct BindWatcherHandler {} + +#[async_trait::async_trait] +impl StorageHandler for BindWatcherHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + if let Some(cid) = ctx.cid { + ctx.sandbox + .lock() + .await + .bind_watcher + .add_container(cid.to_string(), iter::once(storage.clone()), ctx.logger) + .await?; + } + new_device("".to_string()) + } +} diff --git a/src/agent/src/storage/block_handler.rs b/src/agent/src/storage/block_handler.rs new file mode 100644 index 0000000000..7d676211b0 --- /dev/null +++ b/src/agent/src/storage/block_handler.rs @@ -0,0 +1,145 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::os::unix::fs::PermissionsExt; +use std::path::Path; +use std::str::FromStr; + +use anyhow::{anyhow, Context, Result}; +use protocols::agent::Storage; +use tracing::instrument; + +use crate::device::{ + get_scsi_device_name, get_virtio_blk_pci_device_name, get_virtio_mmio_device_name, + wait_for_pmem_device, +}; +use crate::pci; +use crate::sandbox::StorageDeviceObject; +use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler}; +#[cfg(target_arch = "s390x")] +use crate::{ccw, device::get_virtio_blk_ccw_device_name}; + +#[derive(Debug)] +pub struct VirtioBlkMmioHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkMmioHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + if !Path::new(&storage.source).exists() { + get_virtio_mmio_device_name(ctx.sandbox, &storage.source) + .await + .context("failed to get mmio device name")?; + } + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct VirtioBlkPciHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkPciHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // If hot-plugged, get the device node path based on the PCI path + // otherwise use the virt path provided in Storage Source + if storage.source.starts_with("/dev") { + let metadata = fs::metadata(&storage.source) + .context(format!("get metadata on file {:?}", &storage.source))?; + let mode = metadata.permissions().mode(); + if mode & libc::S_IFBLK == 0 { + return Err(anyhow!("Invalid device {}", &storage.source)); + } + } else { + let pcipath = pci::Path::from_str(&storage.source)?; + let dev_path = get_virtio_blk_pci_device_name(ctx.sandbox, &pcipath).await?; + storage.source = dev_path; + } + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct VirtioBlkCcwHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioBlkCcwHandler { + #[cfg(target_arch = "s390x")] + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + let ccw_device = ccw::Device::from_str(&storage.source)?; + let dev_path = get_virtio_blk_ccw_device_name(ctx.sandbox, &ccw_device).await?; + storage.source = dev_path; + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } + + #[cfg(not(target_arch = "s390x"))] + #[instrument] + async fn create_device( + &self, + _storage: Storage, + _ctx: &mut StorageContext, + ) -> Result { + Err(anyhow!("CCW is only supported on s390x")) + } +} + +#[derive(Debug)] +pub struct ScsiHandler {} + +#[async_trait::async_trait] +impl StorageHandler for ScsiHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // Retrieve the device path from SCSI address. + let dev_path = get_scsi_device_name(ctx.sandbox, &storage.source).await?; + storage.source = dev_path; + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct PmemHandler {} + +#[async_trait::async_trait] +impl StorageHandler for PmemHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // Retrieve the device for pmem storage + wait_for_pmem_device(ctx.sandbox, &storage.source).await?; + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} diff --git a/src/agent/src/storage/ephemeral_handler.rs b/src/agent/src/storage/ephemeral_handler.rs new file mode 100644 index 0000000000..38ceb8f556 --- /dev/null +++ b/src/agent/src/storage/ephemeral_handler.rs @@ -0,0 +1,293 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::fs::OpenOptions; +use std::io::Write; +use std::os::unix::fs::{MetadataExt, PermissionsExt}; +use std::path::Path; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use kata_sys_util::mount::parse_mount_options; +use kata_types::mount::KATA_MOUNT_OPTION_FS_GID; +use nix::unistd::Gid; +use protocols::agent::Storage; +use slog::Logger; +use tokio::sync::Mutex; +use tracing::instrument; + +use crate::device::{DRIVER_EPHEMERAL_TYPE, FS_TYPE_HUGETLB}; +use crate::mount::baremount; +use crate::sandbox::{Sandbox, StorageDeviceObject}; +use crate::storage::{ + common_storage_handler, new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID, +}; + +const FS_GID_EQ: &str = "fsgid="; +const SYS_FS_HUGEPAGES_PREFIX: &str = "/sys/kernel/mm/hugepages"; + +#[derive(Debug)] +pub struct EphemeralHandler {} + +#[async_trait::async_trait] +impl StorageHandler for EphemeralHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + // hugetlbfs + if storage.fstype == FS_TYPE_HUGETLB { + info!(ctx.logger, "handle hugetlbfs storage"); + // Allocate hugepages before mount + // /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages + // /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + // options eg "pagesize=2097152,size=524288000"(2M, 500M) + Self::allocate_hugepages(ctx.logger, &storage.options.to_vec()) + .context("allocate hugepages")?; + common_storage_handler(ctx.logger, &storage)?; + } else if !storage.options.is_empty() { + // By now we only support one option field: "fsGroup" which + // isn't an valid mount option, thus we should remove it when + // do mount. + let opts = parse_options(&storage.options); + storage.options = Default::default(); + common_storage_handler(ctx.logger, &storage)?; + + // ephemeral_storage didn't support mount options except fsGroup. + if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { + let gid = fsgid.parse::()?; + + nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; + + let meta = fs::metadata(&storage.mount_point)?; + let mut permission = meta.permissions(); + + let o_mode = meta.mode() | MODE_SETGID; + permission.set_mode(o_mode); + fs::set_permissions(&storage.mount_point, permission)?; + } + } else { + common_storage_handler(ctx.logger, &storage)?; + } + + new_device("".to_string()) + } +} + +impl EphemeralHandler { + // Allocate hugepages by writing to sysfs + fn allocate_hugepages(logger: &Logger, options: &[String]) -> Result<()> { + info!(logger, "mounting hugePages storage options: {:?}", options); + + let (pagesize, size) = Self::get_pagesize_and_size_from_option(options) + .context(format!("parse mount options: {:?}", &options))?; + + info!( + logger, + "allocate hugepages. pageSize: {}, size: {}", pagesize, size + ); + + // sysfs entry is always of the form hugepages-${pagesize}kB + // Ref: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt + let path = Path::new(SYS_FS_HUGEPAGES_PREFIX) + .join(format!("hugepages-{}kB", pagesize / 1024)) + .join("nr_hugepages"); + + // write numpages to nr_hugepages file. + let numpages = format!("{}", size / pagesize); + info!(logger, "write {} pages to {:?}", &numpages, &path); + + let mut file = OpenOptions::new() + .write(true) + .open(&path) + .context(format!("open nr_hugepages directory {:?}", &path))?; + + file.write_all(numpages.as_bytes()) + .context(format!("write nr_hugepages failed: {:?}", &path))?; + + // Even if the write succeeds, the kernel isn't guaranteed to be + // able to allocate all the pages we requested. Verify that it + // did. + let verify = fs::read_to_string(&path).context(format!("reading {:?}", &path))?; + let allocated = verify + .trim_end() + .parse::() + .map_err(|_| anyhow!("Unexpected text {:?} in {:?}", &verify, &path))?; + if allocated != size / pagesize { + return Err(anyhow!( + "Only allocated {} of {} hugepages of size {}", + allocated, + numpages, + pagesize + )); + } + + Ok(()) + } + + // Parse filesystem options string to retrieve hugepage details + // options eg "pagesize=2048,size=107374182" + fn get_pagesize_and_size_from_option(options: &[String]) -> Result<(u64, u64)> { + let mut pagesize_str: Option<&str> = None; + let mut size_str: Option<&str> = None; + + for option in options { + let vars: Vec<&str> = option.trim().split(',').collect(); + + for var in vars { + if let Some(stripped) = var.strip_prefix("pagesize=") { + pagesize_str = Some(stripped); + } else if let Some(stripped) = var.strip_prefix("size=") { + size_str = Some(stripped); + } + + if pagesize_str.is_some() && size_str.is_some() { + break; + } + } + } + + if pagesize_str.is_none() || size_str.is_none() { + return Err(anyhow!("no pagesize/size options found")); + } + + let pagesize = pagesize_str + .unwrap() + .parse::() + .context(format!("parse pagesize: {:?}", &pagesize_str))?; + let size = size_str + .unwrap() + .parse::() + .context(format!("parse size: {:?}", &pagesize_str))?; + + Ok((pagesize, size)) + } +} + +// update_ephemeral_mounts takes a list of ephemeral mounts and remounts them +// with mount options passed by the caller +#[instrument] +pub async fn update_ephemeral_mounts( + logger: Logger, + storages: &[Storage], + _sandbox: &Arc>, +) -> Result<()> { + for storage in storages { + let handler_name = &storage.driver; + let logger = logger.new(o!( + "msg" => "updating tmpfs storage", + "subsystem" => "storage", + "storage-type" => handler_name.to_owned())); + + match handler_name.as_str() { + DRIVER_EPHEMERAL_TYPE => { + fs::create_dir_all(&storage.mount_point)?; + + if storage.options.is_empty() { + continue; + } else { + // assume that fsGid has already been set + let mount_path = Path::new(&storage.mount_point); + let src_path = Path::new(&storage.source); + let opts: Vec<&String> = storage + .options + .iter() + .filter(|&opt| !opt.starts_with(FS_GID_EQ)) + .collect(); + let (flags, options) = parse_mount_options(&opts)?; + + info!(logger, "mounting storage"; + "mount-source" => src_path.display(), + "mount-destination" => mount_path.display(), + "mount-fstype" => storage.fstype.as_str(), + "mount-options" => options.as_str(), + ); + + baremount( + src_path, + mount_path, + storage.fstype.as_str(), + flags, + options.as_str(), + &logger, + )?; + } + } + _ => { + return Err(anyhow!( + "Unsupported storage type for syncing mounts {}. Only ephemeral storage update is supported", + storage.driver + )); + } + }; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_pagesize_and_size_from_option() { + let expected_pagesize = 2048; + let expected_size = 107374182; + let expected = (expected_pagesize, expected_size); + + let data = vec![ + // (input, expected, is_ok) + ("size-1=107374182,pagesize-1=2048", expected, false), + ("size-1=107374182,pagesize=2048", expected, false), + ("size=107374182,pagesize-1=2048", expected, false), + ("size=107374182,pagesize=abc", expected, false), + ("size=abc,pagesize=2048", expected, false), + ("size=,pagesize=2048", expected, false), + ("size=107374182,pagesize=", expected, false), + ("size=107374182,pagesize=2048", expected, true), + ("pagesize=2048,size=107374182", expected, true), + ("foo=bar,pagesize=2048,size=107374182", expected, true), + ( + "foo=bar,pagesize=2048,foo1=bar1,size=107374182", + expected, + true, + ), + ( + "pagesize=2048,foo1=bar1,foo=bar,size=107374182", + expected, + true, + ), + ( + "foo=bar,pagesize=2048,foo1=bar1,size=107374182,foo2=bar2", + expected, + true, + ), + ( + "foo=bar,size=107374182,foo1=bar1,pagesize=2048", + expected, + true, + ), + ]; + + for case in data { + let input = case.0; + let r = EphemeralHandler::get_pagesize_and_size_from_option(&[input.to_string()]); + + let is_ok = case.2; + if is_ok { + let expected = case.1; + let (pagesize, size) = r.unwrap(); + assert_eq!(expected.0, pagesize); + assert_eq!(expected.1, size); + } else { + assert!(r.is_err()); + } + } + } +} diff --git a/src/agent/src/storage/fs_handler.rs b/src/agent/src/storage/fs_handler.rs new file mode 100644 index 0000000000..97f3cb2589 --- /dev/null +++ b/src/agent/src/storage/fs_handler.rs @@ -0,0 +1,88 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::path::Path; + +use anyhow::{anyhow, Context, Result}; +use protocols::agent::Storage; +use tracing::instrument; + +use crate::sandbox::StorageDeviceObject; +use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler}; + +#[derive(Debug)] +pub struct OverlayfsHandler {} + +#[async_trait::async_trait] +impl StorageHandler for OverlayfsHandler { + #[instrument] + async fn create_device( + &self, + mut storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + if storage + .options + .iter() + .any(|e| e == "io.katacontainers.fs-opt.overlay-rw") + { + let cid = ctx + .cid + .clone() + .ok_or_else(|| anyhow!("No container id in rw overlay"))?; + let cpath = Path::new(crate::rpc::CONTAINER_BASE).join(cid); + let work = cpath.join("work"); + let upper = cpath.join("upper"); + + fs::create_dir_all(&work).context("Creating overlay work directory")?; + fs::create_dir_all(&upper).context("Creating overlay upper directory")?; + + storage.fstype = "overlay".into(); + storage + .options + .push(format!("upperdir={}", upper.to_string_lossy())); + storage + .options + .push(format!("workdir={}", work.to_string_lossy())); + } + + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct Virtio9pHandler {} + +#[async_trait::async_trait] +impl StorageHandler for Virtio9pHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} + +#[derive(Debug)] +pub struct VirtioFsHandler {} + +#[async_trait::async_trait] +impl StorageHandler for VirtioFsHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result { + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } +} diff --git a/src/agent/src/storage/local_handler.rs b/src/agent/src/storage/local_handler.rs new file mode 100644 index 0000000000..0ff6f26f8e --- /dev/null +++ b/src/agent/src/storage/local_handler.rs @@ -0,0 +1,61 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::fs; +use std::os::unix::fs::PermissionsExt; + +use anyhow::{Context, Result}; +use kata_types::mount::KATA_MOUNT_OPTION_FS_GID; +use nix::unistd::Gid; +use protocols::agent::Storage; +use tracing::instrument; + +use crate::sandbox::StorageDeviceObject; +use crate::storage::{new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID}; + +#[derive(Debug)] +pub struct LocalHandler {} + +#[async_trait::async_trait] +impl StorageHandler for LocalHandler { + #[instrument] + async fn create_device( + &self, + storage: Storage, + _ctx: &mut StorageContext, + ) -> Result { + fs::create_dir_all(&storage.mount_point).context(format!( + "failed to create dir all {:?}", + &storage.mount_point + ))?; + + let opts = parse_options(&storage.options); + + let mut need_set_fsgid = false; + if let Some(fsgid) = opts.get(KATA_MOUNT_OPTION_FS_GID) { + let gid = fsgid.parse::()?; + + nix::unistd::chown(storage.mount_point.as_str(), None, Some(Gid::from_raw(gid)))?; + need_set_fsgid = true; + } + + if let Some(mode) = opts.get("mode") { + let mut permission = fs::metadata(&storage.mount_point)?.permissions(); + + let mut o_mode = u32::from_str_radix(mode, 8)?; + + if need_set_fsgid { + // set SetGid mode mask. + o_mode |= MODE_SETGID; + } + permission.set_mode(o_mode); + + fs::set_permissions(&storage.mount_point, permission)?; + } + + new_device("".to_string()) + } +} diff --git a/src/agent/src/storage/mod.rs b/src/agent/src/storage/mod.rs new file mode 100644 index 0000000000..11bf170c6a --- /dev/null +++ b/src/agent/src/storage/mod.rs @@ -0,0 +1,648 @@ +// Copyright (c) 2019 Ant Financial +// Copyright (c) 2023 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::fs; +use std::os::unix::fs::{MetadataExt, PermissionsExt}; +use std::path::Path; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use kata_sys_util::mount::{create_mount_destination, parse_mount_options}; +use kata_types::mount::{ + StorageDeviceGeneric, StorageHandlerManager, KATA_SHAREDFS_GUEST_PREMOUNT_TAG, +}; +use nix::unistd::{Gid, Uid}; +use protocols::agent::Storage; +use protocols::types::FSGroupChangePolicy; +use slog::Logger; +use tokio::sync::Mutex; +use tracing::instrument; + +use self::bind_watcher_handler::BindWatcherHandler; +use self::block_handler::{PmemHandler, ScsiHandler, VirtioBlkMmioHandler, VirtioBlkPciHandler}; +use self::ephemeral_handler::EphemeralHandler; +use self::fs_handler::{OverlayfsHandler, Virtio9pHandler, VirtioFsHandler}; +use self::local_handler::LocalHandler; +use crate::device::{ + DRIVER_9P_TYPE, DRIVER_BLK_MMIO_TYPE, DRIVER_BLK_PCI_TYPE, DRIVER_EPHEMERAL_TYPE, + DRIVER_LOCAL_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_OVERLAYFS_TYPE, DRIVER_SCSI_TYPE, + DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE, +}; +use crate::mount::{baremount, is_mounted}; +use crate::sandbox::{Sandbox, StorageDeviceObject}; + +pub use self::ephemeral_handler::update_ephemeral_mounts; + +mod bind_watcher_handler; +mod block_handler; +mod ephemeral_handler; +mod fs_handler; +mod local_handler; + +const RW_MASK: u32 = 0o660; +const RO_MASK: u32 = 0o440; +const EXEC_MASK: u32 = 0o110; +const MODE_SETGID: u32 = 0o2000; + +#[derive(Debug)] +pub struct StorageContext<'a> { + cid: &'a Option, + logger: &'a Logger, + sandbox: &'a Arc>, +} + +/// Trait object to handle storage device. +#[async_trait::async_trait] +pub trait StorageHandler: Send + Sync { + /// Create a new storage device. + async fn create_device( + &self, + storage: Storage, + ctx: &mut StorageContext, + ) -> Result; +} + +#[rustfmt::skip] +lazy_static! { + pub static ref STORAGE_HANDLERS: StorageHandlerManager> = { + let mut manager: StorageHandlerManager> = StorageHandlerManager::new(); + manager.add_handler(DRIVER_9P_TYPE, Arc::new(Virtio9pHandler{})).unwrap(); + #[cfg(target_arch = "s390x")] + manager.add_handler(crate::device::DRIVER_BLK_CCW_TYPE, Arc::new(self::block_handler::VirtioBlkCcwHandler{})).unwrap(); + manager.add_handler(DRIVER_BLK_MMIO_TYPE, Arc::new(VirtioBlkMmioHandler{})).unwrap(); + manager.add_handler(DRIVER_BLK_PCI_TYPE, Arc::new(VirtioBlkPciHandler{})).unwrap(); + manager.add_handler(DRIVER_EPHEMERAL_TYPE, Arc::new(EphemeralHandler{})).unwrap(); + manager.add_handler(DRIVER_LOCAL_TYPE, Arc::new(LocalHandler{})).unwrap(); + manager.add_handler(DRIVER_NVDIMM_TYPE, Arc::new(PmemHandler{})).unwrap(); + manager.add_handler(DRIVER_OVERLAYFS_TYPE, Arc::new(OverlayfsHandler{})).unwrap(); + manager.add_handler(DRIVER_SCSI_TYPE, Arc::new(ScsiHandler{})).unwrap(); + manager.add_handler(DRIVER_VIRTIOFS_TYPE, Arc::new(VirtioFsHandler{})).unwrap(); + manager.add_handler(DRIVER_WATCHABLE_BIND_TYPE, Arc::new(BindWatcherHandler{})).unwrap(); + manager + }; +} + +// add_storages takes a list of storages passed by the caller, and perform the +// associated operations such as waiting for the device to show up, and mount +// it to a specific location, according to the type of handler chosen, and for +// each storage. +#[instrument] +pub async fn add_storages( + logger: Logger, + storages: Vec, + sandbox: &Arc>, + cid: Option, +) -> Result> { + let mut mount_list = Vec::new(); + + for storage in storages { + let path = storage.mount_point.clone(); + let state = sandbox.lock().await.add_sandbox_storage(&path).await; + if state.ref_count().await > 1 { + // The device already exists. + continue; + } + + if let Some(handler) = STORAGE_HANDLERS.handler(&storage.driver) { + let logger = + logger.new(o!( "subsystem" => "storage", "storage-type" => storage.driver.clone())); + let mut ctx = StorageContext { + cid: &cid, + logger: &logger, + sandbox, + }; + + match handler.create_device(storage, &mut ctx).await { + Ok(device) => { + match sandbox + .lock() + .await + .update_sandbox_storage(&path, device.clone()) + { + Ok(d) => { + let path = device.lock().await.path().to_string(); + if !path.is_empty() { + mount_list.push(path.clone()); + } + drop(d); + } + Err(device) => { + error!(logger, "failed to update device for storage"); + if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await + { + warn!(logger, "failed to remove dummy sandbox storage {:?}", e); + } + device.lock().await.cleanup(); + return Err(anyhow!("failed to update device for storage")); + } + } + } + Err(e) => { + error!(logger, "failed to create device for storage, error: {e:?}"); + if let Err(e) = sandbox.lock().await.remove_sandbox_storage(&path).await { + warn!(logger, "failed to remove dummy sandbox storage {e:?}"); + } + return Err(e); + } + } + } else { + return Err(anyhow!( + "Failed to find the storage handler {}", + storage.driver + )); + } + } + + Ok(mount_list) +} + +pub(crate) fn new_device(path: String) -> Result { + let device = StorageDeviceGeneric::new(path); + Ok(Arc::new(Mutex::new(device))) +} + +#[instrument] +pub(crate) fn common_storage_handler(logger: &Logger, storage: &Storage) -> Result { + mount_storage(logger, storage)?; + set_ownership(logger, storage)?; + Ok(storage.mount_point.clone()) +} + +// mount_storage performs the mount described by the storage structure. +#[instrument] +fn mount_storage(logger: &Logger, storage: &Storage) -> Result<()> { + let logger = logger.new(o!("subsystem" => "mount")); + + // There's a special mechanism to create mountpoint from a `sharedfs` instance before + // starting the kata-agent. Check for such cases. + if storage.source == KATA_SHAREDFS_GUEST_PREMOUNT_TAG && is_mounted(&storage.mount_point)? { + warn!( + logger, + "{} already mounted on {}, ignoring...", + KATA_SHAREDFS_GUEST_PREMOUNT_TAG, + &storage.mount_point + ); + return Ok(()); + } + + let (flags, options) = parse_mount_options(&storage.options)?; + let mount_path = Path::new(&storage.mount_point); + let src_path = Path::new(&storage.source); + create_mount_destination(src_path, mount_path, "", &storage.fstype) + .context("Could not create mountpoint")?; + + info!(logger, "mounting storage"; + "mount-source" => src_path.display(), + "mount-destination" => mount_path.display(), + "mount-fstype" => storage.fstype.as_str(), + "mount-options" => options.as_str(), + ); + + baremount( + src_path, + mount_path, + storage.fstype.as_str(), + flags, + options.as_str(), + &logger, + ) +} + +#[instrument] +pub(crate) fn parse_options(option_list: &[String]) -> HashMap { + let mut options = HashMap::new(); + for opt in option_list { + let fields: Vec<&str> = opt.split('=').collect(); + if fields.len() == 2 { + options.insert(fields[0].to_string(), fields[1].to_string()); + } + } + options +} + +#[instrument] +pub fn set_ownership(logger: &Logger, storage: &Storage) -> Result<()> { + let logger = logger.new(o!("subsystem" => "mount", "fn" => "set_ownership")); + + // If fsGroup is not set, skip performing ownership change + if storage.fs_group.is_none() { + return Ok(()); + } + + let fs_group = storage.fs_group(); + let read_only = storage.options.contains(&String::from("ro")); + let mount_path = Path::new(&storage.mount_point); + let metadata = mount_path.metadata().map_err(|err| { + error!(logger, "failed to obtain metadata for mount path"; + "mount-path" => mount_path.to_str(), + "error" => err.to_string(), + ); + err + })?; + + if fs_group.group_change_policy == FSGroupChangePolicy::OnRootMismatch.into() + && metadata.gid() == fs_group.group_id + { + let mut mask = if read_only { RO_MASK } else { RW_MASK }; + mask |= EXEC_MASK; + + // With fsGroup change policy to OnRootMismatch, if the current + // gid of the mount path root directory matches the desired gid + // and the current permission of mount path root directory is correct, + // then ownership change will be skipped. + let current_mode = metadata.permissions().mode(); + if (mask & current_mode == mask) && (current_mode & MODE_SETGID != 0) { + info!(logger, "skipping ownership change for volume"; + "mount-path" => mount_path.to_str(), + "fs-group" => fs_group.group_id.to_string(), + ); + return Ok(()); + } + } + + info!(logger, "performing recursive ownership change"; + "mount-path" => mount_path.to_str(), + "fs-group" => fs_group.group_id.to_string(), + ); + recursive_ownership_change( + mount_path, + None, + Some(Gid::from_raw(fs_group.group_id)), + read_only, + ) +} + +#[instrument] +pub fn recursive_ownership_change( + path: &Path, + uid: Option, + gid: Option, + read_only: bool, +) -> Result<()> { + let mut mask = if read_only { RO_MASK } else { RW_MASK }; + if path.is_dir() { + for entry in fs::read_dir(path)? { + recursive_ownership_change(entry?.path().as_path(), uid, gid, read_only)?; + } + mask |= EXEC_MASK; + mask |= MODE_SETGID; + } + + // We do not want to change the permission of the underlying file + // using symlink. Hence we skip symlinks from recursive ownership + // and permission changes. + if path.is_symlink() { + return Ok(()); + } + + nix::unistd::chown(path, uid, gid)?; + + if gid.is_some() { + let metadata = path.metadata()?; + let mut permission = metadata.permissions(); + let target_mode = metadata.mode() | mask; + permission.set_mode(target_mode); + fs::set_permissions(path, permission)?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use protocols::agent::FSGroup; + use std::fs::File; + use tempfile::tempdir; + use test_utils::{ + skip_if_not_root, skip_loop_by_user, skip_loop_if_not_root, skip_loop_if_root, TestUserType, + }; + + #[test] + fn test_mount_storage() { + #[derive(Debug)] + struct TestData<'a> { + test_user: TestUserType, + storage: Storage, + error_contains: &'a str, + + make_source_dir: bool, + make_mount_dir: bool, + deny_mount_permission: bool, + } + + impl Default for TestData<'_> { + fn default() -> Self { + TestData { + test_user: TestUserType::Any, + storage: Storage { + mount_point: "mnt".to_string(), + source: "src".to_string(), + fstype: "tmpfs".to_string(), + ..Default::default() + }, + make_source_dir: true, + make_mount_dir: false, + deny_mount_permission: false, + error_contains: "", + } + } + } + + let tests = &[ + TestData { + test_user: TestUserType::NonRootOnly, + error_contains: "EPERM: Operation not permitted", + ..Default::default() + }, + TestData { + test_user: TestUserType::RootOnly, + ..Default::default() + }, + TestData { + storage: Storage { + mount_point: "mnt".to_string(), + source: "src".to_string(), + fstype: "bind".to_string(), + ..Default::default() + }, + make_source_dir: false, + make_mount_dir: true, + error_contains: "Could not create mountpoint", + ..Default::default() + }, + TestData { + test_user: TestUserType::NonRootOnly, + deny_mount_permission: true, + error_contains: "Could not create mountpoint", + ..Default::default() + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + skip_loop_by_user!(msg, d.test_user); + + let drain = slog::Discard; + let logger = slog::Logger::root(drain, o!()); + + let tempdir = tempdir().unwrap(); + + let source = tempdir.path().join(&d.storage.source); + let mount_point = tempdir.path().join(&d.storage.mount_point); + + let storage = Storage { + source: source.to_str().unwrap().to_string(), + mount_point: mount_point.to_str().unwrap().to_string(), + ..d.storage.clone() + }; + + if d.make_source_dir { + fs::create_dir_all(&storage.source).unwrap(); + } + if d.make_mount_dir { + fs::create_dir_all(&storage.mount_point).unwrap(); + } + + if d.deny_mount_permission { + fs::set_permissions( + mount_point.parent().unwrap(), + fs::Permissions::from_mode(0o000), + ) + .unwrap(); + } + + let result = mount_storage(&logger, &storage); + + // restore permissions so tempdir can be cleaned up + if d.deny_mount_permission { + fs::set_permissions( + mount_point.parent().unwrap(), + fs::Permissions::from_mode(0o755), + ) + .unwrap(); + } + + if result.is_ok() { + nix::mount::umount(&mount_point).unwrap(); + } + + let msg = format!("{}: result: {:?}", msg, result); + if d.error_contains.is_empty() { + assert!(result.is_ok(), "{}", msg); + } else { + assert!(result.is_err(), "{}", msg); + let error_msg = format!("{}", result.unwrap_err()); + assert!(error_msg.contains(d.error_contains), "{}", msg); + } + } + } + + #[test] + fn test_set_ownership() { + skip_if_not_root!(); + + let logger = slog::Logger::root(slog::Discard, o!()); + + #[derive(Debug)] + struct TestData<'a> { + mount_path: &'a str, + fs_group: Option, + read_only: bool, + expected_group_id: u32, + expected_permission: u32, + } + + let tests = &[ + TestData { + mount_path: "foo", + fs_group: None, + read_only: false, + expected_group_id: 0, + expected_permission: 0, + }, + TestData { + mount_path: "rw_mount", + fs_group: Some(FSGroup { + group_id: 3000, + group_change_policy: FSGroupChangePolicy::Always.into(), + ..Default::default() + }), + read_only: false, + expected_group_id: 3000, + expected_permission: RW_MASK | EXEC_MASK | MODE_SETGID, + }, + TestData { + mount_path: "ro_mount", + fs_group: Some(FSGroup { + group_id: 3000, + group_change_policy: FSGroupChangePolicy::OnRootMismatch.into(), + ..Default::default() + }), + read_only: true, + expected_group_id: 3000, + expected_permission: RO_MASK | EXEC_MASK | MODE_SETGID, + }, + ]; + + let tempdir = tempdir().expect("failed to create tmpdir"); + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let mount_dir = tempdir.path().join(d.mount_path); + fs::create_dir(&mount_dir) + .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); + + let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); + let mut storage_data = Storage::new(); + if d.read_only { + storage_data.set_options(vec!["foo".to_string(), "ro".to_string()]); + } + if let Some(fs_group) = d.fs_group.clone() { + storage_data.set_fs_group(fs_group); + } + storage_data.mount_point = mount_dir.clone().into_os_string().into_string().unwrap(); + + let result = set_ownership(&logger, &storage_data); + assert!(result.is_ok()); + + assert_eq!( + mount_dir.as_path().metadata().unwrap().gid(), + d.expected_group_id + ); + assert_eq!( + mount_dir.as_path().metadata().unwrap().permissions().mode(), + (directory_mode | d.expected_permission) + ); + } + } + + #[test] + fn test_recursive_ownership_change() { + skip_if_not_root!(); + + const COUNT: usize = 5; + + #[derive(Debug)] + struct TestData<'a> { + // Directory where the recursive ownership change should be performed on + path: &'a str, + + // User ID for ownership change + uid: u32, + + // Group ID for ownership change + gid: u32, + + // Set when the permission should be read-only + read_only: bool, + + // The expected permission of all directories after ownership change + expected_permission_directory: u32, + + // The expected permission of all files after ownership change + expected_permission_file: u32, + } + + let tests = &[ + TestData { + path: "no_gid_change", + uid: 0, + gid: 0, + read_only: false, + expected_permission_directory: 0, + expected_permission_file: 0, + }, + TestData { + path: "rw_gid_change", + uid: 0, + gid: 3000, + read_only: false, + expected_permission_directory: RW_MASK | EXEC_MASK | MODE_SETGID, + expected_permission_file: RW_MASK, + }, + TestData { + path: "ro_gid_change", + uid: 0, + gid: 3000, + read_only: true, + expected_permission_directory: RO_MASK | EXEC_MASK | MODE_SETGID, + expected_permission_file: RO_MASK, + }, + ]; + + let tempdir = tempdir().expect("failed to create tmpdir"); + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let mount_dir = tempdir.path().join(d.path); + fs::create_dir(&mount_dir) + .unwrap_or_else(|_| panic!("{}: failed to create root directory", msg)); + + let directory_mode = mount_dir.as_path().metadata().unwrap().permissions().mode(); + let mut file_mode: u32 = 0; + + // create testing directories and files + for n in 1..COUNT { + let nest_dir = mount_dir.join(format!("nested{}", n)); + fs::create_dir(&nest_dir) + .unwrap_or_else(|_| panic!("{}: failed to create nest directory", msg)); + + for f in 1..COUNT { + let filename = nest_dir.join(format!("file{}", f)); + File::create(&filename) + .unwrap_or_else(|_| panic!("{}: failed to create file", msg)); + file_mode = filename.as_path().metadata().unwrap().permissions().mode(); + } + } + + let uid = if d.uid > 0 { + Some(Uid::from_raw(d.uid)) + } else { + None + }; + let gid = if d.gid > 0 { + Some(Gid::from_raw(d.gid)) + } else { + None + }; + let result = recursive_ownership_change(&mount_dir, uid, gid, d.read_only); + + assert!(result.is_ok()); + + assert_eq!(mount_dir.as_path().metadata().unwrap().gid(), d.gid); + assert_eq!( + mount_dir.as_path().metadata().unwrap().permissions().mode(), + (directory_mode | d.expected_permission_directory) + ); + + for n in 1..COUNT { + let nest_dir = mount_dir.join(format!("nested{}", n)); + for f in 1..COUNT { + let filename = nest_dir.join(format!("file{}", f)); + let file = Path::new(&filename); + + assert_eq!(file.metadata().unwrap().gid(), d.gid); + assert_eq!( + file.metadata().unwrap().permissions().mode(), + (file_mode | d.expected_permission_file) + ); + } + + let dir = Path::new(&nest_dir); + assert_eq!(dir.metadata().unwrap().gid(), d.gid); + assert_eq!( + dir.metadata().unwrap().permissions().mode(), + (directory_mode | d.expected_permission_directory) + ); + } + } + } +} From 412e8554f3147a9c44f4bf51d0eaf548f43adb15 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 25 Aug 2023 17:21:03 +0800 Subject: [PATCH 9/9] agent: simplify storage device by removing StorageDeviceObject Simplify storage device implementation by removing StorageDeviceObject. Signed-off-by: Jiang Liu --- src/agent/src/sandbox.rs | 28 +++++++----- src/agent/src/storage/bind_watcher_handler.rs | 5 ++- src/agent/src/storage/block_handler.rs | 15 ++++--- src/agent/src/storage/ephemeral_handler.rs | 6 +-- src/agent/src/storage/fs_handler.rs | 9 ++-- src/agent/src/storage/local_handler.rs | 6 +-- src/agent/src/storage/mod.rs | 14 +++--- src/libs/kata-types/src/mount.rs | 44 +------------------ 8 files changed, 47 insertions(+), 80 deletions(-) diff --git a/src/agent/src/sandbox.rs b/src/agent/src/sandbox.rs index 05ae044197..788f29278f 100644 --- a/src/agent/src/sandbox.rs +++ b/src/agent/src/sandbox.rs @@ -10,6 +10,7 @@ use std::fs; use std::os::unix::fs::PermissionsExt; use std::path::Path; use std::str::FromStr; +use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Arc; use std::{thread, time}; @@ -43,11 +44,10 @@ pub const ERR_INVALID_CONTAINER_ID: &str = "Invalid container id"; type UeventWatcher = (Box, oneshot::Sender); -pub type StorageDeviceObject = Arc>; - #[derive(Clone)] pub struct StorageState { - inner: StorageDeviceObject, + count: Arc, + device: Arc, } impl Debug for StorageState { @@ -59,24 +59,28 @@ impl Debug for StorageState { impl StorageState { fn new() -> Self { StorageState { - inner: Arc::new(Mutex::new(StorageDeviceGeneric::new("".to_string()))), + count: Arc::new(AtomicU32::new(1)), + device: Arc::new(StorageDeviceGeneric::new("".to_string())), } } - pub fn from_device(device: StorageDeviceObject) -> Self { - Self { inner: device } + pub fn from_device(device: Arc) -> Self { + Self { + count: Arc::new(AtomicU32::new(1)), + device, + } } pub async fn ref_count(&self) -> u32 { - self.inner.lock().await.ref_count() + self.count.load(Ordering::Relaxed) } async fn inc_ref_count(&self) { - self.inner.lock().await.inc_ref_count() + self.count.fetch_add(1, Ordering::Acquire); } async fn dec_and_test_ref_count(&self) -> bool { - self.inner.lock().await.dec_and_test_ref_count() + self.count.fetch_sub(1, Ordering::AcqRel) == 1 } } @@ -162,8 +166,8 @@ impl Sandbox { pub fn update_sandbox_storage( &mut self, path: &str, - device: StorageDeviceObject, - ) -> std::result::Result { + device: Arc, + ) -> std::result::Result, Arc> { if !self.storages.contains_key(path) { return Err(device); } @@ -171,7 +175,7 @@ impl Sandbox { let state = StorageState::from_device(device); // Safe to unwrap() because we have just ensured existence of entry. let state = self.storages.insert(path.to_string(), state).unwrap(); - Ok(state.inner) + Ok(state.device) } // Clean mount and directory of a mountpoint. diff --git a/src/agent/src/storage/bind_watcher_handler.rs b/src/agent/src/storage/bind_watcher_handler.rs index 44f7094e8b..3b50327d12 100644 --- a/src/agent/src/storage/bind_watcher_handler.rs +++ b/src/agent/src/storage/bind_watcher_handler.rs @@ -5,11 +5,12 @@ // use anyhow::Result; +use kata_types::mount::StorageDevice; use protocols::agent::Storage; use std::iter; +use std::sync::Arc; use tracing::instrument; -use crate::sandbox::StorageDeviceObject; use crate::storage::{new_device, StorageContext, StorageHandler}; #[derive(Debug)] @@ -22,7 +23,7 @@ impl StorageHandler for BindWatcherHandler { &self, storage: Storage, ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { if let Some(cid) = ctx.cid { ctx.sandbox .lock() diff --git a/src/agent/src/storage/block_handler.rs b/src/agent/src/storage/block_handler.rs index 7d676211b0..60330253ce 100644 --- a/src/agent/src/storage/block_handler.rs +++ b/src/agent/src/storage/block_handler.rs @@ -8,8 +8,10 @@ use std::fs; use std::os::unix::fs::PermissionsExt; use std::path::Path; use std::str::FromStr; +use std::sync::Arc; use anyhow::{anyhow, Context, Result}; +use kata_types::mount::StorageDevice; use protocols::agent::Storage; use tracing::instrument; @@ -18,7 +20,6 @@ use crate::device::{ wait_for_pmem_device, }; use crate::pci; -use crate::sandbox::StorageDeviceObject; use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler}; #[cfg(target_arch = "s390x")] use crate::{ccw, device::get_virtio_blk_ccw_device_name}; @@ -33,7 +34,7 @@ impl StorageHandler for VirtioBlkMmioHandler { &self, storage: Storage, ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { if !Path::new(&storage.source).exists() { get_virtio_mmio_device_name(ctx.sandbox, &storage.source) .await @@ -54,7 +55,7 @@ impl StorageHandler for VirtioBlkPciHandler { &self, mut storage: Storage, ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { // If hot-plugged, get the device node path based on the PCI path // otherwise use the virt path provided in Storage Source if storage.source.starts_with("/dev") { @@ -86,7 +87,7 @@ impl StorageHandler for VirtioBlkCcwHandler { &self, mut storage: Storage, ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { let ccw_device = ccw::Device::from_str(&storage.source)?; let dev_path = get_virtio_blk_ccw_device_name(ctx.sandbox, &ccw_device).await?; storage.source = dev_path; @@ -100,7 +101,7 @@ impl StorageHandler for VirtioBlkCcwHandler { &self, _storage: Storage, _ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { Err(anyhow!("CCW is only supported on s390x")) } } @@ -115,7 +116,7 @@ impl StorageHandler for ScsiHandler { &self, mut storage: Storage, ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { // Retrieve the device path from SCSI address. let dev_path = get_scsi_device_name(ctx.sandbox, &storage.source).await?; storage.source = dev_path; @@ -135,7 +136,7 @@ impl StorageHandler for PmemHandler { &self, storage: Storage, ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { // Retrieve the device for pmem storage wait_for_pmem_device(ctx.sandbox, &storage.source).await?; diff --git a/src/agent/src/storage/ephemeral_handler.rs b/src/agent/src/storage/ephemeral_handler.rs index 38ceb8f556..8fc70f6959 100644 --- a/src/agent/src/storage/ephemeral_handler.rs +++ b/src/agent/src/storage/ephemeral_handler.rs @@ -13,7 +13,7 @@ use std::sync::Arc; use anyhow::{anyhow, Context, Result}; use kata_sys_util::mount::parse_mount_options; -use kata_types::mount::KATA_MOUNT_OPTION_FS_GID; +use kata_types::mount::{StorageDevice, KATA_MOUNT_OPTION_FS_GID}; use nix::unistd::Gid; use protocols::agent::Storage; use slog::Logger; @@ -22,7 +22,7 @@ use tracing::instrument; use crate::device::{DRIVER_EPHEMERAL_TYPE, FS_TYPE_HUGETLB}; use crate::mount::baremount; -use crate::sandbox::{Sandbox, StorageDeviceObject}; +use crate::sandbox::Sandbox; use crate::storage::{ common_storage_handler, new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID, }; @@ -40,7 +40,7 @@ impl StorageHandler for EphemeralHandler { &self, mut storage: Storage, ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { // hugetlbfs if storage.fstype == FS_TYPE_HUGETLB { info!(ctx.logger, "handle hugetlbfs storage"); diff --git a/src/agent/src/storage/fs_handler.rs b/src/agent/src/storage/fs_handler.rs index 97f3cb2589..fce59c0b14 100644 --- a/src/agent/src/storage/fs_handler.rs +++ b/src/agent/src/storage/fs_handler.rs @@ -6,12 +6,13 @@ use std::fs; use std::path::Path; +use std::sync::Arc; use anyhow::{anyhow, Context, Result}; +use kata_types::mount::StorageDevice; use protocols::agent::Storage; use tracing::instrument; -use crate::sandbox::StorageDeviceObject; use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler}; #[derive(Debug)] @@ -24,7 +25,7 @@ impl StorageHandler for OverlayfsHandler { &self, mut storage: Storage, ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { if storage .options .iter() @@ -65,7 +66,7 @@ impl StorageHandler for Virtio9pHandler { &self, storage: Storage, ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { let path = common_storage_handler(ctx.logger, &storage)?; new_device(path) } @@ -81,7 +82,7 @@ impl StorageHandler for VirtioFsHandler { &self, storage: Storage, ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { let path = common_storage_handler(ctx.logger, &storage)?; new_device(path) } diff --git a/src/agent/src/storage/local_handler.rs b/src/agent/src/storage/local_handler.rs index 0ff6f26f8e..5bcee2d01f 100644 --- a/src/agent/src/storage/local_handler.rs +++ b/src/agent/src/storage/local_handler.rs @@ -6,14 +6,14 @@ use std::fs; use std::os::unix::fs::PermissionsExt; +use std::sync::Arc; use anyhow::{Context, Result}; -use kata_types::mount::KATA_MOUNT_OPTION_FS_GID; +use kata_types::mount::{StorageDevice, KATA_MOUNT_OPTION_FS_GID}; use nix::unistd::Gid; use protocols::agent::Storage; use tracing::instrument; -use crate::sandbox::StorageDeviceObject; use crate::storage::{new_device, parse_options, StorageContext, StorageHandler, MODE_SETGID}; #[derive(Debug)] @@ -26,7 +26,7 @@ impl StorageHandler for LocalHandler { &self, storage: Storage, _ctx: &mut StorageContext, - ) -> Result { + ) -> Result> { fs::create_dir_all(&storage.mount_point).context(format!( "failed to create dir all {:?}", &storage.mount_point diff --git a/src/agent/src/storage/mod.rs b/src/agent/src/storage/mod.rs index 11bf170c6a..84348c972c 100644 --- a/src/agent/src/storage/mod.rs +++ b/src/agent/src/storage/mod.rs @@ -13,7 +13,7 @@ use std::sync::Arc; use anyhow::{anyhow, Context, Result}; use kata_sys_util::mount::{create_mount_destination, parse_mount_options}; use kata_types::mount::{ - StorageDeviceGeneric, StorageHandlerManager, KATA_SHAREDFS_GUEST_PREMOUNT_TAG, + StorageDevice, StorageDeviceGeneric, StorageHandlerManager, KATA_SHAREDFS_GUEST_PREMOUNT_TAG, }; use nix::unistd::{Gid, Uid}; use protocols::agent::Storage; @@ -33,7 +33,7 @@ use crate::device::{ DRIVER_VIRTIOFS_TYPE, DRIVER_WATCHABLE_BIND_TYPE, }; use crate::mount::{baremount, is_mounted}; -use crate::sandbox::{Sandbox, StorageDeviceObject}; +use crate::sandbox::Sandbox; pub use self::ephemeral_handler::update_ephemeral_mounts; @@ -63,7 +63,7 @@ pub trait StorageHandler: Send + Sync { &self, storage: Storage, ctx: &mut StorageContext, - ) -> Result; + ) -> Result>; } #[rustfmt::skip] @@ -124,7 +124,7 @@ pub async fn add_storages( .update_sandbox_storage(&path, device.clone()) { Ok(d) => { - let path = device.lock().await.path().to_string(); + let path = device.path().to_string(); if !path.is_empty() { mount_list.push(path.clone()); } @@ -136,7 +136,7 @@ pub async fn add_storages( { warn!(logger, "failed to remove dummy sandbox storage {:?}", e); } - device.lock().await.cleanup(); + device.cleanup(); return Err(anyhow!("failed to update device for storage")); } } @@ -160,9 +160,9 @@ pub async fn add_storages( Ok(mount_list) } -pub(crate) fn new_device(path: String) -> Result { +pub(crate) fn new_device(path: String) -> Result> { let device = StorageDeviceGeneric::new(path); - Ok(Arc::new(Mutex::new(device))) + Ok(Arc::new(device)) } #[instrument] diff --git a/src/libs/kata-types/src/mount.rs b/src/libs/kata-types/src/mount.rs index a3747af51c..521a24a4e7 100644 --- a/src/libs/kata-types/src/mount.rs +++ b/src/libs/kata-types/src/mount.rs @@ -431,14 +431,13 @@ impl TryFrom<&NydusExtraOptions> for KataVirtualVolume { /// An implementation of generic storage device. pub struct StorageDeviceGeneric { - refcount: u32, path: String, } impl std::fmt::Debug for StorageDeviceGeneric { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("StorageState") - .field("refcount", &self.refcount) + .field("path", &self.path) .finish() } } @@ -446,7 +445,7 @@ impl std::fmt::Debug for StorageDeviceGeneric { impl StorageDeviceGeneric { /// Create a new instance of `StorageStateCommon`. pub fn new(path: String) -> Self { - StorageDeviceGeneric { refcount: 1, path } + StorageDeviceGeneric { path } } } @@ -455,20 +454,6 @@ impl StorageDevice for StorageDeviceGeneric { &self.path } - fn ref_count(&self) -> u32 { - self.refcount - } - - fn inc_ref_count(&mut self) { - self.refcount += 1; - } - - fn dec_and_test_ref_count(&mut self) -> bool { - assert!(self.refcount > 0); - self.refcount -= 1; - self.refcount == 0 - } - fn cleanup(&self) {} } @@ -477,15 +462,6 @@ pub trait StorageDevice: Send + Sync { /// Path fn path(&self) -> &str; - /// Get reference count. - fn ref_count(&self) -> u32; - - /// Increase reference count. - fn inc_ref_count(&mut self); - - /// Decrease reference count and return true if it reaches zero. - fn dec_and_test_ref_count(&mut self) -> bool; - /// Clean up resources related to the storage device. fn cleanup(&self); } @@ -763,20 +739,4 @@ mod tests { ); assert_eq!(volume.fs_type.as_str(), "rafsv6") } - - #[test] - fn test_storage_state_common() { - let mut state = StorageDeviceGeneric::new("".to_string()); - assert_eq!(state.ref_count(), 1); - state.inc_ref_count(); - assert_eq!(state.ref_count(), 2); - state.inc_ref_count(); - assert_eq!(state.ref_count(), 3); - assert!(!state.dec_and_test_ref_count()); - assert_eq!(state.ref_count(), 2); - assert!(!state.dec_and_test_ref_count()); - assert_eq!(state.ref_count(), 1); - assert!(state.dec_and_test_ref_count()); - assert_eq!(state.ref_count(), 0); - } }