diff --git a/src/libs/kata-types/src/annotations/mod.rs b/src/libs/kata-types/src/annotations/mod.rs index 3af0563c19..f094ddd70b 100644 --- a/src/libs/kata-types/src/annotations/mod.rs +++ b/src/libs/kata-types/src/annotations/mod.rs @@ -316,6 +316,10 @@ pub const KATA_ANNO_CFG_VFIO_MODE: &str = "io.katacontainers.config.runtime.vfio pub const KATA_ANNO_CFG_HYPERVISOR_PREFETCH_FILES_LIST: &str = "io.katacontainers.config.hypervisor.prefetch_files.list"; +/// A sandbox annotation for sandbox level volume sharing with host. +pub const KATA_ANNO_CFG_SANDBOX_BIND_MOUNTS: &str = + "io.katacontainers.config.runtime.sandbox_bind_mounts"; + /// A helper structure to query configuration information by check annotations. #[derive(Debug, Default, Deserialize)] pub struct Annotation { @@ -950,6 +954,16 @@ impl Annotation { KATA_ANNO_CFG_VFIO_MODE => { config.runtime.vfio_mode = value.to_string(); } + KATA_ANNO_CFG_SANDBOX_BIND_MOUNTS => { + let args: Vec = value + .to_string() + .split_ascii_whitespace() + .map(str::to_string) + .collect(); + for arg in args { + config.runtime.sandbox_bind_mounts.push(arg.to_string()); + } + } _ => { warn!(sl!(), "Annotation {} not enabled", key); } diff --git a/src/libs/kata-types/src/config/runtime.rs b/src/libs/kata-types/src/config/runtime.rs index 067ff6776a..dddd3adc5b 100644 --- a/src/libs/kata-types/src/config/runtime.rs +++ b/src/libs/kata-types/src/config/runtime.rs @@ -8,7 +8,8 @@ use std::path::Path; use super::default; use crate::config::{ConfigOps, TomlConfig}; -use crate::{eother, resolve_path, validate_path}; +use crate::mount::split_bind_mounts; +use crate::{eother, validate_path}; /// Type of runtime VirtContainer. pub const RUNTIME_NAME_VIRTCONTAINER: &str = "virt_container"; @@ -146,7 +147,14 @@ impl ConfigOps for Runtime { } for bind in conf.runtime.sandbox_bind_mounts.iter_mut() { - resolve_path!(*bind, "sandbox bind mount `{}` is invalid: {}")?; + // Split the bind mount, canonicalize the path and then append rw mode to it. + let (real_path, mode) = split_bind_mounts(bind); + match Path::new(real_path).canonicalize() { + Err(e) => return Err(eother!("sandbox bind mount `{}` is invalid: {}", bind, e)), + Ok(path) => { + *bind = format!("{}{}", path.display(), mode); + } + } } Ok(()) @@ -176,7 +184,12 @@ impl ConfigOps for Runtime { } for bind in conf.runtime.sandbox_bind_mounts.iter() { - validate_path!(*bind, "sandbox bind mount `{}` is invalid: {}")?; + // Just validate the real_path. + let (real_path, _mode) = split_bind_mounts(bind); + validate_path!( + real_path.to_owned(), + "sandbox bind mount `{}` is invalid: {}" + )?; } Ok(()) diff --git a/src/libs/kata-types/src/mount.rs b/src/libs/kata-types/src/mount.rs index f66e828bd1..d779438484 100644 --- a/src/libs/kata-types/src/mount.rs +++ b/src/libs/kata-types/src/mount.rs @@ -25,6 +25,15 @@ pub const KATA_MOUNT_INFO_FILE_NAME: &str = "mountInfo.json"; /// KATA_DIRECT_VOLUME_ROOT_PATH is the root path used for concatenating with the direct-volume mount info file path pub const KATA_DIRECT_VOLUME_ROOT_PATH: &str = "/run/kata-containers/shared/direct-volumes"; +/// SANDBOX_BIND_MOUNTS_DIR is for sandbox bindmounts +pub const SANDBOX_BIND_MOUNTS_DIR: &str = "sandbox-mounts"; + +/// SANDBOX_BIND_MOUNTS_RO is for sandbox bindmounts with readonly +pub const SANDBOX_BIND_MOUNTS_RO: &str = ":ro"; + +/// SANDBOX_BIND_MOUNTS_RO is for sandbox bindmounts with readwrite +pub const SANDBOX_BIND_MOUNTS_RW: &str = ":rw"; + /// Information about a mount. #[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] pub struct Mount { @@ -128,6 +137,28 @@ impl NydusExtraOptions { serde_json::from_slice(&extra_options_buf).context("deserialize nydus's extraoption") } } + +/// sandbox bindmount format: /path/to/dir, or /path/to/dir:ro[:rw] +/// the real path is without suffix ":ro" or ":rw". +pub fn split_bind_mounts(bindmount: &str) -> (&str, &str) { + let (real_path, mode) = if bindmount.ends_with(SANDBOX_BIND_MOUNTS_RO) { + ( + bindmount.trim_end_matches(SANDBOX_BIND_MOUNTS_RO), + SANDBOX_BIND_MOUNTS_RO, + ) + } else if bindmount.ends_with(SANDBOX_BIND_MOUNTS_RW) { + ( + bindmount.trim_end_matches(SANDBOX_BIND_MOUNTS_RW), + SANDBOX_BIND_MOUNTS_RW, + ) + } else { + // default bindmount format + (bindmount, "") + }; + + (real_path, mode) +} + #[cfg(test)] mod tests { use super::*; @@ -137,6 +168,18 @@ mod tests { assert!(!is_kata_special_volume("kata:")); } + #[test] + fn test_split_bind_mounts() { + let test01 = "xxx0:ro"; + let test02 = "xxx2:rw"; + let test03 = "xxx3:is"; + let test04 = "xxx4"; + assert_eq!(split_bind_mounts(test01), ("xxx0", ":ro")); + assert_eq!(split_bind_mounts(test02), ("xxx2", ":rw")); + assert_eq!(split_bind_mounts(test03), ("xxx3:is", "")); + assert_eq!(split_bind_mounts(test04), ("xxx4", "")); + } + #[test] fn test_is_kata_guest_mount_volume() { assert!(is_kata_guest_mount_volume("kata:guest-mount:nfs")); diff --git a/src/runtime-rs/config/configuration-dragonball.toml.in b/src/runtime-rs/config/configuration-dragonball.toml.in index 4c7d3db053..e9c2b8c197 100644 --- a/src/runtime-rs/config/configuration-dragonball.toml.in +++ b/src/runtime-rs/config/configuration-dragonball.toml.in @@ -309,3 +309,12 @@ experimental=@DEFAULTEXPFEATURES@ # - When running single containers using a tool like ctr, container sizing information will be available. static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@ +# If specified, sandbox_bind_mounts identifieds host paths to be mounted(ro, rw) into the sandboxes shared path. +# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory. +# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts` +# These will not be exposed to the container workloads, and are only provided for potential guest services. +# Now it supports three kinds of bind mount format: +# - "/path/to", default readonly mode. +# - "/path/to:ro", readonly mode. +# - "/path/to:rw", readwrite mode. +sandbox_bind_mounts=@DEFBINDMOUNTS@ diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index 7e50485ae8..16f55a36a6 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -25,7 +25,7 @@ use crate::{ manager::ManagerArgs, network::{self, Network}, rootfs::{RootFsResource, Rootfs}, - share_fs::{self, ShareFs}, + share_fs::{self, sandbox_bind_mounts::SandboxBindMounts, ShareFs}, volume::{Volume, VolumeResource}, ResourceConfig, }; @@ -97,6 +97,12 @@ impl ResourceManagerInner { .setup_device_before_start_vm(self.hypervisor.as_ref()) .await .context("setup share fs device before start vm")?; + + // setup sandbox bind mounts: setup = true + self.handle_sandbox_bindmounts(true) + .await + .context("failed setup sandbox bindmounts")?; + Some(share_fs) } else { None @@ -308,6 +314,22 @@ impl ResourceManagerInner { Ok(devices) } + async fn handle_sandbox_bindmounts(&self, setup: bool) -> Result<()> { + let bindmounts = self.toml_config.runtime.sandbox_bind_mounts.clone(); + if bindmounts.is_empty() { + info!(sl!(), "sandbox bindmounts empty, just skip it."); + return Ok(()); + } + + let sb_bindmnt = SandboxBindMounts::new(self.sid.clone(), bindmounts)?; + + if setup { + sb_bindmnt.setup_sandbox_bind_mounts() + } else { + sb_bindmnt.cleanup_sandbox_bind_mounts() + } + } + pub async fn update_cgroups( &self, cid: &str, @@ -324,6 +346,12 @@ impl ResourceManagerInner { .delete() .await .context("delete cgroup")?; + + // cleanup sandbox bind mounts: setup = false + self.handle_sandbox_bindmounts(false) + .await + .context("failed to cleanup sandbox bindmounts")?; + // clean up share fs mount if let Some(share_fs) = &self.share_fs { share_fs diff --git a/src/runtime-rs/crates/resource/src/share_fs/mod.rs b/src/runtime-rs/crates/resource/src/share_fs/mod.rs index 12bb64420d..4d70a6c7b4 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/mod.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/mod.rs @@ -18,6 +18,7 @@ pub use utils::{ mod virtio_fs_share_mount; use virtio_fs_share_mount::VirtiofsShareMount; pub use virtio_fs_share_mount::EPHEMERAL_PATH; +pub mod sandbox_bind_mounts; use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc}; diff --git a/src/runtime-rs/crates/resource/src/share_fs/sandbox_bind_mounts.rs b/src/runtime-rs/crates/resource/src/share_fs/sandbox_bind_mounts.rs new file mode 100644 index 0000000000..13bd281033 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/share_fs/sandbox_bind_mounts.rs @@ -0,0 +1,155 @@ +// Copyright (c) 2023 Alibaba Cloud +// Copyright (c) 2023 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// +// Note: +// sandbox_bind_mounts supports kinds of mount patterns, for example: +// (1) "/path/to", with default readonly mode. +// (2) "/path/to:ro", same as (1). +// (3) "/path/to:rw", with readwrite mode. +// +// sandbox_bind_mounts: ["/path/to", "/path/to:rw", "/mnt/to:ro"] +// + +use std::{ + collections::HashMap, + fs, + path::{Path, PathBuf}, +}; + +use anyhow::{anyhow, Context, Result}; + +use super::utils::{do_get_host_path, mkdir_with_permissions}; +use kata_sys_util::{fs::get_base_name, mount}; +use kata_types::mount::{SANDBOX_BIND_MOUNTS_DIR, SANDBOX_BIND_MOUNTS_RO, SANDBOX_BIND_MOUNTS_RW}; + +#[derive(Clone, Default, Debug)] +pub struct SandboxBindMounts { + sid: String, + host_mounts_path: PathBuf, + sandbox_bindmounts: Vec, +} + +impl SandboxBindMounts { + pub fn new(sid: String, sandbox_bindmounts: Vec) -> Result { + // /run/kata-containers/shared/sandboxes//rw/passthrough/sandbox-mounts + let bindmounts_path = + do_get_host_path(SANDBOX_BIND_MOUNTS_DIR, sid.as_str(), "", true, false); + let host_mounts_path = PathBuf::from(bindmounts_path); + + Ok(SandboxBindMounts { + sid, + host_mounts_path, + sandbox_bindmounts, + }) + } + + fn parse_sandbox_bind_mounts<'a>(&self, bindmnt_src: &'a str) -> Result<(&'a str, &'a str)> { + // get the bindmount's r/w mode + let bindmount_mode = if bindmnt_src.ends_with(SANDBOX_BIND_MOUNTS_RW) { + SANDBOX_BIND_MOUNTS_RW + } else { + SANDBOX_BIND_MOUNTS_RO + }; + + // get the true bindmount from the string + let bindmount = bindmnt_src.trim_end_matches(bindmount_mode); + + Ok((bindmount_mode, bindmount)) + } + + pub fn setup_sandbox_bind_mounts(&self) -> Result<()> { + let mut mounted_list: Vec = Vec::new(); + let mut mounted_map: HashMap = HashMap::new(); + for src in &self.sandbox_bindmounts { + let (bindmount_mode, bindmount) = self + .parse_sandbox_bind_mounts(src) + .context("parse sandbox bind mounts failed")?; + + // get the basename of the canonicalized mount path mnt_name: dirX + let mnt_name = get_base_name(bindmount)? + .into_string() + .map_err(|e| anyhow!("failed to get base name {:?}", e))?; + + // if repeated mounted, do umount it and return error + if mounted_map.insert(mnt_name.clone(), true).is_some() { + for p in &mounted_list { + nix::mount::umount(p) + .context("mounted_map insert one repeated mounted, do umount it")?; + } + + return Err(anyhow!( + "sandbox-bindmounts: path {} is already specified.", + bindmount + )); + } + + // mount_dest: /run/kata-containers/shared/sandboxes//rw/passthrough/sandbox-mounts/dirX + let mount_dest = self.host_mounts_path.clone().join(mnt_name.as_str()); + mkdir_with_permissions(self.host_mounts_path.clone().to_path_buf(), 0o750).context( + format!( + "create host mounts path {:?}", + self.host_mounts_path.clone() + ), + )?; + + info!( + sl!(), + "sandbox-bindmounts mount_src: {:?} => mount_dest: {:?}", bindmount, &mount_dest + ); + + // mount -o bind,ro host_shared mount_dest + // host_shared: ${bindmount} + mount::bind_mount_unchecked(Path::new(bindmount), &mount_dest, true).map_err(|e| { + for p in &mounted_list { + nix::mount::umount(p).unwrap_or_else(|x| { + format!("do umount failed: {:?}", x); + }); + } + e + })?; + + // default sandbox bind mounts mode is ro. + if bindmount_mode == SANDBOX_BIND_MOUNTS_RO { + info!(sl!(), "sandbox readonly bind mount."); + // dest_ro: /run/kata-containers/shared/sandboxes//ro/passthrough/sandbox-mounts + let mount_dest_ro = + do_get_host_path(SANDBOX_BIND_MOUNTS_DIR, &self.sid, "", true, true); + let sandbox_bindmounts_ro = [mount_dest_ro, mnt_name.clone()].join("/"); + + mount::bind_remount(sandbox_bindmounts_ro, true) + .context("remount ro directory with ro permission")?; + } + + mounted_list.push(mount_dest); + } + + Ok(()) + } + + pub fn cleanup_sandbox_bind_mounts(&self) -> Result<()> { + for src in &self.sandbox_bindmounts { + let parsed_mnts = self + .parse_sandbox_bind_mounts(src) + .context("parse sandbox bind mounts")?; + + let mnt_name = get_base_name(parsed_mnts.1)? + .into_string() + .map_err(|e| anyhow!("failed to convert to string{:?}", e))?; + + // /run/kata-containers/shared/sandboxes//passthrough/rw/sandbox-mounts/dir + let mnt_dest = self.host_mounts_path.join(mnt_name.as_str()); + mount::umount_timeout(mnt_dest, 0).context("umount bindmount failed")?; + } + + if fs::metadata(self.host_mounts_path.clone())?.is_dir() { + fs::remove_dir_all(self.host_mounts_path.clone()).context(format!( + "remove sandbox bindmount point {:?}.", + self.host_mounts_path.clone() + ))?; + } + + Ok(()) + } +} diff --git a/src/runtime-rs/crates/resource/src/share_fs/utils.rs b/src/runtime-rs/crates/resource/src/share_fs/utils.rs index c93cbec547..3300c74ef3 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/utils.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/utils.rs @@ -4,13 +4,27 @@ // SPDX-License-Identifier: Apache-2.0 // -use std::path::{Path, PathBuf}; +use std::{ + os::unix::fs::PermissionsExt, + path::{Path, PathBuf}, +}; use anyhow::Result; use kata_sys_util::mount; use super::*; +pub(crate) fn mkdir_with_permissions(path_target: PathBuf, mode: u32) -> Result<()> { + let new_path = &path_target; + std::fs::create_dir_all(new_path) + .context(format!("unable to create new path: {:?}", new_path))?; + + // mode format: 0o750, ... + std::fs::set_permissions(new_path, std::fs::Permissions::from_mode(mode))?; + + Ok(()) +} + pub(crate) fn ensure_dir_exist(path: &Path) -> Result<()> { if !path.exists() { std::fs::create_dir_all(path).context(format!("failed to create directory {:?}", path))?; diff --git a/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs b/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs index 2cea9904d9..6f875d29ed 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs @@ -12,7 +12,6 @@ use kata_types::k8s::is_watchable_mount; use kata_types::mount; use nix::sys::stat::stat; use std::fs; -use std::os::unix::fs::PermissionsExt; use std::path::Path; const WATCHABLE_PATH_NAME: &str = "watchable"; @@ -21,7 +20,10 @@ pub const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral"; use super::{ get_host_rw_shared_path, - utils::{self, do_get_host_path, get_host_ro_shared_path, get_host_shared_path}, + utils::{ + self, do_get_host_path, get_host_ro_shared_path, get_host_shared_path, + mkdir_with_permissions, + }, ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig, KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR, }; @@ -79,13 +81,11 @@ impl ShareFsMount for VirtiofsShareMount { .join(PASSTHROUGH_FS_DIR) .join(WATCHABLE_PATH_NAME); - fs::create_dir_all(&watchable_host_path).context(format!( - "unable to create watchable path: {:?}", - &watchable_host_path, + mkdir_with_permissions(watchable_host_path.clone(), 0o750).context(format!( + "unable to create watchable path {:?}", + watchable_host_path ))?; - fs::set_permissions(watchable_host_path, fs::Permissions::from_mode(0o750))?; - // path: /run/kata-containers/shared/containers/passthrough/watchable/config-map-name let file_name = Path::new(&guest_path) .file_name()