runtime-rs/sandbox_bindmounts: add support for sandbox bindmounts

sandbox_bind_mounts supports kinds of mount patterns, for example:

(1) "/path/to", default readonly mode.
(2) "/path/to:ro", same as (1).
(3) "/path/to:rw", readwrite mode.

Both support configuration and annotation:
(1)[runtime]
sandbox_bind_mounts=["/path/to", "/path/to:rw", "/mnt/to:ro"]
(2) annotation will alse be supported, restricted as below:
io.katacontainers.config.runtime.sandbox_bind_mounts
                         = "/path/to /path/to:rw /mnt/to:ro"

Fixes: #6597

Signed-off-by: alex.lyn <alex.lyn@antgroup.com>
This commit is contained in:
alex.lyn 2023-05-25 20:00:25 +08:00
parent 62b2838962
commit eee7aae71d
9 changed files with 289 additions and 12 deletions

View File

@ -316,6 +316,10 @@ pub const KATA_ANNO_CFG_VFIO_MODE: &str = "io.katacontainers.config.runtime.vfio
pub const KATA_ANNO_CFG_HYPERVISOR_PREFETCH_FILES_LIST: &str =
"io.katacontainers.config.hypervisor.prefetch_files.list";
/// A sandbox annotation for sandbox level volume sharing with host.
pub const KATA_ANNO_CFG_SANDBOX_BIND_MOUNTS: &str =
"io.katacontainers.config.runtime.sandbox_bind_mounts";
/// A helper structure to query configuration information by check annotations.
#[derive(Debug, Default, Deserialize)]
pub struct Annotation {
@ -950,6 +954,16 @@ impl Annotation {
KATA_ANNO_CFG_VFIO_MODE => {
config.runtime.vfio_mode = value.to_string();
}
KATA_ANNO_CFG_SANDBOX_BIND_MOUNTS => {
let args: Vec<String> = value
.to_string()
.split_ascii_whitespace()
.map(str::to_string)
.collect();
for arg in args {
config.runtime.sandbox_bind_mounts.push(arg.to_string());
}
}
_ => {
warn!(sl!(), "Annotation {} not enabled", key);
}

View File

@ -8,7 +8,8 @@ use std::path::Path;
use super::default;
use crate::config::{ConfigOps, TomlConfig};
use crate::{eother, resolve_path, validate_path};
use crate::mount::split_bind_mounts;
use crate::{eother, validate_path};
/// Type of runtime VirtContainer.
pub const RUNTIME_NAME_VIRTCONTAINER: &str = "virt_container";
@ -146,7 +147,14 @@ impl ConfigOps for Runtime {
}
for bind in conf.runtime.sandbox_bind_mounts.iter_mut() {
resolve_path!(*bind, "sandbox bind mount `{}` is invalid: {}")?;
// Split the bind mount, canonicalize the path and then append rw mode to it.
let (real_path, mode) = split_bind_mounts(bind);
match Path::new(real_path).canonicalize() {
Err(e) => return Err(eother!("sandbox bind mount `{}` is invalid: {}", bind, e)),
Ok(path) => {
*bind = format!("{}{}", path.display(), mode);
}
}
}
Ok(())
@ -176,7 +184,12 @@ impl ConfigOps for Runtime {
}
for bind in conf.runtime.sandbox_bind_mounts.iter() {
validate_path!(*bind, "sandbox bind mount `{}` is invalid: {}")?;
// Just validate the real_path.
let (real_path, _mode) = split_bind_mounts(bind);
validate_path!(
real_path.to_owned(),
"sandbox bind mount `{}` is invalid: {}"
)?;
}
Ok(())

View File

@ -25,6 +25,15 @@ pub const KATA_MOUNT_INFO_FILE_NAME: &str = "mountInfo.json";
/// KATA_DIRECT_VOLUME_ROOT_PATH is the root path used for concatenating with the direct-volume mount info file path
pub const KATA_DIRECT_VOLUME_ROOT_PATH: &str = "/run/kata-containers/shared/direct-volumes";
/// SANDBOX_BIND_MOUNTS_DIR is for sandbox bindmounts
pub const SANDBOX_BIND_MOUNTS_DIR: &str = "sandbox-mounts";
/// SANDBOX_BIND_MOUNTS_RO is for sandbox bindmounts with readonly
pub const SANDBOX_BIND_MOUNTS_RO: &str = ":ro";
/// SANDBOX_BIND_MOUNTS_RO is for sandbox bindmounts with readwrite
pub const SANDBOX_BIND_MOUNTS_RW: &str = ":rw";
/// Information about a mount.
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct Mount {
@ -128,6 +137,28 @@ impl NydusExtraOptions {
serde_json::from_slice(&extra_options_buf).context("deserialize nydus's extraoption")
}
}
/// sandbox bindmount format: /path/to/dir, or /path/to/dir:ro[:rw]
/// the real path is without suffix ":ro" or ":rw".
pub fn split_bind_mounts(bindmount: &str) -> (&str, &str) {
let (real_path, mode) = if bindmount.ends_with(SANDBOX_BIND_MOUNTS_RO) {
(
bindmount.trim_end_matches(SANDBOX_BIND_MOUNTS_RO),
SANDBOX_BIND_MOUNTS_RO,
)
} else if bindmount.ends_with(SANDBOX_BIND_MOUNTS_RW) {
(
bindmount.trim_end_matches(SANDBOX_BIND_MOUNTS_RW),
SANDBOX_BIND_MOUNTS_RW,
)
} else {
// default bindmount format
(bindmount, "")
};
(real_path, mode)
}
#[cfg(test)]
mod tests {
use super::*;
@ -137,6 +168,18 @@ mod tests {
assert!(!is_kata_special_volume("kata:"));
}
#[test]
fn test_split_bind_mounts() {
let test01 = "xxx0:ro";
let test02 = "xxx2:rw";
let test03 = "xxx3:is";
let test04 = "xxx4";
assert_eq!(split_bind_mounts(test01), ("xxx0", ":ro"));
assert_eq!(split_bind_mounts(test02), ("xxx2", ":rw"));
assert_eq!(split_bind_mounts(test03), ("xxx3:is", ""));
assert_eq!(split_bind_mounts(test04), ("xxx4", ""));
}
#[test]
fn test_is_kata_guest_mount_volume() {
assert!(is_kata_guest_mount_volume("kata:guest-mount:nfs"));

View File

@ -309,3 +309,12 @@ experimental=@DEFAULTEXPFEATURES@
# - When running single containers using a tool like ctr, container sizing information will be available.
static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT_DB@
# If specified, sandbox_bind_mounts identifieds host paths to be mounted(ro, rw) into the sandboxes shared path.
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
# These will not be exposed to the container workloads, and are only provided for potential guest services.
# Now it supports three kinds of bind mount format:
# - "/path/to", default readonly mode.
# - "/path/to:ro", readonly mode.
# - "/path/to:rw", readwrite mode.
sandbox_bind_mounts=@DEFBINDMOUNTS@

View File

@ -25,7 +25,7 @@ use crate::{
manager::ManagerArgs,
network::{self, Network},
rootfs::{RootFsResource, Rootfs},
share_fs::{self, ShareFs},
share_fs::{self, sandbox_bind_mounts::SandboxBindMounts, ShareFs},
volume::{Volume, VolumeResource},
ResourceConfig,
};
@ -97,6 +97,12 @@ impl ResourceManagerInner {
.setup_device_before_start_vm(self.hypervisor.as_ref())
.await
.context("setup share fs device before start vm")?;
// setup sandbox bind mounts: setup = true
self.handle_sandbox_bindmounts(true)
.await
.context("failed setup sandbox bindmounts")?;
Some(share_fs)
} else {
None
@ -308,6 +314,22 @@ impl ResourceManagerInner {
Ok(devices)
}
async fn handle_sandbox_bindmounts(&self, setup: bool) -> Result<()> {
let bindmounts = self.toml_config.runtime.sandbox_bind_mounts.clone();
if bindmounts.is_empty() {
info!(sl!(), "sandbox bindmounts empty, just skip it.");
return Ok(());
}
let sb_bindmnt = SandboxBindMounts::new(self.sid.clone(), bindmounts)?;
if setup {
sb_bindmnt.setup_sandbox_bind_mounts()
} else {
sb_bindmnt.cleanup_sandbox_bind_mounts()
}
}
pub async fn update_cgroups(
&self,
cid: &str,
@ -324,6 +346,12 @@ impl ResourceManagerInner {
.delete()
.await
.context("delete cgroup")?;
// cleanup sandbox bind mounts: setup = false
self.handle_sandbox_bindmounts(false)
.await
.context("failed to cleanup sandbox bindmounts")?;
// clean up share fs mount
if let Some(share_fs) = &self.share_fs {
share_fs

View File

@ -18,6 +18,7 @@ pub use utils::{
mod virtio_fs_share_mount;
use virtio_fs_share_mount::VirtiofsShareMount;
pub use virtio_fs_share_mount::EPHEMERAL_PATH;
pub mod sandbox_bind_mounts;
use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc};

View File

@ -0,0 +1,155 @@
// Copyright (c) 2023 Alibaba Cloud
// Copyright (c) 2023 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
// Note:
// sandbox_bind_mounts supports kinds of mount patterns, for example:
// (1) "/path/to", with default readonly mode.
// (2) "/path/to:ro", same as (1).
// (3) "/path/to:rw", with readwrite mode.
//
// sandbox_bind_mounts: ["/path/to", "/path/to:rw", "/mnt/to:ro"]
//
use std::{
collections::HashMap,
fs,
path::{Path, PathBuf},
};
use anyhow::{anyhow, Context, Result};
use super::utils::{do_get_host_path, mkdir_with_permissions};
use kata_sys_util::{fs::get_base_name, mount};
use kata_types::mount::{SANDBOX_BIND_MOUNTS_DIR, SANDBOX_BIND_MOUNTS_RO, SANDBOX_BIND_MOUNTS_RW};
#[derive(Clone, Default, Debug)]
pub struct SandboxBindMounts {
sid: String,
host_mounts_path: PathBuf,
sandbox_bindmounts: Vec<String>,
}
impl SandboxBindMounts {
pub fn new(sid: String, sandbox_bindmounts: Vec<String>) -> Result<Self> {
// /run/kata-containers/shared/sandboxes/<sid>/rw/passthrough/sandbox-mounts
let bindmounts_path =
do_get_host_path(SANDBOX_BIND_MOUNTS_DIR, sid.as_str(), "", true, false);
let host_mounts_path = PathBuf::from(bindmounts_path);
Ok(SandboxBindMounts {
sid,
host_mounts_path,
sandbox_bindmounts,
})
}
fn parse_sandbox_bind_mounts<'a>(&self, bindmnt_src: &'a str) -> Result<(&'a str, &'a str)> {
// get the bindmount's r/w mode
let bindmount_mode = if bindmnt_src.ends_with(SANDBOX_BIND_MOUNTS_RW) {
SANDBOX_BIND_MOUNTS_RW
} else {
SANDBOX_BIND_MOUNTS_RO
};
// get the true bindmount from the string
let bindmount = bindmnt_src.trim_end_matches(bindmount_mode);
Ok((bindmount_mode, bindmount))
}
pub fn setup_sandbox_bind_mounts(&self) -> Result<()> {
let mut mounted_list: Vec<PathBuf> = Vec::new();
let mut mounted_map: HashMap<String, bool> = HashMap::new();
for src in &self.sandbox_bindmounts {
let (bindmount_mode, bindmount) = self
.parse_sandbox_bind_mounts(src)
.context("parse sandbox bind mounts failed")?;
// get the basename of the canonicalized mount path mnt_name: dirX
let mnt_name = get_base_name(bindmount)?
.into_string()
.map_err(|e| anyhow!("failed to get base name {:?}", e))?;
// if repeated mounted, do umount it and return error
if mounted_map.insert(mnt_name.clone(), true).is_some() {
for p in &mounted_list {
nix::mount::umount(p)
.context("mounted_map insert one repeated mounted, do umount it")?;
}
return Err(anyhow!(
"sandbox-bindmounts: path {} is already specified.",
bindmount
));
}
// mount_dest: /run/kata-containers/shared/sandboxes/<sid>/rw/passthrough/sandbox-mounts/dirX
let mount_dest = self.host_mounts_path.clone().join(mnt_name.as_str());
mkdir_with_permissions(self.host_mounts_path.clone().to_path_buf(), 0o750).context(
format!(
"create host mounts path {:?}",
self.host_mounts_path.clone()
),
)?;
info!(
sl!(),
"sandbox-bindmounts mount_src: {:?} => mount_dest: {:?}", bindmount, &mount_dest
);
// mount -o bind,ro host_shared mount_dest
// host_shared: ${bindmount}
mount::bind_mount_unchecked(Path::new(bindmount), &mount_dest, true).map_err(|e| {
for p in &mounted_list {
nix::mount::umount(p).unwrap_or_else(|x| {
format!("do umount failed: {:?}", x);
});
}
e
})?;
// default sandbox bind mounts mode is ro.
if bindmount_mode == SANDBOX_BIND_MOUNTS_RO {
info!(sl!(), "sandbox readonly bind mount.");
// dest_ro: /run/kata-containers/shared/sandboxes/<sid>/ro/passthrough/sandbox-mounts
let mount_dest_ro =
do_get_host_path(SANDBOX_BIND_MOUNTS_DIR, &self.sid, "", true, true);
let sandbox_bindmounts_ro = [mount_dest_ro, mnt_name.clone()].join("/");
mount::bind_remount(sandbox_bindmounts_ro, true)
.context("remount ro directory with ro permission")?;
}
mounted_list.push(mount_dest);
}
Ok(())
}
pub fn cleanup_sandbox_bind_mounts(&self) -> Result<()> {
for src in &self.sandbox_bindmounts {
let parsed_mnts = self
.parse_sandbox_bind_mounts(src)
.context("parse sandbox bind mounts")?;
let mnt_name = get_base_name(parsed_mnts.1)?
.into_string()
.map_err(|e| anyhow!("failed to convert to string{:?}", e))?;
// /run/kata-containers/shared/sandboxes/<sid>/passthrough/rw/sandbox-mounts/dir
let mnt_dest = self.host_mounts_path.join(mnt_name.as_str());
mount::umount_timeout(mnt_dest, 0).context("umount bindmount failed")?;
}
if fs::metadata(self.host_mounts_path.clone())?.is_dir() {
fs::remove_dir_all(self.host_mounts_path.clone()).context(format!(
"remove sandbox bindmount point {:?}.",
self.host_mounts_path.clone()
))?;
}
Ok(())
}
}

View File

@ -4,13 +4,27 @@
// SPDX-License-Identifier: Apache-2.0
//
use std::path::{Path, PathBuf};
use std::{
os::unix::fs::PermissionsExt,
path::{Path, PathBuf},
};
use anyhow::Result;
use kata_sys_util::mount;
use super::*;
pub(crate) fn mkdir_with_permissions(path_target: PathBuf, mode: u32) -> Result<()> {
let new_path = &path_target;
std::fs::create_dir_all(new_path)
.context(format!("unable to create new path: {:?}", new_path))?;
// mode format: 0o750, ...
std::fs::set_permissions(new_path, std::fs::Permissions::from_mode(mode))?;
Ok(())
}
pub(crate) fn ensure_dir_exist(path: &Path) -> Result<()> {
if !path.exists() {
std::fs::create_dir_all(path).context(format!("failed to create directory {:?}", path))?;

View File

@ -12,7 +12,6 @@ use kata_types::k8s::is_watchable_mount;
use kata_types::mount;
use nix::sys::stat::stat;
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
const WATCHABLE_PATH_NAME: &str = "watchable";
@ -21,7 +20,10 @@ pub const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
use super::{
get_host_rw_shared_path,
utils::{self, do_get_host_path, get_host_ro_shared_path, get_host_shared_path},
utils::{
self, do_get_host_path, get_host_ro_shared_path, get_host_shared_path,
mkdir_with_permissions,
},
ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR,
};
@ -79,13 +81,11 @@ impl ShareFsMount for VirtiofsShareMount {
.join(PASSTHROUGH_FS_DIR)
.join(WATCHABLE_PATH_NAME);
fs::create_dir_all(&watchable_host_path).context(format!(
"unable to create watchable path: {:?}",
&watchable_host_path,
mkdir_with_permissions(watchable_host_path.clone(), 0o750).context(format!(
"unable to create watchable path {:?}",
watchable_host_path
))?;
fs::set_permissions(watchable_host_path, fs::Permissions::from_mode(0o750))?;
// path: /run/kata-containers/shared/containers/passthrough/watchable/config-map-name
let file_name = Path::new(&guest_path)
.file_name()