runtime-rs: bind mount volumes in sandbox level

Implemented bind mount related managment on the sandbox side, involving bind
mount a volume if it's not mounted before, upgrade permission to readwrite if
there is a new container needs.

Fixes: #5588

Signed-off-by: Xuewei Niu <justxuewei@apache.org>
This commit is contained in:
Xuewei Niu 2022-11-04 13:35:31 +08:00
parent 8246de821f
commit 527b871414
10 changed files with 253 additions and 44 deletions

2
.gitignore vendored
View File

@ -4,6 +4,8 @@
**/*.rej
**/target
**/.vscode
**/.idea
**/.fleet
pkg/logging/Cargo.lock
src/agent/src/version.rs
src/agent/kata-agent.service

View File

@ -213,11 +213,11 @@ pub fn create_mount_destination<S: AsRef<Path>, D: AsRef<Path>, R: AsRef<Path>>(
}
}
/// Remount a bind mount into readonly mode.
/// Remount a bind mount
///
/// # Safety
/// Caller needs to ensure safety of the `dst` to avoid possible file path based attacks.
pub fn bind_remount_read_only<P: AsRef<Path>>(dst: P) -> Result<()> {
pub fn bind_remount<P: AsRef<Path>>(dst: P, readonly: bool) -> Result<()> {
let dst = dst.as_ref();
if dst.is_empty() {
return Err(Error::NullMountPointPath);
@ -226,7 +226,7 @@ pub fn bind_remount_read_only<P: AsRef<Path>>(dst: P) -> Result<()> {
.canonicalize()
.map_err(|_e| Error::InvalidPath(dst.to_path_buf()))?;
do_rebind_mount_read_only(dst, MsFlags::empty())
do_rebind_mount(dst, readonly, MsFlags::empty())
}
/// Bind mount `src` to `dst` in slave mode, optionally in readonly mode if `readonly` is true.
@ -239,7 +239,7 @@ pub fn bind_remount_read_only<P: AsRef<Path>>(dst: P) -> Result<()> {
pub fn bind_mount_unchecked<S: AsRef<Path>, D: AsRef<Path>>(
src: S,
dst: D,
read_only: bool,
readonly: bool,
) -> Result<()> {
fail::fail_point!("bind_mount", |_| {
Err(Error::FailureInject(
@ -275,8 +275,8 @@ pub fn bind_mount_unchecked<S: AsRef<Path>, D: AsRef<Path>>(
.map_err(|e| Error::Mount(PathBuf::new(), dst.to_path_buf(), e))?;
// Optionally rebind into readonly mode.
if read_only {
do_rebind_mount_read_only(dst, MsFlags::empty())?;
if readonly {
do_rebind_mount(dst, readonly, MsFlags::empty())?;
}
Ok(())
@ -356,7 +356,7 @@ impl Mounter for kata_types::mount::Mount {
// Bind mount readonly.
let bro_flag = MsFlags::MS_BIND | MsFlags::MS_RDONLY;
if (o_flag & bro_flag) == bro_flag {
do_rebind_mount_read_only(target, o_flag)?;
do_rebind_mount(target, true, o_flag)?;
}
Ok(())
@ -364,12 +364,16 @@ impl Mounter for kata_types::mount::Mount {
}
#[inline]
fn do_rebind_mount_read_only<P: AsRef<Path>>(path: P, flags: MsFlags) -> Result<()> {
fn do_rebind_mount<P: AsRef<Path>>(path: P, readonly: bool, flags: MsFlags) -> Result<()> {
mount(
Some(""),
path.as_ref(),
Some(""),
flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT | MsFlags::MS_RDONLY,
if readonly {
flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT | MsFlags::MS_RDONLY
} else {
flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT
},
Some(""),
)
.map_err(|e| Error::Remount(path.as_ref().to_path_buf(), e))
@ -820,21 +824,21 @@ mod tests {
#[test]
#[ignore]
fn test_bind_remount_read_only() {
fn test_bind_remount() {
let tmpdir = tempfile::tempdir().unwrap();
let tmpdir2 = tempfile::tempdir().unwrap();
assert!(matches!(
bind_remount_read_only(&PathBuf::from("")),
bind_remount(&PathBuf::from(""), true),
Err(Error::NullMountPointPath)
));
assert!(matches!(
bind_remount_read_only(&PathBuf::from("../______doesn't____exist____nnn")),
bind_remount(&PathBuf::from("../______doesn't____exist____nnn"), true),
Err(Error::InvalidPath(_))
));
bind_mount_unchecked(tmpdir2.path(), tmpdir.path(), true).unwrap();
bind_remount_read_only(tmpdir.path()).unwrap();
bind_remount(tmpdir.path(), true).unwrap();
umount_timeout(tmpdir.path().to_str().unwrap(), 0).unwrap();
}

View File

@ -249,6 +249,12 @@ dependencies = [
"rustc-demangle",
]
[[package]]
name = "base64"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -1352,6 +1358,8 @@ dependencies = [
name = "kata-types"
version = "0.1.0"
dependencies = [
"anyhow",
"base64",
"bitmask-enum",
"byte-unit",
"glob",
@ -2288,6 +2296,7 @@ dependencies = [
"rtnetlink",
"scopeguard",
"serde",
"serde_json",
"slog",
"slog-scope",
"test-utils",

View File

@ -15,10 +15,10 @@ pub use utils::{do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_p
mod virtio_fs_share_mount;
use virtio_fs_share_mount::VirtiofsShareMount;
use std::sync::Arc;
use std::{path::PathBuf, sync::Arc};
use agent::Storage;
use anyhow::{anyhow, Context, Result};
use anyhow::{anyhow, Context, Ok, Result};
use async_trait::async_trait;
use hypervisor::Hypervisor;
use kata_types::config::hypervisor::SharedFsInfo;
@ -43,8 +43,16 @@ pub trait ShareFs: Send + Sync {
async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()>;
async fn setup_device_after_start_vm(&self, h: &dyn Hypervisor) -> Result<()>;
async fn get_storages(&self) -> Result<Vec<Storage>>;
/// Get mounted info from ShareFs.
/// The source is an original path on the host (not in the `/run/kata-containers/...`).
async fn get_mounted_info(&self, source: &str) -> Option<MountedInfo>;
/// Set mounted info to ShareFS.
/// The source is an original path on the host (not in the `/run/kata-containers/...`).
async fn set_mounted_info(&self, source: &str, mounted_info: MountedInfo) -> Result<()>;
}
#[derive(Debug)]
pub struct ShareFsRootfsConfig {
// TODO: for nydus v5/v6 need to update ShareFsMount
pub cid: String,
@ -54,6 +62,7 @@ pub struct ShareFsRootfsConfig {
pub is_rafs: bool,
}
#[derive(Debug)]
pub struct ShareFsVolumeConfig {
pub cid: String,
pub source: String,
@ -69,10 +78,59 @@ pub struct ShareFsMountResult {
pub storages: Vec<agent::Storage>,
}
/// Save mounted info for sandbox-level shared files.
#[derive(Clone, Debug)]
pub struct MountedInfo {
// Guest path
pub guest_path: PathBuf,
// Ref count of containers that uses this volume with read only permission
pub ro_ref_count: usize,
// Ref count of containers that uses this volume with read write permission
pub rw_ref_count: usize,
}
impl MountedInfo {
pub fn new(guest_path: PathBuf, readonly: bool) -> Self {
Self {
guest_path,
ro_ref_count: if readonly { 1 } else { 0 },
rw_ref_count: if readonly { 0 } else { 1 },
}
}
/// Check if the mount has read only permission
pub fn readonly(&self) -> bool {
self.rw_ref_count == 0
}
/// Ref count for all permissions
pub fn ref_count(&self) -> usize {
self.ro_ref_count + self.rw_ref_count
}
// File/dir name in the form of "sandbox-<uuid>-<file/dir name>"
pub fn name(&self) -> Result<String> {
match self.guest_path.file_name() {
Some(file_name) => match file_name.to_str() {
Some(file_name) => Ok(file_name.to_owned()),
None => Err(anyhow!("failed to get string from {:?}", file_name)),
},
None => Err(anyhow!(
"failed to get file name from the guest_path {:?}",
self.guest_path
)),
}
}
}
#[async_trait]
pub trait ShareFsMount: Send + Sync {
async fn share_rootfs(&self, config: ShareFsRootfsConfig) -> Result<ShareFsMountResult>;
async fn share_volume(&self, config: ShareFsVolumeConfig) -> Result<ShareFsMountResult>;
/// Upgrade to readwrite permission
async fn upgrade(&self, file_name: &str) -> Result<()>;
/// Downgrade to readonly permission
async fn downgrade(&self, file_name: &str) -> Result<()>;
}
pub fn new(id: &str, config: &SharedFsInfo) -> Result<Arc<dyn ShareFs>> {

View File

@ -4,11 +4,14 @@
// SPDX-License-Identifier: Apache-2.0
//
use std::collections::HashMap;
use agent::Storage;
use anyhow::{Context, Result};
use async_trait::async_trait;
use hypervisor::Hypervisor;
use kata_types::config::hypervisor::SharedFsInfo;
use tokio::sync::RwLock;
use super::{
share_virtio_fs::{
@ -27,9 +30,15 @@ pub struct ShareVirtioFsInlineConfig {
pub id: String,
}
#[derive(Default)]
pub struct ShareVirtioFsInlineInner {
mounted_info_set: HashMap<String, MountedInfo>,
}
pub struct ShareVirtioFsInline {
config: ShareVirtioFsInlineConfig,
share_fs_mount: Arc<dyn ShareFsMount>,
inner: Arc<RwLock<ShareVirtioFsInlineInner>>,
}
impl ShareVirtioFsInline {
@ -37,6 +46,7 @@ impl ShareVirtioFsInline {
Ok(Self {
config: ShareVirtioFsInlineConfig { id: id.to_string() },
share_fs_mount: Arc::new(VirtiofsShareMount::new(id)),
inner: Arc::new(RwLock::new(ShareVirtioFsInlineInner::default())),
})
}
}
@ -77,4 +87,17 @@ impl ShareFs for ShareVirtioFsInline {
storages.push(shared_volume);
Ok(storages)
}
async fn get_mounted_info(&self, source: &str) -> Option<MountedInfo> {
let inner = self.inner.read().await;
inner.mounted_info_set.get(source).map(|m| m.clone())
}
async fn set_mounted_info(&self, source: &str, mounted_info: MountedInfo) -> Result<()> {
let mut inner = self.inner.write().await;
inner
.mounted_info_set
.insert(source.to_owned(), mounted_info.clone());
Ok(())
}
}

View File

@ -4,7 +4,7 @@
// SPDX-License-Identifier: Apache-2.0
//
use std::{process::Stdio, sync::Arc};
use std::{collections::HashMap, process::Stdio, sync::Arc};
use agent::Storage;
use anyhow::{anyhow, Context, Result};
@ -22,7 +22,7 @@ use tokio::{
use super::{
share_virtio_fs::generate_sock_path, utils::ensure_dir_exist, utils::get_host_ro_shared_path,
virtio_fs_share_mount::VirtiofsShareMount, ShareFs, ShareFsMount,
virtio_fs_share_mount::VirtiofsShareMount, MountedInfo, ShareFs, ShareFsMount,
};
#[derive(Debug, Clone)]
@ -41,6 +41,7 @@ pub struct ShareVirtioFsStandaloneConfig {
#[derive(Default)]
struct ShareVirtioFsStandaloneInner {
pid: Option<u32>,
mounted_info_set: HashMap<String, MountedInfo>,
}
pub(crate) struct ShareVirtioFsStandalone {
inner: Arc<RwLock<ShareVirtioFsStandaloneInner>>,
@ -172,4 +173,17 @@ impl ShareFs for ShareVirtioFsStandalone {
async fn get_storages(&self) -> Result<Vec<Storage>> {
Ok(vec![])
}
async fn get_mounted_info(&self, source: &str) -> Option<MountedInfo> {
let inner = self.inner.read().await;
inner.mounted_info_set.get(source).map(|m| m.clone())
}
async fn set_mounted_info(&self, source: &str, mounted_info: MountedInfo) -> Result<()> {
let mut inner = self.inner.write().await;
inner
.mounted_info_set
.insert(source.to_owned(), mounted_info.clone());
Ok(())
}
}

View File

@ -18,6 +18,7 @@ pub(crate) fn ensure_dir_exist(path: &Path) -> Result<()> {
Ok(())
}
/// Bind mount the original path to the runtime directory.
pub(crate) fn share_to_guest(
// absolute path for source
source: &str,
@ -37,7 +38,7 @@ pub(crate) fn share_to_guest(
// to remount the read only dir mount point directly.
if readonly {
let dst = do_get_host_path(target, sid, cid, is_volume, true);
mount::bind_remount_read_only(&dst).context("bind remount readonly")?;
mount::bind_remount(&dst, readonly).context("bind remount readonly")?;
}
Ok(do_get_guest_path(target, cid, is_volume, is_rafs))
@ -114,3 +115,17 @@ pub(crate) fn do_get_host_path(
};
path.to_str().unwrap().to_string()
}
// /// Get the bind mounted path on the host that will be shared to the guest in
// /// **sandbox level**.
// /// The filename is in format of "sandbox-{uuid}-examplename".
// pub(crate) fn do_get_sandbox_level_host_path(sid: &str, filename: &str, readonly: bool) -> String {
// do_get_host_path(filename, sid, "", true, readonly)
// }
// /// Get the bind mounted path on the guest that will be shared from the host in
// /// **sandbox level**.
// /// The filename is in format of "sandbox-{uuid}-examplename".
// pub(crate) fn do_get_sandbox_level_guest_path(filename: &str) -> String {
// do_get_guest_any_path(filename, "", true, false)
// }

View File

@ -7,6 +7,7 @@
use agent::Storage;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use kata_sys_util::mount::bind_remount;
use kata_types::k8s::is_watchable_mount;
use kata_types::mount;
use nix::sys::stat::stat;
@ -19,7 +20,8 @@ const WATCHABLE_BIND_DEV_TYPE: &str = "watchable-bind";
const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
use super::{
utils, ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
utils::{self, do_get_host_path},
ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR,
};
@ -166,4 +168,28 @@ impl ShareFsMount for VirtiofsShareMount {
storages: vec![],
})
}
async fn upgrade(&self, file_name: &str) -> Result<()> {
// Remount readonly directory with readwrite permission
let host_dest = do_get_host_path(file_name, &self.id, "", true, true);
bind_remount(&host_dest, false)
.context("remount readonly directory with readwrite permission")?;
// Remount readwrite directory with readwrite permission
let host_dest = do_get_host_path(file_name, &self.id, "", true, false);
bind_remount(&host_dest, false)
.context("remount readwrite directory with readwrite permission")?;
Ok(())
}
async fn downgrade(&self, file_name: &str) -> Result<()> {
// Remount readwrite directory with readonly permission
let host_dest = do_get_host_path(file_name, &self.id, "", true, false);
bind_remount(&host_dest, true)
.context("remount readwrite directory with readonly permission")?;
// Remount readonly directory with readonly permission
let host_dest = do_get_host_path(file_name, &self.id, "", true, true);
bind_remount(&host_dest, true)
.context("remount readonly directory with readonly permission")?;
Ok(())
}
}

View File

@ -4,12 +4,16 @@
// SPDX-License-Identifier: Apache-2.0
//
use std::{path::Path, sync::Arc};
use std::{
path::{Path, PathBuf},
str::FromStr,
sync::Arc,
};
use anyhow::{anyhow, Context, Result};
use super::Volume;
use crate::share_fs::{ShareFs, ShareFsVolumeConfig};
use crate::share_fs::{MountedInfo, ShareFs, ShareFsVolumeConfig};
use kata_types::mount;
// copy file to container's rootfs if filesystem sharing is not supported, otherwise
@ -29,10 +33,12 @@ impl ShareFsVolume {
m: &oci::Mount,
cid: &str,
) -> Result<Self> {
// The file_name is in the format of "sandbox-{uuid}-{file_name}"
let file_name = Path::new(&m.source).file_name().unwrap().to_str().unwrap();
let file_name = generate_mount_path(cid, file_name);
let file_name = generate_mount_path("sandbox", file_name);
let mut volume = Self {
// share_fs: share_fs.as_ref().map(Arc::clone),
mounts: vec![],
storages: vec![],
};
@ -59,20 +65,69 @@ impl ShareFsVolume {
}
}
Some(share_fs) => {
let readonly = m.options.iter().any(|opt| opt == "ro");
let share_fs_mount = share_fs.get_share_fs_mount();
if let Some(mut mounted_info) = share_fs.get_mounted_info(&m.source).await {
// Mounted at least once
let guest_path = mounted_info
.guest_path
.clone()
.as_os_str()
.to_str()
.unwrap()
.to_owned();
if !readonly && mounted_info.readonly() {
// The current mount should be upgraded to readwrite permission
info!(
sl!(),
"The mount will be upgraded, mount = {:?}, cid = {}", m, cid
);
share_fs_mount
.upgrade(&mounted_info.name().context("get name of mounted info")?)
.await
.context("upgrade mount")?;
}
if readonly {
mounted_info.ro_ref_count += 1;
} else {
mounted_info.rw_ref_count += 1;
}
share_fs
.set_mounted_info(&m.source, mounted_info)
.await
.context("set mounted info")?;
volume.mounts.push(oci::Mount {
destination: m.destination.clone(),
r#type: "bind".to_string(),
source: guest_path,
options: m.options.clone(),
})
} else {
// Not mounted ever
let mount_result = share_fs_mount
.share_volume(ShareFsVolumeConfig {
cid: cid.to_string(),
// The scope of shared volume is sandbox
cid: String::from(""),
source: m.source.clone(),
target: file_name,
readonly: m.options.iter().any(|o| *o == "ro"),
target: file_name.clone(),
readonly,
mount_options: m.options.clone(),
mount: m.clone(),
is_rafs: false,
})
.await
.context("share fs volume")?;
.context("mount shared volume")?;
let mounted_info = MountedInfo::new(
PathBuf::from_str(&mount_result.guest_path)
.context("convert guest path")?,
readonly,
);
share_fs
.set_mounted_info(&m.source, mounted_info)
.await
.context("set mounted info")?;
// set storages for the volume
volume.storages = mount_result.storages;
@ -85,6 +140,7 @@ impl ShareFsVolume {
});
}
}
}
Ok(volume)
}
}

View File

@ -180,6 +180,8 @@ impl ContainerInner {
}
})?;
// TODO(justxuewei): clean mount
// close the exit channel to wakeup wait service
// send to notify watchers who are waiting for the process exit
self.init_process.stop().await;