runtime-rs: umount and permission controls in sandbox level

This commit implemented umonut controls and permission controls. When a volume
is no longer referenced, it will be umounted immediately. When a volume mounted
with readonly permission and a new coming container needs readwrite permission,
the volume should be upgraded to readwrite permission. On the contrary, if a
volume with readwrite permission and no container needs readwrite, then the
volume should be downgraded.

Fixes: #5588

Signed-off-by: Xuewei Niu <justxuewei@apache.org>
This commit is contained in:
Xuewei Niu 2022-11-07 20:50:11 +08:00
parent 527b871414
commit 1d823c4f65
13 changed files with 191 additions and 44 deletions

View File

@ -15,7 +15,7 @@ pub use utils::{do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_p
mod virtio_fs_share_mount;
use virtio_fs_share_mount::VirtiofsShareMount;
use std::{path::PathBuf, sync::Arc};
use std::{fmt::Debug, path::PathBuf, sync::Arc};
use agent::Storage;
use anyhow::{anyhow, Context, Ok, Result};
@ -38,18 +38,26 @@ pub const PASSTHROUGH_FS_DIR: &str = "passthrough";
const RAFS_DIR: &str = "rafs";
#[async_trait]
pub trait ShareFs: Send + Sync {
pub trait ShareFs: Send + Sync + Debug {
fn get_share_fs_mount(&self) -> Arc<dyn ShareFsMount>;
async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()>;
async fn setup_device_after_start_vm(&self, h: &dyn Hypervisor) -> Result<()>;
async fn get_storages(&self) -> Result<Vec<Storage>>;
/// Get mounted info from ShareFs.
/// Get a mounted info from ShareFs.
/// The source is an original path on the host (not in the `/run/kata-containers/...`).
async fn get_mounted_info(&self, source: &str) -> Option<MountedInfo>;
/// Set mounted info to ShareFS.
/// The source is an original path on the host (not in the `/run/kata-containers/...`).
/// Set a mounted info to ShareFS.
/// The source is the same as get_mounted_info's.
async fn set_mounted_info(&self, source: &str, mounted_info: MountedInfo) -> Result<()>;
/// Remove a mounted info from ShareFs.
/// The source is the same as get_mounted_info's.
async fn rm_mounted_info(&self, source: &str) -> Result<Option<MountedInfo>>;
/// Get a mounted info by guest path.
async fn get_mounted_info_by_guest_path(
&self,
guest_path: &str,
) -> Option<(String, MountedInfo)>;
}
#[derive(Debug)]
@ -109,7 +117,7 @@ impl MountedInfo {
}
// File/dir name in the form of "sandbox-<uuid>-<file/dir name>"
pub fn name(&self) -> Result<String> {
pub fn file_name(&self) -> Result<String> {
match self.guest_path.file_name() {
Some(file_name) => match file_name.to_str() {
Some(file_name) => Ok(file_name.to_owned()),
@ -124,13 +132,15 @@ impl MountedInfo {
}
#[async_trait]
pub trait ShareFsMount: Send + Sync {
pub trait ShareFsMount: Send + Sync + Debug {
async fn share_rootfs(&self, config: ShareFsRootfsConfig) -> Result<ShareFsMountResult>;
async fn share_volume(&self, config: ShareFsVolumeConfig) -> Result<ShareFsMountResult>;
/// Upgrade to readwrite permission
async fn upgrade(&self, file_name: &str) -> Result<()>;
/// Downgrade to readonly permission
async fn downgrade(&self, file_name: &str) -> Result<()>;
/// Umount the volume
async fn umount(&self, file_name: &str) -> Result<()>;
}
pub fn new(id: &str, config: &SharedFsInfo) -> Result<Arc<dyn ShareFs>> {

View File

@ -30,11 +30,12 @@ pub struct ShareVirtioFsInlineConfig {
pub id: String,
}
#[derive(Default)]
#[derive(Default, Debug)]
pub struct ShareVirtioFsInlineInner {
mounted_info_set: HashMap<String, MountedInfo>,
}
#[derive(Debug)]
pub struct ShareVirtioFsInline {
config: ShareVirtioFsInlineConfig,
share_fs_mount: Arc<dyn ShareFsMount>,
@ -90,7 +91,7 @@ impl ShareFs for ShareVirtioFsInline {
async fn get_mounted_info(&self, source: &str) -> Option<MountedInfo> {
let inner = self.inner.read().await;
inner.mounted_info_set.get(source).map(|m| m.clone())
inner.mounted_info_set.get(source).cloned()
}
async fn set_mounted_info(&self, source: &str, mounted_info: MountedInfo) -> Result<()> {
@ -100,4 +101,21 @@ impl ShareFs for ShareVirtioFsInline {
.insert(source.to_owned(), mounted_info.clone());
Ok(())
}
async fn rm_mounted_info(&self, source: &str) -> Result<Option<MountedInfo>> {
let mut inner = self.inner.write().await;
Ok(inner.mounted_info_set.remove(source))
}
async fn get_mounted_info_by_guest_path(
&self,
guest_path: &str,
) -> Option<(String, MountedInfo)> {
let inner = self.inner.read().await;
inner
.mounted_info_set
.iter()
.find(|m| m.1.guest_path.as_os_str().to_str().unwrap() == guest_path)
.map(|m| (m.0.to_owned(), m.1.clone()))
}
}

View File

@ -38,11 +38,13 @@ pub struct ShareVirtioFsStandaloneConfig {
pub virtio_fs_extra_args: Vec<String>,
}
#[derive(Default)]
#[derive(Default, Debug)]
struct ShareVirtioFsStandaloneInner {
pid: Option<u32>,
mounted_info_set: HashMap<String, MountedInfo>,
}
#[derive(Debug)]
pub(crate) struct ShareVirtioFsStandalone {
inner: Arc<RwLock<ShareVirtioFsStandaloneInner>>,
config: ShareVirtioFsStandaloneConfig,
@ -176,7 +178,7 @@ impl ShareFs for ShareVirtioFsStandalone {
async fn get_mounted_info(&self, source: &str) -> Option<MountedInfo> {
let inner = self.inner.read().await;
inner.mounted_info_set.get(source).map(|m| m.clone())
inner.mounted_info_set.get(source).cloned()
}
async fn set_mounted_info(&self, source: &str, mounted_info: MountedInfo) -> Result<()> {
@ -186,4 +188,21 @@ impl ShareFs for ShareVirtioFsStandalone {
.insert(source.to_owned(), mounted_info.clone());
Ok(())
}
async fn rm_mounted_info(&self, source: &str) -> Result<Option<MountedInfo>> {
let mut inner = self.inner.write().await;
Ok(inner.mounted_info_set.remove(source))
}
async fn get_mounted_info_by_guest_path(
&self,
guest_path: &str,
) -> Option<(String, MountedInfo)> {
let inner = self.inner.read().await;
inner
.mounted_info_set
.iter()
.find(|m| m.1.guest_path.as_os_str().to_str().unwrap() == guest_path)
.map(|m| (m.0.to_owned(), m.1.clone()))
}
}

View File

@ -115,17 +115,3 @@ pub(crate) fn do_get_host_path(
};
path.to_str().unwrap().to_string()
}
// /// Get the bind mounted path on the host that will be shared to the guest in
// /// **sandbox level**.
// /// The filename is in format of "sandbox-{uuid}-examplename".
// pub(crate) fn do_get_sandbox_level_host_path(sid: &str, filename: &str, readonly: bool) -> String {
// do_get_host_path(filename, sid, "", true, readonly)
// }
// /// Get the bind mounted path on the guest that will be shared from the host in
// /// **sandbox level**.
// /// The filename is in format of "sandbox-{uuid}-examplename".
// pub(crate) fn do_get_sandbox_level_guest_path(filename: &str) -> String {
// do_get_guest_any_path(filename, "", true, false)
// }

View File

@ -7,7 +7,7 @@
use agent::Storage;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use kata_sys_util::mount::bind_remount;
use kata_sys_util::mount::{bind_remount, umount_timeout};
use kata_types::k8s::is_watchable_mount;
use kata_types::mount;
use nix::sys::stat::stat;
@ -25,6 +25,7 @@ use super::{
KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR,
};
#[derive(Debug)]
pub struct VirtiofsShareMount {
id: String,
}
@ -192,4 +193,12 @@ impl ShareFsMount for VirtiofsShareMount {
.context("remount readonly directory with readonly permission")?;
Ok(())
}
async fn umount(&self, file_name: &str) -> Result<()> {
let host_dest = do_get_host_path(file_name, &self.id, "", true, false);
umount_timeout(&host_dest, 0).context("Umount readonly host dest")?;
let host_dest = do_get_host_path(file_name, &self.id, "", true, true);
umount_timeout(&host_dest, 0).context("Umount readwrite host dest")?;
Ok(())
}
}

View File

@ -5,9 +5,11 @@
//
use anyhow::Result;
use async_trait::async_trait;
use super::Volume;
#[derive(Debug)]
pub(crate) struct BlockVolume {}
/// BlockVolume: block device volume
@ -17,6 +19,7 @@ impl BlockVolume {
}
}
#[async_trait]
impl Volume for BlockVolume {
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
todo!()
@ -26,8 +29,9 @@ impl Volume for BlockVolume {
todo!()
}
fn cleanup(&self) -> Result<()> {
todo!()
async fn cleanup(&self) -> Result<()> {
warn!(sl!(), "Cleaning up BlockVolume is still unimplemented.");
Ok(())
}
}

View File

@ -5,9 +5,11 @@
//
use anyhow::Result;
use async_trait::async_trait;
use super::Volume;
#[derive(Debug)]
pub(crate) struct DefaultVolume {
mount: oci::Mount,
}
@ -21,6 +23,7 @@ impl DefaultVolume {
}
}
#[async_trait]
impl Volume for DefaultVolume {
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
Ok(vec![self.mount.clone()])
@ -30,7 +33,8 @@ impl Volume for DefaultVolume {
Ok(vec![])
}
fn cleanup(&self) -> Result<()> {
todo!()
async fn cleanup(&self) -> Result<()> {
warn!(sl!(), "Cleaning up DefaultVolume is still unimplemented.");
Ok(())
}
}

View File

@ -8,6 +8,7 @@ mod block_volume;
mod default_volume;
mod share_fs_volume;
mod shm_volume;
use async_trait::async_trait;
use std::{sync::Arc, vec::Vec};
@ -16,10 +17,11 @@ use tokio::sync::RwLock;
use crate::share_fs::ShareFs;
pub trait Volume: Send + Sync {
#[async_trait]
pub trait Volume: Send + Sync + std::fmt::Debug {
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>>;
fn get_storage(&self) -> Result<Vec<agent::Storage>>;
fn cleanup(&self) -> Result<()>;
async fn cleanup(&self) -> Result<()>;
}
#[derive(Default)]

View File

@ -7,10 +7,11 @@
use std::{
path::{Path, PathBuf},
str::FromStr,
sync::Arc,
sync::{Arc, Weak},
};
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use super::Volume;
use crate::share_fs::{MountedInfo, ShareFs, ShareFsVolumeConfig};
@ -22,7 +23,9 @@ use kata_types::mount;
// only regular files in /dev. It does not make sense to pass the host
// device nodes to the guest.
// skip the volumes whose source had already set to guest share dir.
#[derive(Debug)]
pub(crate) struct ShareFsVolume {
share_fs: Option<Weak<dyn ShareFs>>,
mounts: Vec<oci::Mount>,
storages: Vec<agent::Storage>,
}
@ -38,7 +41,7 @@ impl ShareFsVolume {
let file_name = generate_mount_path("sandbox", file_name);
let mut volume = Self {
// share_fs: share_fs.as_ref().map(Arc::clone),
share_fs: share_fs.as_ref().map(Arc::downgrade),
mounts: vec![],
storages: vec![],
};
@ -84,7 +87,11 @@ impl ShareFsVolume {
"The mount will be upgraded, mount = {:?}, cid = {}", m, cid
);
share_fs_mount
.upgrade(&mounted_info.name().context("get name of mounted info")?)
.upgrade(
&mounted_info
.file_name()
.context("get name of mounted info")?,
)
.await
.context("upgrade mount")?;
}
@ -145,6 +152,7 @@ impl ShareFsVolume {
}
}
#[async_trait]
impl Volume for ShareFsVolume {
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
Ok(self.mounts.clone())
@ -154,8 +162,76 @@ impl Volume for ShareFsVolume {
Ok(self.storages.clone())
}
fn cleanup(&self) -> Result<()> {
todo!()
async fn cleanup(&self) -> Result<()> {
if self.share_fs.is_none() {
return Ok(());
}
let share_fs = match self.share_fs.as_ref().unwrap().upgrade() {
Some(share_fs) => share_fs,
None => return Err(anyhow!("The share_fs was released unexpectedly")),
};
for m in self.mounts.iter() {
let (host_source, mut mounted_info) =
match share_fs.get_mounted_info_by_guest_path(&m.source).await {
Some(entry) => entry,
None => {
warn!(
sl!(),
"The mounted info for guest path {} not found", m.source
);
continue;
}
};
let old_readonly = mounted_info.readonly();
if m.options.iter().any(|opt| *opt == "ro") {
mounted_info.ro_ref_count -= 1;
} else {
mounted_info.rw_ref_count -= 1;
}
debug!(
sl!(),
"Ref count for {} was updated to {} due to volume cleanup",
host_source,
mounted_info.ref_count()
);
let share_fs_mount = share_fs.get_share_fs_mount();
let file_name = mounted_info.file_name()?;
if mounted_info.ref_count() > 0 {
// Downgrade to readonly if no container needs readwrite permission
if !old_readonly && mounted_info.readonly() {
info!(sl!(), "Downgrade {} to readonly due to no container that needs readwrite permission", host_source);
share_fs_mount
.downgrade(&file_name)
.await
.context("Downgrade volume")?;
}
share_fs
.set_mounted_info(&host_source, mounted_info)
.await
.context("Update mounted info")?;
} else {
info!(
sl!(),
"The path will be umounted due to no references, host_source = {}", host_source
);
share_fs
.rm_mounted_info(&host_source)
.await
.context("Rm mounted info due to no reference")?;
// Umount the volume
share_fs_mount
.umount(&file_name)
.await
.context("Umount volume")?
}
}
Ok(())
}
}

View File

@ -7,6 +7,7 @@
use std::path::Path;
use anyhow::Result;
use async_trait::async_trait;
use super::Volume;
use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR;
@ -19,6 +20,7 @@ pub const DEFAULT_SHM_SIZE: u64 = 65536 * 1024;
// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers.
pub const KATA_EPHEMERAL_DEV_TYPE: &str = "ephemeral";
#[derive(Debug)]
pub(crate) struct ShmVolume {
mount: oci::Mount,
storage: Option<agent::Storage>,
@ -82,6 +84,7 @@ impl ShmVolume {
}
}
#[async_trait]
impl Volume for ShmVolume {
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
Ok(vec![self.mount.clone()])
@ -96,8 +99,9 @@ impl Volume for ShmVolume {
Ok(s)
}
fn cleanup(&self) -> Result<()> {
todo!()
async fn cleanup(&self) -> Result<()> {
warn!(sl!(), "Cleaning up ShmVolume is still unimplemented.");
Ok(())
}
}

View File

@ -258,7 +258,7 @@ impl Container {
signal: u32,
all: bool,
) -> Result<()> {
let inner = self.inner.read().await;
let mut inner = self.inner.write().await;
inner.signal_process(container_process, signal, all).await
}

View File

@ -180,8 +180,6 @@ impl ContainerInner {
}
})?;
// TODO(justxuewei): clean mount
// close the exit channel to wakeup wait service
// send to notify watchers who are waiting for the process exit
self.init_process.stop().await;
@ -235,7 +233,7 @@ impl ContainerInner {
}
pub(crate) async fn signal_process(
&self,
&mut self,
process: &ContainerProcess,
signal: u32,
all: bool,
@ -249,6 +247,9 @@ impl ContainerInner {
self.agent
.signal_process(agent::SignalProcessRequest { process_id, signal })
.await?;
self.clean_volumes().await.context("clean volumes")?;
Ok(())
}
@ -270,4 +271,18 @@ impl ContainerInner {
Ok(())
}
async fn clean_volumes(&mut self) -> Result<()> {
let mut unhandled = Vec::new();
for v in self.volumes.iter() {
if let Err(err) = v.cleanup().await {
unhandled.push(Arc::clone(v));
warn!(sl!(), "Failed to clean volume {:?}, error = {:?}", v, err);
}
}
if !unhandled.is_empty() {
self.volumes = unhandled;
}
Ok(())
}
}