diff --git a/.gitignore b/.gitignore index 1a149208c0..ace29b4365 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ **/*.rej **/target **/.vscode +**/.idea +**/.fleet pkg/logging/Cargo.lock src/agent/src/version.rs src/agent/kata-agent.service diff --git a/src/libs/kata-sys-util/src/mount.rs b/src/libs/kata-sys-util/src/mount.rs index 2bc8c07a5a..febdf23e60 100644 --- a/src/libs/kata-sys-util/src/mount.rs +++ b/src/libs/kata-sys-util/src/mount.rs @@ -213,11 +213,11 @@ pub fn create_mount_destination, D: AsRef, R: AsRef>( } } -/// Remount a bind mount into readonly mode. +/// Remount a bind mount /// /// # Safety /// Caller needs to ensure safety of the `dst` to avoid possible file path based attacks. -pub fn bind_remount_read_only>(dst: P) -> Result<()> { +pub fn bind_remount>(dst: P, readonly: bool) -> Result<()> { let dst = dst.as_ref(); if dst.is_empty() { return Err(Error::NullMountPointPath); @@ -225,8 +225,8 @@ pub fn bind_remount_read_only>(dst: P) -> Result<()> { let dst = dst .canonicalize() .map_err(|_e| Error::InvalidPath(dst.to_path_buf()))?; - - do_rebind_mount_read_only(dst, MsFlags::empty()) + + do_rebind_mount(dst, readonly, MsFlags::empty()) } /// Bind mount `src` to `dst` in slave mode, optionally in readonly mode if `readonly` is true. @@ -239,7 +239,7 @@ pub fn bind_remount_read_only>(dst: P) -> Result<()> { pub fn bind_mount_unchecked, D: AsRef>( src: S, dst: D, - read_only: bool, + readonly: bool, ) -> Result<()> { fail::fail_point!("bind_mount", |_| { Err(Error::FailureInject( @@ -275,8 +275,8 @@ pub fn bind_mount_unchecked, D: AsRef>( .map_err(|e| Error::Mount(PathBuf::new(), dst.to_path_buf(), e))?; // Optionally rebind into readonly mode. - if read_only { - do_rebind_mount_read_only(dst, MsFlags::empty())?; + if readonly { + do_rebind_mount(dst, readonly, MsFlags::empty())?; } Ok(()) @@ -356,7 +356,7 @@ impl Mounter for kata_types::mount::Mount { // Bind mount readonly. let bro_flag = MsFlags::MS_BIND | MsFlags::MS_RDONLY; if (o_flag & bro_flag) == bro_flag { - do_rebind_mount_read_only(target, o_flag)?; + do_rebind_mount(target, true, o_flag)?; } Ok(()) @@ -364,12 +364,16 @@ impl Mounter for kata_types::mount::Mount { } #[inline] -fn do_rebind_mount_read_only>(path: P, flags: MsFlags) -> Result<()> { +fn do_rebind_mount>(path: P, readonly: bool, flags: MsFlags) -> Result<()> { mount( Some(""), path.as_ref(), Some(""), - flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT | MsFlags::MS_RDONLY, + if readonly { + flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT | MsFlags::MS_RDONLY + } else { + flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT + }, Some(""), ) .map_err(|e| Error::Remount(path.as_ref().to_path_buf(), e)) @@ -820,21 +824,21 @@ mod tests { #[test] #[ignore] - fn test_bind_remount_read_only() { + fn test_bind_remount() { let tmpdir = tempfile::tempdir().unwrap(); let tmpdir2 = tempfile::tempdir().unwrap(); assert!(matches!( - bind_remount_read_only(&PathBuf::from("")), + bind_remount(&PathBuf::from(""), true), Err(Error::NullMountPointPath) )); assert!(matches!( - bind_remount_read_only(&PathBuf::from("../______doesn't____exist____nnn")), + bind_remount(&PathBuf::from("../______doesn't____exist____nnn"), true), Err(Error::InvalidPath(_)) )); bind_mount_unchecked(tmpdir2.path(), tmpdir.path(), true).unwrap(); - bind_remount_read_only(tmpdir.path()).unwrap(); + bind_remount(tmpdir.path(), true).unwrap(); umount_timeout(tmpdir.path().to_str().unwrap(), 0).unwrap(); } diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index a95056d607..5c436ca535 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -249,6 +249,12 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "bitflags" version = "1.3.2" @@ -1352,6 +1358,8 @@ dependencies = [ name = "kata-types" version = "0.1.0" dependencies = [ + "anyhow", + "base64", "bitmask-enum", "byte-unit", "glob", @@ -2288,6 +2296,7 @@ dependencies = [ "rtnetlink", "scopeguard", "serde", + "serde_json", "slog", "slog-scope", "test-utils", diff --git a/src/runtime-rs/crates/resource/src/share_fs/mod.rs b/src/runtime-rs/crates/resource/src/share_fs/mod.rs index e2d5696010..e044495eab 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/mod.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/mod.rs @@ -15,10 +15,10 @@ pub use utils::{do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_p mod virtio_fs_share_mount; use virtio_fs_share_mount::VirtiofsShareMount; -use std::sync::Arc; +use std::{path::PathBuf, sync::Arc}; use agent::Storage; -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, Context, Ok, Result}; use async_trait::async_trait; use hypervisor::Hypervisor; use kata_types::config::hypervisor::SharedFsInfo; @@ -43,8 +43,16 @@ pub trait ShareFs: Send + Sync { async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()>; async fn setup_device_after_start_vm(&self, h: &dyn Hypervisor) -> Result<()>; async fn get_storages(&self) -> Result>; + + /// Get mounted info from ShareFs. + /// The source is an original path on the host (not in the `/run/kata-containers/...`). + async fn get_mounted_info(&self, source: &str) -> Option; + /// Set mounted info to ShareFS. + /// The source is an original path on the host (not in the `/run/kata-containers/...`). + async fn set_mounted_info(&self, source: &str, mounted_info: MountedInfo) -> Result<()>; } +#[derive(Debug)] pub struct ShareFsRootfsConfig { // TODO: for nydus v5/v6 need to update ShareFsMount pub cid: String, @@ -54,6 +62,7 @@ pub struct ShareFsRootfsConfig { pub is_rafs: bool, } +#[derive(Debug)] pub struct ShareFsVolumeConfig { pub cid: String, pub source: String, @@ -69,10 +78,59 @@ pub struct ShareFsMountResult { pub storages: Vec, } +/// Save mounted info for sandbox-level shared files. +#[derive(Clone, Debug)] +pub struct MountedInfo { + // Guest path + pub guest_path: PathBuf, + // Ref count of containers that uses this volume with read only permission + pub ro_ref_count: usize, + // Ref count of containers that uses this volume with read write permission + pub rw_ref_count: usize, +} + +impl MountedInfo { + pub fn new(guest_path: PathBuf, readonly: bool) -> Self { + Self { + guest_path, + ro_ref_count: if readonly { 1 } else { 0 }, + rw_ref_count: if readonly { 0 } else { 1 }, + } + } + + /// Check if the mount has read only permission + pub fn readonly(&self) -> bool { + self.rw_ref_count == 0 + } + + /// Ref count for all permissions + pub fn ref_count(&self) -> usize { + self.ro_ref_count + self.rw_ref_count + } + + // File/dir name in the form of "sandbox--" + pub fn name(&self) -> Result { + match self.guest_path.file_name() { + Some(file_name) => match file_name.to_str() { + Some(file_name) => Ok(file_name.to_owned()), + None => Err(anyhow!("failed to get string from {:?}", file_name)), + }, + None => Err(anyhow!( + "failed to get file name from the guest_path {:?}", + self.guest_path + )), + } + } +} + #[async_trait] pub trait ShareFsMount: Send + Sync { async fn share_rootfs(&self, config: ShareFsRootfsConfig) -> Result; async fn share_volume(&self, config: ShareFsVolumeConfig) -> Result; + /// Upgrade to readwrite permission + async fn upgrade(&self, file_name: &str) -> Result<()>; + /// Downgrade to readonly permission + async fn downgrade(&self, file_name: &str) -> Result<()>; } pub fn new(id: &str, config: &SharedFsInfo) -> Result> { diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_inline.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_inline.rs index c082a28b70..79a2c8ca2d 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_inline.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_inline.rs @@ -4,11 +4,14 @@ // SPDX-License-Identifier: Apache-2.0 // +use std::collections::HashMap; + use agent::Storage; use anyhow::{Context, Result}; use async_trait::async_trait; use hypervisor::Hypervisor; use kata_types::config::hypervisor::SharedFsInfo; +use tokio::sync::RwLock; use super::{ share_virtio_fs::{ @@ -27,9 +30,15 @@ pub struct ShareVirtioFsInlineConfig { pub id: String, } +#[derive(Default)] +pub struct ShareVirtioFsInlineInner { + mounted_info_set: HashMap, +} + pub struct ShareVirtioFsInline { config: ShareVirtioFsInlineConfig, share_fs_mount: Arc, + inner: Arc>, } impl ShareVirtioFsInline { @@ -37,6 +46,7 @@ impl ShareVirtioFsInline { Ok(Self { config: ShareVirtioFsInlineConfig { id: id.to_string() }, share_fs_mount: Arc::new(VirtiofsShareMount::new(id)), + inner: Arc::new(RwLock::new(ShareVirtioFsInlineInner::default())), }) } } @@ -77,4 +87,17 @@ impl ShareFs for ShareVirtioFsInline { storages.push(shared_volume); Ok(storages) } + + async fn get_mounted_info(&self, source: &str) -> Option { + let inner = self.inner.read().await; + inner.mounted_info_set.get(source).map(|m| m.clone()) + } + + async fn set_mounted_info(&self, source: &str, mounted_info: MountedInfo) -> Result<()> { + let mut inner = self.inner.write().await; + inner + .mounted_info_set + .insert(source.to_owned(), mounted_info.clone()); + Ok(()) + } } diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs index a9ef41ee72..d3992ac834 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs_standalone.rs @@ -4,7 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 // -use std::{process::Stdio, sync::Arc}; +use std::{collections::HashMap, process::Stdio, sync::Arc}; use agent::Storage; use anyhow::{anyhow, Context, Result}; @@ -22,7 +22,7 @@ use tokio::{ use super::{ share_virtio_fs::generate_sock_path, utils::ensure_dir_exist, utils::get_host_ro_shared_path, - virtio_fs_share_mount::VirtiofsShareMount, ShareFs, ShareFsMount, + virtio_fs_share_mount::VirtiofsShareMount, MountedInfo, ShareFs, ShareFsMount, }; #[derive(Debug, Clone)] @@ -41,6 +41,7 @@ pub struct ShareVirtioFsStandaloneConfig { #[derive(Default)] struct ShareVirtioFsStandaloneInner { pid: Option, + mounted_info_set: HashMap, } pub(crate) struct ShareVirtioFsStandalone { inner: Arc>, @@ -172,4 +173,17 @@ impl ShareFs for ShareVirtioFsStandalone { async fn get_storages(&self) -> Result> { Ok(vec![]) } + + async fn get_mounted_info(&self, source: &str) -> Option { + let inner = self.inner.read().await; + inner.mounted_info_set.get(source).map(|m| m.clone()) + } + + async fn set_mounted_info(&self, source: &str, mounted_info: MountedInfo) -> Result<()> { + let mut inner = self.inner.write().await; + inner + .mounted_info_set + .insert(source.to_owned(), mounted_info.clone()); + Ok(()) + } } diff --git a/src/runtime-rs/crates/resource/src/share_fs/utils.rs b/src/runtime-rs/crates/resource/src/share_fs/utils.rs index 5856cfd6c4..4109a0a71a 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/utils.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/utils.rs @@ -18,6 +18,7 @@ pub(crate) fn ensure_dir_exist(path: &Path) -> Result<()> { Ok(()) } +/// Bind mount the original path to the runtime directory. pub(crate) fn share_to_guest( // absolute path for source source: &str, @@ -37,7 +38,7 @@ pub(crate) fn share_to_guest( // to remount the read only dir mount point directly. if readonly { let dst = do_get_host_path(target, sid, cid, is_volume, true); - mount::bind_remount_read_only(&dst).context("bind remount readonly")?; + mount::bind_remount(&dst, readonly).context("bind remount readonly")?; } Ok(do_get_guest_path(target, cid, is_volume, is_rafs)) @@ -114,3 +115,17 @@ pub(crate) fn do_get_host_path( }; path.to_str().unwrap().to_string() } + +// /// Get the bind mounted path on the host that will be shared to the guest in +// /// **sandbox level**. +// /// The filename is in format of "sandbox-{uuid}-examplename". +// pub(crate) fn do_get_sandbox_level_host_path(sid: &str, filename: &str, readonly: bool) -> String { +// do_get_host_path(filename, sid, "", true, readonly) +// } + +// /// Get the bind mounted path on the guest that will be shared from the host in +// /// **sandbox level**. +// /// The filename is in format of "sandbox-{uuid}-examplename". +// pub(crate) fn do_get_sandbox_level_guest_path(filename: &str) -> String { +// do_get_guest_any_path(filename, "", true, false) +// } diff --git a/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs b/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs index 541dc7f910..923be39156 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/virtio_fs_share_mount.rs @@ -7,6 +7,7 @@ use agent::Storage; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; +use kata_sys_util::mount::bind_remount; use kata_types::k8s::is_watchable_mount; use kata_types::mount; use nix::sys::stat::stat; @@ -19,7 +20,8 @@ const WATCHABLE_BIND_DEV_TYPE: &str = "watchable-bind"; const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral"; use super::{ - utils, ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig, + utils::{self, do_get_host_path}, + ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig, KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR, }; @@ -166,4 +168,28 @@ impl ShareFsMount for VirtiofsShareMount { storages: vec![], }) } + + async fn upgrade(&self, file_name: &str) -> Result<()> { + // Remount readonly directory with readwrite permission + let host_dest = do_get_host_path(file_name, &self.id, "", true, true); + bind_remount(&host_dest, false) + .context("remount readonly directory with readwrite permission")?; + // Remount readwrite directory with readwrite permission + let host_dest = do_get_host_path(file_name, &self.id, "", true, false); + bind_remount(&host_dest, false) + .context("remount readwrite directory with readwrite permission")?; + Ok(()) + } + + async fn downgrade(&self, file_name: &str) -> Result<()> { + // Remount readwrite directory with readonly permission + let host_dest = do_get_host_path(file_name, &self.id, "", true, false); + bind_remount(&host_dest, true) + .context("remount readwrite directory with readonly permission")?; + // Remount readonly directory with readonly permission + let host_dest = do_get_host_path(file_name, &self.id, "", true, true); + bind_remount(&host_dest, true) + .context("remount readonly directory with readonly permission")?; + Ok(()) + } } diff --git a/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs b/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs index 907741091d..5b768c5dac 100644 --- a/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs +++ b/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs @@ -4,12 +4,16 @@ // SPDX-License-Identifier: Apache-2.0 // -use std::{path::Path, sync::Arc}; +use std::{ + path::{Path, PathBuf}, + str::FromStr, + sync::Arc, +}; use anyhow::{anyhow, Context, Result}; use super::Volume; -use crate::share_fs::{ShareFs, ShareFsVolumeConfig}; +use crate::share_fs::{MountedInfo, ShareFs, ShareFsVolumeConfig}; use kata_types::mount; // copy file to container's rootfs if filesystem sharing is not supported, otherwise @@ -29,10 +33,12 @@ impl ShareFsVolume { m: &oci::Mount, cid: &str, ) -> Result { + // The file_name is in the format of "sandbox-{uuid}-{file_name}" let file_name = Path::new(&m.source).file_name().unwrap().to_str().unwrap(); - let file_name = generate_mount_path(cid, file_name); + let file_name = generate_mount_path("sandbox", file_name); let mut volume = Self { + // share_fs: share_fs.as_ref().map(Arc::clone), mounts: vec![], storages: vec![], }; @@ -59,30 +65,80 @@ impl ShareFsVolume { } } Some(share_fs) => { + let readonly = m.options.iter().any(|opt| opt == "ro"); + let share_fs_mount = share_fs.get_share_fs_mount(); - let mount_result = share_fs_mount - .share_volume(ShareFsVolumeConfig { - cid: cid.to_string(), - source: m.source.clone(), - target: file_name, - readonly: m.options.iter().any(|o| *o == "ro"), - mount_options: m.options.clone(), - mount: m.clone(), - is_rafs: false, + if let Some(mut mounted_info) = share_fs.get_mounted_info(&m.source).await { + // Mounted at least once + let guest_path = mounted_info + .guest_path + .clone() + .as_os_str() + .to_str() + .unwrap() + .to_owned(); + if !readonly && mounted_info.readonly() { + // The current mount should be upgraded to readwrite permission + info!( + sl!(), + "The mount will be upgraded, mount = {:?}, cid = {}", m, cid + ); + share_fs_mount + .upgrade(&mounted_info.name().context("get name of mounted info")?) + .await + .context("upgrade mount")?; + } + if readonly { + mounted_info.ro_ref_count += 1; + } else { + mounted_info.rw_ref_count += 1; + } + share_fs + .set_mounted_info(&m.source, mounted_info) + .await + .context("set mounted info")?; + + volume.mounts.push(oci::Mount { + destination: m.destination.clone(), + r#type: "bind".to_string(), + source: guest_path, + options: m.options.clone(), }) - .await - .context("share fs volume")?; + } else { + // Not mounted ever + let mount_result = share_fs_mount + .share_volume(ShareFsVolumeConfig { + // The scope of shared volume is sandbox + cid: String::from(""), + source: m.source.clone(), + target: file_name.clone(), + readonly, + mount_options: m.options.clone(), + mount: m.clone(), + is_rafs: false, + }) + .await + .context("mount shared volume")?; + let mounted_info = MountedInfo::new( + PathBuf::from_str(&mount_result.guest_path) + .context("convert guest path")?, + readonly, + ); + share_fs + .set_mounted_info(&m.source, mounted_info) + .await + .context("set mounted info")?; + // set storages for the volume + volume.storages = mount_result.storages; - // set storages for the volume - volume.storages = mount_result.storages; - - // set mount for the volume - volume.mounts.push(oci::Mount { - destination: m.destination.clone(), - r#type: "bind".to_string(), - source: mount_result.guest_path, - options: m.options.clone(), - }); + // set mount for the volume + volume.mounts.push(oci::Mount { + destination: m.destination.clone(), + r#type: "bind".to_string(), + source: mount_result.guest_path, + options: m.options.clone(), + }); + } } } Ok(volume) diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container_inner.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container_inner.rs index c809213360..88f7d7ab71 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container_inner.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/container_inner.rs @@ -180,6 +180,8 @@ impl ContainerInner { } })?; + // TODO(justxuewei): clean mount + // close the exit channel to wakeup wait service // send to notify watchers who are waiting for the process exit self.init_process.stop().await;