From 6e5f3cbbeb5345fa6ba187f679c2d6c40ca926cf Mon Sep 17 00:00:00 2001 From: Fupan Li Date: Fri, 28 Mar 2025 11:46:38 +0800 Subject: [PATCH] runtime-rs: add the ephemeral memory based volume support For k8s, there's two type of volumes based on ephemral memory, one is emptydir volume based on ephemeral memory, and the other one is used for shm device such as /dev/shm. Thus add a new volume type ephemeral volume to support those two type volumes and remove the legacy shm volume. Signed-off-by: Fupan Li --- src/libs/kata-sys-util/src/k8s.rs | 38 +++--- src/libs/kata-types/src/mount.rs | 2 +- .../resource/src/volume/ephemeral_volume.rs | 114 +++++++++++++++++ .../crates/resource/src/volume/mod.rs | 7 +- .../crates/resource/src/volume/shm_volume.rs | 119 ------------------ 5 files changed, 141 insertions(+), 139 deletions(-) create mode 100644 src/runtime-rs/crates/resource/src/volume/ephemeral_volume.rs delete mode 100644 src/runtime-rs/crates/resource/src/volume/shm_volume.rs diff --git a/src/libs/kata-sys-util/src/k8s.rs b/src/libs/kata-sys-util/src/k8s.rs index 46450accef..9494d8a04e 100644 --- a/src/libs/kata-sys-util/src/k8s.rs +++ b/src/libs/kata-sys-util/src/k8s.rs @@ -10,27 +10,34 @@ //! to detect K8S EmptyDir medium type from `oci::spec::Mount` objects. use kata_types::mount; -use oci_spec::runtime::Spec; +use oci_spec::runtime::{Mount, Spec}; +use std::path::Path; use crate::mount::get_linux_mount_info; pub use kata_types::k8s::is_empty_dir; -/// Check whether the given path is a kubernetes ephemeral volume. +/// Check whether a given volume is an ephemeral volume. /// -/// This method depends on a specific path used by k8s to detect if it's type of ephemeral. -/// As of now, this is a very k8s specific solution that works but in future there should be a -/// better way for this method to determine if the path is for ephemeral volume type. -pub fn is_ephemeral_volume(path: &str) -> bool { - if is_empty_dir(path) { - if let Ok(info) = get_linux_mount_info(path) { - if info.fs_type == "tmpfs" { - return true; - } - } - } +/// For k8s, there are generally two types of ephemeral volumes: one is the +/// volume used as /dev/shm of the container, and the other is the +/// emptydir volume based on the memory type. Both types of volumes +/// are based on tmpfs mount volumes, so we classify them as ephemeral +/// volumes and can be setup in the guest; For the other volume based on tmpfs +/// which would contain some initial files we cound't deal them as ephemeral and +/// should be passed using share fs. +pub fn is_ephemeral_volume(mount: &Mount) -> bool { + matches!( + ( + mount.typ().as_deref(), + mount.source().as_deref().and_then(|s| s.to_str()), + mount.destination(), - false + ), + (Some("bind"), Some(source), dest) if get_linux_mount_info(source) + .map_or(false, |info| info.fs_type == "tmpfs") && + (is_empty_dir(source) || dest.as_path() == Path::new("/dev/shm")) + ) } /// Check whether the given path is a kubernetes empty-dir volume of medium "default". @@ -65,7 +72,8 @@ pub fn update_ephemeral_storage_type(oci_spec: &mut Spec) { if let Some(source) = &m.source() { let mnt_src = &source.display().to_string(); - if is_ephemeral_volume(mnt_src) { + //here we only care about the "bind" mount volume. + if is_ephemeral_volume(m) { m.set_typ(Some(String::from(mount::KATA_EPHEMERAL_VOLUME_TYPE))); } else if is_host_empty_dir(mnt_src) { // FIXME support disable_guest_empty_dir diff --git a/src/libs/kata-types/src/mount.rs b/src/libs/kata-types/src/mount.rs index b1d530467b..6e4423a16d 100644 --- a/src/libs/kata-types/src/mount.rs +++ b/src/libs/kata-types/src/mount.rs @@ -19,7 +19,7 @@ pub const KATA_GUEST_MOUNT_PREFIX: &str = "kata:guest-mount:"; /// The sharedfs volume is mounted by guest OS before starting the kata-agent. pub const KATA_SHAREDFS_GUEST_PREMOUNT_TAG: &str = "kataShared"; -/// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers. +/// KATA_EPHEMERAL_VOLUME_TYPE creates a tmpfs backed volume for sharing files between containers. pub const KATA_EPHEMERAL_VOLUME_TYPE: &str = "ephemeral"; /// KATA_HOST_DIR_TYPE use for host empty dir diff --git a/src/runtime-rs/crates/resource/src/volume/ephemeral_volume.rs b/src/runtime-rs/crates/resource/src/volume/ephemeral_volume.rs new file mode 100644 index 0000000000..d09ea372b0 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/ephemeral_volume.rs @@ -0,0 +1,114 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::{Path, PathBuf}; + +use super::Volume; +use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use hypervisor::device::device_manager::DeviceManager; +use kata_sys_util::mount::{get_mount_path, get_mount_type}; +use kata_types::mount::KATA_EPHEMERAL_VOLUME_TYPE; +use nix::sys::stat::stat; +use oci_spec::runtime as oci; +use tokio::sync::RwLock; + +#[derive(Debug)] +pub(crate) struct EphemeralVolume { + mount: oci::Mount, + storage: Option, +} + +impl EphemeralVolume { + pub(crate) fn new(m: &oci::Mount) -> Result { + if m.source().is_none() { + return Err(anyhow!(format!( + "got a wrong volume without source: {:?}", + m + ))); + } + + // refer to the golang `handleEphemeralStorage` code at + // https://github.com/kata-containers/kata-containers/blob/9516286f6dd5cfd6b138810e5d7c9e01cf6fc043/src/runtime/virtcontainers/kata_agent.go#L1354 + + let source = &get_mount_path(m.source()); + let file_stat = + stat(Path::new(source)).with_context(|| format!("mount source {}", source))?; + + // if volume's gid isn't root group(default group), this means there's + // an specific fsGroup is set on this local volume, then it should pass + // to guest. + let dir_options = if file_stat.st_gid != 0 { + vec![format!("fsgid={}", file_stat.st_gid)] + } else { + vec![] + }; + + let file_name = Path::new(source) + .file_name() + .context(format!("get file name from {:?}", &m.source()))?; + let source = Path::new(DEFAULT_KATA_GUEST_SANDBOX_DIR) + .join(KATA_EPHEMERAL_VOLUME_TYPE) + .join(file_name) + .into_os_string() + .into_string() + .map_err(|e| anyhow!("failed to get ephemeral path {:?}", e))?; + + // Create a storage struct so that kata agent is able to create + // tmpfs backed volume inside the VM + let ephemeral_storage = agent::Storage { + driver: String::from(KATA_EPHEMERAL_VOLUME_TYPE), + driver_options: Vec::new(), + source: String::from("tmpfs"), + fs_type: String::from("tmpfs"), + fs_group: None, + options: dir_options, + mount_point: source.clone(), + }; + + let mut mount = oci::Mount::default(); + mount.set_destination(m.destination().clone()); + mount.set_typ(Some("bind".to_string())); + mount.set_source(Some(PathBuf::from(&source))); + mount.set_options(Some(vec!["rbind".to_string()])); + + Ok(Self { + mount, + storage: Some(ephemeral_storage), + }) + } +} + +#[async_trait] +impl Volume for EphemeralVolume { + fn get_volume_mount(&self) -> anyhow::Result> { + Ok(vec![self.mount.clone()]) + } + + fn get_storage(&self) -> Result> { + let s = if let Some(s) = self.storage.as_ref() { + vec![s.clone()] + } else { + vec![] + }; + Ok(s) + } + + async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { + // TODO: Clean up EphemeralVolume + warn!(sl!(), "Cleaning up EphemeralVolume is still unimplemented."); + Ok(()) + } + + fn get_device_id(&self) -> Result> { + Ok(None) + } +} + +pub(crate) fn is_ephemeral_volume(m: &oci::Mount) -> bool { + get_mount_type(m).as_str() == KATA_EPHEMERAL_VOLUME_TYPE +} diff --git a/src/runtime-rs/crates/resource/src/volume/mod.rs b/src/runtime-rs/crates/resource/src/volume/mod.rs index a5fba95c06..fc3afd39c8 100644 --- a/src/runtime-rs/crates/resource/src/volume/mod.rs +++ b/src/runtime-rs/crates/resource/src/volume/mod.rs @@ -6,9 +6,9 @@ mod block_volume; mod default_volume; +mod ephemeral_volume; pub mod hugepage; mod share_fs_volume; -mod shm_volume; pub mod utils; pub mod direct_volume; @@ -67,10 +67,9 @@ impl VolumeResource { // handle mounts for m in oci_mounts { let read_only = get_mount_options(m.options()).iter().any(|opt| opt == "ro"); - let volume: Arc = if shm_volume::is_shm_volume(m) { - let shm_size = shm_volume::DEFAULT_SHM_SIZE; + let volume: Arc = if ephemeral_volume::is_ephemeral_volume(m) { Arc::new( - shm_volume::ShmVolume::new(m, shm_size) + ephemeral_volume::EphemeralVolume::new(m) .with_context(|| format!("new shm volume {:?}", m))?, ) } else if is_block_volume(m) { diff --git a/src/runtime-rs/crates/resource/src/volume/shm_volume.rs b/src/runtime-rs/crates/resource/src/volume/shm_volume.rs deleted file mode 100644 index 54b99e5186..0000000000 --- a/src/runtime-rs/crates/resource/src/volume/shm_volume.rs +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) 2019-2022 Alibaba Cloud -// Copyright (c) 2019-2022 Ant Group -// -// SPDX-License-Identifier: Apache-2.0 -// - -use std::path::{Path, PathBuf}; - -use super::Volume; -use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR; -use anyhow::Result; -use async_trait::async_trait; -use hypervisor::device::device_manager::DeviceManager; -use kata_sys_util::mount::{get_mount_path, get_mount_type}; -use oci_spec::runtime as oci; -use tokio::sync::RwLock; - -pub const SHM_DIR: &str = "shm"; -// DEFAULT_SHM_SIZE is the default shm size to be used in case host -// IPC is used. -pub const DEFAULT_SHM_SIZE: u64 = 65536 * 1024; - -// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers. -pub const KATA_EPHEMERAL_DEV_TYPE: &str = "ephemeral"; - -#[derive(Debug)] -pub(crate) struct ShmVolume { - mount: oci::Mount, - storage: Option, -} - -impl ShmVolume { - pub(crate) fn new(m: &oci::Mount, shm_size: u64) -> Result { - let (storage, mount) = if shm_size > 0 { - // storage - let mount_path = Path::new(DEFAULT_KATA_GUEST_SANDBOX_DIR).join(SHM_DIR); - let mount_path = mount_path.to_str().unwrap(); - let option = format!("size={}", shm_size); - - let options = vec![ - String::from("noexec"), - String::from("nosuid"), - String::from("nodev"), - String::from("mode=1777"), - option, - ]; - - let storage = agent::Storage { - driver: String::from(KATA_EPHEMERAL_DEV_TYPE), - driver_options: Vec::new(), - source: String::from("shm"), - fs_type: String::from("tmpfs"), - fs_group: None, - options, - mount_point: mount_path.to_string(), - }; - - let mut oci_mount = oci::Mount::default(); - oci_mount.set_destination(m.destination().clone()); - oci_mount.set_typ(Some("bind".to_string())); - oci_mount.set_source(Some(PathBuf::from(&mount_path))); - oci_mount.set_options(Some(vec!["rbind".to_string()])); - - (Some(storage), oci_mount) - } else { - let mut oci_mount = oci::Mount::default(); - oci_mount.set_destination(m.destination().clone()); - oci_mount.set_typ(Some("tmpfs".to_string())); - oci_mount.set_source(Some(PathBuf::from("shm"))); - oci_mount.set_options(Some( - [ - "noexec", - "nosuid", - "nodev", - "mode=1777", - &format!("size={}", DEFAULT_SHM_SIZE), - ] - .iter() - .map(|s| s.to_string()) - .collect(), - )); - - (None, oci_mount) - }; - - Ok(Self { storage, mount }) - } -} - -#[async_trait] -impl Volume for ShmVolume { - fn get_volume_mount(&self) -> anyhow::Result> { - Ok(vec![self.mount.clone()]) - } - - fn get_storage(&self) -> Result> { - let s = if let Some(s) = self.storage.as_ref() { - vec![s.clone()] - } else { - vec![] - }; - Ok(s) - } - - async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { - // TODO: Clean up ShmVolume - warn!(sl!(), "Cleaning up ShmVolume is still unimplemented."); - Ok(()) - } - - fn get_device_id(&self) -> Result> { - Ok(None) - } -} - -pub(crate) fn is_shm_volume(m: &oci::Mount) -> bool { - get_mount_path(&Some(m.destination().clone())).as_str() == "/dev/shm" - && get_mount_type(m).as_str() != KATA_EPHEMERAL_DEV_TYPE -}