diff --git a/src/libs/kata-sys-util/src/k8s.rs b/src/libs/kata-sys-util/src/k8s.rs index 46450accef..9494d8a04e 100644 --- a/src/libs/kata-sys-util/src/k8s.rs +++ b/src/libs/kata-sys-util/src/k8s.rs @@ -10,27 +10,34 @@ //! to detect K8S EmptyDir medium type from `oci::spec::Mount` objects. use kata_types::mount; -use oci_spec::runtime::Spec; +use oci_spec::runtime::{Mount, Spec}; +use std::path::Path; use crate::mount::get_linux_mount_info; pub use kata_types::k8s::is_empty_dir; -/// Check whether the given path is a kubernetes ephemeral volume. +/// Check whether a given volume is an ephemeral volume. /// -/// This method depends on a specific path used by k8s to detect if it's type of ephemeral. -/// As of now, this is a very k8s specific solution that works but in future there should be a -/// better way for this method to determine if the path is for ephemeral volume type. -pub fn is_ephemeral_volume(path: &str) -> bool { - if is_empty_dir(path) { - if let Ok(info) = get_linux_mount_info(path) { - if info.fs_type == "tmpfs" { - return true; - } - } - } +/// For k8s, there are generally two types of ephemeral volumes: one is the +/// volume used as /dev/shm of the container, and the other is the +/// emptydir volume based on the memory type. Both types of volumes +/// are based on tmpfs mount volumes, so we classify them as ephemeral +/// volumes and can be setup in the guest; For the other volume based on tmpfs +/// which would contain some initial files we cound't deal them as ephemeral and +/// should be passed using share fs. +pub fn is_ephemeral_volume(mount: &Mount) -> bool { + matches!( + ( + mount.typ().as_deref(), + mount.source().as_deref().and_then(|s| s.to_str()), + mount.destination(), - false + ), + (Some("bind"), Some(source), dest) if get_linux_mount_info(source) + .map_or(false, |info| info.fs_type == "tmpfs") && + (is_empty_dir(source) || dest.as_path() == Path::new("/dev/shm")) + ) } /// Check whether the given path is a kubernetes empty-dir volume of medium "default". @@ -65,7 +72,8 @@ pub fn update_ephemeral_storage_type(oci_spec: &mut Spec) { if let Some(source) = &m.source() { let mnt_src = &source.display().to_string(); - if is_ephemeral_volume(mnt_src) { + //here we only care about the "bind" mount volume. + if is_ephemeral_volume(m) { m.set_typ(Some(String::from(mount::KATA_EPHEMERAL_VOLUME_TYPE))); } else if is_host_empty_dir(mnt_src) { // FIXME support disable_guest_empty_dir diff --git a/src/libs/kata-types/src/mount.rs b/src/libs/kata-types/src/mount.rs index b1d530467b..6e4423a16d 100644 --- a/src/libs/kata-types/src/mount.rs +++ b/src/libs/kata-types/src/mount.rs @@ -19,7 +19,7 @@ pub const KATA_GUEST_MOUNT_PREFIX: &str = "kata:guest-mount:"; /// The sharedfs volume is mounted by guest OS before starting the kata-agent. pub const KATA_SHAREDFS_GUEST_PREMOUNT_TAG: &str = "kataShared"; -/// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers. +/// KATA_EPHEMERAL_VOLUME_TYPE creates a tmpfs backed volume for sharing files between containers. pub const KATA_EPHEMERAL_VOLUME_TYPE: &str = "ephemeral"; /// KATA_HOST_DIR_TYPE use for host empty dir diff --git a/src/runtime-rs/crates/resource/src/volume/ephemeral_volume.rs b/src/runtime-rs/crates/resource/src/volume/ephemeral_volume.rs new file mode 100644 index 0000000000..d09ea372b0 --- /dev/null +++ b/src/runtime-rs/crates/resource/src/volume/ephemeral_volume.rs @@ -0,0 +1,114 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::path::{Path, PathBuf}; + +use super::Volume; +use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR; +use anyhow::{anyhow, Context, Result}; +use async_trait::async_trait; +use hypervisor::device::device_manager::DeviceManager; +use kata_sys_util::mount::{get_mount_path, get_mount_type}; +use kata_types::mount::KATA_EPHEMERAL_VOLUME_TYPE; +use nix::sys::stat::stat; +use oci_spec::runtime as oci; +use tokio::sync::RwLock; + +#[derive(Debug)] +pub(crate) struct EphemeralVolume { + mount: oci::Mount, + storage: Option, +} + +impl EphemeralVolume { + pub(crate) fn new(m: &oci::Mount) -> Result { + if m.source().is_none() { + return Err(anyhow!(format!( + "got a wrong volume without source: {:?}", + m + ))); + } + + // refer to the golang `handleEphemeralStorage` code at + // https://github.com/kata-containers/kata-containers/blob/9516286f6dd5cfd6b138810e5d7c9e01cf6fc043/src/runtime/virtcontainers/kata_agent.go#L1354 + + let source = &get_mount_path(m.source()); + let file_stat = + stat(Path::new(source)).with_context(|| format!("mount source {}", source))?; + + // if volume's gid isn't root group(default group), this means there's + // an specific fsGroup is set on this local volume, then it should pass + // to guest. + let dir_options = if file_stat.st_gid != 0 { + vec![format!("fsgid={}", file_stat.st_gid)] + } else { + vec![] + }; + + let file_name = Path::new(source) + .file_name() + .context(format!("get file name from {:?}", &m.source()))?; + let source = Path::new(DEFAULT_KATA_GUEST_SANDBOX_DIR) + .join(KATA_EPHEMERAL_VOLUME_TYPE) + .join(file_name) + .into_os_string() + .into_string() + .map_err(|e| anyhow!("failed to get ephemeral path {:?}", e))?; + + // Create a storage struct so that kata agent is able to create + // tmpfs backed volume inside the VM + let ephemeral_storage = agent::Storage { + driver: String::from(KATA_EPHEMERAL_VOLUME_TYPE), + driver_options: Vec::new(), + source: String::from("tmpfs"), + fs_type: String::from("tmpfs"), + fs_group: None, + options: dir_options, + mount_point: source.clone(), + }; + + let mut mount = oci::Mount::default(); + mount.set_destination(m.destination().clone()); + mount.set_typ(Some("bind".to_string())); + mount.set_source(Some(PathBuf::from(&source))); + mount.set_options(Some(vec!["rbind".to_string()])); + + Ok(Self { + mount, + storage: Some(ephemeral_storage), + }) + } +} + +#[async_trait] +impl Volume for EphemeralVolume { + fn get_volume_mount(&self) -> anyhow::Result> { + Ok(vec![self.mount.clone()]) + } + + fn get_storage(&self) -> Result> { + let s = if let Some(s) = self.storage.as_ref() { + vec![s.clone()] + } else { + vec![] + }; + Ok(s) + } + + async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { + // TODO: Clean up EphemeralVolume + warn!(sl!(), "Cleaning up EphemeralVolume is still unimplemented."); + Ok(()) + } + + fn get_device_id(&self) -> Result> { + Ok(None) + } +} + +pub(crate) fn is_ephemeral_volume(m: &oci::Mount) -> bool { + get_mount_type(m).as_str() == KATA_EPHEMERAL_VOLUME_TYPE +} diff --git a/src/runtime-rs/crates/resource/src/volume/mod.rs b/src/runtime-rs/crates/resource/src/volume/mod.rs index a5fba95c06..fc3afd39c8 100644 --- a/src/runtime-rs/crates/resource/src/volume/mod.rs +++ b/src/runtime-rs/crates/resource/src/volume/mod.rs @@ -6,9 +6,9 @@ mod block_volume; mod default_volume; +mod ephemeral_volume; pub mod hugepage; mod share_fs_volume; -mod shm_volume; pub mod utils; pub mod direct_volume; @@ -67,10 +67,9 @@ impl VolumeResource { // handle mounts for m in oci_mounts { let read_only = get_mount_options(m.options()).iter().any(|opt| opt == "ro"); - let volume: Arc = if shm_volume::is_shm_volume(m) { - let shm_size = shm_volume::DEFAULT_SHM_SIZE; + let volume: Arc = if ephemeral_volume::is_ephemeral_volume(m) { Arc::new( - shm_volume::ShmVolume::new(m, shm_size) + ephemeral_volume::EphemeralVolume::new(m) .with_context(|| format!("new shm volume {:?}", m))?, ) } else if is_block_volume(m) { diff --git a/src/runtime-rs/crates/resource/src/volume/shm_volume.rs b/src/runtime-rs/crates/resource/src/volume/shm_volume.rs deleted file mode 100644 index 54b99e5186..0000000000 --- a/src/runtime-rs/crates/resource/src/volume/shm_volume.rs +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) 2019-2022 Alibaba Cloud -// Copyright (c) 2019-2022 Ant Group -// -// SPDX-License-Identifier: Apache-2.0 -// - -use std::path::{Path, PathBuf}; - -use super::Volume; -use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR; -use anyhow::Result; -use async_trait::async_trait; -use hypervisor::device::device_manager::DeviceManager; -use kata_sys_util::mount::{get_mount_path, get_mount_type}; -use oci_spec::runtime as oci; -use tokio::sync::RwLock; - -pub const SHM_DIR: &str = "shm"; -// DEFAULT_SHM_SIZE is the default shm size to be used in case host -// IPC is used. -pub const DEFAULT_SHM_SIZE: u64 = 65536 * 1024; - -// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers. -pub const KATA_EPHEMERAL_DEV_TYPE: &str = "ephemeral"; - -#[derive(Debug)] -pub(crate) struct ShmVolume { - mount: oci::Mount, - storage: Option, -} - -impl ShmVolume { - pub(crate) fn new(m: &oci::Mount, shm_size: u64) -> Result { - let (storage, mount) = if shm_size > 0 { - // storage - let mount_path = Path::new(DEFAULT_KATA_GUEST_SANDBOX_DIR).join(SHM_DIR); - let mount_path = mount_path.to_str().unwrap(); - let option = format!("size={}", shm_size); - - let options = vec![ - String::from("noexec"), - String::from("nosuid"), - String::from("nodev"), - String::from("mode=1777"), - option, - ]; - - let storage = agent::Storage { - driver: String::from(KATA_EPHEMERAL_DEV_TYPE), - driver_options: Vec::new(), - source: String::from("shm"), - fs_type: String::from("tmpfs"), - fs_group: None, - options, - mount_point: mount_path.to_string(), - }; - - let mut oci_mount = oci::Mount::default(); - oci_mount.set_destination(m.destination().clone()); - oci_mount.set_typ(Some("bind".to_string())); - oci_mount.set_source(Some(PathBuf::from(&mount_path))); - oci_mount.set_options(Some(vec!["rbind".to_string()])); - - (Some(storage), oci_mount) - } else { - let mut oci_mount = oci::Mount::default(); - oci_mount.set_destination(m.destination().clone()); - oci_mount.set_typ(Some("tmpfs".to_string())); - oci_mount.set_source(Some(PathBuf::from("shm"))); - oci_mount.set_options(Some( - [ - "noexec", - "nosuid", - "nodev", - "mode=1777", - &format!("size={}", DEFAULT_SHM_SIZE), - ] - .iter() - .map(|s| s.to_string()) - .collect(), - )); - - (None, oci_mount) - }; - - Ok(Self { storage, mount }) - } -} - -#[async_trait] -impl Volume for ShmVolume { - fn get_volume_mount(&self) -> anyhow::Result> { - Ok(vec![self.mount.clone()]) - } - - fn get_storage(&self) -> Result> { - let s = if let Some(s) = self.storage.as_ref() { - vec![s.clone()] - } else { - vec![] - }; - Ok(s) - } - - async fn cleanup(&self, _device_manager: &RwLock) -> Result<()> { - // TODO: Clean up ShmVolume - warn!(sl!(), "Cleaning up ShmVolume is still unimplemented."); - Ok(()) - } - - fn get_device_id(&self) -> Result> { - Ok(None) - } -} - -pub(crate) fn is_shm_volume(m: &oci::Mount) -> bool { - get_mount_path(&Some(m.destination().clone())).as_str() == "/dev/shm" - && get_mount_type(m).as_str() != KATA_EPHEMERAL_DEV_TYPE -}