diff --git a/src/runtime-rs/config/configuration-remote.toml.in b/src/runtime-rs/config/configuration-remote.toml.in index 0fd48a667b..d4ed869815 100644 --- a/src/runtime-rs/config/configuration-remote.toml.in +++ b/src/runtime-rs/config/configuration-remote.toml.in @@ -268,11 +268,6 @@ static_sandbox_resource_mgmt = true # vfio_mode = "@DEFVFIOMODE@" -# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will -# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest. -# Note: remote hypervisor has no sharing of emptydir mounts from host to guest -disable_guest_empty_dir = false - # Enabled experimental feature list, format: ["a", "b"]. # Experimental features are features not stable enough for production, # they may break compatibility, and are prepared for a big version bump. diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index bc085b8bac..8b573cff70 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -494,6 +494,7 @@ impl ResourceManagerInner { sid: &self.sid, agent: self.agent.clone(), emptydir_mode: &self.toml_config.runtime.emptydir_mode, + fs_sharing_supported: self.hypervisor.capabilities().await?.is_fs_sharing_supported(), }; self.volume_resource.handler_volumes(&ctx, cid, spec).await } diff --git a/src/runtime-rs/crates/resource/src/volume/local_volume.rs b/src/runtime-rs/crates/resource/src/volume/local_volume.rs index d0f33d8428..7e1b72d485 100644 --- a/src/runtime-rs/crates/resource/src/volume/local_volume.rs +++ b/src/runtime-rs/crates/resource/src/volume/local_volume.rs @@ -11,7 +11,7 @@ use super::Volume; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; use hypervisor::device::device_manager::DeviceManager; -use kata_sys_util::mount::{get_mount_path, get_mount_type}; +use kata_sys_util::mount::get_mount_path; use kata_types::mount::KATA_K8S_LOCAL_STORAGE_TYPE; use nix::sys::stat::stat; use oci_spec::runtime as oci; @@ -121,7 +121,3 @@ impl Volume for LocalStorage { Ok(None) } } - -pub(crate) fn is_local_volume(m: &oci::Mount) -> bool { - get_mount_type(m).as_str() == KATA_K8S_LOCAL_STORAGE_TYPE -} diff --git a/src/runtime-rs/crates/resource/src/volume/mod.rs b/src/runtime-rs/crates/resource/src/volume/mod.rs index bf40ed208c..abe429fed0 100644 --- a/src/runtime-rs/crates/resource/src/volume/mod.rs +++ b/src/runtime-rs/crates/resource/src/volume/mod.rs @@ -26,7 +26,8 @@ use agent::Agent; use anyhow::{Context, Result}; use async_trait::async_trait; use hypervisor::device::device_manager::DeviceManager; -use kata_sys_util::mount::get_mount_options; +use kata_sys_util::{k8s::is_disk_empty_dir, mount::get_mount_options}; +use kata_types::config::EMPTYDIR_MODE_BLOCK_ENCRYPTED; use oci_spec::runtime as oci; use tokio::sync::RwLock; @@ -38,6 +39,7 @@ pub struct VolumeContext<'a> { pub sid: &'a str, pub agent: Arc, pub emptydir_mode: &'a str, + pub fs_sharing_supported: bool, } #[async_trait] @@ -82,6 +84,7 @@ impl VolumeResource { let d = ctx.d; let sid = ctx.sid; let emptydir_mode = ctx.emptydir_mode; + let fs_sharing_supported = ctx.fs_sharing_supported; let mut volumes: Vec> = vec![]; let oci_mounts = &spec.mounts().clone().unwrap_or_default(); info!(sl!(), " oci mount is : {:?}", oci_mounts.clone()); @@ -93,11 +96,6 @@ impl VolumeResource { shm_volume::ShmVolume::new(m) .with_context(|| format!("new shm volume {m:?}"))?, ) - } else if local_volume::is_local_volume(m) { - Arc::new( - local_volume::LocalStorage::new(m, sid, cid) - .with_context(|| format!("new local volume {m:?}"))?, - ) } else if ephemeral_volume::is_ephemeral_volume(m) { Arc::new( ephemeral_volume::EphemeralVolume::new(m) @@ -112,6 +110,17 @@ impl VolumeResource { inner.ephemeral_disks.push(vol.disk_info); drop(inner); vol_arc + } else if need_local_volume(m, fs_sharing_supported, emptydir_mode) { + // This branch comes after the is_encrypted_emptydir_volume() branch + // to ensure encrypted handling takes precedence. + warn!( + sl!(), + "handling emptyDir as guest-local volume because fs sharing is unsupported; Kubelet cannot enforce sizeLimit-based eviction", + ); + Arc::new( + local_volume::LocalStorage::new(m, sid, cid) + .with_context(|| format!("new local volume {m:?}"))?, + ) } else if is_block_volume(m) { // handle block volume Arc::new( @@ -204,6 +213,23 @@ impl VolumeResource { } } +/// Indicates whether a mount needs to be a local volume, i.e. created +/// inside the guest instead of being shared from the host. +/// +/// This returns true when the hypervisor doesn't support fs sharing +/// (e.g. peer pods) and the mount is a non-block-based disk-backed +/// emptyDir. +/// +/// Limitation: Local volumes cannot be managed by Kubelet and hence may +/// starve the host storage. +fn need_local_volume(m: &oci::Mount, fs_sharing_supported: bool, emptydir_mode: &str) -> bool { + !fs_sharing_supported + && emptydir_mode != EMPTYDIR_MODE_BLOCK_ENCRYPTED + && m.source() + .as_ref() + .is_some_and(|src| is_disk_empty_dir(&src.display().to_string())) +} + fn is_skip_volume(_m: &oci::Mount) -> bool { // TODO: support volume check false