runtime-rs: special case emptyDirs with peer pods

Peer pods don't support fs sharing, hence we need to be thoughtful about
removing disable_guest_empty_dir there (=false for peer pods today, missed it
in my previous PR).

So we preserve disable_guest_empty_dir=false behavior for peer pods only (ie.
using guest-local mounts) but we detect the need for guest-local mounts directly
in code instead of using a config flag.

Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
This commit is contained in:
Aurélien Bombo
2026-06-25 15:14:19 -05:00
parent b20f974ddd
commit b1e6b9449d
4 changed files with 34 additions and 16 deletions

View File

@@ -268,11 +268,6 @@ static_sandbox_resource_mgmt = true
#
vfio_mode = "@DEFVFIOMODE@"
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
# Note: remote hypervisor has no sharing of emptydir mounts from host to guest
disable_guest_empty_dir = false
# Enabled experimental feature list, format: ["a", "b"].
# Experimental features are features not stable enough for production,
# they may break compatibility, and are prepared for a big version bump.

View File

@@ -494,6 +494,7 @@ impl ResourceManagerInner {
sid: &self.sid,
agent: self.agent.clone(),
emptydir_mode: &self.toml_config.runtime.emptydir_mode,
fs_sharing_supported: self.hypervisor.capabilities().await?.is_fs_sharing_supported(),
};
self.volume_resource.handler_volumes(&ctx, cid, spec).await
}

View File

@@ -11,7 +11,7 @@ use super::Volume;
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use hypervisor::device::device_manager::DeviceManager;
use kata_sys_util::mount::{get_mount_path, get_mount_type};
use kata_sys_util::mount::get_mount_path;
use kata_types::mount::KATA_K8S_LOCAL_STORAGE_TYPE;
use nix::sys::stat::stat;
use oci_spec::runtime as oci;
@@ -121,7 +121,3 @@ impl Volume for LocalStorage {
Ok(None)
}
}
pub(crate) fn is_local_volume(m: &oci::Mount) -> bool {
get_mount_type(m).as_str() == KATA_K8S_LOCAL_STORAGE_TYPE
}

View File

@@ -26,7 +26,8 @@ use agent::Agent;
use anyhow::{Context, Result};
use async_trait::async_trait;
use hypervisor::device::device_manager::DeviceManager;
use kata_sys_util::mount::get_mount_options;
use kata_sys_util::{k8s::is_disk_empty_dir, mount::get_mount_options};
use kata_types::config::EMPTYDIR_MODE_BLOCK_ENCRYPTED;
use oci_spec::runtime as oci;
use tokio::sync::RwLock;
@@ -38,6 +39,7 @@ pub struct VolumeContext<'a> {
pub sid: &'a str,
pub agent: Arc<dyn Agent>,
pub emptydir_mode: &'a str,
pub fs_sharing_supported: bool,
}
#[async_trait]
@@ -82,6 +84,7 @@ impl VolumeResource {
let d = ctx.d;
let sid = ctx.sid;
let emptydir_mode = ctx.emptydir_mode;
let fs_sharing_supported = ctx.fs_sharing_supported;
let mut volumes: Vec<Arc<dyn Volume>> = vec![];
let oci_mounts = &spec.mounts().clone().unwrap_or_default();
info!(sl!(), " oci mount is : {:?}", oci_mounts.clone());
@@ -93,11 +96,6 @@ impl VolumeResource {
shm_volume::ShmVolume::new(m)
.with_context(|| format!("new shm volume {m:?}"))?,
)
} else if local_volume::is_local_volume(m) {
Arc::new(
local_volume::LocalStorage::new(m, sid, cid)
.with_context(|| format!("new local volume {m:?}"))?,
)
} else if ephemeral_volume::is_ephemeral_volume(m) {
Arc::new(
ephemeral_volume::EphemeralVolume::new(m)
@@ -112,6 +110,17 @@ impl VolumeResource {
inner.ephemeral_disks.push(vol.disk_info);
drop(inner);
vol_arc
} else if need_local_volume(m, fs_sharing_supported, emptydir_mode) {
// This branch comes after the is_encrypted_emptydir_volume() branch
// to ensure encrypted handling takes precedence.
warn!(
sl!(),
"handling emptyDir as guest-local volume because fs sharing is unsupported; Kubelet cannot enforce sizeLimit-based eviction",
);
Arc::new(
local_volume::LocalStorage::new(m, sid, cid)
.with_context(|| format!("new local volume {m:?}"))?,
)
} else if is_block_volume(m) {
// handle block volume
Arc::new(
@@ -204,6 +213,23 @@ impl VolumeResource {
}
}
/// Indicates whether a mount needs to be a local volume, i.e. created
/// inside the guest instead of being shared from the host.
///
/// This returns true when the hypervisor doesn't support fs sharing
/// (e.g. peer pods) and the mount is a non-block-based disk-backed
/// emptyDir.
///
/// Limitation: Local volumes cannot be managed by Kubelet and hence may
/// starve the host storage.
fn need_local_volume(m: &oci::Mount, fs_sharing_supported: bool, emptydir_mode: &str) -> bool {
!fs_sharing_supported
&& emptydir_mode != EMPTYDIR_MODE_BLOCK_ENCRYPTED
&& m.source()
.as_ref()
.is_some_and(|src| is_disk_empty_dir(&src.display().to_string()))
}
fn is_skip_volume(_m: &oci::Mount) -> bool {
// TODO: support volume check
false