runtime-go/rs: Reconcile hugepage emptyDirs and disable_guest_empty_dir

This addresses an issue where the disable_guest_empty_dir=true code paths did
not take into account that hugepage-backed emptyDirs should always be recreated
in the guest (using guest hugepages).

Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
This commit is contained in:
Aurélien Bombo
2026-06-24 04:58:52 -05:00
parent a3e91d9ed2
commit e191c5b716
7 changed files with 88 additions and 54 deletions

View File

@@ -16,16 +16,8 @@ use crate::mount::get_linux_mount_info;
pub use kata_types::k8s::is_empty_dir;
/// Check whether a given volume is an ephemeral volume.
///
/// For k8s, there are generally two types of ephemeral volumes: one is the
/// volume used as /dev/shm of the container, and the other is the
/// emptydir volume based on the memory type. Both types of volumes
/// are based on tmpfs mount volumes, so we classify them as ephemeral
/// volumes and can be setup in the guest; For the other volume based on tmpfs
/// which would contain some initial files we cound't deal them as ephemeral and
/// should be passed using share fs.
pub fn is_ephemeral_volume(mount: &Mount) -> bool {
/// Returns true for tmpfs-backed emptyDirs (medium: Memory).
pub fn is_tmpfs_empty_dir(mount: &Mount) -> bool {
matches!(
(
mount.typ().as_deref(),
@@ -33,15 +25,18 @@ pub fn is_ephemeral_volume(mount: &Mount) -> bool {
mount.destination(),
),
(Some("bind"), Some(source), _dest) if get_linux_mount_info(source).is_ok_and(|info| info.fs_type == "tmpfs") &&
is_empty_dir(source))
(
Some("bind"),
Some(source),
_dest,
)
if is_empty_dir(source) && get_linux_mount_info(source).is_ok_and(|info| info.fs_type == "tmpfs")
)
}
/// Check whether the given path is a kubernetes empty-dir volume of medium "default".
///
/// K8s `EmptyDir` volumes are directories on the host. If the fs type is tmpfs, it's a ephemeral
/// volume instead of a `EmptyDir` volume.
pub fn is_host_empty_dir(path: &str) -> bool {
/// Returns true for non-tmpfs-backed emptyDirs.
/// This includes disk-backed (medium: "", default) and hugepage-backed (medium: HugePages).
pub fn is_non_tmpfs_empty_dir(path: &str) -> bool {
if !is_empty_dir(path) {
return false;
}
@@ -53,6 +48,16 @@ pub fn is_host_empty_dir(path: &str) -> bool {
}
}
/// Returns true for hugepage-backed emptyDirs (medium: HugePages).
pub fn is_hugepage_empty_dir(path: &str) -> bool {
is_empty_dir(path) && get_linux_mount_info(path).is_ok_and(|info| info.fs_type == "hugetlbfs")
}
/// Returns true for disk-backed emptyDirs (medium: "", default).
pub fn is_disk_empty_dir(path: &str) -> bool {
is_non_tmpfs_empty_dir(path) && !is_hugepage_empty_dir(path)
}
// update_ephemeral_storage_type sets the mount type to 'ephemeral'
// if the mount source path is provisioned by k8s for ephemeral storage.
// For the given pod ephemeral volume is created only once
@@ -75,17 +80,31 @@ pub fn update_ephemeral_storage_type(
if let Some(source) = &m.source() {
let mnt_src = &source.display().to_string();
if is_ephemeral_volume(m) {
if is_tmpfs_empty_dir(m) {
m.set_typ(Some(String::from(mount::KATA_EPHEMERAL_VOLUME_TYPE)));
}
// When block-encrypted mode is active, host emptyDirs must
// stay as "bind" so the EncryptedEmptyDirVolume handler can
// intercept them in the volume dispatch chain.
if is_host_empty_dir(mnt_src)
&& !disable_guest_empty_dir
&& emptydir_mode != EMPTYDIR_MODE_BLOCK_ENCRYPTED
{
m.set_typ(Some(mount::KATA_K8S_LOCAL_STORAGE_TYPE.to_string()));
} else if is_non_tmpfs_empty_dir(mnt_src) {
// Among non-tmpfs emptyDirs:
// * For hugepage-backed emptyDirs, do nothing here
// and offload to the later HugePage handler.
// Contrary to runtime-go, adding the LOCAL type
// here would wrongly circumvent the HugePage
// handler.
// * For disk-backed emptyDirs, instead of adding
// the LOCAL type here, we'll do this down the
// line:
// - disable_guest_empty_dir=true: FS sharing.
// - emptyDirMode=block-encrypted: Leverage the
// EncryptedEmptyDirVolume handler.
if is_hugepage_empty_dir(mnt_src) {
// No-op as explained above. Keeping this branch
// for now for clarity and easier comparison
// with runtime-go.
} else if !disable_guest_empty_dir
&& emptydir_mode != EMPTYDIR_MODE_BLOCK_ENCRYPTED
{
// This is a disk-backed emptyDir.
m.set_typ(Some(String::from(mount::KATA_K8S_LOCAL_STORAGE_TYPE)));
}
}
}
}

View File

@@ -18,7 +18,7 @@ use hypervisor::{
},
BlockConfigModern, BlockDeviceAio,
};
use kata_sys_util::k8s::is_host_empty_dir;
use kata_sys_util::k8s::is_disk_empty_dir;
use kata_types::config::EMPTYDIR_MODE_BLOCK_ENCRYPTED;
use kata_types::mount::DEFAULT_KATA_GUEST_SANDBOX_DIR;
use kata_types::mount::{add_volume_mount_info, is_volume_mounted, DirectVolumeMountInfo};
@@ -206,7 +206,7 @@ pub(crate) fn is_encrypted_emptydir_volume(m: &oci::Mount, emptydir_mode: &str)
return false;
}
match m.source() {
Some(src) => is_host_empty_dir(&src.display().to_string()),
Some(src) => is_disk_empty_dir(&src.display().to_string()),
None => false,
}
}

View File

@@ -101,11 +101,19 @@ func HandleFactory(ctx context.Context, vci vc.VC, runtimeConfig *oci.RuntimeCon
// of the same pod the already existing volume is reused.
func SetEphemeralStorageType(ociSpec specs.Spec, disableGuestEmptyDir bool, emptyDirMode string) specs.Spec {
for idx, mnt := range ociSpec.Mounts {
if vc.IsEphemeralStorage(mnt.Source) {
if vc.IsTmpFSEmptyDir(mnt.Source) {
ociSpec.Mounts[idx].Type = vc.KataEphemeralDevType
}
if vc.Isk8sHostEmptyDir(mnt.Source) && !disableGuestEmptyDir && emptyDirMode != vc.EmptyDirModeVirtioBlkEncrypted {
ociSpec.Mounts[idx].Type = vc.KataLocalDevType
} else if vc.IsNonTmpFSEmptyDir(mnt.Source) {
// Among non-tmpfs emptyDirs:
// * Only hugepage-backed emptyDirs should always be
// local to the guest and recreated inside it.
// * For disk-backed emptyDirs, that decision is driven by
// disableGuestEmptyDir and emptyDirMode.
if vc.IsHugePageEmptyDir(mnt.Source) {
ociSpec.Mounts[idx].Type = vc.KataLocalDevType
} else if !disableGuestEmptyDir && emptyDirMode != vc.EmptyDirModeVirtioBlkEncrypted {
ociSpec.Mounts[idx].Type = vc.KataLocalDevType
}
}
}
return ociSpec

View File

@@ -624,7 +624,7 @@ func (c *Container) createBlockDevices(ctx context.Context) error {
// If block devices are disabled, we selectively only hotplug if
// the mount is an encrypted block-based emptyDir, to avoid
// cases that could regress 20ca4d2.
if !c.checkBlockDeviceSupport(ctx) && (c.sandbox.config.EmptyDirMode != EmptyDirModeVirtioBlkEncrypted || !Isk8sHostEmptyDir(c.mounts[i].Source)) {
if !c.checkBlockDeviceSupport(ctx) && (c.sandbox.config.EmptyDirMode != EmptyDirModeVirtioBlkEncrypted || !IsNonTmpFSEmptyDir(c.mounts[i].Source)) {
c.Logger().Warn("Block device not supported")
continue
}
@@ -883,7 +883,7 @@ func (c *Container) createEphemeralDisks() error {
}
for i := range c.mounts {
if !Isk8sHostEmptyDir(c.mounts[i].Source) {
if !IsNonTmpFSEmptyDir(c.mounts[i].Source) {
continue
}

View File

@@ -322,7 +322,7 @@ func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount)
randHex := hex.EncodeToString(randBytes)
caps := f.sandbox.hypervisor.Capabilities(ctx)
mustCopyEmptyDir := !caps.IsFsSharingSupported() && Isk8sHostEmptyDir(m.Source)
mustCopyEmptyDir := !caps.IsFsSharingSupported() && IsDiskEmptyDir(m.Source)
filename := shareFileName(c.id, m.Source, m.Destination, randHex, mustCopyEmptyDir)
guestPath := filepath.Join(kataGuestSharedDir(), filename)

View File

@@ -312,16 +312,8 @@ const (
K8sSecret = "kubernetes.io~secret"
)
// IsEphemeralStorage returns true if the given path
// to the storage belongs to kubernetes ephemeral storage
//
// This method depends on a specific path used by k8s
// to detect if it's of type ephemeral. As of now,
// this is a very k8s specific solution that works
// but in future there should be a better way for this
// method to determine if the path is for ephemeral
// volume type
func IsEphemeralStorage(path string) bool {
// IsTmpFSEmptyDir returns true for tmpfs-backed emptyDirs (medium: Memory).
func IsTmpFSEmptyDir(path string) bool {
if !isEmptyDir(path) {
return false
}
@@ -333,10 +325,9 @@ func IsEphemeralStorage(path string) bool {
return false
}
// Isk8sHostEmptyDir returns true if the given path
// to the storage belongs to kubernetes empty-dir of medium "default"
// i.e volumes that are directories on the host.
func Isk8sHostEmptyDir(path string) bool {
// IsNonTmpFSEmptyDir returns true for non-tmpfs-backed emptyDirs.
// This includes disk-backed (medium: "", default) and hugepage-backed (medium: HugePages).
func IsNonTmpFSEmptyDir(path string) bool {
if !isEmptyDir(path) {
return false
}
@@ -347,6 +338,22 @@ func Isk8sHostEmptyDir(path string) bool {
return false
}
// IsHugePageEmptyDir returns true for hugepage-backed emptyDirs (medium: HugePages).
func IsHugePageEmptyDir(path string) bool {
if !isEmptyDir(path) {
return false
}
if _, fsType, _, _ := utils.GetDevicePathAndFsTypeOptions(path); fsType == "hugetlbfs" {
return true
}
return false
}
// IsDiskEmptyDir returns true for disk-backed emptyDirs (medium: "", default).
func IsDiskEmptyDir(path string) bool {
return IsNonTmpFSEmptyDir(path) && !IsHugePageEmptyDir(path)
}
func checkKubernetesVolume(path, volumeType string) bool {
splitSourceSlice := strings.Split(path, "/")
if len(splitSourceSlice) > 1 {

View File

@@ -40,17 +40,17 @@ func TestIsEphemeralStorage(t *testing.T) {
assert.NoError(err)
defer syscall.Unmount(sampleEphePath, 0)
isEphe := IsEphemeralStorage(sampleEphePath)
isEphe := IsTmpFSEmptyDir(sampleEphePath)
assert.True(isEphe)
isHostEmptyDir := Isk8sHostEmptyDir(sampleEphePath)
isHostEmptyDir := IsNonTmpFSEmptyDir(sampleEphePath)
assert.False(isHostEmptyDir)
sampleEphePath = "/var/lib/kubelet/pods/366c3a75-4869-11e8-b479-507b9ddd5ce4/volumes/cache-volume"
isEphe = IsEphemeralStorage(sampleEphePath)
isEphe = IsTmpFSEmptyDir(sampleEphePath)
assert.False(isEphe)
isHostEmptyDir = Isk8sHostEmptyDir(sampleEphePath)
isHostEmptyDir = IsNonTmpFSEmptyDir(sampleEphePath)
assert.False(isHostEmptyDir)
}