From e191c5b716a1e1289b8eb86edd8726db166e1df3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bombo?= Date: Wed, 24 Jun 2026 04:58:52 -0500 Subject: [PATCH] runtime-go/rs: Reconcile hugepage emptyDirs and disable_guest_empty_dir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This addresses an issue where the disable_guest_empty_dir=true code paths did not take into account that hugepage-backed emptyDirs should always be recreated in the guest (using guest hugepages). Signed-off-by: Aurélien Bombo --- src/libs/kata-sys-util/src/k8s.rs | 73 ++++++++++++------- .../src/volume/encrypted_emptydir_volume.rs | 4 +- src/runtime/pkg/katautils/create.go | 16 +++- src/runtime/virtcontainers/container.go | 4 +- src/runtime/virtcontainers/fs_share_linux.go | 2 +- src/runtime/virtcontainers/mount.go | 35 +++++---- .../virtcontainers/mount_linux_test.go | 8 +- 7 files changed, 88 insertions(+), 54 deletions(-) diff --git a/src/libs/kata-sys-util/src/k8s.rs b/src/libs/kata-sys-util/src/k8s.rs index 085a622c6f..100e021b3f 100644 --- a/src/libs/kata-sys-util/src/k8s.rs +++ b/src/libs/kata-sys-util/src/k8s.rs @@ -16,16 +16,8 @@ use crate::mount::get_linux_mount_info; pub use kata_types::k8s::is_empty_dir; -/// Check whether a given volume is an ephemeral volume. -/// -/// For k8s, there are generally two types of ephemeral volumes: one is the -/// volume used as /dev/shm of the container, and the other is the -/// emptydir volume based on the memory type. Both types of volumes -/// are based on tmpfs mount volumes, so we classify them as ephemeral -/// volumes and can be setup in the guest; For the other volume based on tmpfs -/// which would contain some initial files we cound't deal them as ephemeral and -/// should be passed using share fs. -pub fn is_ephemeral_volume(mount: &Mount) -> bool { +/// Returns true for tmpfs-backed emptyDirs (medium: Memory). +pub fn is_tmpfs_empty_dir(mount: &Mount) -> bool { matches!( ( mount.typ().as_deref(), @@ -33,15 +25,18 @@ pub fn is_ephemeral_volume(mount: &Mount) -> bool { mount.destination(), ), - (Some("bind"), Some(source), _dest) if get_linux_mount_info(source).is_ok_and(|info| info.fs_type == "tmpfs") && - is_empty_dir(source)) + ( + Some("bind"), + Some(source), + _dest, + ) + if is_empty_dir(source) && get_linux_mount_info(source).is_ok_and(|info| info.fs_type == "tmpfs") + ) } -/// Check whether the given path is a kubernetes empty-dir volume of medium "default". -/// -/// K8s `EmptyDir` volumes are directories on the host. If the fs type is tmpfs, it's a ephemeral -/// volume instead of a `EmptyDir` volume. -pub fn is_host_empty_dir(path: &str) -> bool { +/// Returns true for non-tmpfs-backed emptyDirs. +/// This includes disk-backed (medium: "", default) and hugepage-backed (medium: HugePages). +pub fn is_non_tmpfs_empty_dir(path: &str) -> bool { if !is_empty_dir(path) { return false; } @@ -53,6 +48,16 @@ pub fn is_host_empty_dir(path: &str) -> bool { } } +/// Returns true for hugepage-backed emptyDirs (medium: HugePages). +pub fn is_hugepage_empty_dir(path: &str) -> bool { + is_empty_dir(path) && get_linux_mount_info(path).is_ok_and(|info| info.fs_type == "hugetlbfs") +} + +/// Returns true for disk-backed emptyDirs (medium: "", default). +pub fn is_disk_empty_dir(path: &str) -> bool { + is_non_tmpfs_empty_dir(path) && !is_hugepage_empty_dir(path) +} + // update_ephemeral_storage_type sets the mount type to 'ephemeral' // if the mount source path is provisioned by k8s for ephemeral storage. // For the given pod ephemeral volume is created only once @@ -75,17 +80,31 @@ pub fn update_ephemeral_storage_type( if let Some(source) = &m.source() { let mnt_src = &source.display().to_string(); - if is_ephemeral_volume(m) { + if is_tmpfs_empty_dir(m) { m.set_typ(Some(String::from(mount::KATA_EPHEMERAL_VOLUME_TYPE))); - } - // When block-encrypted mode is active, host emptyDirs must - // stay as "bind" so the EncryptedEmptyDirVolume handler can - // intercept them in the volume dispatch chain. - if is_host_empty_dir(mnt_src) - && !disable_guest_empty_dir - && emptydir_mode != EMPTYDIR_MODE_BLOCK_ENCRYPTED - { - m.set_typ(Some(mount::KATA_K8S_LOCAL_STORAGE_TYPE.to_string())); + } else if is_non_tmpfs_empty_dir(mnt_src) { + // Among non-tmpfs emptyDirs: + // * For hugepage-backed emptyDirs, do nothing here + // and offload to the later HugePage handler. + // Contrary to runtime-go, adding the LOCAL type + // here would wrongly circumvent the HugePage + // handler. + // * For disk-backed emptyDirs, instead of adding + // the LOCAL type here, we'll do this down the + // line: + // - disable_guest_empty_dir=true: FS sharing. + // - emptyDirMode=block-encrypted: Leverage the + // EncryptedEmptyDirVolume handler. + if is_hugepage_empty_dir(mnt_src) { + // No-op as explained above. Keeping this branch + // for now for clarity and easier comparison + // with runtime-go. + } else if !disable_guest_empty_dir + && emptydir_mode != EMPTYDIR_MODE_BLOCK_ENCRYPTED + { + // This is a disk-backed emptyDir. + m.set_typ(Some(String::from(mount::KATA_K8S_LOCAL_STORAGE_TYPE))); + } } } } diff --git a/src/runtime-rs/crates/resource/src/volume/encrypted_emptydir_volume.rs b/src/runtime-rs/crates/resource/src/volume/encrypted_emptydir_volume.rs index 7189220f46..6004b7b711 100644 --- a/src/runtime-rs/crates/resource/src/volume/encrypted_emptydir_volume.rs +++ b/src/runtime-rs/crates/resource/src/volume/encrypted_emptydir_volume.rs @@ -18,7 +18,7 @@ use hypervisor::{ }, BlockConfigModern, BlockDeviceAio, }; -use kata_sys_util::k8s::is_host_empty_dir; +use kata_sys_util::k8s::is_disk_empty_dir; use kata_types::config::EMPTYDIR_MODE_BLOCK_ENCRYPTED; use kata_types::mount::DEFAULT_KATA_GUEST_SANDBOX_DIR; use kata_types::mount::{add_volume_mount_info, is_volume_mounted, DirectVolumeMountInfo}; @@ -206,7 +206,7 @@ pub(crate) fn is_encrypted_emptydir_volume(m: &oci::Mount, emptydir_mode: &str) return false; } match m.source() { - Some(src) => is_host_empty_dir(&src.display().to_string()), + Some(src) => is_disk_empty_dir(&src.display().to_string()), None => false, } } diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index b60b0bee14..f002983778 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -101,11 +101,19 @@ func HandleFactory(ctx context.Context, vci vc.VC, runtimeConfig *oci.RuntimeCon // of the same pod the already existing volume is reused. func SetEphemeralStorageType(ociSpec specs.Spec, disableGuestEmptyDir bool, emptyDirMode string) specs.Spec { for idx, mnt := range ociSpec.Mounts { - if vc.IsEphemeralStorage(mnt.Source) { + if vc.IsTmpFSEmptyDir(mnt.Source) { ociSpec.Mounts[idx].Type = vc.KataEphemeralDevType - } - if vc.Isk8sHostEmptyDir(mnt.Source) && !disableGuestEmptyDir && emptyDirMode != vc.EmptyDirModeVirtioBlkEncrypted { - ociSpec.Mounts[idx].Type = vc.KataLocalDevType + } else if vc.IsNonTmpFSEmptyDir(mnt.Source) { + // Among non-tmpfs emptyDirs: + // * Only hugepage-backed emptyDirs should always be + // local to the guest and recreated inside it. + // * For disk-backed emptyDirs, that decision is driven by + // disableGuestEmptyDir and emptyDirMode. + if vc.IsHugePageEmptyDir(mnt.Source) { + ociSpec.Mounts[idx].Type = vc.KataLocalDevType + } else if !disableGuestEmptyDir && emptyDirMode != vc.EmptyDirModeVirtioBlkEncrypted { + ociSpec.Mounts[idx].Type = vc.KataLocalDevType + } } } return ociSpec diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 0513293cb6..0dc274abe7 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -624,7 +624,7 @@ func (c *Container) createBlockDevices(ctx context.Context) error { // If block devices are disabled, we selectively only hotplug if // the mount is an encrypted block-based emptyDir, to avoid // cases that could regress 20ca4d2. - if !c.checkBlockDeviceSupport(ctx) && (c.sandbox.config.EmptyDirMode != EmptyDirModeVirtioBlkEncrypted || !Isk8sHostEmptyDir(c.mounts[i].Source)) { + if !c.checkBlockDeviceSupport(ctx) && (c.sandbox.config.EmptyDirMode != EmptyDirModeVirtioBlkEncrypted || !IsNonTmpFSEmptyDir(c.mounts[i].Source)) { c.Logger().Warn("Block device not supported") continue } @@ -883,7 +883,7 @@ func (c *Container) createEphemeralDisks() error { } for i := range c.mounts { - if !Isk8sHostEmptyDir(c.mounts[i].Source) { + if !IsNonTmpFSEmptyDir(c.mounts[i].Source) { continue } diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index fe7dd7f3fb..6ad9737fe3 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -322,7 +322,7 @@ func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount) randHex := hex.EncodeToString(randBytes) caps := f.sandbox.hypervisor.Capabilities(ctx) - mustCopyEmptyDir := !caps.IsFsSharingSupported() && Isk8sHostEmptyDir(m.Source) + mustCopyEmptyDir := !caps.IsFsSharingSupported() && IsDiskEmptyDir(m.Source) filename := shareFileName(c.id, m.Source, m.Destination, randHex, mustCopyEmptyDir) guestPath := filepath.Join(kataGuestSharedDir(), filename) diff --git a/src/runtime/virtcontainers/mount.go b/src/runtime/virtcontainers/mount.go index 645369d78b..b974ce6039 100644 --- a/src/runtime/virtcontainers/mount.go +++ b/src/runtime/virtcontainers/mount.go @@ -312,16 +312,8 @@ const ( K8sSecret = "kubernetes.io~secret" ) -// IsEphemeralStorage returns true if the given path -// to the storage belongs to kubernetes ephemeral storage -// -// This method depends on a specific path used by k8s -// to detect if it's of type ephemeral. As of now, -// this is a very k8s specific solution that works -// but in future there should be a better way for this -// method to determine if the path is for ephemeral -// volume type -func IsEphemeralStorage(path string) bool { +// IsTmpFSEmptyDir returns true for tmpfs-backed emptyDirs (medium: Memory). +func IsTmpFSEmptyDir(path string) bool { if !isEmptyDir(path) { return false } @@ -333,10 +325,9 @@ func IsEphemeralStorage(path string) bool { return false } -// Isk8sHostEmptyDir returns true if the given path -// to the storage belongs to kubernetes empty-dir of medium "default" -// i.e volumes that are directories on the host. -func Isk8sHostEmptyDir(path string) bool { +// IsNonTmpFSEmptyDir returns true for non-tmpfs-backed emptyDirs. +// This includes disk-backed (medium: "", default) and hugepage-backed (medium: HugePages). +func IsNonTmpFSEmptyDir(path string) bool { if !isEmptyDir(path) { return false } @@ -347,6 +338,22 @@ func Isk8sHostEmptyDir(path string) bool { return false } +// IsHugePageEmptyDir returns true for hugepage-backed emptyDirs (medium: HugePages). +func IsHugePageEmptyDir(path string) bool { + if !isEmptyDir(path) { + return false + } + if _, fsType, _, _ := utils.GetDevicePathAndFsTypeOptions(path); fsType == "hugetlbfs" { + return true + } + return false +} + +// IsDiskEmptyDir returns true for disk-backed emptyDirs (medium: "", default). +func IsDiskEmptyDir(path string) bool { + return IsNonTmpFSEmptyDir(path) && !IsHugePageEmptyDir(path) +} + func checkKubernetesVolume(path, volumeType string) bool { splitSourceSlice := strings.Split(path, "/") if len(splitSourceSlice) > 1 { diff --git a/src/runtime/virtcontainers/mount_linux_test.go b/src/runtime/virtcontainers/mount_linux_test.go index 2eb15e2929..92e41ca12d 100644 --- a/src/runtime/virtcontainers/mount_linux_test.go +++ b/src/runtime/virtcontainers/mount_linux_test.go @@ -40,17 +40,17 @@ func TestIsEphemeralStorage(t *testing.T) { assert.NoError(err) defer syscall.Unmount(sampleEphePath, 0) - isEphe := IsEphemeralStorage(sampleEphePath) + isEphe := IsTmpFSEmptyDir(sampleEphePath) assert.True(isEphe) - isHostEmptyDir := Isk8sHostEmptyDir(sampleEphePath) + isHostEmptyDir := IsNonTmpFSEmptyDir(sampleEphePath) assert.False(isHostEmptyDir) sampleEphePath = "/var/lib/kubelet/pods/366c3a75-4869-11e8-b479-507b9ddd5ce4/volumes/cache-volume" - isEphe = IsEphemeralStorage(sampleEphePath) + isEphe = IsTmpFSEmptyDir(sampleEphePath) assert.False(isEphe) - isHostEmptyDir = Isk8sHostEmptyDir(sampleEphePath) + isHostEmptyDir = IsNonTmpFSEmptyDir(sampleEphePath) assert.False(isHostEmptyDir) }