runtime-go/rs: Set disable_guest_empty_dir = true by default

This makes the runtime share the host Kubelet emptyDir folder with the guest
instead of the agent creating an empty folder in the container rootfs. Doing so
enables the Kubelet to track emptyDir usage and evict greedy pods.

In other words, with virtio-fs the container rootfs uses host storage whether
this is true or false, however with true, Kata uses the k8s emptyDir folder so
the sizeLimit is properly enforced by k8s.

Addresses the ephemeral storage part of #12203.

History:

 * Initially, emptyDirs are slow because they are shared from the host with 9p.
   https://github.com/kata-containers/runtime/issues/1472

 * To address above, emptyDirs are hardcoded to be created by the agent in the
   pause container's rootfs, potentially leveraging devicemapper and improving
   perf.
   https://github.com/kata-containers/runtime/pull/1485

 * The previous PR regressed an (interesting?) use case where emptyDirs were
   used to share data from the host to the guest, so the behavior was made
   configurable and `disable_guest_empty_dir = false` is introduced, defaulting
   to the behavior of the previous PR.
   https://github.com/kata-containers/kata-containers/pull/2056

 * Another resource accounting regression remains which is addressed in this PR.

Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
This commit is contained in:
Aurélien Bombo
2026-01-20 14:58:55 -06:00
parent 49ce886f20
commit a3e91d9ed2
4 changed files with 74 additions and 7 deletions

View File

@@ -179,7 +179,7 @@ DEFNETQUEUES := 1
DEFENABLEANNOTATIONS := [\"enable_iommu\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\"]
DEFENABLEANNOTATIONS_COCO := [\"enable_iommu\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\", \"cc_init_data\"]
DEFDISABLEGUESTSECCOMP := true
DEFDISABLEGUESTEMPTYDIR := false
DEFDISABLEGUESTEMPTYDIR := true
DEFEMPTYDIRMODE := shared-fs
DEFEMPTYDIRMODE_COCO := block-encrypted
##VAR DEFAULTEXPFEATURES=[features] Default experimental features enabled

View File

@@ -228,7 +228,7 @@ DEFBRIDGES := 1
DEFENABLEANNOTATIONS := [\"enable_iommu\", \"kernel_params\", \"kernel_verity_params\"]
DEFENABLEANNOTATIONS_COCO := [\"enable_iommu\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\", \"cc_init_data\"]
DEFDISABLEGUESTSECCOMP := true
DEFDISABLEGUESTEMPTYDIR := false
DEFDISABLEGUESTEMPTYDIR := true
DEFEMPTYDIRMODE := shared-fs
DEFEMPTYDIRMODE_COCO := block-encrypted
#Default experimental features enabled

View File

@@ -84,9 +84,13 @@ type FilesystemShare struct {
configVolRegex *regexp.Regexp
// Regex to match only the timestamped directory inside the k8's volume mount
timestampDirRegex *regexp.Regexp
// The same volume mount can be shared by multiple containers in the same sandbox (pod)
srcDstMap map[string][]string
srcDstMapLock sync.Mutex
// srcDstMap tracks file-level source to destination mappings for configmap/secret watching
srcDstMap map[string][]string
srcDstMapLock sync.Mutex
// srcGuestMap caches volume source path to guest path, enabling multiple containers
// in the same pod to share the same volume mount
srcGuestMap map[string]string
srcGuestMapLock sync.Mutex
eventLoopStarted bool
eventLoopStartedLock sync.Mutex
watcherDoneChannel chan bool
@@ -114,6 +118,7 @@ func NewFilesystemShare(s *Sandbox) (*FilesystemShare, error) {
sandbox: s,
watcherDoneChannel: make(chan bool),
srcDstMap: make(map[string][]string),
srcGuestMap: make(map[string]string),
watcher: watcher,
configVolRegex: configVolRegex,
timestampDirRegex: timestampDirRegex,
@@ -302,19 +307,38 @@ func (f *FilesystemShare) Cleanup(ctx context.Context) error {
return nil
}
func shareFileName(containerID, source, destination, randHex string, isSandboxScoped bool) string {
if isSandboxScoped {
return fmt.Sprintf("sandbox-%s-%s", randHex, filepath.Base(filepath.Clean(source)))
}
return fmt.Sprintf("%s-%s-%s", containerID, randHex, filepath.Base(destination))
}
func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount) (*SharedFile, error) {
randBytes, err := utils.GenerateRandomBytes(8)
if err != nil {
return nil, err
}
filename := fmt.Sprintf("%s-%s-%s", c.id, hex.EncodeToString(randBytes), filepath.Base(m.Destination))
randHex := hex.EncodeToString(randBytes)
caps := f.sandbox.hypervisor.Capabilities(ctx)
mustCopyEmptyDir := !caps.IsFsSharingSupported() && Isk8sHostEmptyDir(m.Source)
filename := shareFileName(c.id, m.Source, m.Destination, randHex, mustCopyEmptyDir)
guestPath := filepath.Join(kataGuestSharedDir(), filename)
// copy file to container's rootfs if filesystem sharing is not supported, otherwise
// bind mount it in the shared directory.
caps := f.sandbox.hypervisor.Capabilities(ctx)
if !caps.IsFsSharingSupported() {
if mustCopyEmptyDir {
f.srcGuestMapLock.Lock()
if guestPath, ok := f.srcGuestMap[m.Source]; ok {
f.srcGuestMapLock.Unlock()
return &SharedFile{guestPath: guestPath}, nil
}
f.srcGuestMapLock.Unlock()
}
f.Logger().Debug("filesystem sharing is not supported, files will be copied")
var ignored bool
@@ -393,6 +417,13 @@ func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount)
return nil, nil
}
if mustCopyEmptyDir {
// Cache the host emptyDir guestPath so other containers in the pod
// share the same copied writable directory.
f.srcGuestMapLock.Lock()
f.srcGuestMap[m.Source] = guestPath
f.srcGuestMapLock.Unlock()
}
} else {
// These mounts are created in the shared dir
mountDest := filepath.Join(getMountPath(f.sandbox.ID()), filename)
@@ -449,6 +480,9 @@ func (f *FilesystemShare) UnshareFile(ctx context.Context, c *Container, m *Moun
}
}
// Not deleting from f.srcGuestMapLock since this function is not
// called for mounts without HostPath.
return nil
}

View File

@@ -182,3 +182,36 @@ func TestShareRootFilesystem(t *testing.T) {
})
}
}
func TestShareFileName(t *testing.T) {
testCases := map[string]struct {
containerID string
source string
destination string
randHex string
sandboxScoped bool
expectedResult string
}{
"container scoped": {
containerID: "container-id-abc",
source: "/var/lib/kubelet/pods/poduid/volumes/kubernetes.io~empty-dir/cache",
destination: "/mnt/cache",
randHex: "0011223344556677",
expectedResult: "container-id-abc-0011223344556677-cache",
},
"sandbox scoped source basename": {
containerID: "container-id-abc",
source: "/var/lib/kubelet/pods/poduid/volumes/kubernetes.io~empty-dir/cache/",
destination: "/mnt/different-cache-name",
randHex: "0011223344556677",
sandboxScoped: true,
expectedResult: "sandbox-0011223344556677-cache",
},
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
assert.Equal(t, tc.expectedResult, shareFileName(tc.containerID, tc.source, tc.destination, tc.randHex, tc.sandboxScoped))
})
}
}