mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-06-30 22:21:05 +00:00
runtime-go/rs: Set disable_guest_empty_dir = true by default
This makes the runtime share the host Kubelet emptyDir folder with the guest instead of the agent creating an empty folder in the container rootfs. Doing so enables the Kubelet to track emptyDir usage and evict greedy pods. In other words, with virtio-fs the container rootfs uses host storage whether this is true or false, however with true, Kata uses the k8s emptyDir folder so the sizeLimit is properly enforced by k8s. Addresses the ephemeral storage part of #12203. History: * Initially, emptyDirs are slow because they are shared from the host with 9p. https://github.com/kata-containers/runtime/issues/1472 * To address above, emptyDirs are hardcoded to be created by the agent in the pause container's rootfs, potentially leveraging devicemapper and improving perf. https://github.com/kata-containers/runtime/pull/1485 * The previous PR regressed an (interesting?) use case where emptyDirs were used to share data from the host to the guest, so the behavior was made configurable and `disable_guest_empty_dir = false` is introduced, defaulting to the behavior of the previous PR. https://github.com/kata-containers/kata-containers/pull/2056 * Another resource accounting regression remains which is addressed in this PR. Signed-off-by: Aurélien Bombo <abombo@microsoft.com>
This commit is contained in:
@@ -179,7 +179,7 @@ DEFNETQUEUES := 1
|
||||
DEFENABLEANNOTATIONS := [\"enable_iommu\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\"]
|
||||
DEFENABLEANNOTATIONS_COCO := [\"enable_iommu\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\", \"cc_init_data\"]
|
||||
DEFDISABLEGUESTSECCOMP := true
|
||||
DEFDISABLEGUESTEMPTYDIR := false
|
||||
DEFDISABLEGUESTEMPTYDIR := true
|
||||
DEFEMPTYDIRMODE := shared-fs
|
||||
DEFEMPTYDIRMODE_COCO := block-encrypted
|
||||
##VAR DEFAULTEXPFEATURES=[features] Default experimental features enabled
|
||||
|
||||
@@ -228,7 +228,7 @@ DEFBRIDGES := 1
|
||||
DEFENABLEANNOTATIONS := [\"enable_iommu\", \"kernel_params\", \"kernel_verity_params\"]
|
||||
DEFENABLEANNOTATIONS_COCO := [\"enable_iommu\", \"kernel_params\", \"kernel_verity_params\", \"default_vcpus\", \"default_memory\", \"cc_init_data\"]
|
||||
DEFDISABLEGUESTSECCOMP := true
|
||||
DEFDISABLEGUESTEMPTYDIR := false
|
||||
DEFDISABLEGUESTEMPTYDIR := true
|
||||
DEFEMPTYDIRMODE := shared-fs
|
||||
DEFEMPTYDIRMODE_COCO := block-encrypted
|
||||
#Default experimental features enabled
|
||||
|
||||
@@ -84,9 +84,13 @@ type FilesystemShare struct {
|
||||
configVolRegex *regexp.Regexp
|
||||
// Regex to match only the timestamped directory inside the k8's volume mount
|
||||
timestampDirRegex *regexp.Regexp
|
||||
// The same volume mount can be shared by multiple containers in the same sandbox (pod)
|
||||
srcDstMap map[string][]string
|
||||
srcDstMapLock sync.Mutex
|
||||
// srcDstMap tracks file-level source to destination mappings for configmap/secret watching
|
||||
srcDstMap map[string][]string
|
||||
srcDstMapLock sync.Mutex
|
||||
// srcGuestMap caches volume source path to guest path, enabling multiple containers
|
||||
// in the same pod to share the same volume mount
|
||||
srcGuestMap map[string]string
|
||||
srcGuestMapLock sync.Mutex
|
||||
eventLoopStarted bool
|
||||
eventLoopStartedLock sync.Mutex
|
||||
watcherDoneChannel chan bool
|
||||
@@ -114,6 +118,7 @@ func NewFilesystemShare(s *Sandbox) (*FilesystemShare, error) {
|
||||
sandbox: s,
|
||||
watcherDoneChannel: make(chan bool),
|
||||
srcDstMap: make(map[string][]string),
|
||||
srcGuestMap: make(map[string]string),
|
||||
watcher: watcher,
|
||||
configVolRegex: configVolRegex,
|
||||
timestampDirRegex: timestampDirRegex,
|
||||
@@ -302,19 +307,38 @@ func (f *FilesystemShare) Cleanup(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func shareFileName(containerID, source, destination, randHex string, isSandboxScoped bool) string {
|
||||
if isSandboxScoped {
|
||||
return fmt.Sprintf("sandbox-%s-%s", randHex, filepath.Base(filepath.Clean(source)))
|
||||
}
|
||||
return fmt.Sprintf("%s-%s-%s", containerID, randHex, filepath.Base(destination))
|
||||
}
|
||||
|
||||
func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount) (*SharedFile, error) {
|
||||
randBytes, err := utils.GenerateRandomBytes(8)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
filename := fmt.Sprintf("%s-%s-%s", c.id, hex.EncodeToString(randBytes), filepath.Base(m.Destination))
|
||||
randHex := hex.EncodeToString(randBytes)
|
||||
caps := f.sandbox.hypervisor.Capabilities(ctx)
|
||||
mustCopyEmptyDir := !caps.IsFsSharingSupported() && Isk8sHostEmptyDir(m.Source)
|
||||
|
||||
filename := shareFileName(c.id, m.Source, m.Destination, randHex, mustCopyEmptyDir)
|
||||
guestPath := filepath.Join(kataGuestSharedDir(), filename)
|
||||
|
||||
// copy file to container's rootfs if filesystem sharing is not supported, otherwise
|
||||
// bind mount it in the shared directory.
|
||||
caps := f.sandbox.hypervisor.Capabilities(ctx)
|
||||
if !caps.IsFsSharingSupported() {
|
||||
if mustCopyEmptyDir {
|
||||
f.srcGuestMapLock.Lock()
|
||||
if guestPath, ok := f.srcGuestMap[m.Source]; ok {
|
||||
f.srcGuestMapLock.Unlock()
|
||||
return &SharedFile{guestPath: guestPath}, nil
|
||||
}
|
||||
f.srcGuestMapLock.Unlock()
|
||||
}
|
||||
|
||||
f.Logger().Debug("filesystem sharing is not supported, files will be copied")
|
||||
|
||||
var ignored bool
|
||||
@@ -393,6 +417,13 @@ func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if mustCopyEmptyDir {
|
||||
// Cache the host emptyDir guestPath so other containers in the pod
|
||||
// share the same copied writable directory.
|
||||
f.srcGuestMapLock.Lock()
|
||||
f.srcGuestMap[m.Source] = guestPath
|
||||
f.srcGuestMapLock.Unlock()
|
||||
}
|
||||
} else {
|
||||
// These mounts are created in the shared dir
|
||||
mountDest := filepath.Join(getMountPath(f.sandbox.ID()), filename)
|
||||
@@ -449,6 +480,9 @@ func (f *FilesystemShare) UnshareFile(ctx context.Context, c *Container, m *Moun
|
||||
}
|
||||
}
|
||||
|
||||
// Not deleting from f.srcGuestMapLock since this function is not
|
||||
// called for mounts without HostPath.
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -182,3 +182,36 @@ func TestShareRootFilesystem(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestShareFileName(t *testing.T) {
|
||||
testCases := map[string]struct {
|
||||
containerID string
|
||||
source string
|
||||
destination string
|
||||
randHex string
|
||||
sandboxScoped bool
|
||||
expectedResult string
|
||||
}{
|
||||
"container scoped": {
|
||||
containerID: "container-id-abc",
|
||||
source: "/var/lib/kubelet/pods/poduid/volumes/kubernetes.io~empty-dir/cache",
|
||||
destination: "/mnt/cache",
|
||||
randHex: "0011223344556677",
|
||||
expectedResult: "container-id-abc-0011223344556677-cache",
|
||||
},
|
||||
"sandbox scoped source basename": {
|
||||
containerID: "container-id-abc",
|
||||
source: "/var/lib/kubelet/pods/poduid/volumes/kubernetes.io~empty-dir/cache/",
|
||||
destination: "/mnt/different-cache-name",
|
||||
randHex: "0011223344556677",
|
||||
sandboxScoped: true,
|
||||
expectedResult: "sandbox-0011223344556677-cache",
|
||||
},
|
||||
}
|
||||
|
||||
for name, tc := range testCases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
assert.Equal(t, tc.expectedResult, shareFileName(tc.containerID, tc.source, tc.destination, tc.randHex, tc.sandboxScoped))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user