From f390c122f065cfca3c4be057f6560544d3cf650d Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Tue, 9 Aug 2022 15:36:09 -0700 Subject: [PATCH] sandbox: don't hotplug too much memory at once If we're using ACPI hotplug for memory, there's a limitation on the amount of memory which can be hotplugged at a single time. During hotplug, we'll allocate memory for the memmap for each page, resulting in a 64 byte per 4KiB page allocation. As an example, hotplugging 12GiB of memory requires ~192 MiB of *free* memory, which is about the limit we should expect for an idle 256 MiB guest (conservative heuristic of 75% of provided memory). From experimentation, at pod creation time we can reliably add 48 times what is provided to the guest. (a factor of 48 results in using 75% of provided memory for hotplug). Using prior example of a guest with 256Mi RAM, 256 Mi * 48 = 12 Gi; 12GiB is upper end of what we should expect can be hotplugged successfully into the guest. Note: It isn't expected that we'll need to hotplug large amounts of RAM after workloads have already started -- container additions are expected to occur first in pod lifecycle. Based on this, we expect that provided memory should be freely available for hotplug. If virtio-mem is being utilized, there isn't such a limitation - we can hotplug the max allowed memory at a single time. Fixes: #4847 Signed-off-by: Eric Ernst --- src/runtime/virtcontainers/sandbox.go | 64 +++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index e691ea1dee..e4a16983ed 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -77,6 +77,14 @@ const ( // Restricted permission for shared directory managed by virtiofs sharedDirMode = os.FileMode(0700) | os.ModeDir + + // hotplug factor indicates how much memory can be hotplugged relative to the amount of + // RAM provided to the guest. This is a conservative heuristic based on needing 64 bytes per + // 4KiB page of hotplugged memory. + // + // As an example: 12 GiB hotplugged -> 3 Mi pages -> 192 MiBytes overhead (3Mi x 64B). + // This is approximately what should be free in a relatively unloaded 256 MiB guest (75% of available memory). So, 256 Mi x 48 => 12 Gi + acpiMemoryHotplugFactor = 48 ) var ( @@ -2012,9 +2020,60 @@ func (s *Sandbox) updateResources(ctx context.Context) error { } s.Logger().Debugf("Sandbox CPUs: %d", newCPUs) - // Update Memory - s.Logger().WithField("memory-sandbox-size-byte", sandboxMemoryByte).Debugf("Request to hypervisor to update memory") + // Update Memory -- + // If we're using ACPI hotplug for memory, there's a limitation on the amount of memory which can be hotplugged at a single time. + // We must have enough free memory in the guest kernel to cover 64bytes per (4KiB) page of memory added for mem_map. + // See https://github.com/kata-containers/kata-containers/issues/4847 for more details. + // For a typical pod lifecycle, we expect that each container is added when we start the workloads. Based on this, we'll "assume" that majority + // of the guest memory is readily available. From experimentation, we see that we can add approximately 48 times what is already provided to + // the guest workload. For example, a 256 MiB guest should be able to accommodate hotplugging 12 GiB of memory. + // + // If virtio-mem is being used, there isn't such a limitation - we can hotplug the maximum allowed memory at a single time. + // newMemoryMB := uint32(sandboxMemoryByte >> utils.MibToBytesShift) + finalMemoryMB := newMemoryMB + + hconfig := s.hypervisor.HypervisorConfig() + + for { + currentMemoryMB := s.hypervisor.GetTotalMemoryMB(ctx) + + maxhotPluggableMemoryMB := currentMemoryMB * acpiMemoryHotplugFactor + + // In the case of virtio-mem, we don't have a restriction on how much can be hotplugged at + // a single time. As a result, the max hotpluggable is only limited by the maximum memory size + // of the guest. + if hconfig.VirtioMem { + maxhotPluggableMemoryMB = uint32(hconfig.DefaultMaxMemorySize) - currentMemoryMB + } + + deltaMB := int32(finalMemoryMB - currentMemoryMB) + + if deltaMB > int32(maxhotPluggableMemoryMB) { + s.Logger().Warnf("Large hotplug. Adding %d MB of %d total memory", maxhotPluggableMemoryMB, deltaMB) + newMemoryMB = currentMemoryMB + maxhotPluggableMemoryMB + } else { + newMemoryMB = finalMemoryMB + } + + // Add the memory to the guest and online the memory: + if err := s.updateMemory(ctx, newMemoryMB); err != nil { + return err + } + + if newMemoryMB == finalMemoryMB { + break + } + + } + + return nil + +} + +func (s *Sandbox) updateMemory(ctx context.Context, newMemoryMB uint32) error { + // online the memory: + s.Logger().WithField("memory-sandbox-size-mb", newMemoryMB).Debugf("Request to hypervisor to update memory") newMemory, updatedMemoryDevice, err := s.hypervisor.ResizeMemory(ctx, newMemoryMB, s.state.GuestMemoryBlockSizeMB, s.state.GuestMemoryHotplugProbe) if err != nil { if err == noGuestMemHotplugErr { @@ -2034,7 +2093,6 @@ func (s *Sandbox) updateResources(ctx context.Context) error { if err := s.agent.onlineCPUMem(ctx, 0, false); err != nil { return err } - return nil }