sandbox: don't hotplug too much memory at once

If we're using ACPI hotplug for memory, there's a limitation on the
amount of memory which can be hotplugged at a single time.

During hotplug, we'll allocate memory for the memmap for each page,
resulting in a 64 byte per 4KiB page allocation. As an example, hotplugging 12GiB
of memory requires ~192 MiB of *free* memory, which is about the limit
we should expect for an idle 256 MiB guest (conservative heuristic of 75%
of provided memory).

From experimentation, at pod creation time we can reliably add 48 times
what is provided to the guest. (a factor of 48 results in using 75% of
provided memory for hotplug). Using prior example of a guest with 256Mi
RAM, 256 Mi * 48 = 12 Gi; 12GiB is upper end of what we should expect
can be hotplugged successfully into the guest.

Note: It isn't expected that we'll need to hotplug large amounts of RAM
after workloads have already started -- container additions are expected
to occur first in pod lifecycle. Based on this, we expect that provided
memory should be freely available for hotplug.

If virtio-mem is being utilized, there isn't such a limitation - we can
hotplug the max allowed memory at a single time.

Fixes: #4847

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
This commit is contained in:
Eric Ernst 2022-08-09 15:36:09 -07:00
parent e0142db24f
commit f390c122f0

View File

@ -77,6 +77,14 @@ const (
// Restricted permission for shared directory managed by virtiofs
sharedDirMode = os.FileMode(0700) | os.ModeDir
// hotplug factor indicates how much memory can be hotplugged relative to the amount of
// RAM provided to the guest. This is a conservative heuristic based on needing 64 bytes per
// 4KiB page of hotplugged memory.
//
// As an example: 12 GiB hotplugged -> 3 Mi pages -> 192 MiBytes overhead (3Mi x 64B).
// This is approximately what should be free in a relatively unloaded 256 MiB guest (75% of available memory). So, 256 Mi x 48 => 12 Gi
acpiMemoryHotplugFactor = 48
)
var (
@ -2012,9 +2020,60 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
}
s.Logger().Debugf("Sandbox CPUs: %d", newCPUs)
// Update Memory
s.Logger().WithField("memory-sandbox-size-byte", sandboxMemoryByte).Debugf("Request to hypervisor to update memory")
// Update Memory --
// If we're using ACPI hotplug for memory, there's a limitation on the amount of memory which can be hotplugged at a single time.
// We must have enough free memory in the guest kernel to cover 64bytes per (4KiB) page of memory added for mem_map.
// See https://github.com/kata-containers/kata-containers/issues/4847 for more details.
// For a typical pod lifecycle, we expect that each container is added when we start the workloads. Based on this, we'll "assume" that majority
// of the guest memory is readily available. From experimentation, we see that we can add approximately 48 times what is already provided to
// the guest workload. For example, a 256 MiB guest should be able to accommodate hotplugging 12 GiB of memory.
//
// If virtio-mem is being used, there isn't such a limitation - we can hotplug the maximum allowed memory at a single time.
//
newMemoryMB := uint32(sandboxMemoryByte >> utils.MibToBytesShift)
finalMemoryMB := newMemoryMB
hconfig := s.hypervisor.HypervisorConfig()
for {
currentMemoryMB := s.hypervisor.GetTotalMemoryMB(ctx)
maxhotPluggableMemoryMB := currentMemoryMB * acpiMemoryHotplugFactor
// In the case of virtio-mem, we don't have a restriction on how much can be hotplugged at
// a single time. As a result, the max hotpluggable is only limited by the maximum memory size
// of the guest.
if hconfig.VirtioMem {
maxhotPluggableMemoryMB = uint32(hconfig.DefaultMaxMemorySize) - currentMemoryMB
}
deltaMB := int32(finalMemoryMB - currentMemoryMB)
if deltaMB > int32(maxhotPluggableMemoryMB) {
s.Logger().Warnf("Large hotplug. Adding %d MB of %d total memory", maxhotPluggableMemoryMB, deltaMB)
newMemoryMB = currentMemoryMB + maxhotPluggableMemoryMB
} else {
newMemoryMB = finalMemoryMB
}
// Add the memory to the guest and online the memory:
if err := s.updateMemory(ctx, newMemoryMB); err != nil {
return err
}
if newMemoryMB == finalMemoryMB {
break
}
}
return nil
}
func (s *Sandbox) updateMemory(ctx context.Context, newMemoryMB uint32) error {
// online the memory:
s.Logger().WithField("memory-sandbox-size-mb", newMemoryMB).Debugf("Request to hypervisor to update memory")
newMemory, updatedMemoryDevice, err := s.hypervisor.ResizeMemory(ctx, newMemoryMB, s.state.GuestMemoryBlockSizeMB, s.state.GuestMemoryHotplugProbe)
if err != nil {
if err == noGuestMemHotplugErr {
@ -2034,7 +2093,6 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
if err := s.agent.onlineCPUMem(ctx, 0, false); err != nil {
return err
}
return nil
}