diff --git a/src/runtime/virtcontainers/acrn.go b/src/runtime/virtcontainers/acrn.go index 1c3ebc147..008b3bd97 100644 --- a/src/runtime/virtcontainers/acrn.go +++ b/src/runtime/virtcontainers/acrn.go @@ -667,6 +667,10 @@ func (a *Acrn) GetThreadIDs(ctx context.Context) (VcpuThreadIDs, error) { return VcpuThreadIDs{}, nil } +func (a *Acrn) GetTotalMemoryMB(ctx context.Context) uint32 { + return a.config.MemorySize +} + func (a *Acrn) ResizeMemory(ctx context.Context, reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) { return 0, MemoryDevice{}, nil } diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 3a02a645a..d93ceed67 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -1598,6 +1598,16 @@ func (clh *cloudHypervisor) cleanupVM(force bool) error { return nil } +func (clh *cloudHypervisor) GetTotalMemoryMB(ctx context.Context) uint32 { + vminfo, err := clh.vmInfo() + if err != nil { + clh.Logger().WithError(err).Error("failed to get vminfo") + return 0 + } + + return uint32(vminfo.GetMemoryActualSize() >> utils.MibToBytesShift) +} + // vmInfo ask to hypervisor for current VM status func (clh *cloudHypervisor) vmInfo() (chclient.VmInfo, error) { cl := clh.client() diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 703e6e88b..f81cc319c 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -1165,6 +1165,10 @@ func (fc *firecracker) HypervisorConfig() HypervisorConfig { return fc.config } +func (fc *firecracker) GetTotalMemoryMB(ctx context.Context) uint32 { + return fc.config.MemorySize +} + func (fc *firecracker) ResizeMemory(ctx context.Context, reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) { return 0, MemoryDevice{}, nil } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 49b658db3..0e7b4785b 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -922,6 +922,7 @@ type Hypervisor interface { HotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) ResizeMemory(ctx context.Context, memMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) ResizeVCPUs(ctx context.Context, vcpus uint32) (uint32, uint32, error) + GetTotalMemoryMB(ctx context.Context) uint32 GetVMConsole(ctx context.Context, sandboxID string) (string, string, error) Disconnect(ctx context.Context) Capabilities(ctx context.Context) types.Capabilities diff --git a/src/runtime/virtcontainers/mock_hypervisor.go b/src/runtime/virtcontainers/mock_hypervisor.go index f4a0b934e..7d6da561f 100644 --- a/src/runtime/virtcontainers/mock_hypervisor.go +++ b/src/runtime/virtcontainers/mock_hypervisor.go @@ -17,6 +17,7 @@ import ( var MockHybridVSockPath = "/tmp/kata-mock-hybrid-vsock.socket" type mockHypervisor struct { + config HypervisorConfig mockPid int } @@ -27,10 +28,11 @@ func (m *mockHypervisor) Capabilities(ctx context.Context) types.Capabilities { } func (m *mockHypervisor) HypervisorConfig() HypervisorConfig { - return HypervisorConfig{} + return m.config } func (m *mockHypervisor) setConfig(config *HypervisorConfig) error { + m.config = *config return nil } @@ -38,7 +40,7 @@ func (m *mockHypervisor) CreateVM(ctx context.Context, id string, network Networ if err := m.setConfig(hypervisorConfig); err != nil { return err } - + m.config.MemSlots = 0 return nil } @@ -92,12 +94,20 @@ func (m *mockHypervisor) GetVMConsole(ctx context.Context, sandboxID string) (st } func (m *mockHypervisor) ResizeMemory(ctx context.Context, memMB uint32, memorySectionSizeMB uint32, probe bool) (uint32, MemoryDevice, error) { + if m.config.MemorySize != memMB { + // For testing, we'll use MemSlots to track how many times we resized memory + m.config.MemSlots += 1 + m.config.MemorySize = memMB + } return 0, MemoryDevice{}, nil } func (m *mockHypervisor) ResizeVCPUs(ctx context.Context, cpus uint32) (uint32, uint32, error) { return 0, 0, nil } +func (m *mockHypervisor) GetTotalMemoryMB(ctx context.Context) uint32 { + return m.config.MemorySize +} func (m *mockHypervisor) Disconnect(ctx context.Context) { } diff --git a/src/runtime/virtcontainers/mock_hypervisor_test.go b/src/runtime/virtcontainers/mock_hypervisor_test.go index 0159a993d..ba4435f13 100644 --- a/src/runtime/virtcontainers/mock_hypervisor_test.go +++ b/src/runtime/virtcontainers/mock_hypervisor_test.go @@ -14,7 +14,7 @@ import ( ) func TestMockHypervisorCreateVM(t *testing.T) { - var m *mockHypervisor + m := &mockHypervisor{} assert := assert.New(t) sandbox := &Sandbox{ diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 56bd5c389..6ef2310f4 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -2166,6 +2166,10 @@ func (q *qemu) Disconnect(ctx context.Context) { q.qmpShutdown() } +func (q *qemu) GetTotalMemoryMB(ctx context.Context) uint32 { + return q.config.MemorySize + uint32(q.state.HotpluggedMemory) +} + // ResizeMemory gets a request to update the VM memory to reqMemMB // Memory update is managed with two approaches // Add memory to VM: @@ -2179,7 +2183,7 @@ func (q *qemu) Disconnect(ctx context.Context) { // A longer term solution is evaluate solutions like virtio-mem func (q *qemu) ResizeMemory(ctx context.Context, reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) { - currentMemory := q.config.MemorySize + uint32(q.state.HotpluggedMemory) + currentMemory := q.GetTotalMemoryMB(ctx) if err := q.qmpSetup(); err != nil { return 0, MemoryDevice{}, err } diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index e691ea1de..e4a16983e 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -77,6 +77,14 @@ const ( // Restricted permission for shared directory managed by virtiofs sharedDirMode = os.FileMode(0700) | os.ModeDir + + // hotplug factor indicates how much memory can be hotplugged relative to the amount of + // RAM provided to the guest. This is a conservative heuristic based on needing 64 bytes per + // 4KiB page of hotplugged memory. + // + // As an example: 12 GiB hotplugged -> 3 Mi pages -> 192 MiBytes overhead (3Mi x 64B). + // This is approximately what should be free in a relatively unloaded 256 MiB guest (75% of available memory). So, 256 Mi x 48 => 12 Gi + acpiMemoryHotplugFactor = 48 ) var ( @@ -2012,9 +2020,60 @@ func (s *Sandbox) updateResources(ctx context.Context) error { } s.Logger().Debugf("Sandbox CPUs: %d", newCPUs) - // Update Memory - s.Logger().WithField("memory-sandbox-size-byte", sandboxMemoryByte).Debugf("Request to hypervisor to update memory") + // Update Memory -- + // If we're using ACPI hotplug for memory, there's a limitation on the amount of memory which can be hotplugged at a single time. + // We must have enough free memory in the guest kernel to cover 64bytes per (4KiB) page of memory added for mem_map. + // See https://github.com/kata-containers/kata-containers/issues/4847 for more details. + // For a typical pod lifecycle, we expect that each container is added when we start the workloads. Based on this, we'll "assume" that majority + // of the guest memory is readily available. From experimentation, we see that we can add approximately 48 times what is already provided to + // the guest workload. For example, a 256 MiB guest should be able to accommodate hotplugging 12 GiB of memory. + // + // If virtio-mem is being used, there isn't such a limitation - we can hotplug the maximum allowed memory at a single time. + // newMemoryMB := uint32(sandboxMemoryByte >> utils.MibToBytesShift) + finalMemoryMB := newMemoryMB + + hconfig := s.hypervisor.HypervisorConfig() + + for { + currentMemoryMB := s.hypervisor.GetTotalMemoryMB(ctx) + + maxhotPluggableMemoryMB := currentMemoryMB * acpiMemoryHotplugFactor + + // In the case of virtio-mem, we don't have a restriction on how much can be hotplugged at + // a single time. As a result, the max hotpluggable is only limited by the maximum memory size + // of the guest. + if hconfig.VirtioMem { + maxhotPluggableMemoryMB = uint32(hconfig.DefaultMaxMemorySize) - currentMemoryMB + } + + deltaMB := int32(finalMemoryMB - currentMemoryMB) + + if deltaMB > int32(maxhotPluggableMemoryMB) { + s.Logger().Warnf("Large hotplug. Adding %d MB of %d total memory", maxhotPluggableMemoryMB, deltaMB) + newMemoryMB = currentMemoryMB + maxhotPluggableMemoryMB + } else { + newMemoryMB = finalMemoryMB + } + + // Add the memory to the guest and online the memory: + if err := s.updateMemory(ctx, newMemoryMB); err != nil { + return err + } + + if newMemoryMB == finalMemoryMB { + break + } + + } + + return nil + +} + +func (s *Sandbox) updateMemory(ctx context.Context, newMemoryMB uint32) error { + // online the memory: + s.Logger().WithField("memory-sandbox-size-mb", newMemoryMB).Debugf("Request to hypervisor to update memory") newMemory, updatedMemoryDevice, err := s.hypervisor.ResizeMemory(ctx, newMemoryMB, s.state.GuestMemoryBlockSizeMB, s.state.GuestMemoryHotplugProbe) if err != nil { if err == noGuestMemHotplugErr { @@ -2034,7 +2093,6 @@ func (s *Sandbox) updateResources(ctx context.Context) error { if err := s.agent.onlineCPUMem(ctx, 0, false); err != nil { return err } - return nil } diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index 331094ee3..59ed24c1a 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -41,6 +41,7 @@ func newHypervisorConfig(kernelParams []Param, hParams []Param) HypervisorConfig HypervisorPath: filepath.Join(testDir, testHypervisor), KernelParams: kernelParams, HypervisorParams: hParams, + MemorySize: 1, } } @@ -1360,7 +1361,6 @@ func TestSandboxUpdateResources(t *testing.T) { contConfig1 := newTestContainerConfigNoop("cont-00001") contConfig2 := newTestContainerConfigNoop("cont-00002") hConfig := newHypervisorConfig(nil, nil) - defer cleanUp() // create a sandbox s, err := testCreateSandbox(t, @@ -1370,28 +1370,37 @@ func TestSandboxUpdateResources(t *testing.T) { NetworkConfig{}, []ContainerConfig{contConfig1, contConfig2}, nil) - assert.NoError(t, err) + err = s.updateResources(context.Background()) assert.NoError(t, err) - containerMemLimit := int64(1000) + // For mock hypervisor, we MemSlots to be 0 since the memory wasn't changed. + assert.Equal(t, s.hypervisor.HypervisorConfig().MemSlots, uint32(0)) + + containerMemLimit := int64(4 * 1024 * 1024 * 1024) containerCPUPeriod := uint64(1000) containerCPUQouta := int64(5) - for _, c := range s.config.Containers { - c.Resources.Memory = &specs.LinuxMemory{ + for idx := range s.config.Containers { + s.config.Containers[idx].Resources.Memory = &specs.LinuxMemory{ Limit: new(int64), } - c.Resources.CPU = &specs.LinuxCPU{ + s.config.Containers[idx].Resources.CPU = &specs.LinuxCPU{ Period: new(uint64), Quota: new(int64), } - c.Resources.Memory.Limit = &containerMemLimit - c.Resources.CPU.Period = &containerCPUPeriod - c.Resources.CPU.Quota = &containerCPUQouta + s.config.Containers[idx].Resources.Memory.Limit = &containerMemLimit + s.config.Containers[idx].Resources.CPU.Period = &containerCPUPeriod + s.config.Containers[idx].Resources.CPU.Quota = &containerCPUQouta } err = s.updateResources(context.Background()) assert.NoError(t, err) + + // Since we're starting with a memory of 1 MB, we expect it to take 3 hotplugs to add 4GiB of memory when using ACPI hotplug: + // +48MB + // +2352MB + // +the remaining + assert.Equal(t, s.hypervisor.HypervisorConfig().MemSlots, uint32(3)) } func TestSandboxExperimentalFeature(t *testing.T) {