Merge pull request #4862 from egernst/memory-hotplug-limitation

Address Memory hotplug limitation
2025-07-06 20:09:44 +00:00 · 2022-09-02 16:11:46 +08:00 · 2022-09-02 16:11:46 +08:00 · b5786361e9
commit b5786361e9
parent 6de4bfd860 9997ab064a
9 changed files with 116 additions and 16 deletions
--- a/src/runtime/virtcontainers/acrn.go
+++ b/src/runtime/virtcontainers/acrn.go
@ -667,6 +667,10 @@ func (a *Acrn) GetThreadIDs(ctx context.Context) (VcpuThreadIDs, error) {
 	return VcpuThreadIDs{}, nil
 }
 func (a *Acrn) GetTotalMemoryMB(ctx context.Context) uint32 {
 	return a.config.MemorySize
 }
 func (a *Acrn) ResizeMemory(ctx context.Context, reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) {
 	return 0, MemoryDevice{}, nil
 }
--- a/src/runtime/virtcontainers/clh.go
+++ b/src/runtime/virtcontainers/clh.go
@ -1598,6 +1598,16 @@ func (clh *cloudHypervisor) cleanupVM(force bool) error {
 	return nil
 }
 func (clh *cloudHypervisor) GetTotalMemoryMB(ctx context.Context) uint32 {
 	vminfo, err := clh.vmInfo()
 	if err != nil {
 		clh.Logger().WithError(err).Error("failed to get vminfo")
 		return 0
 	}
 	return uint32(vminfo.GetMemoryActualSize() >> utils.MibToBytesShift)
 }
 // vmInfo ask to hypervisor for current VM status
 func (clh *cloudHypervisor) vmInfo() (chclient.VmInfo, error) {
 	cl := clh.client()
--- a/src/runtime/virtcontainers/fc.go
+++ b/src/runtime/virtcontainers/fc.go
@ -1165,6 +1165,10 @@ func (fc *firecracker) HypervisorConfig() HypervisorConfig {
 	return fc.config
 }
 func (fc *firecracker) GetTotalMemoryMB(ctx context.Context) uint32 {
 	return fc.config.MemorySize
 }
 func (fc *firecracker) ResizeMemory(ctx context.Context, reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) {
 	return 0, MemoryDevice{}, nil
 }
--- a/src/runtime/virtcontainers/hypervisor.go
+++ b/src/runtime/virtcontainers/hypervisor.go
@ -922,6 +922,7 @@ type Hypervisor interface {
 	HotplugRemoveDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error)
 	ResizeMemory(ctx context.Context, memMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error)
 	ResizeVCPUs(ctx context.Context, vcpus uint32) (uint32, uint32, error)
 	GetTotalMemoryMB(ctx context.Context) uint32
 	GetVMConsole(ctx context.Context, sandboxID string) (string, string, error)
 	Disconnect(ctx context.Context)
 	Capabilities(ctx context.Context) types.Capabilities
--- a/src/runtime/virtcontainers/mock_hypervisor.go
+++ b/src/runtime/virtcontainers/mock_hypervisor.go
@ -17,6 +17,7 @@ import (
 var MockHybridVSockPath = "/tmp/kata-mock-hybrid-vsock.socket"
 type mockHypervisor struct {
 	config  HypervisorConfig
 	mockPid int
 }
@ -27,10 +28,11 @@ func (m *mockHypervisor) Capabilities(ctx context.Context) types.Capabilities {
 }
 func (m *mockHypervisor) HypervisorConfig() HypervisorConfig {
-	return HypervisorConfig{}
+	return m.config
 }
 func (m *mockHypervisor) setConfig(config *HypervisorConfig) error {
 	m.config = *config
 	return nil
 }
@ -38,7 +40,7 @@ func (m *mockHypervisor) CreateVM(ctx context.Context, id string, network Networ
 	if err := m.setConfig(hypervisorConfig); err != nil {
 		return err
 	}
-
+	m.config.MemSlots = 0
 	return nil
 }
@ -92,12 +94,20 @@ func (m *mockHypervisor) GetVMConsole(ctx context.Context, sandboxID string) (st
 }
 func (m *mockHypervisor) ResizeMemory(ctx context.Context, memMB uint32, memorySectionSizeMB uint32, probe bool) (uint32, MemoryDevice, error) {
 	if m.config.MemorySize != memMB {
 		// For testing, we'll use MemSlots to track how many times we resized memory
 		m.config.MemSlots += 1
 		m.config.MemorySize = memMB
 	}
 	return 0, MemoryDevice{}, nil
 }
 func (m *mockHypervisor) ResizeVCPUs(ctx context.Context, cpus uint32) (uint32, uint32, error) {
 	return 0, 0, nil
 }
 func (m *mockHypervisor) GetTotalMemoryMB(ctx context.Context) uint32 {
 	return m.config.MemorySize
 }
 func (m *mockHypervisor) Disconnect(ctx context.Context) {
 }
--- a/src/runtime/virtcontainers/mock_hypervisor_test.go
+++ b/src/runtime/virtcontainers/mock_hypervisor_test.go
@ -14,7 +14,7 @@ import (
 )
 func TestMockHypervisorCreateVM(t *testing.T) {
-	var m *mockHypervisor
+	m := &mockHypervisor{}
 	assert := assert.New(t)
 	sandbox := &Sandbox{
--- a/src/runtime/virtcontainers/qemu.go
+++ b/src/runtime/virtcontainers/qemu.go
@ -2166,6 +2166,10 @@ func (q *qemu) Disconnect(ctx context.Context) {
 	q.qmpShutdown()
 }
 func (q *qemu) GetTotalMemoryMB(ctx context.Context) uint32 {
 	return q.config.MemorySize + uint32(q.state.HotpluggedMemory)
 }
 // ResizeMemory gets a request to update the VM memory to reqMemMB
 // Memory update is managed with two approaches
 // Add memory to VM:
@ -2179,7 +2183,7 @@ func (q *qemu) Disconnect(ctx context.Context) {
 // A longer term solution is evaluate solutions like virtio-mem
 func (q *qemu) ResizeMemory(ctx context.Context, reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, MemoryDevice, error) {
-	currentMemory := q.config.MemorySize + uint32(q.state.HotpluggedMemory)
+	currentMemory := q.GetTotalMemoryMB(ctx)
 	if err := q.qmpSetup(); err != nil {
 		return 0, MemoryDevice{}, err
 	}
--- a/src/runtime/virtcontainers/sandbox.go
+++ b/src/runtime/virtcontainers/sandbox.go
@ -77,6 +77,14 @@ const (
 	// Restricted permission for shared directory managed by virtiofs
 	sharedDirMode = os.FileMode(0700) | os.ModeDir
 	// hotplug factor indicates how much memory can be hotplugged relative to the amount of
 	// RAM provided to the guest. This is a conservative heuristic based on needing 64 bytes per
 	// 4KiB page of hotplugged memory.
 	//
 	// As an example: 12 GiB hotplugged -> 3 Mi pages -> 192 MiBytes overhead (3Mi x 64B).
 	// This is approximately what should be free in a relatively unloaded 256 MiB guest (75% of available memory). So, 256 Mi x 48 => 12 Gi
 	acpiMemoryHotplugFactor = 48
 )
 var (
@ -2012,9 +2020,60 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
 	}
 	s.Logger().Debugf("Sandbox CPUs: %d", newCPUs)
-	// Update Memory
+	// Update Memory --
-	s.Logger().WithField("memory-sandbox-size-byte", sandboxMemoryByte).Debugf("Request to hypervisor to update memory")
+	// If we're using ACPI hotplug for memory, there's a limitation on the amount of memory which can be hotplugged at a single time.
 	// We must have enough free memory in the guest kernel to cover 64bytes per (4KiB) page of memory added for mem_map.
 	// See https://github.com/kata-containers/kata-containers/issues/4847 for more details.
 	// For a typical pod lifecycle, we expect that each container is added when we start the workloads. Based on this, we'll "assume" that majority
 	// of the guest memory is readily available. From experimentation, we see that we can add approximately 48 times what is already provided to
 	// the guest workload. For example, a 256 MiB guest should be able to accommodate hotplugging 12 GiB of memory.
 	//
 	// If virtio-mem is being used, there isn't such a limitation - we can hotplug the maximum allowed memory at a single time.
 	//
 	newMemoryMB := uint32(sandboxMemoryByte >> utils.MibToBytesShift)
 	finalMemoryMB := newMemoryMB
 	hconfig := s.hypervisor.HypervisorConfig()
 	for {
 		currentMemoryMB := s.hypervisor.GetTotalMemoryMB(ctx)
 		maxhotPluggableMemoryMB := currentMemoryMB * acpiMemoryHotplugFactor
 		// In the case of virtio-mem, we don't have a restriction on how much can be hotplugged at
 		// a single time. As a result, the max hotpluggable is only limited by the maximum memory size
 		// of the guest.
 		if hconfig.VirtioMem {
 			maxhotPluggableMemoryMB = uint32(hconfig.DefaultMaxMemorySize) - currentMemoryMB
 		}
 		deltaMB := int32(finalMemoryMB - currentMemoryMB)
 		if deltaMB > int32(maxhotPluggableMemoryMB) {
 			s.Logger().Warnf("Large hotplug. Adding %d MB of %d total memory", maxhotPluggableMemoryMB, deltaMB)
 			newMemoryMB = currentMemoryMB + maxhotPluggableMemoryMB
 		} else {
 			newMemoryMB = finalMemoryMB
 		}
 		// Add the memory to the guest and online the memory:
 		if err := s.updateMemory(ctx, newMemoryMB); err != nil {
 			return err
 		}
 		if newMemoryMB == finalMemoryMB {
 			break
 		}
 	}
 	return nil
 }
 func (s *Sandbox) updateMemory(ctx context.Context, newMemoryMB uint32) error {
 	// online the memory:
 	s.Logger().WithField("memory-sandbox-size-mb", newMemoryMB).Debugf("Request to hypervisor to update memory")
 	newMemory, updatedMemoryDevice, err := s.hypervisor.ResizeMemory(ctx, newMemoryMB, s.state.GuestMemoryBlockSizeMB, s.state.GuestMemoryHotplugProbe)
 	if err != nil {
 		if err == noGuestMemHotplugErr {
@ -2034,7 +2093,6 @@ func (s *Sandbox) updateResources(ctx context.Context) error {
 	if err := s.agent.onlineCPUMem(ctx, 0, false); err != nil {
 		return err
 	}
 	return nil
 }
--- a/src/runtime/virtcontainers/sandbox_test.go
+++ b/src/runtime/virtcontainers/sandbox_test.go
@ -41,6 +41,7 @@ func newHypervisorConfig(kernelParams []Param, hParams []Param) HypervisorConfig
 		HypervisorPath:   filepath.Join(testDir, testHypervisor),
 		KernelParams:     kernelParams,
 		HypervisorParams: hParams,
 		MemorySize:       1,
 	}
 }
@ -1360,7 +1361,6 @@ func TestSandboxUpdateResources(t *testing.T) {
 	contConfig1 := newTestContainerConfigNoop("cont-00001")
 	contConfig2 := newTestContainerConfigNoop("cont-00002")
 	hConfig := newHypervisorConfig(nil, nil)
 	defer cleanUp()
 	// create a sandbox
 	s, err := testCreateSandbox(t,
@ -1370,28 +1370,37 @@ func TestSandboxUpdateResources(t *testing.T) {
 		NetworkConfig{},
 		[]ContainerConfig{contConfig1, contConfig2},
 		nil)
 	assert.NoError(t, err)
 	err = s.updateResources(context.Background())
 	assert.NoError(t, err)
-	containerMemLimit := int64(1000)
+	// For mock hypervisor, we MemSlots to be 0 since the memory wasn't changed.
 	assert.Equal(t, s.hypervisor.HypervisorConfig().MemSlots, uint32(0))
 	containerMemLimit := int64(4 * 1024 * 1024 * 1024)
 	containerCPUPeriod := uint64(1000)
 	containerCPUQouta := int64(5)
-	for _, c := range s.config.Containers {
+	for idx := range s.config.Containers {
-		c.Resources.Memory = &specs.LinuxMemory{
+		s.config.Containers[idx].Resources.Memory = &specs.LinuxMemory{
 			Limit: new(int64),
 		}
-		c.Resources.CPU = &specs.LinuxCPU{
+		s.config.Containers[idx].Resources.CPU = &specs.LinuxCPU{
 			Period: new(uint64),
 			Quota:  new(int64),
 		}
-		c.Resources.Memory.Limit = &containerMemLimit
+		s.config.Containers[idx].Resources.Memory.Limit = &containerMemLimit
-		c.Resources.CPU.Period = &containerCPUPeriod
+		s.config.Containers[idx].Resources.CPU.Period = &containerCPUPeriod
-		c.Resources.CPU.Quota = &containerCPUQouta
+		s.config.Containers[idx].Resources.CPU.Quota = &containerCPUQouta
 	}
 	err = s.updateResources(context.Background())
 	assert.NoError(t, err)
 	// Since we're starting with a memory of 1 MB, we expect it to take 3 hotplugs to add 4GiB of memory when using ACPI hotplug:
 	// +48MB
 	// +2352MB
 	// +the remaining
 	assert.Equal(t, s.hypervisor.HypervisorConfig().MemSlots, uint32(3))
 }
 func TestSandboxExperimentalFeature(t *testing.T) {