Merge pull request #296 from devimc/cpu/fixMemFootprint

virtcontainers/qemu: reduce memory footprint
2025-09-17 14:58:16 +00:00 · 2018-05-14 18:55:25 -07:00
parent 90e3ba6027 4527a8066a
commit 90fc7e6d85
17 changed files with 227 additions and 102 deletions
--- a/5
+++ b/5
@@ -92,6 +92,8 @@ PROXYPATH := $(PKGLIBEXECDIR)/$(PROXYCMD)

 # Default number of vCPUs
 DEFVCPUS := 1
+# Default maximum number of vCPUs
+DEFMAXVCPUS := 0
 # Default memory size in MiB
 DEFMEMSZ := 2048
 #Default number of bridges
@@ -169,6 +171,7 @@ USER_VARS += SHAREDIR
 USER_VARS += SHIMPATH
 USER_VARS += SYSCONFDIR
 USER_VARS += DEFVCPUS
+USER_VARS += DEFMAXVCPUS
 USER_VARS += DEFMEMSZ
 USER_VARS += DEFBRIDGES
 USER_VARS += DEFNETWORKMODEL
@@ -262,6 +265,7 @@ const defaultMachineType = "$(MACHINETYPE)"
 const defaultRootDirectory = "$(PKGRUNDIR)"

 const defaultVCPUCount uint32 = $(DEFVCPUS)
+const defaultMaxVCPUCount uint32 = $(DEFMAXVCPUS)
 const defaultMemSize uint32 = $(DEFMEMSZ) // MiB
 const defaultBridgesCount uint32 = $(DEFBRIDGES)
 const defaultInterNetworkingModel = "$(DEFNETWORKMODEL)"
@@ -347,6 +351,7 @@ $(GENERATED_FILES): %: %.in Makefile VERSION
 		-e "s|@MACHINETYPE@|$(MACHINETYPE)|g" \
 		-e "s|@SHIMPATH@|$(SHIMPATH)|g" \
 		-e "s|@DEFVCPUS@|$(DEFVCPUS)|g" \
+		-e "s|@DEFMAXVCPUS@|$(DEFMAXVCPUS)|g" \
 		-e "s|@DEFMEMSZ@|$(DEFMEMSZ)|g" \
 		-e "s|@DEFBRIDGES@|$(DEFBRIDGES)|g" \
 		-e "s|@DEFNETWORKMODEL@|$(DEFNETWORKMODEL)|g" \
--- a/cli/config.go
+++ b/cli/config.go
@@ -74,6 +74,7 @@ type hypervisor struct {
 	KernelParams          string `toml:"kernel_params"`
 	MachineType           string `toml:"machine_type"`
 	DefaultVCPUs          int32  `toml:"default_vcpus"`
+	DefaultMaxVCPUs       uint32 `toml:"default_maxvcpus"`
 	DefaultMemSz          uint32 `toml:"default_memory"`
 	DefaultBridges        uint32 `toml:"default_bridges"`
 	Msize9p               uint32 `toml:"msize_9p"`
@@ -202,6 +203,25 @@ func (h hypervisor) defaultVCPUs() uint32 {
 	return uint32(h.DefaultVCPUs)
 }

+func (h hypervisor) defaultMaxVCPUs() uint32 {
+	numcpus := uint32(goruntime.NumCPU())
+	maxvcpus := vc.MaxQemuVCPUs()
+	reqVCPUs := h.DefaultMaxVCPUs
+
+	//don't exceed the number of physical CPUs. If a default is not provided, use the
+	// numbers of physical CPUs
+	if reqVCPUs >= numcpus || reqVCPUs == 0 {
+		reqVCPUs = numcpus
+	}
+
+	// Don't exceed the maximum number of vCPUs supported by hypervisor
+	if reqVCPUs > maxvcpus {
+		return maxvcpus
+	}
+
+	return reqVCPUs
+}
+
 func (h hypervisor) defaultMemSz() uint32 {
 	if h.DefaultMemSz < 8 {
 		return defaultMemSize // MiB
@@ -313,6 +333,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
 		KernelParams:          vc.DeserializeParams(strings.Fields(kernelParams)),
 		HypervisorMachineType: machineType,
 		DefaultVCPUs:          h.defaultVCPUs(),
+		DefaultMaxVCPUs:       h.defaultMaxVCPUs(),
 		DefaultMemSz:          h.defaultMemSz(),
 		DefaultBridges:        h.defaultBridges(),
 		DisableBlockDeviceUse: h.DisableBlockDeviceUse,
@@ -418,6 +439,7 @@ func loadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
 		MachineAccelerators:   defaultMachineAccelerators,
 		HypervisorMachineType: defaultMachineType,
 		DefaultVCPUs:          defaultVCPUCount,
+		DefaultMaxVCPUs:       defaultMaxVCPUCount,
 		DefaultMemSz:          defaultMemSize,
 		DefaultBridges:        defaultBridgesCount,
 		MemPrealloc:           defaultEnableMemPrealloc,
--- a/cli/config/configuration.toml.in
+++ b/cli/config/configuration.toml.in
@@ -45,6 +45,21 @@ machine_accelerators="@MACHINEACCELERATORS@"
 # > number of physical cores      --> will be set to the actual number of physical cores
 default_vcpus = 1

+# Default maximum number of vCPUs per SB/VM:
+# unspecified or == 0             --> will be set to the actual number of physical cores or to the maximum number
+#                                     of vCPUs supported by KVM if that number is exceeded
+# > 0 <= number of physical cores --> will be set to the specified number
+# > number of physical cores      --> will be set to the actual number of physical cores or to the maximum number
+#                                     of vCPUs supported by KVM if that number is exceeded
+# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when
+# the actual number of physical cores is greater than it.
+# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU
+# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs
+# can be added to a SB/VM, but the memory footprint will be big. Another example, with
+# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of
+# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
+# unless you know what are you doing.
+default_maxvcpus = @DEFMAXVCPUS@

 # Bridges can be used to hot plug devices.
 # Limitations:
--- a/cli/config_test.go
+++ b/cli/config_test.go
@@ -44,6 +44,7 @@ func makeRuntimeConfigFileData(hypervisor, hypervisorPath, kernelPath, imagePath
 	image = "` + imagePath + `"
 	machine_type = "` + machineType + `"
 	default_vcpus = ` + strconv.FormatUint(uint64(defaultVCPUCount), 10) + `
+	default_maxvcpus = ` + strconv.FormatUint(uint64(defaultMaxVCPUCount), 10) + `
 	default_memory = ` + strconv.FormatUint(uint64(defaultMemSize), 10) + `
 	disable_block_device_use =  ` + strconv.FormatBool(disableBlock) + `
 	enable_iothreads =  ` + strconv.FormatBool(enableIOThreads) + `
@@ -129,6 +130,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
 		KernelParams:          vc.DeserializeParams(strings.Fields(kernelParams)),
 		HypervisorMachineType: machineType,
 		DefaultVCPUs:          defaultVCPUCount,
+		DefaultMaxVCPUs:       uint32(goruntime.NumCPU()),
 		DefaultMemSz:          defaultMemSize,
 		DisableBlockDeviceUse: disableBlockDevice,
 		BlockDeviceDriver:     defaultBlockDeviceDriver,
@@ -513,6 +515,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
 		InitrdPath:            defaultInitrdPath,
 		HypervisorMachineType: defaultMachineType,
 		DefaultVCPUs:          defaultVCPUCount,
+		DefaultMaxVCPUs:       defaultMaxVCPUCount,
 		DefaultMemSz:          defaultMemSize,
 		DisableBlockDeviceUse: defaultDisableBlockDeviceUse,
 		DefaultBridges:        defaultBridgesCount,
@@ -658,10 +661,13 @@ func TestNewShimConfig(t *testing.T) {
 func TestHypervisorDefaults(t *testing.T) {
 	assert := assert.New(t)

+	numCPUs := goruntime.NumCPU()
+
 	h := hypervisor{}

 	assert.Equal(h.machineType(), defaultMachineType, "default hypervisor machine type wrong")
 	assert.Equal(h.defaultVCPUs(), defaultVCPUCount, "default vCPU number is wrong")
+	assert.Equal(h.defaultMaxVCPUs(), uint32(numCPUs), "default max vCPU number is wrong")
 	assert.Equal(h.defaultMemSz(), defaultMemSize, "default memory size is wrong")

 	machineType := "foo"
@@ -670,15 +676,24 @@ func TestHypervisorDefaults(t *testing.T) {

 	// auto inferring
 	h.DefaultVCPUs = -1
-	assert.Equal(h.defaultVCPUs(), uint32(goruntime.NumCPU()), "default vCPU number is wrong")
+	assert.Equal(h.defaultVCPUs(), uint32(numCPUs), "default vCPU number is wrong")

 	h.DefaultVCPUs = 2
 	assert.Equal(h.defaultVCPUs(), uint32(2), "default vCPU number is wrong")

-	numCPUs := goruntime.NumCPU()
 	h.DefaultVCPUs = int32(numCPUs) + 1
 	assert.Equal(h.defaultVCPUs(), uint32(numCPUs), "default vCPU number is wrong")

+	h.DefaultMaxVCPUs = 2
+	assert.Equal(h.defaultMaxVCPUs(), uint32(h.DefaultMaxVCPUs), "default max vCPU number is wrong")
+
+	h.DefaultMaxVCPUs = uint32(numCPUs) + 1
+	assert.Equal(h.defaultMaxVCPUs(), uint32(numCPUs), "default max vCPU number is wrong")
+
+	maxvcpus := vc.MaxQemuVCPUs()
+	h.DefaultMaxVCPUs = uint32(maxvcpus) + 1
+	assert.Equal(h.defaultMaxVCPUs(), uint32(numCPUs), "default max vCPU number is wrong")
+
 	h.DefaultMemSz = 1024
 	assert.Equal(h.defaultMemSz(), uint32(1024), "default memory size is wrong")
 }
--- a/virtcontainers/container.go
+++ b/virtcontainers/container.go
@@ -57,15 +57,11 @@ type ContainerStatus struct {

 // ContainerResources describes container resources
 type ContainerResources struct {
-	// CPUQuota specifies the total amount of time in microseconds
-	// The number of microseconds per CPUPeriod that the container is guaranteed CPU access
-	CPUQuota int64
+	// VCPUs are the number of vCPUs that are being used by the container
+	VCPUs uint32

-	// CPUPeriod specifies the CPU CFS scheduler period of time in microseconds
-	CPUPeriod uint64
-
-	// CPUShares specifies container's weight vs. other containers
-	CPUShares uint64
+	// Mem is the memory that is being used by the container
+	Mem uint32
 }

 // ContainerConfig describes one container runtime configuration.
@@ -804,8 +800,7 @@ func (c *Container) update(resources specs.LinuxResources) error {
 	}

 	newResources := ContainerResources{
-		CPUPeriod: *resources.CPU.Period,
-		CPUQuota:  *resources.CPU.Quota,
+		VCPUs: uint32(utils.ConstraintsToVCPUs(*resources.CPU.Quota, *resources.CPU.Period)),
 	}

 	if err := c.updateResources(currentConfig.Resources, newResources); err != nil {
@@ -866,7 +861,7 @@ func (c *Container) hotplugDrive() error {
 		Index:  driveIndex,
 	}

-	if err := c.sandbox.hypervisor.hotplugAddDevice(&drive, blockDev); err != nil {
+	if _, err := c.sandbox.hypervisor.hotplugAddDevice(&drive, blockDev); err != nil {
 		return err
 	}

@@ -903,7 +898,7 @@ func (c *Container) removeDrive() (err error) {
 		l := c.Logger().WithField("device-id", devID)
 		l.Info("Unplugging block device")

-		if err := c.sandbox.hypervisor.hotplugRemoveDevice(drive, blockDev); err != nil {
+		if _, err := c.sandbox.hypervisor.hotplugRemoveDevice(drive, blockDev); err != nil {
 			l.WithError(err).Info("Failed to unplug block device")
 			return err
 		}
@@ -938,14 +933,31 @@ func (c *Container) addResources() error {
 		return nil
 	}

-	vCPUs := utils.ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod)
+	// Container is being created, try to add the number of vCPUs specified
+	vCPUs := c.config.Resources.VCPUs
 	if vCPUs != 0 {
 		virtLog.Debugf("hot adding %d vCPUs", vCPUs)
-		if err := c.sandbox.hypervisor.hotplugAddDevice(uint32(vCPUs), cpuDev); err != nil {
+		data, err := c.sandbox.hypervisor.hotplugAddDevice(vCPUs, cpuDev)
+		if err != nil {
 			return err
 		}

-		return c.sandbox.agent.onlineCPUMem(uint32(vCPUs))
+		vcpusAdded, ok := data.(uint32)
+		if !ok {
+			return fmt.Errorf("Could not get the number of vCPUs added, got %+v", data)
+		}
+
+		// A different number of vCPUs was added, we have to update
+		// the resources in order to don't remove vCPUs used by other containers.
+		if vcpusAdded != vCPUs {
+			// Set and save container's config
+			c.config.Resources.VCPUs = vcpusAdded
+			if err := c.storeContainer(); err != nil {
+				return err
+			}
+		}
+
+		return c.sandbox.agent.onlineCPUMem(vcpusAdded)
 	}

 	return nil
@@ -957,10 +969,18 @@ func (c *Container) removeResources() error {
 		return nil
 	}

-	vCPUs := utils.ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod)
+	// In order to don't remove vCPUs used by other containers, we have to remove
+	// only the vCPUs assigned to the container
+	config, err := c.sandbox.storage.fetchContainerConfig(c.sandbox.id, c.id)
+	if err != nil {
+		// don't fail, let's use the default configuration
+		config = *c.config
+	}
+
+	vCPUs := config.Resources.VCPUs
 	if vCPUs != 0 {
 		virtLog.Debugf("hot removing %d vCPUs", vCPUs)
-		if err := c.sandbox.hypervisor.hotplugRemoveDevice(uint32(vCPUs), cpuDev); err != nil {
+		if _, err := c.sandbox.hypervisor.hotplugRemoveDevice(vCPUs, cpuDev); err != nil {
 			return err
 		}
 	}
@@ -970,9 +990,9 @@ func (c *Container) removeResources() error {

 func (c *Container) updateResources(oldResources, newResources ContainerResources) error {
 	//TODO add support for memory, Issue: https://github.com/containers/virtcontainers/issues/578
-	var vCPUs uint
-	oldVCPUs := utils.ConstraintsToVCPUs(oldResources.CPUQuota, oldResources.CPUPeriod)
-	newVCPUs := utils.ConstraintsToVCPUs(newResources.CPUQuota, newResources.CPUPeriod)
+	var vCPUs uint32
+	oldVCPUs := oldResources.VCPUs
+	newVCPUs := newResources.VCPUs

 	// Update vCPUs is not possible if period and/or quota are not set or
 	// oldVCPUs and newVCPUs are equal.
@@ -989,23 +1009,36 @@ func (c *Container) updateResources(oldResources, newResources ContainerResource
 		// hot add vCPUs
 		vCPUs = newVCPUs - oldVCPUs
 		virtLog.Debugf("hot adding %d vCPUs", vCPUs)
-		if err := c.sandbox.hypervisor.hotplugAddDevice(uint32(vCPUs), cpuDev); err != nil {
+		data, err := c.sandbox.hypervisor.hotplugAddDevice(vCPUs, cpuDev)
+		if err != nil {
+			return err
+		}
+		vcpusAdded, ok := data.(uint32)
+		if !ok {
+			return fmt.Errorf("Could not get the number of vCPUs added, got %+v", data)
+		}
+		// recalculate the actual number of vCPUs if a different number of vCPUs was added
+		newResources.VCPUs = oldVCPUs + vcpusAdded
+		if err := c.sandbox.agent.onlineCPUMem(vcpusAdded); err != nil {
 			return err
 		}
 	} else {
 		// hot remove vCPUs
 		vCPUs = oldVCPUs - newVCPUs
 		virtLog.Debugf("hot removing %d vCPUs", vCPUs)
-		if err := c.sandbox.hypervisor.hotplugRemoveDevice(uint32(vCPUs), cpuDev); err != nil {
+		data, err := c.sandbox.hypervisor.hotplugRemoveDevice(vCPUs, cpuDev)
+		if err != nil {
 			return err
 		}
+		vcpusRemoved, ok := data.(uint32)
+		if !ok {
+			return fmt.Errorf("Could not get the number of vCPUs removed, got %+v", data)
+		}
+		// recalculate the actual number of vCPUs if a different number of vCPUs was removed
+		newResources.VCPUs = oldVCPUs - vcpusRemoved
 	}

 	// Set and save container's config
 	c.config.Resources = newResources
-	if err := c.storeContainer(); err != nil {
-		return err
-	}
-
-	return c.sandbox.agent.onlineCPUMem(uint32(vCPUs))
+	return c.storeContainer()
 }
--- a/virtcontainers/container_test.go
+++ b/virtcontainers/container_test.go
@@ -284,7 +284,11 @@ func TestCheckSandboxRunningSuccessful(t *testing.T) {
 func TestContainerAddResources(t *testing.T) {
 	assert := assert.New(t)

-	c := &Container{}
+	c := &Container{
+		sandbox: &Sandbox{
+			storage: &filesystem{},
+		},
+	}
 	err := c.addResources()
 	assert.Nil(err)

@@ -297,13 +301,16 @@ func TestContainerAddResources(t *testing.T) {
 	err = c.addResources()
 	assert.Nil(err)

+	vCPUs := uint32(5)
 	c.config.Resources = ContainerResources{
-		CPUQuota:  5000,
-		CPUPeriod: 1000,
+		VCPUs: vCPUs,
 	}
 	c.sandbox = &Sandbox{
-		hypervisor: &mockHypervisor{},
-		agent:      &noopAgent{},
+		hypervisor: &mockHypervisor{
+			vCPUs: vCPUs,
+		},
+		agent:   &noopAgent{},
+		storage: &filesystem{},
 	}
 	err = c.addResources()
 	assert.Nil(err)
@@ -312,7 +319,12 @@ func TestContainerAddResources(t *testing.T) {
 func TestContainerRemoveResources(t *testing.T) {
 	assert := assert.New(t)

-	c := &Container{}
+	c := &Container{
+		sandbox: &Sandbox{
+			storage: &filesystem{},
+		},
+	}
+
 	err := c.addResources()
 	assert.Nil(err)

@@ -325,11 +337,18 @@ func TestContainerRemoveResources(t *testing.T) {
 	err = c.removeResources()
 	assert.Nil(err)

+	vCPUs := uint32(5)
 	c.config.Resources = ContainerResources{
-		CPUQuota:  5000,
-		CPUPeriod: 1000,
+		VCPUs: vCPUs,
 	}
-	c.sandbox = &Sandbox{hypervisor: &mockHypervisor{}}
+
+	c.sandbox = &Sandbox{
+		hypervisor: &mockHypervisor{
+			vCPUs: vCPUs,
+		},
+		storage: &filesystem{},
+	}
+
 	err = c.removeResources()
 	assert.Nil(err)
 }
--- a/virtcontainers/hyperstart_agent.go
+++ b/virtcontainers/hyperstart_agent.go
@@ -430,17 +430,6 @@ func (h *hyper) startOneContainer(sandbox *Sandbox, c *Container) error {
 		Process: process,
 	}

-	if c.config.Resources.CPUQuota != 0 && c.config.Resources.CPUPeriod != 0 {
-		container.Constraints = hyperstart.Constraints{
-			CPUQuota:  c.config.Resources.CPUQuota,
-			CPUPeriod: c.config.Resources.CPUPeriod,
-		}
-	}
-
-	if c.config.Resources.CPUShares != 0 {
-		container.Constraints.CPUShares = c.config.Resources.CPUShares
-	}
-
 	container.SystemMountsInfo.BindMountDev = c.systemMountsInfo.BindMountDev

 	if c.state.Fstype != "" {
--- a/virtcontainers/hypervisor.go
+++ b/virtcontainers/hypervisor.go
@@ -41,7 +41,7 @@ const (
 )

 // In some architectures the maximum number of vCPUs depends on the number of physical cores.
-var defaultMaxQemuVCPUs = maxQemuVCPUs()
+var defaultMaxQemuVCPUs = MaxQemuVCPUs()

 // deviceType describes a virtualized device type.
 type deviceType int
@@ -499,8 +499,8 @@ type hypervisor interface {
 	pauseSandbox() error
 	resumeSandbox() error
 	addDevice(devInfo interface{}, devType deviceType) error
-	hotplugAddDevice(devInfo interface{}, devType deviceType) error
-	hotplugRemoveDevice(devInfo interface{}, devType deviceType) error
+	hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error)
+	hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error)
 	getSandboxConsole(sandboxID string) (string, error)
 	capabilities() capabilities
 }
--- a/virtcontainers/mock_hypervisor.go
+++ b/virtcontainers/mock_hypervisor.go
@@ -6,6 +6,7 @@
 package virtcontainers

 type mockHypervisor struct {
+	vCPUs uint32
 }

 func (m *mockHypervisor) init(sandbox *Sandbox) error {
@@ -49,12 +50,20 @@ func (m *mockHypervisor) addDevice(devInfo interface{}, devType deviceType) erro
 	return nil
 }

-func (m *mockHypervisor) hotplugAddDevice(devInfo interface{}, devType deviceType) error {
-	return nil
+func (m *mockHypervisor) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
+	switch devType {
+	case cpuDev:
+		return m.vCPUs, nil
+	}
+	return nil, nil
 }

-func (m *mockHypervisor) hotplugRemoveDevice(devInfo interface{}, devType deviceType) error {
-	return nil
+func (m *mockHypervisor) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
+	switch devType {
+	case cpuDev:
+		return m.vCPUs, nil
+	}
+	return nil, nil
 }

 func (m *mockHypervisor) getSandboxConsole(sandboxID string) (string, error) {
--- a/virtcontainers/pkg/oci/utils.go
+++ b/virtcontainers/pkg/oci/utils.go
@@ -23,6 +23,7 @@ import (
 	"github.com/kata-containers/runtime/virtcontainers/device/config"
 	vcAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations"
 	dockershimAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations/dockershim"
+	"github.com/kata-containers/runtime/virtcontainers/utils"
 )

 type annotationContainerType struct {
@@ -562,11 +563,7 @@ func ContainerConfig(ocispec CompatOCISpec, bundlePath, cid, console string, det
 	if ocispec.Linux.Resources.CPU != nil {
 		if ocispec.Linux.Resources.CPU.Quota != nil &&
 			ocispec.Linux.Resources.CPU.Period != nil {
-			resources.CPUQuota = *ocispec.Linux.Resources.CPU.Quota
-			resources.CPUPeriod = *ocispec.Linux.Resources.CPU.Period
-		}
-		if ocispec.Linux.Resources.CPU.Shares != nil {
-			resources.CPUShares = *ocispec.Linux.Resources.CPU.Shares
+			resources.VCPUs = uint32(utils.ConstraintsToVCPUs(*ocispec.Linux.Resources.CPU.Quota, *ocispec.Linux.Resources.CPU.Period))
 		}
 	}

--- a/virtcontainers/qemu.go
+++ b/virtcontainers/qemu.go
@@ -122,6 +122,9 @@ func (q *qemu) kernelParameters() string {
 	// use default parameters
 	params = append(params, defaultKernelParameters...)

+	// set the maximum number of vCPUs
+	params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)})
+
 	// add the params specified by the provided config. As the kernel
 	// honours the last parameter value set and since the config-provided
 	// params are added here, they will take priority over the defaults.
@@ -197,7 +200,7 @@ func (q *qemu) init(sandbox *Sandbox) error {
 }

 func (q *qemu) cpuTopology() govmmQemu.SMP {
-	return q.arch.cpuTopology(q.config.DefaultVCPUs)
+	return q.arch.cpuTopology(q.config.DefaultVCPUs, q.config.DefaultMaxVCPUs)
 }

 func (q *qemu) memoryTopology(sandboxConfig SandboxConfig) (govmmQemu.Memory, error) {
@@ -737,44 +740,46 @@ func (q *qemu) hotplugVFIODevice(device deviceDrivers.VFIODevice, op operation)
 	return nil
 }

-func (q *qemu) hotplugDevice(devInfo interface{}, devType deviceType, op operation) error {
+func (q *qemu) hotplugDevice(devInfo interface{}, devType deviceType, op operation) (interface{}, error) {
 	switch devType {
 	case blockDev:
 		// TODO: find a way to remove dependency of deviceDrivers lib @weizhang555
 		drive := devInfo.(*deviceDrivers.Drive)
-		return q.hotplugBlockDevice(drive, op)
+		return nil, q.hotplugBlockDevice(drive, op)
 	case cpuDev:
 		vcpus := devInfo.(uint32)
 		return q.hotplugCPUs(vcpus, op)
 	case vfioDev:
 		// TODO: find a way to remove dependency of deviceDrivers lib @weizhang555
 		device := devInfo.(deviceDrivers.VFIODevice)
-		return q.hotplugVFIODevice(device, op)
+		return nil, q.hotplugVFIODevice(device, op)
 	default:
-		return fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType)
+		return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType)
 	}
 }

-func (q *qemu) hotplugAddDevice(devInfo interface{}, devType deviceType) error {
-	if err := q.hotplugDevice(devInfo, devType, addDevice); err != nil {
-		return err
+func (q *qemu) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
+	data, err := q.hotplugDevice(devInfo, devType, addDevice)
+	if err != nil {
+		return data, err
 	}

-	return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
+	return data, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
 }

-func (q *qemu) hotplugRemoveDevice(devInfo interface{}, devType deviceType) error {
-	if err := q.hotplugDevice(devInfo, devType, removeDevice); err != nil {
-		return err
+func (q *qemu) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
+	data, err := q.hotplugDevice(devInfo, devType, removeDevice)
+	if err != nil {
+		return data, err
 	}

-	return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
+	return data, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
 }

-func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error {
+func (q *qemu) hotplugCPUs(vcpus uint32, op operation) (uint32, error) {
 	if vcpus == 0 {
 		q.Logger().Warnf("cannot hotplug 0 vCPUs")
-		return nil
+		return 0, nil
 	}

 	defer func(qemu *qemu) {
@@ -785,7 +790,7 @@ func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error {

 	qmp, err := q.qmpSetup()
 	if err != nil {
-		return err
+		return 0, err
 	}

 	q.qmpMonitorCh.qmp = qmp
@@ -797,19 +802,28 @@ func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error {
 	return q.hotplugRemoveCPUs(vcpus)
 }

-func (q *qemu) hotplugAddCPUs(amount uint32) error {
+// try to hot add an amount of vCPUs, returns the number of vCPUs added
+func (q *qemu) hotplugAddCPUs(amount uint32) (uint32, error) {
 	currentVCPUs := q.qemuConfig.SMP.CPUs + uint32(len(q.state.HotpluggedVCPUs))

-	// Don't exceed the maximum amount of vCPUs
+	// Don't fail if the number of max vCPUs is exceeded, log a warning and hot add the vCPUs needed
+	// to reach out max vCPUs
 	if currentVCPUs+amount > q.config.DefaultMaxVCPUs {
-		return fmt.Errorf("Unable to hotplug %d CPUs, currently this SB has %d CPUs and the maximum amount of CPUs is %d",
+		q.Logger().Warnf("Cannot hotplug %d CPUs, currently this SB has %d CPUs and the maximum amount of CPUs is %d",
 			amount, currentVCPUs, q.config.DefaultMaxVCPUs)
+		amount = q.config.DefaultMaxVCPUs - currentVCPUs
+	}
+
+	if amount == 0 {
+		// Don't fail if no more vCPUs can be added, since cgroups still can be updated
+		q.Logger().Warnf("maximum number of vCPUs '%d' has been reached", q.config.DefaultMaxVCPUs)
+		return 0, nil
 	}

 	// get the list of hotpluggable CPUs
 	hotpluggableVCPUs, err := q.qmpMonitorCh.qmp.ExecuteQueryHotpluggableCPUs(q.qmpMonitorCh.ctx)
 	if err != nil {
-		return fmt.Errorf("failed to query hotpluggable CPUs: %v", err)
+		return 0, fmt.Errorf("failed to query hotpluggable CPUs: %v", err)
 	}

 	var hotpluggedVCPUs uint32
@@ -835,7 +849,7 @@ func (q *qemu) hotplugAddCPUs(amount uint32) error {
 		hotpluggedVCPUs++
 		if hotpluggedVCPUs == amount {
 			// All vCPUs were hotplugged
-			return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
+			return amount, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
 		}
 	}

@@ -844,29 +858,31 @@ func (q *qemu) hotplugAddCPUs(amount uint32) error {
 		q.Logger().Errorf("failed to save hypervisor state after hotplug %d vCPUs: %v", hotpluggedVCPUs, err)
 	}

-	return fmt.Errorf("failed to hot add vCPUs: only %d vCPUs of %d were added", hotpluggedVCPUs, amount)
+	return hotpluggedVCPUs, fmt.Errorf("failed to hot add vCPUs: only %d vCPUs of %d were added", hotpluggedVCPUs, amount)
 }

-func (q *qemu) hotplugRemoveCPUs(amount uint32) error {
+// try to  hot remove an amount of vCPUs, returns the number of vCPUs removed
+func (q *qemu) hotplugRemoveCPUs(amount uint32) (uint32, error) {
 	hotpluggedVCPUs := uint32(len(q.state.HotpluggedVCPUs))

 	// we can only remove hotplugged vCPUs
 	if amount > hotpluggedVCPUs {
-		return fmt.Errorf("Unable to remove %d CPUs, currently there are only %d hotplugged CPUs", amount, hotpluggedVCPUs)
+		return 0, fmt.Errorf("Unable to remove %d CPUs, currently there are only %d hotplugged CPUs", amount, hotpluggedVCPUs)
 	}

 	for i := uint32(0); i < amount; i++ {
 		// get the last vCPUs and try to remove it
 		cpu := q.state.HotpluggedVCPUs[len(q.state.HotpluggedVCPUs)-1]
 		if err := q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, cpu.ID); err != nil {
-			return fmt.Errorf("failed to hotunplug CPUs, only %d CPUs were hotunplugged: %v", i, err)
+			_ = q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
+			return i, fmt.Errorf("failed to hotunplug CPUs, only %d CPUs were hotunplugged: %v", i, err)
 		}

 		// remove from the list the vCPU hotunplugged
 		q.state.HotpluggedVCPUs = q.state.HotpluggedVCPUs[:len(q.state.HotpluggedVCPUs)-1]
 	}

-	return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
+	return amount, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
 }

 func (q *qemu) pauseSandbox() error {
--- a/virtcontainers/qemu_amd64.go
+++ b/virtcontainers/qemu_amd64.go
@@ -71,8 +71,8 @@ var supportedQemuMachines = []govmmQemu.Machine{
 	},
 }

-// returns the maximum number of vCPUs supported
-func maxQemuVCPUs() uint32 {
+// MaxQemuVCPUs returns the maximum number of vCPUs supported
+func MaxQemuVCPUs() uint32 {
 	return uint32(240)
 }

--- a/virtcontainers/qemu_arch_base.go
+++ b/virtcontainers/qemu_arch_base.go
@@ -42,7 +42,7 @@ type qemuArch interface {
 	bridges(number uint32) []Bridge

 	// cpuTopology returns the CPU topology for the given amount of vcpus
-	cpuTopology(vcpus uint32) govmmQemu.SMP
+	cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP

 	// cpuModel returns the CPU model for the machine type
 	cpuModel() string
@@ -219,13 +219,13 @@ func (q *qemuArchBase) bridges(number uint32) []Bridge {
 	return bridges
 }

-func (q *qemuArchBase) cpuTopology(vcpus uint32) govmmQemu.SMP {
+func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP {
 	smp := govmmQemu.SMP{
 		CPUs:    vcpus,
 		Sockets: vcpus,
 		Cores:   defaultCores,
 		Threads: defaultThreads,
-		MaxCPUs: defaultMaxQemuVCPUs,
+		MaxCPUs: maxvcpus,
 	}

 	return smp
--- a/virtcontainers/qemu_arch_base_test.go
+++ b/virtcontainers/qemu_arch_base_test.go
@@ -169,7 +169,7 @@ func TestQemuArchBaseCPUTopology(t *testing.T) {
 		MaxCPUs: defaultMaxQemuVCPUs,
 	}

-	smp := qemuArchBase.cpuTopology(vcpus)
+	smp := qemuArchBase.cpuTopology(vcpus, defaultMaxQemuVCPUs)
 	assert.Equal(expectedSMP, smp)
 }

--- a/virtcontainers/qemu_arm64.go
+++ b/virtcontainers/qemu_arm64.go
@@ -42,8 +42,8 @@ var supportedQemuMachines = []govmmQemu.Machine{
 	},
 }

-// returns the maximum number of vCPUs supported
-func maxQemuVCPUs() uint32 {
+// MaxQemuVCPUs returns the maximum number of vCPUs supported
+func MaxQemuVCPUs() uint32 {
 	return uint32(runtime.NumCPU())
 }

--- a/virtcontainers/qemu_test.go
+++ b/virtcontainers/qemu_test.go
@@ -52,7 +52,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin
 }

 func TestQemuKernelParameters(t *testing.T) {
-	expectedOut := "panic=1 initcall_debug foo=foo bar=bar"
+	expectedOut := fmt.Sprintf("panic=1 initcall_debug nr_cpus=%d foo=foo bar=bar", MaxQemuVCPUs())
 	params := []Param{
 		{
 			Key:   "foo",
@@ -128,7 +128,8 @@ func TestQemuCPUTopology(t *testing.T) {
 	q := &qemu{
 		arch: &qemuArchBase{},
 		config: HypervisorConfig{
-			DefaultVCPUs: uint32(vcpus),
+			DefaultVCPUs:    uint32(vcpus),
+			DefaultMaxVCPUs: uint32(vcpus),
 		},
 	}

@@ -137,7 +138,7 @@ func TestQemuCPUTopology(t *testing.T) {
 		Sockets: uint32(vcpus),
 		Cores:   defaultCores,
 		Threads: defaultThreads,
-		MaxCPUs: defaultMaxQemuVCPUs,
+		MaxCPUs: uint32(vcpus),
 	}

 	smp := q.cpuTopology()
--- a/virtcontainers/sandbox.go
+++ b/virtcontainers/sandbox.go
@@ -1338,13 +1338,15 @@ func (s *Sandbox) HotplugAddDevice(device api.Device, devType config.DeviceType)
 		if !ok {
 			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
 		}
-		return s.hypervisor.hotplugAddDevice(*vfioDevice, vfioDev)
+		_, err := s.hypervisor.hotplugAddDevice(*vfioDevice, vfioDev)
+		return err
 	case config.DeviceBlock:
 		blockDevice, ok := device.(*drivers.BlockDevice)
 		if !ok {
 			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
 		}
-		return s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev)
+		_, err := s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev)
+		return err
 	case config.DeviceGeneric:
 		// TODO: what?
 		return nil
@@ -1361,13 +1363,15 @@ func (s *Sandbox) HotplugRemoveDevice(device api.Device, devType config.DeviceTy
 		if !ok {
 			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
 		}
-		return s.hypervisor.hotplugRemoveDevice(*vfioDevice, vfioDev)
+		_, err := s.hypervisor.hotplugRemoveDevice(*vfioDevice, vfioDev)
+		return err
 	case config.DeviceBlock:
 		blockDevice, ok := device.(*drivers.BlockDevice)
 		if !ok {
 			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
 		}
-		return s.hypervisor.hotplugRemoveDevice(blockDevice.BlockDrive, blockDev)
+		_, err := s.hypervisor.hotplugRemoveDevice(blockDevice.BlockDrive, blockDev)
+		return err
 	case config.DeviceGeneric:
 		// TODO: what?
 		return nil