From 07db945b0976cac912c78d57efe563672267c1f2 Mon Sep 17 00:00:00 2001 From: Julio Montes Date: Wed, 9 May 2018 13:34:20 -0500 Subject: [PATCH 1/2] virtcontainers/qemu: reduce memory footprint There is a relation between the maximum number of vCPUs and the memory footprint, if QEMU maxcpus option and kernel nr_cpus cmdline argument are big, then memory footprint is big, this issue only occurs if CPU hotplug support is enabled in the kernel, might be because of kernel needs to allocate resources to watch all sockets waiting for a CPU to be connected (ACPI event). For example ``` +---------------+-------------------------+ | | Memory Footprint (KB) | +---------------+-------------------------+ | NR_CPUS=240 | 186501 | +---------------+-------------------------+ | NR_CPUS=8 | 110684 | +---------------+-------------------------+ ``` In order to do not affect CPU hotplug and allow to users to have containers with the same number of physical CPUs, this patch tries to mitigate the big memory footprint by using the actual number of physical CPUs as the maximum number of vCPUs for each container if `default_maxvcpus` is <= 0 in the runtime configuration file, otherwise `default_maxvcpus` is used as the maximum number of vCPUs. Before this patch a container with 256MB of RAM ``` total used free shared buff/cache available Mem: 195M 40M 113M 26M 41M 112M Swap: 0B 0B 0B ``` With this patch ``` total used free shared buff/cache available Mem: 236M 11M 188M 26M 36M 186M Swap: 0B 0B 0B ``` fixes #295 Signed-off-by: Julio Montes --- Makefile | 5 +++++ cli/config.go | 22 ++++++++++++++++++++++ cli/config/configuration.toml.in | 15 +++++++++++++++ cli/config_test.go | 19 +++++++++++++++++-- virtcontainers/hypervisor.go | 2 +- virtcontainers/qemu.go | 5 ++++- virtcontainers/qemu_amd64.go | 4 ++-- virtcontainers/qemu_arch_base.go | 6 +++--- virtcontainers/qemu_arch_base_test.go | 2 +- virtcontainers/qemu_arm64.go | 4 ++-- virtcontainers/qemu_test.go | 7 ++++--- 11 files changed, 76 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index a9055ec236..a27ec3127c 100644 --- a/Makefile +++ b/Makefile @@ -92,6 +92,8 @@ PROXYPATH := $(PKGLIBEXECDIR)/$(PROXYCMD) # Default number of vCPUs DEFVCPUS := 1 +# Default maximum number of vCPUs +DEFMAXVCPUS := 0 # Default memory size in MiB DEFMEMSZ := 2048 #Default number of bridges @@ -169,6 +171,7 @@ USER_VARS += SHAREDIR USER_VARS += SHIMPATH USER_VARS += SYSCONFDIR USER_VARS += DEFVCPUS +USER_VARS += DEFMAXVCPUS USER_VARS += DEFMEMSZ USER_VARS += DEFBRIDGES USER_VARS += DEFNETWORKMODEL @@ -262,6 +265,7 @@ const defaultMachineType = "$(MACHINETYPE)" const defaultRootDirectory = "$(PKGRUNDIR)" const defaultVCPUCount uint32 = $(DEFVCPUS) +const defaultMaxVCPUCount uint32 = $(DEFMAXVCPUS) const defaultMemSize uint32 = $(DEFMEMSZ) // MiB const defaultBridgesCount uint32 = $(DEFBRIDGES) const defaultInterNetworkingModel = "$(DEFNETWORKMODEL)" @@ -347,6 +351,7 @@ $(GENERATED_FILES): %: %.in Makefile VERSION -e "s|@MACHINETYPE@|$(MACHINETYPE)|g" \ -e "s|@SHIMPATH@|$(SHIMPATH)|g" \ -e "s|@DEFVCPUS@|$(DEFVCPUS)|g" \ + -e "s|@DEFMAXVCPUS@|$(DEFMAXVCPUS)|g" \ -e "s|@DEFMEMSZ@|$(DEFMEMSZ)|g" \ -e "s|@DEFBRIDGES@|$(DEFBRIDGES)|g" \ -e "s|@DEFNETWORKMODEL@|$(DEFNETWORKMODEL)|g" \ diff --git a/cli/config.go b/cli/config.go index 9950d0ae40..40d64a788c 100644 --- a/cli/config.go +++ b/cli/config.go @@ -74,6 +74,7 @@ type hypervisor struct { KernelParams string `toml:"kernel_params"` MachineType string `toml:"machine_type"` DefaultVCPUs int32 `toml:"default_vcpus"` + DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"` DefaultMemSz uint32 `toml:"default_memory"` DefaultBridges uint32 `toml:"default_bridges"` Msize9p uint32 `toml:"msize_9p"` @@ -202,6 +203,25 @@ func (h hypervisor) defaultVCPUs() uint32 { return uint32(h.DefaultVCPUs) } +func (h hypervisor) defaultMaxVCPUs() uint32 { + numcpus := uint32(goruntime.NumCPU()) + maxvcpus := vc.MaxQemuVCPUs() + reqVCPUs := h.DefaultMaxVCPUs + + //don't exceed the number of physical CPUs. If a default is not provided, use the + // numbers of physical CPUs + if reqVCPUs >= numcpus || reqVCPUs == 0 { + reqVCPUs = numcpus + } + + // Don't exceed the maximum number of vCPUs supported by hypervisor + if reqVCPUs > maxvcpus { + return maxvcpus + } + + return reqVCPUs +} + func (h hypervisor) defaultMemSz() uint32 { if h.DefaultMemSz < 8 { return defaultMemSize // MiB @@ -313,6 +333,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), HypervisorMachineType: machineType, DefaultVCPUs: h.defaultVCPUs(), + DefaultMaxVCPUs: h.defaultMaxVCPUs(), DefaultMemSz: h.defaultMemSz(), DefaultBridges: h.defaultBridges(), DisableBlockDeviceUse: h.DisableBlockDeviceUse, @@ -418,6 +439,7 @@ func loadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat MachineAccelerators: defaultMachineAccelerators, HypervisorMachineType: defaultMachineType, DefaultVCPUs: defaultVCPUCount, + DefaultMaxVCPUs: defaultMaxVCPUCount, DefaultMemSz: defaultMemSize, DefaultBridges: defaultBridgesCount, MemPrealloc: defaultEnableMemPrealloc, diff --git a/cli/config/configuration.toml.in b/cli/config/configuration.toml.in index 4b310aa208..b6e91a31ac 100644 --- a/cli/config/configuration.toml.in +++ b/cli/config/configuration.toml.in @@ -45,6 +45,21 @@ machine_accelerators="@MACHINEACCELERATORS@" # > number of physical cores --> will be set to the actual number of physical cores default_vcpus = 1 +# Default maximum number of vCPUs per SB/VM: +# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# > 0 <= number of physical cores --> will be set to the specified number +# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number +# of vCPUs supported by KVM if that number is exceeded +# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when +# the actual number of physical cores is greater than it. +# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU +# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs +# can be added to a SB/VM, but the memory footprint will be big. Another example, with +# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of +# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable, +# unless you know what are you doing. +default_maxvcpus = @DEFMAXVCPUS@ # Bridges can be used to hot plug devices. # Limitations: diff --git a/cli/config_test.go b/cli/config_test.go index fc24137a42..5dbdf7524d 100644 --- a/cli/config_test.go +++ b/cli/config_test.go @@ -44,6 +44,7 @@ func makeRuntimeConfigFileData(hypervisor, hypervisorPath, kernelPath, imagePath image = "` + imagePath + `" machine_type = "` + machineType + `" default_vcpus = ` + strconv.FormatUint(uint64(defaultVCPUCount), 10) + ` + default_maxvcpus = ` + strconv.FormatUint(uint64(defaultMaxVCPUCount), 10) + ` default_memory = ` + strconv.FormatUint(uint64(defaultMemSize), 10) + ` disable_block_device_use = ` + strconv.FormatBool(disableBlock) + ` enable_iothreads = ` + strconv.FormatBool(enableIOThreads) + ` @@ -129,6 +130,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), HypervisorMachineType: machineType, DefaultVCPUs: defaultVCPUCount, + DefaultMaxVCPUs: uint32(goruntime.NumCPU()), DefaultMemSz: defaultMemSize, DisableBlockDeviceUse: disableBlockDevice, BlockDeviceDriver: defaultBlockDeviceDriver, @@ -513,6 +515,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { InitrdPath: defaultInitrdPath, HypervisorMachineType: defaultMachineType, DefaultVCPUs: defaultVCPUCount, + DefaultMaxVCPUs: defaultMaxVCPUCount, DefaultMemSz: defaultMemSize, DisableBlockDeviceUse: defaultDisableBlockDeviceUse, DefaultBridges: defaultBridgesCount, @@ -658,10 +661,13 @@ func TestNewShimConfig(t *testing.T) { func TestHypervisorDefaults(t *testing.T) { assert := assert.New(t) + numCPUs := goruntime.NumCPU() + h := hypervisor{} assert.Equal(h.machineType(), defaultMachineType, "default hypervisor machine type wrong") assert.Equal(h.defaultVCPUs(), defaultVCPUCount, "default vCPU number is wrong") + assert.Equal(h.defaultMaxVCPUs(), uint32(numCPUs), "default max vCPU number is wrong") assert.Equal(h.defaultMemSz(), defaultMemSize, "default memory size is wrong") machineType := "foo" @@ -670,15 +676,24 @@ func TestHypervisorDefaults(t *testing.T) { // auto inferring h.DefaultVCPUs = -1 - assert.Equal(h.defaultVCPUs(), uint32(goruntime.NumCPU()), "default vCPU number is wrong") + assert.Equal(h.defaultVCPUs(), uint32(numCPUs), "default vCPU number is wrong") h.DefaultVCPUs = 2 assert.Equal(h.defaultVCPUs(), uint32(2), "default vCPU number is wrong") - numCPUs := goruntime.NumCPU() h.DefaultVCPUs = int32(numCPUs) + 1 assert.Equal(h.defaultVCPUs(), uint32(numCPUs), "default vCPU number is wrong") + h.DefaultMaxVCPUs = 2 + assert.Equal(h.defaultMaxVCPUs(), uint32(h.DefaultMaxVCPUs), "default max vCPU number is wrong") + + h.DefaultMaxVCPUs = uint32(numCPUs) + 1 + assert.Equal(h.defaultMaxVCPUs(), uint32(numCPUs), "default max vCPU number is wrong") + + maxvcpus := vc.MaxQemuVCPUs() + h.DefaultMaxVCPUs = uint32(maxvcpus) + 1 + assert.Equal(h.defaultMaxVCPUs(), uint32(numCPUs), "default max vCPU number is wrong") + h.DefaultMemSz = 1024 assert.Equal(h.defaultMemSz(), uint32(1024), "default memory size is wrong") } diff --git a/virtcontainers/hypervisor.go b/virtcontainers/hypervisor.go index 2e0907ca88..0d6631192f 100644 --- a/virtcontainers/hypervisor.go +++ b/virtcontainers/hypervisor.go @@ -41,7 +41,7 @@ const ( ) // In some architectures the maximum number of vCPUs depends on the number of physical cores. -var defaultMaxQemuVCPUs = maxQemuVCPUs() +var defaultMaxQemuVCPUs = MaxQemuVCPUs() // deviceType describes a virtualized device type. type deviceType int diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index 95ae5c8260..b233f0925b 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -122,6 +122,9 @@ func (q *qemu) kernelParameters() string { // use default parameters params = append(params, defaultKernelParameters...) + // set the maximum number of vCPUs + params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)}) + // add the params specified by the provided config. As the kernel // honours the last parameter value set and since the config-provided // params are added here, they will take priority over the defaults. @@ -197,7 +200,7 @@ func (q *qemu) init(sandbox *Sandbox) error { } func (q *qemu) cpuTopology() govmmQemu.SMP { - return q.arch.cpuTopology(q.config.DefaultVCPUs) + return q.arch.cpuTopology(q.config.DefaultVCPUs, q.config.DefaultMaxVCPUs) } func (q *qemu) memoryTopology(sandboxConfig SandboxConfig) (govmmQemu.Memory, error) { diff --git a/virtcontainers/qemu_amd64.go b/virtcontainers/qemu_amd64.go index 2e3406a921..6099ade1e7 100644 --- a/virtcontainers/qemu_amd64.go +++ b/virtcontainers/qemu_amd64.go @@ -71,8 +71,8 @@ var supportedQemuMachines = []govmmQemu.Machine{ }, } -// returns the maximum number of vCPUs supported -func maxQemuVCPUs() uint32 { +// MaxQemuVCPUs returns the maximum number of vCPUs supported +func MaxQemuVCPUs() uint32 { return uint32(240) } diff --git a/virtcontainers/qemu_arch_base.go b/virtcontainers/qemu_arch_base.go index 28a06d2a00..795c63cc5f 100644 --- a/virtcontainers/qemu_arch_base.go +++ b/virtcontainers/qemu_arch_base.go @@ -42,7 +42,7 @@ type qemuArch interface { bridges(number uint32) []Bridge // cpuTopology returns the CPU topology for the given amount of vcpus - cpuTopology(vcpus uint32) govmmQemu.SMP + cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP // cpuModel returns the CPU model for the machine type cpuModel() string @@ -219,13 +219,13 @@ func (q *qemuArchBase) bridges(number uint32) []Bridge { return bridges } -func (q *qemuArchBase) cpuTopology(vcpus uint32) govmmQemu.SMP { +func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP { smp := govmmQemu.SMP{ CPUs: vcpus, Sockets: vcpus, Cores: defaultCores, Threads: defaultThreads, - MaxCPUs: defaultMaxQemuVCPUs, + MaxCPUs: maxvcpus, } return smp diff --git a/virtcontainers/qemu_arch_base_test.go b/virtcontainers/qemu_arch_base_test.go index 24100d1e84..906d354cc6 100644 --- a/virtcontainers/qemu_arch_base_test.go +++ b/virtcontainers/qemu_arch_base_test.go @@ -169,7 +169,7 @@ func TestQemuArchBaseCPUTopology(t *testing.T) { MaxCPUs: defaultMaxQemuVCPUs, } - smp := qemuArchBase.cpuTopology(vcpus) + smp := qemuArchBase.cpuTopology(vcpus, defaultMaxQemuVCPUs) assert.Equal(expectedSMP, smp) } diff --git a/virtcontainers/qemu_arm64.go b/virtcontainers/qemu_arm64.go index 2b80440b5f..42ae767189 100644 --- a/virtcontainers/qemu_arm64.go +++ b/virtcontainers/qemu_arm64.go @@ -42,8 +42,8 @@ var supportedQemuMachines = []govmmQemu.Machine{ }, } -// returns the maximum number of vCPUs supported -func maxQemuVCPUs() uint32 { +// MaxQemuVCPUs returns the maximum number of vCPUs supported +func MaxQemuVCPUs() uint32 { return uint32(runtime.NumCPU()) } diff --git a/virtcontainers/qemu_test.go b/virtcontainers/qemu_test.go index 14751db5e1..4470e8b3ac 100644 --- a/virtcontainers/qemu_test.go +++ b/virtcontainers/qemu_test.go @@ -52,7 +52,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin } func TestQemuKernelParameters(t *testing.T) { - expectedOut := "panic=1 initcall_debug foo=foo bar=bar" + expectedOut := fmt.Sprintf("panic=1 initcall_debug nr_cpus=%d foo=foo bar=bar", MaxQemuVCPUs()) params := []Param{ { Key: "foo", @@ -128,7 +128,8 @@ func TestQemuCPUTopology(t *testing.T) { q := &qemu{ arch: &qemuArchBase{}, config: HypervisorConfig{ - DefaultVCPUs: uint32(vcpus), + DefaultVCPUs: uint32(vcpus), + DefaultMaxVCPUs: uint32(vcpus), }, } @@ -137,7 +138,7 @@ func TestQemuCPUTopology(t *testing.T) { Sockets: uint32(vcpus), Cores: defaultCores, Threads: defaultThreads, - MaxCPUs: defaultMaxQemuVCPUs, + MaxCPUs: uint32(vcpus), } smp := q.cpuTopology() From 4527a8066a3f3541148bc421b16ec5cdd20371d2 Mon Sep 17 00:00:00 2001 From: Julio Montes Date: Fri, 11 May 2018 15:46:56 -0500 Subject: [PATCH 2/2] virtcontainers/qemu: honour CPU constrains Don't fail if a new container with a CPU constraint was added to a POD and no more vCPUs are available, instead apply the constraint and let kernel balance the resources. Signed-off-by: Julio Montes --- virtcontainers/container.go | 87 ++++++++++++++++++++---------- virtcontainers/container_test.go | 37 +++++++++---- virtcontainers/hyperstart_agent.go | 11 ---- virtcontainers/hypervisor.go | 4 +- virtcontainers/mock_hypervisor.go | 17 ++++-- virtcontainers/pkg/oci/utils.go | 7 +-- virtcontainers/qemu.go | 63 +++++++++++++--------- virtcontainers/sandbox.go | 12 +++-- 8 files changed, 151 insertions(+), 87 deletions(-) diff --git a/virtcontainers/container.go b/virtcontainers/container.go index f96cc61d37..0d1411e9a8 100644 --- a/virtcontainers/container.go +++ b/virtcontainers/container.go @@ -57,15 +57,11 @@ type ContainerStatus struct { // ContainerResources describes container resources type ContainerResources struct { - // CPUQuota specifies the total amount of time in microseconds - // The number of microseconds per CPUPeriod that the container is guaranteed CPU access - CPUQuota int64 + // VCPUs are the number of vCPUs that are being used by the container + VCPUs uint32 - // CPUPeriod specifies the CPU CFS scheduler period of time in microseconds - CPUPeriod uint64 - - // CPUShares specifies container's weight vs. other containers - CPUShares uint64 + // Mem is the memory that is being used by the container + Mem uint32 } // ContainerConfig describes one container runtime configuration. @@ -804,8 +800,7 @@ func (c *Container) update(resources specs.LinuxResources) error { } newResources := ContainerResources{ - CPUPeriod: *resources.CPU.Period, - CPUQuota: *resources.CPU.Quota, + VCPUs: uint32(utils.ConstraintsToVCPUs(*resources.CPU.Quota, *resources.CPU.Period)), } if err := c.updateResources(currentConfig.Resources, newResources); err != nil { @@ -866,7 +861,7 @@ func (c *Container) hotplugDrive() error { Index: driveIndex, } - if err := c.sandbox.hypervisor.hotplugAddDevice(&drive, blockDev); err != nil { + if _, err := c.sandbox.hypervisor.hotplugAddDevice(&drive, blockDev); err != nil { return err } @@ -903,7 +898,7 @@ func (c *Container) removeDrive() (err error) { l := c.Logger().WithField("device-id", devID) l.Info("Unplugging block device") - if err := c.sandbox.hypervisor.hotplugRemoveDevice(drive, blockDev); err != nil { + if _, err := c.sandbox.hypervisor.hotplugRemoveDevice(drive, blockDev); err != nil { l.WithError(err).Info("Failed to unplug block device") return err } @@ -938,14 +933,31 @@ func (c *Container) addResources() error { return nil } - vCPUs := utils.ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod) + // Container is being created, try to add the number of vCPUs specified + vCPUs := c.config.Resources.VCPUs if vCPUs != 0 { virtLog.Debugf("hot adding %d vCPUs", vCPUs) - if err := c.sandbox.hypervisor.hotplugAddDevice(uint32(vCPUs), cpuDev); err != nil { + data, err := c.sandbox.hypervisor.hotplugAddDevice(vCPUs, cpuDev) + if err != nil { return err } - return c.sandbox.agent.onlineCPUMem(uint32(vCPUs)) + vcpusAdded, ok := data.(uint32) + if !ok { + return fmt.Errorf("Could not get the number of vCPUs added, got %+v", data) + } + + // A different number of vCPUs was added, we have to update + // the resources in order to don't remove vCPUs used by other containers. + if vcpusAdded != vCPUs { + // Set and save container's config + c.config.Resources.VCPUs = vcpusAdded + if err := c.storeContainer(); err != nil { + return err + } + } + + return c.sandbox.agent.onlineCPUMem(vcpusAdded) } return nil @@ -957,10 +969,18 @@ func (c *Container) removeResources() error { return nil } - vCPUs := utils.ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod) + // In order to don't remove vCPUs used by other containers, we have to remove + // only the vCPUs assigned to the container + config, err := c.sandbox.storage.fetchContainerConfig(c.sandbox.id, c.id) + if err != nil { + // don't fail, let's use the default configuration + config = *c.config + } + + vCPUs := config.Resources.VCPUs if vCPUs != 0 { virtLog.Debugf("hot removing %d vCPUs", vCPUs) - if err := c.sandbox.hypervisor.hotplugRemoveDevice(uint32(vCPUs), cpuDev); err != nil { + if _, err := c.sandbox.hypervisor.hotplugRemoveDevice(vCPUs, cpuDev); err != nil { return err } } @@ -970,9 +990,9 @@ func (c *Container) removeResources() error { func (c *Container) updateResources(oldResources, newResources ContainerResources) error { //TODO add support for memory, Issue: https://github.com/containers/virtcontainers/issues/578 - var vCPUs uint - oldVCPUs := utils.ConstraintsToVCPUs(oldResources.CPUQuota, oldResources.CPUPeriod) - newVCPUs := utils.ConstraintsToVCPUs(newResources.CPUQuota, newResources.CPUPeriod) + var vCPUs uint32 + oldVCPUs := oldResources.VCPUs + newVCPUs := newResources.VCPUs // Update vCPUs is not possible if period and/or quota are not set or // oldVCPUs and newVCPUs are equal. @@ -989,23 +1009,36 @@ func (c *Container) updateResources(oldResources, newResources ContainerResource // hot add vCPUs vCPUs = newVCPUs - oldVCPUs virtLog.Debugf("hot adding %d vCPUs", vCPUs) - if err := c.sandbox.hypervisor.hotplugAddDevice(uint32(vCPUs), cpuDev); err != nil { + data, err := c.sandbox.hypervisor.hotplugAddDevice(vCPUs, cpuDev) + if err != nil { + return err + } + vcpusAdded, ok := data.(uint32) + if !ok { + return fmt.Errorf("Could not get the number of vCPUs added, got %+v", data) + } + // recalculate the actual number of vCPUs if a different number of vCPUs was added + newResources.VCPUs = oldVCPUs + vcpusAdded + if err := c.sandbox.agent.onlineCPUMem(vcpusAdded); err != nil { return err } } else { // hot remove vCPUs vCPUs = oldVCPUs - newVCPUs virtLog.Debugf("hot removing %d vCPUs", vCPUs) - if err := c.sandbox.hypervisor.hotplugRemoveDevice(uint32(vCPUs), cpuDev); err != nil { + data, err := c.sandbox.hypervisor.hotplugRemoveDevice(vCPUs, cpuDev) + if err != nil { return err } + vcpusRemoved, ok := data.(uint32) + if !ok { + return fmt.Errorf("Could not get the number of vCPUs removed, got %+v", data) + } + // recalculate the actual number of vCPUs if a different number of vCPUs was removed + newResources.VCPUs = oldVCPUs - vcpusRemoved } // Set and save container's config c.config.Resources = newResources - if err := c.storeContainer(); err != nil { - return err - } - - return c.sandbox.agent.onlineCPUMem(uint32(vCPUs)) + return c.storeContainer() } diff --git a/virtcontainers/container_test.go b/virtcontainers/container_test.go index bab57911ce..cf26686527 100644 --- a/virtcontainers/container_test.go +++ b/virtcontainers/container_test.go @@ -284,7 +284,11 @@ func TestCheckSandboxRunningSuccessful(t *testing.T) { func TestContainerAddResources(t *testing.T) { assert := assert.New(t) - c := &Container{} + c := &Container{ + sandbox: &Sandbox{ + storage: &filesystem{}, + }, + } err := c.addResources() assert.Nil(err) @@ -297,13 +301,16 @@ func TestContainerAddResources(t *testing.T) { err = c.addResources() assert.Nil(err) + vCPUs := uint32(5) c.config.Resources = ContainerResources{ - CPUQuota: 5000, - CPUPeriod: 1000, + VCPUs: vCPUs, } c.sandbox = &Sandbox{ - hypervisor: &mockHypervisor{}, - agent: &noopAgent{}, + hypervisor: &mockHypervisor{ + vCPUs: vCPUs, + }, + agent: &noopAgent{}, + storage: &filesystem{}, } err = c.addResources() assert.Nil(err) @@ -312,7 +319,12 @@ func TestContainerAddResources(t *testing.T) { func TestContainerRemoveResources(t *testing.T) { assert := assert.New(t) - c := &Container{} + c := &Container{ + sandbox: &Sandbox{ + storage: &filesystem{}, + }, + } + err := c.addResources() assert.Nil(err) @@ -325,11 +337,18 @@ func TestContainerRemoveResources(t *testing.T) { err = c.removeResources() assert.Nil(err) + vCPUs := uint32(5) c.config.Resources = ContainerResources{ - CPUQuota: 5000, - CPUPeriod: 1000, + VCPUs: vCPUs, } - c.sandbox = &Sandbox{hypervisor: &mockHypervisor{}} + + c.sandbox = &Sandbox{ + hypervisor: &mockHypervisor{ + vCPUs: vCPUs, + }, + storage: &filesystem{}, + } + err = c.removeResources() assert.Nil(err) } diff --git a/virtcontainers/hyperstart_agent.go b/virtcontainers/hyperstart_agent.go index 648acd506e..aab1d341ad 100644 --- a/virtcontainers/hyperstart_agent.go +++ b/virtcontainers/hyperstart_agent.go @@ -430,17 +430,6 @@ func (h *hyper) startOneContainer(sandbox *Sandbox, c *Container) error { Process: process, } - if c.config.Resources.CPUQuota != 0 && c.config.Resources.CPUPeriod != 0 { - container.Constraints = hyperstart.Constraints{ - CPUQuota: c.config.Resources.CPUQuota, - CPUPeriod: c.config.Resources.CPUPeriod, - } - } - - if c.config.Resources.CPUShares != 0 { - container.Constraints.CPUShares = c.config.Resources.CPUShares - } - container.SystemMountsInfo.BindMountDev = c.systemMountsInfo.BindMountDev if c.state.Fstype != "" { diff --git a/virtcontainers/hypervisor.go b/virtcontainers/hypervisor.go index 0d6631192f..8b501a3f5a 100644 --- a/virtcontainers/hypervisor.go +++ b/virtcontainers/hypervisor.go @@ -499,8 +499,8 @@ type hypervisor interface { pauseSandbox() error resumeSandbox() error addDevice(devInfo interface{}, devType deviceType) error - hotplugAddDevice(devInfo interface{}, devType deviceType) error - hotplugRemoveDevice(devInfo interface{}, devType deviceType) error + hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) + hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) getSandboxConsole(sandboxID string) (string, error) capabilities() capabilities } diff --git a/virtcontainers/mock_hypervisor.go b/virtcontainers/mock_hypervisor.go index 563a395ca5..89824cdfef 100644 --- a/virtcontainers/mock_hypervisor.go +++ b/virtcontainers/mock_hypervisor.go @@ -6,6 +6,7 @@ package virtcontainers type mockHypervisor struct { + vCPUs uint32 } func (m *mockHypervisor) init(sandbox *Sandbox) error { @@ -49,12 +50,20 @@ func (m *mockHypervisor) addDevice(devInfo interface{}, devType deviceType) erro return nil } -func (m *mockHypervisor) hotplugAddDevice(devInfo interface{}, devType deviceType) error { - return nil +func (m *mockHypervisor) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) { + switch devType { + case cpuDev: + return m.vCPUs, nil + } + return nil, nil } -func (m *mockHypervisor) hotplugRemoveDevice(devInfo interface{}, devType deviceType) error { - return nil +func (m *mockHypervisor) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) { + switch devType { + case cpuDev: + return m.vCPUs, nil + } + return nil, nil } func (m *mockHypervisor) getSandboxConsole(sandboxID string) (string, error) { diff --git a/virtcontainers/pkg/oci/utils.go b/virtcontainers/pkg/oci/utils.go index f82fd7c38d..c886befbda 100644 --- a/virtcontainers/pkg/oci/utils.go +++ b/virtcontainers/pkg/oci/utils.go @@ -23,6 +23,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/config" vcAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" dockershimAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations/dockershim" + "github.com/kata-containers/runtime/virtcontainers/utils" ) type annotationContainerType struct { @@ -562,11 +563,7 @@ func ContainerConfig(ocispec CompatOCISpec, bundlePath, cid, console string, det if ocispec.Linux.Resources.CPU != nil { if ocispec.Linux.Resources.CPU.Quota != nil && ocispec.Linux.Resources.CPU.Period != nil { - resources.CPUQuota = *ocispec.Linux.Resources.CPU.Quota - resources.CPUPeriod = *ocispec.Linux.Resources.CPU.Period - } - if ocispec.Linux.Resources.CPU.Shares != nil { - resources.CPUShares = *ocispec.Linux.Resources.CPU.Shares + resources.VCPUs = uint32(utils.ConstraintsToVCPUs(*ocispec.Linux.Resources.CPU.Quota, *ocispec.Linux.Resources.CPU.Period)) } } diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index b233f0925b..e329623ac0 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -740,44 +740,46 @@ func (q *qemu) hotplugVFIODevice(device deviceDrivers.VFIODevice, op operation) return nil } -func (q *qemu) hotplugDevice(devInfo interface{}, devType deviceType, op operation) error { +func (q *qemu) hotplugDevice(devInfo interface{}, devType deviceType, op operation) (interface{}, error) { switch devType { case blockDev: // TODO: find a way to remove dependency of deviceDrivers lib @weizhang555 drive := devInfo.(*deviceDrivers.Drive) - return q.hotplugBlockDevice(drive, op) + return nil, q.hotplugBlockDevice(drive, op) case cpuDev: vcpus := devInfo.(uint32) return q.hotplugCPUs(vcpus, op) case vfioDev: // TODO: find a way to remove dependency of deviceDrivers lib @weizhang555 device := devInfo.(deviceDrivers.VFIODevice) - return q.hotplugVFIODevice(device, op) + return nil, q.hotplugVFIODevice(device, op) default: - return fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType) + return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType) } } -func (q *qemu) hotplugAddDevice(devInfo interface{}, devType deviceType) error { - if err := q.hotplugDevice(devInfo, devType, addDevice); err != nil { - return err +func (q *qemu) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) { + data, err := q.hotplugDevice(devInfo, devType, addDevice) + if err != nil { + return data, err } - return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) + return data, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) } -func (q *qemu) hotplugRemoveDevice(devInfo interface{}, devType deviceType) error { - if err := q.hotplugDevice(devInfo, devType, removeDevice); err != nil { - return err +func (q *qemu) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) { + data, err := q.hotplugDevice(devInfo, devType, removeDevice) + if err != nil { + return data, err } - return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) + return data, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) } -func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error { +func (q *qemu) hotplugCPUs(vcpus uint32, op operation) (uint32, error) { if vcpus == 0 { q.Logger().Warnf("cannot hotplug 0 vCPUs") - return nil + return 0, nil } defer func(qemu *qemu) { @@ -788,7 +790,7 @@ func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error { qmp, err := q.qmpSetup() if err != nil { - return err + return 0, err } q.qmpMonitorCh.qmp = qmp @@ -800,19 +802,28 @@ func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error { return q.hotplugRemoveCPUs(vcpus) } -func (q *qemu) hotplugAddCPUs(amount uint32) error { +// try to hot add an amount of vCPUs, returns the number of vCPUs added +func (q *qemu) hotplugAddCPUs(amount uint32) (uint32, error) { currentVCPUs := q.qemuConfig.SMP.CPUs + uint32(len(q.state.HotpluggedVCPUs)) - // Don't exceed the maximum amount of vCPUs + // Don't fail if the number of max vCPUs is exceeded, log a warning and hot add the vCPUs needed + // to reach out max vCPUs if currentVCPUs+amount > q.config.DefaultMaxVCPUs { - return fmt.Errorf("Unable to hotplug %d CPUs, currently this SB has %d CPUs and the maximum amount of CPUs is %d", + q.Logger().Warnf("Cannot hotplug %d CPUs, currently this SB has %d CPUs and the maximum amount of CPUs is %d", amount, currentVCPUs, q.config.DefaultMaxVCPUs) + amount = q.config.DefaultMaxVCPUs - currentVCPUs + } + + if amount == 0 { + // Don't fail if no more vCPUs can be added, since cgroups still can be updated + q.Logger().Warnf("maximum number of vCPUs '%d' has been reached", q.config.DefaultMaxVCPUs) + return 0, nil } // get the list of hotpluggable CPUs hotpluggableVCPUs, err := q.qmpMonitorCh.qmp.ExecuteQueryHotpluggableCPUs(q.qmpMonitorCh.ctx) if err != nil { - return fmt.Errorf("failed to query hotpluggable CPUs: %v", err) + return 0, fmt.Errorf("failed to query hotpluggable CPUs: %v", err) } var hotpluggedVCPUs uint32 @@ -838,7 +849,7 @@ func (q *qemu) hotplugAddCPUs(amount uint32) error { hotpluggedVCPUs++ if hotpluggedVCPUs == amount { // All vCPUs were hotplugged - return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) + return amount, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) } } @@ -847,29 +858,31 @@ func (q *qemu) hotplugAddCPUs(amount uint32) error { q.Logger().Errorf("failed to save hypervisor state after hotplug %d vCPUs: %v", hotpluggedVCPUs, err) } - return fmt.Errorf("failed to hot add vCPUs: only %d vCPUs of %d were added", hotpluggedVCPUs, amount) + return hotpluggedVCPUs, fmt.Errorf("failed to hot add vCPUs: only %d vCPUs of %d were added", hotpluggedVCPUs, amount) } -func (q *qemu) hotplugRemoveCPUs(amount uint32) error { +// try to hot remove an amount of vCPUs, returns the number of vCPUs removed +func (q *qemu) hotplugRemoveCPUs(amount uint32) (uint32, error) { hotpluggedVCPUs := uint32(len(q.state.HotpluggedVCPUs)) // we can only remove hotplugged vCPUs if amount > hotpluggedVCPUs { - return fmt.Errorf("Unable to remove %d CPUs, currently there are only %d hotplugged CPUs", amount, hotpluggedVCPUs) + return 0, fmt.Errorf("Unable to remove %d CPUs, currently there are only %d hotplugged CPUs", amount, hotpluggedVCPUs) } for i := uint32(0); i < amount; i++ { // get the last vCPUs and try to remove it cpu := q.state.HotpluggedVCPUs[len(q.state.HotpluggedVCPUs)-1] if err := q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, cpu.ID); err != nil { - return fmt.Errorf("failed to hotunplug CPUs, only %d CPUs were hotunplugged: %v", i, err) + _ = q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) + return i, fmt.Errorf("failed to hotunplug CPUs, only %d CPUs were hotunplugged: %v", i, err) } // remove from the list the vCPU hotunplugged q.state.HotpluggedVCPUs = q.state.HotpluggedVCPUs[:len(q.state.HotpluggedVCPUs)-1] } - return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) + return amount, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state) } func (q *qemu) pauseSandbox() error { diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index 823f4bbe4b..cd4c17019d 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -1338,13 +1338,15 @@ func (s *Sandbox) HotplugAddDevice(device api.Device, devType config.DeviceType) if !ok { return fmt.Errorf("device type mismatch, expect device type to be %s", devType) } - return s.hypervisor.hotplugAddDevice(*vfioDevice, vfioDev) + _, err := s.hypervisor.hotplugAddDevice(*vfioDevice, vfioDev) + return err case config.DeviceBlock: blockDevice, ok := device.(*drivers.BlockDevice) if !ok { return fmt.Errorf("device type mismatch, expect device type to be %s", devType) } - return s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev) + _, err := s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev) + return err case config.DeviceGeneric: // TODO: what? return nil @@ -1361,13 +1363,15 @@ func (s *Sandbox) HotplugRemoveDevice(device api.Device, devType config.DeviceTy if !ok { return fmt.Errorf("device type mismatch, expect device type to be %s", devType) } - return s.hypervisor.hotplugRemoveDevice(*vfioDevice, vfioDev) + _, err := s.hypervisor.hotplugRemoveDevice(*vfioDevice, vfioDev) + return err case config.DeviceBlock: blockDevice, ok := device.(*drivers.BlockDevice) if !ok { return fmt.Errorf("device type mismatch, expect device type to be %s", devType) } - return s.hypervisor.hotplugRemoveDevice(blockDevice.BlockDrive, blockDev) + _, err := s.hypervisor.hotplugRemoveDevice(blockDevice.BlockDrive, blockDev) + return err case config.DeviceGeneric: // TODO: what? return nil