virtcontainers/qemu: honour CPU constrains

Don't fail if a new container with a CPU constraint was added to
a POD and no more vCPUs are available, instead apply the constraint
and let kernel balance the resources.

Signed-off-by: Julio Montes <julio.montes@intel.com>
This commit is contained in:
Julio Montes 2018-05-11 15:46:56 -05:00
parent 07db945b09
commit 4527a8066a
8 changed files with 151 additions and 87 deletions

View File

@ -57,15 +57,11 @@ type ContainerStatus struct {
// ContainerResources describes container resources
type ContainerResources struct {
// CPUQuota specifies the total amount of time in microseconds
// The number of microseconds per CPUPeriod that the container is guaranteed CPU access
CPUQuota int64
// VCPUs are the number of vCPUs that are being used by the container
VCPUs uint32
// CPUPeriod specifies the CPU CFS scheduler period of time in microseconds
CPUPeriod uint64
// CPUShares specifies container's weight vs. other containers
CPUShares uint64
// Mem is the memory that is being used by the container
Mem uint32
}
// ContainerConfig describes one container runtime configuration.
@ -804,8 +800,7 @@ func (c *Container) update(resources specs.LinuxResources) error {
}
newResources := ContainerResources{
CPUPeriod: *resources.CPU.Period,
CPUQuota: *resources.CPU.Quota,
VCPUs: uint32(utils.ConstraintsToVCPUs(*resources.CPU.Quota, *resources.CPU.Period)),
}
if err := c.updateResources(currentConfig.Resources, newResources); err != nil {
@ -866,7 +861,7 @@ func (c *Container) hotplugDrive() error {
Index: driveIndex,
}
if err := c.sandbox.hypervisor.hotplugAddDevice(&drive, blockDev); err != nil {
if _, err := c.sandbox.hypervisor.hotplugAddDevice(&drive, blockDev); err != nil {
return err
}
@ -903,7 +898,7 @@ func (c *Container) removeDrive() (err error) {
l := c.Logger().WithField("device-id", devID)
l.Info("Unplugging block device")
if err := c.sandbox.hypervisor.hotplugRemoveDevice(drive, blockDev); err != nil {
if _, err := c.sandbox.hypervisor.hotplugRemoveDevice(drive, blockDev); err != nil {
l.WithError(err).Info("Failed to unplug block device")
return err
}
@ -938,14 +933,31 @@ func (c *Container) addResources() error {
return nil
}
vCPUs := utils.ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod)
// Container is being created, try to add the number of vCPUs specified
vCPUs := c.config.Resources.VCPUs
if vCPUs != 0 {
virtLog.Debugf("hot adding %d vCPUs", vCPUs)
if err := c.sandbox.hypervisor.hotplugAddDevice(uint32(vCPUs), cpuDev); err != nil {
data, err := c.sandbox.hypervisor.hotplugAddDevice(vCPUs, cpuDev)
if err != nil {
return err
}
return c.sandbox.agent.onlineCPUMem(uint32(vCPUs))
vcpusAdded, ok := data.(uint32)
if !ok {
return fmt.Errorf("Could not get the number of vCPUs added, got %+v", data)
}
// A different number of vCPUs was added, we have to update
// the resources in order to don't remove vCPUs used by other containers.
if vcpusAdded != vCPUs {
// Set and save container's config
c.config.Resources.VCPUs = vcpusAdded
if err := c.storeContainer(); err != nil {
return err
}
}
return c.sandbox.agent.onlineCPUMem(vcpusAdded)
}
return nil
@ -957,10 +969,18 @@ func (c *Container) removeResources() error {
return nil
}
vCPUs := utils.ConstraintsToVCPUs(c.config.Resources.CPUQuota, c.config.Resources.CPUPeriod)
// In order to don't remove vCPUs used by other containers, we have to remove
// only the vCPUs assigned to the container
config, err := c.sandbox.storage.fetchContainerConfig(c.sandbox.id, c.id)
if err != nil {
// don't fail, let's use the default configuration
config = *c.config
}
vCPUs := config.Resources.VCPUs
if vCPUs != 0 {
virtLog.Debugf("hot removing %d vCPUs", vCPUs)
if err := c.sandbox.hypervisor.hotplugRemoveDevice(uint32(vCPUs), cpuDev); err != nil {
if _, err := c.sandbox.hypervisor.hotplugRemoveDevice(vCPUs, cpuDev); err != nil {
return err
}
}
@ -970,9 +990,9 @@ func (c *Container) removeResources() error {
func (c *Container) updateResources(oldResources, newResources ContainerResources) error {
//TODO add support for memory, Issue: https://github.com/containers/virtcontainers/issues/578
var vCPUs uint
oldVCPUs := utils.ConstraintsToVCPUs(oldResources.CPUQuota, oldResources.CPUPeriod)
newVCPUs := utils.ConstraintsToVCPUs(newResources.CPUQuota, newResources.CPUPeriod)
var vCPUs uint32
oldVCPUs := oldResources.VCPUs
newVCPUs := newResources.VCPUs
// Update vCPUs is not possible if period and/or quota are not set or
// oldVCPUs and newVCPUs are equal.
@ -989,23 +1009,36 @@ func (c *Container) updateResources(oldResources, newResources ContainerResource
// hot add vCPUs
vCPUs = newVCPUs - oldVCPUs
virtLog.Debugf("hot adding %d vCPUs", vCPUs)
if err := c.sandbox.hypervisor.hotplugAddDevice(uint32(vCPUs), cpuDev); err != nil {
data, err := c.sandbox.hypervisor.hotplugAddDevice(vCPUs, cpuDev)
if err != nil {
return err
}
vcpusAdded, ok := data.(uint32)
if !ok {
return fmt.Errorf("Could not get the number of vCPUs added, got %+v", data)
}
// recalculate the actual number of vCPUs if a different number of vCPUs was added
newResources.VCPUs = oldVCPUs + vcpusAdded
if err := c.sandbox.agent.onlineCPUMem(vcpusAdded); err != nil {
return err
}
} else {
// hot remove vCPUs
vCPUs = oldVCPUs - newVCPUs
virtLog.Debugf("hot removing %d vCPUs", vCPUs)
if err := c.sandbox.hypervisor.hotplugRemoveDevice(uint32(vCPUs), cpuDev); err != nil {
data, err := c.sandbox.hypervisor.hotplugRemoveDevice(vCPUs, cpuDev)
if err != nil {
return err
}
vcpusRemoved, ok := data.(uint32)
if !ok {
return fmt.Errorf("Could not get the number of vCPUs removed, got %+v", data)
}
// recalculate the actual number of vCPUs if a different number of vCPUs was removed
newResources.VCPUs = oldVCPUs - vcpusRemoved
}
// Set and save container's config
c.config.Resources = newResources
if err := c.storeContainer(); err != nil {
return err
}
return c.sandbox.agent.onlineCPUMem(uint32(vCPUs))
return c.storeContainer()
}

View File

@ -284,7 +284,11 @@ func TestCheckSandboxRunningSuccessful(t *testing.T) {
func TestContainerAddResources(t *testing.T) {
assert := assert.New(t)
c := &Container{}
c := &Container{
sandbox: &Sandbox{
storage: &filesystem{},
},
}
err := c.addResources()
assert.Nil(err)
@ -297,13 +301,16 @@ func TestContainerAddResources(t *testing.T) {
err = c.addResources()
assert.Nil(err)
vCPUs := uint32(5)
c.config.Resources = ContainerResources{
CPUQuota: 5000,
CPUPeriod: 1000,
VCPUs: vCPUs,
}
c.sandbox = &Sandbox{
hypervisor: &mockHypervisor{},
agent: &noopAgent{},
hypervisor: &mockHypervisor{
vCPUs: vCPUs,
},
agent: &noopAgent{},
storage: &filesystem{},
}
err = c.addResources()
assert.Nil(err)
@ -312,7 +319,12 @@ func TestContainerAddResources(t *testing.T) {
func TestContainerRemoveResources(t *testing.T) {
assert := assert.New(t)
c := &Container{}
c := &Container{
sandbox: &Sandbox{
storage: &filesystem{},
},
}
err := c.addResources()
assert.Nil(err)
@ -325,11 +337,18 @@ func TestContainerRemoveResources(t *testing.T) {
err = c.removeResources()
assert.Nil(err)
vCPUs := uint32(5)
c.config.Resources = ContainerResources{
CPUQuota: 5000,
CPUPeriod: 1000,
VCPUs: vCPUs,
}
c.sandbox = &Sandbox{hypervisor: &mockHypervisor{}}
c.sandbox = &Sandbox{
hypervisor: &mockHypervisor{
vCPUs: vCPUs,
},
storage: &filesystem{},
}
err = c.removeResources()
assert.Nil(err)
}

View File

@ -430,17 +430,6 @@ func (h *hyper) startOneContainer(sandbox *Sandbox, c *Container) error {
Process: process,
}
if c.config.Resources.CPUQuota != 0 && c.config.Resources.CPUPeriod != 0 {
container.Constraints = hyperstart.Constraints{
CPUQuota: c.config.Resources.CPUQuota,
CPUPeriod: c.config.Resources.CPUPeriod,
}
}
if c.config.Resources.CPUShares != 0 {
container.Constraints.CPUShares = c.config.Resources.CPUShares
}
container.SystemMountsInfo.BindMountDev = c.systemMountsInfo.BindMountDev
if c.state.Fstype != "" {

View File

@ -499,8 +499,8 @@ type hypervisor interface {
pauseSandbox() error
resumeSandbox() error
addDevice(devInfo interface{}, devType deviceType) error
hotplugAddDevice(devInfo interface{}, devType deviceType) error
hotplugRemoveDevice(devInfo interface{}, devType deviceType) error
hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error)
hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error)
getSandboxConsole(sandboxID string) (string, error)
capabilities() capabilities
}

View File

@ -6,6 +6,7 @@
package virtcontainers
type mockHypervisor struct {
vCPUs uint32
}
func (m *mockHypervisor) init(sandbox *Sandbox) error {
@ -49,12 +50,20 @@ func (m *mockHypervisor) addDevice(devInfo interface{}, devType deviceType) erro
return nil
}
func (m *mockHypervisor) hotplugAddDevice(devInfo interface{}, devType deviceType) error {
return nil
func (m *mockHypervisor) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
switch devType {
case cpuDev:
return m.vCPUs, nil
}
return nil, nil
}
func (m *mockHypervisor) hotplugRemoveDevice(devInfo interface{}, devType deviceType) error {
return nil
func (m *mockHypervisor) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
switch devType {
case cpuDev:
return m.vCPUs, nil
}
return nil, nil
}
func (m *mockHypervisor) getSandboxConsole(sandboxID string) (string, error) {

View File

@ -23,6 +23,7 @@ import (
"github.com/kata-containers/runtime/virtcontainers/device/config"
vcAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations"
dockershimAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations/dockershim"
"github.com/kata-containers/runtime/virtcontainers/utils"
)
type annotationContainerType struct {
@ -562,11 +563,7 @@ func ContainerConfig(ocispec CompatOCISpec, bundlePath, cid, console string, det
if ocispec.Linux.Resources.CPU != nil {
if ocispec.Linux.Resources.CPU.Quota != nil &&
ocispec.Linux.Resources.CPU.Period != nil {
resources.CPUQuota = *ocispec.Linux.Resources.CPU.Quota
resources.CPUPeriod = *ocispec.Linux.Resources.CPU.Period
}
if ocispec.Linux.Resources.CPU.Shares != nil {
resources.CPUShares = *ocispec.Linux.Resources.CPU.Shares
resources.VCPUs = uint32(utils.ConstraintsToVCPUs(*ocispec.Linux.Resources.CPU.Quota, *ocispec.Linux.Resources.CPU.Period))
}
}

View File

@ -740,44 +740,46 @@ func (q *qemu) hotplugVFIODevice(device deviceDrivers.VFIODevice, op operation)
return nil
}
func (q *qemu) hotplugDevice(devInfo interface{}, devType deviceType, op operation) error {
func (q *qemu) hotplugDevice(devInfo interface{}, devType deviceType, op operation) (interface{}, error) {
switch devType {
case blockDev:
// TODO: find a way to remove dependency of deviceDrivers lib @weizhang555
drive := devInfo.(*deviceDrivers.Drive)
return q.hotplugBlockDevice(drive, op)
return nil, q.hotplugBlockDevice(drive, op)
case cpuDev:
vcpus := devInfo.(uint32)
return q.hotplugCPUs(vcpus, op)
case vfioDev:
// TODO: find a way to remove dependency of deviceDrivers lib @weizhang555
device := devInfo.(deviceDrivers.VFIODevice)
return q.hotplugVFIODevice(device, op)
return nil, q.hotplugVFIODevice(device, op)
default:
return fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType)
return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType)
}
}
func (q *qemu) hotplugAddDevice(devInfo interface{}, devType deviceType) error {
if err := q.hotplugDevice(devInfo, devType, addDevice); err != nil {
return err
func (q *qemu) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
data, err := q.hotplugDevice(devInfo, devType, addDevice)
if err != nil {
return data, err
}
return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
return data, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
}
func (q *qemu) hotplugRemoveDevice(devInfo interface{}, devType deviceType) error {
if err := q.hotplugDevice(devInfo, devType, removeDevice); err != nil {
return err
func (q *qemu) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
data, err := q.hotplugDevice(devInfo, devType, removeDevice)
if err != nil {
return data, err
}
return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
return data, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
}
func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error {
func (q *qemu) hotplugCPUs(vcpus uint32, op operation) (uint32, error) {
if vcpus == 0 {
q.Logger().Warnf("cannot hotplug 0 vCPUs")
return nil
return 0, nil
}
defer func(qemu *qemu) {
@ -788,7 +790,7 @@ func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error {
qmp, err := q.qmpSetup()
if err != nil {
return err
return 0, err
}
q.qmpMonitorCh.qmp = qmp
@ -800,19 +802,28 @@ func (q *qemu) hotplugCPUs(vcpus uint32, op operation) error {
return q.hotplugRemoveCPUs(vcpus)
}
func (q *qemu) hotplugAddCPUs(amount uint32) error {
// try to hot add an amount of vCPUs, returns the number of vCPUs added
func (q *qemu) hotplugAddCPUs(amount uint32) (uint32, error) {
currentVCPUs := q.qemuConfig.SMP.CPUs + uint32(len(q.state.HotpluggedVCPUs))
// Don't exceed the maximum amount of vCPUs
// Don't fail if the number of max vCPUs is exceeded, log a warning and hot add the vCPUs needed
// to reach out max vCPUs
if currentVCPUs+amount > q.config.DefaultMaxVCPUs {
return fmt.Errorf("Unable to hotplug %d CPUs, currently this SB has %d CPUs and the maximum amount of CPUs is %d",
q.Logger().Warnf("Cannot hotplug %d CPUs, currently this SB has %d CPUs and the maximum amount of CPUs is %d",
amount, currentVCPUs, q.config.DefaultMaxVCPUs)
amount = q.config.DefaultMaxVCPUs - currentVCPUs
}
if amount == 0 {
// Don't fail if no more vCPUs can be added, since cgroups still can be updated
q.Logger().Warnf("maximum number of vCPUs '%d' has been reached", q.config.DefaultMaxVCPUs)
return 0, nil
}
// get the list of hotpluggable CPUs
hotpluggableVCPUs, err := q.qmpMonitorCh.qmp.ExecuteQueryHotpluggableCPUs(q.qmpMonitorCh.ctx)
if err != nil {
return fmt.Errorf("failed to query hotpluggable CPUs: %v", err)
return 0, fmt.Errorf("failed to query hotpluggable CPUs: %v", err)
}
var hotpluggedVCPUs uint32
@ -838,7 +849,7 @@ func (q *qemu) hotplugAddCPUs(amount uint32) error {
hotpluggedVCPUs++
if hotpluggedVCPUs == amount {
// All vCPUs were hotplugged
return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
return amount, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
}
}
@ -847,29 +858,31 @@ func (q *qemu) hotplugAddCPUs(amount uint32) error {
q.Logger().Errorf("failed to save hypervisor state after hotplug %d vCPUs: %v", hotpluggedVCPUs, err)
}
return fmt.Errorf("failed to hot add vCPUs: only %d vCPUs of %d were added", hotpluggedVCPUs, amount)
return hotpluggedVCPUs, fmt.Errorf("failed to hot add vCPUs: only %d vCPUs of %d were added", hotpluggedVCPUs, amount)
}
func (q *qemu) hotplugRemoveCPUs(amount uint32) error {
// try to hot remove an amount of vCPUs, returns the number of vCPUs removed
func (q *qemu) hotplugRemoveCPUs(amount uint32) (uint32, error) {
hotpluggedVCPUs := uint32(len(q.state.HotpluggedVCPUs))
// we can only remove hotplugged vCPUs
if amount > hotpluggedVCPUs {
return fmt.Errorf("Unable to remove %d CPUs, currently there are only %d hotplugged CPUs", amount, hotpluggedVCPUs)
return 0, fmt.Errorf("Unable to remove %d CPUs, currently there are only %d hotplugged CPUs", amount, hotpluggedVCPUs)
}
for i := uint32(0); i < amount; i++ {
// get the last vCPUs and try to remove it
cpu := q.state.HotpluggedVCPUs[len(q.state.HotpluggedVCPUs)-1]
if err := q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, cpu.ID); err != nil {
return fmt.Errorf("failed to hotunplug CPUs, only %d CPUs were hotunplugged: %v", i, err)
_ = q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
return i, fmt.Errorf("failed to hotunplug CPUs, only %d CPUs were hotunplugged: %v", i, err)
}
// remove from the list the vCPU hotunplugged
q.state.HotpluggedVCPUs = q.state.HotpluggedVCPUs[:len(q.state.HotpluggedVCPUs)-1]
}
return q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
return amount, q.sandbox.storage.storeHypervisorState(q.sandbox.id, q.state)
}
func (q *qemu) pauseSandbox() error {

View File

@ -1338,13 +1338,15 @@ func (s *Sandbox) HotplugAddDevice(device api.Device, devType config.DeviceType)
if !ok {
return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
}
return s.hypervisor.hotplugAddDevice(*vfioDevice, vfioDev)
_, err := s.hypervisor.hotplugAddDevice(*vfioDevice, vfioDev)
return err
case config.DeviceBlock:
blockDevice, ok := device.(*drivers.BlockDevice)
if !ok {
return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
}
return s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev)
_, err := s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev)
return err
case config.DeviceGeneric:
// TODO: what?
return nil
@ -1361,13 +1363,15 @@ func (s *Sandbox) HotplugRemoveDevice(device api.Device, devType config.DeviceTy
if !ok {
return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
}
return s.hypervisor.hotplugRemoveDevice(*vfioDevice, vfioDev)
_, err := s.hypervisor.hotplugRemoveDevice(*vfioDevice, vfioDev)
return err
case config.DeviceBlock:
blockDevice, ok := device.(*drivers.BlockDevice)
if !ok {
return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
}
return s.hypervisor.hotplugRemoveDevice(blockDevice.BlockDrive, blockDev)
_, err := s.hypervisor.hotplugRemoveDevice(blockDevice.BlockDrive, blockDev)
return err
case config.DeviceGeneric:
// TODO: what?
return nil