diff --git a/virtcontainers/container.go b/virtcontainers/container.go index 0930069b8..fe7474718 100644 --- a/virtcontainers/container.go +++ b/virtcontainers/container.go @@ -11,7 +11,6 @@ import ( "encoding/hex" "fmt" "io" - "math" "os" "path/filepath" "syscall" @@ -224,7 +223,7 @@ type ContainerConfig struct { DeviceInfos []config.DeviceInfo // Resources container resources - Resources ContainerResources + Resources specs.LinuxResources } // valid checks that the container configuration is valid. @@ -658,9 +657,6 @@ func newContainer(sandbox *Sandbox, contConfig ContainerConfig) (*Container, err // - Unplug CPU and memory resources from the VM. // - Unplug devices from the VM. func (c *Container) rollbackFailingContainerCreation() { - if err := c.removeResources(); err != nil { - c.Logger().WithError(err).Error("rollback failed removeResources()") - } if err := c.detachDevices(); err != nil { c.Logger().WithError(err).Error("rollback failed detachDevices()") } @@ -684,15 +680,7 @@ func (c *Container) checkBlockDeviceSupport() bool { // createContainer creates and start a container inside a Sandbox. It has to be // called only when a new container, not known by the sandbox, has to be created. -func createContainer(sandbox *Sandbox, contConfig ContainerConfig) (c *Container, err error) { - if sandbox == nil { - return nil, errNeedSandbox - } - - c, err = newContainer(sandbox, contConfig) - if err != nil { - return - } +func (c *Container) create() (err error) { if err = c.createContainersDirs(); err != nil { return @@ -717,10 +705,6 @@ func createContainer(sandbox *Sandbox, contConfig ContainerConfig) (c *Container return } - if err = c.addResources(); err != nil { - return - } - // Deduce additional system mount info that should be handled by the agent // inside the VM c.getSystemMountInfo() @@ -729,16 +713,16 @@ func createContainer(sandbox *Sandbox, contConfig ContainerConfig) (c *Container return } - process, err := sandbox.agent.createContainer(c.sandbox, c) + process, err := c.sandbox.agent.createContainer(c.sandbox, c) if err != nil { - return c, err + return err } c.process = *process // If this is a sandbox container, store the pid for sandbox ann := c.GetAnnotations() if ann[annotations.ContainerTypeKey] == string(PodSandbox) { - sandbox.setSandboxPid(c.process.Pid) + c.sandbox.setSandboxPid(c.process.Pid) } // Store the container process returned by the agent. @@ -750,7 +734,7 @@ func createContainer(sandbox *Sandbox, contConfig ContainerConfig) (c *Container return } - return c, nil + return nil } func (c *Container) delete() error { @@ -904,10 +888,6 @@ func (c *Container) stop() error { return err } - if err := c.removeResources(); err != nil { - return err - } - if err := c.detachDevices(); err != nil { return err } @@ -1010,24 +990,28 @@ func (c *Container) update(resources specs.LinuxResources) error { return fmt.Errorf("Container not running, impossible to update") } - // fetch current configuration - currentConfig, err := c.sandbox.storage.fetchContainerConfig(c.sandbox.id, c.id) - if err != nil { - return err + if c.config.Resources.CPU == nil { + c.config.Resources.CPU = &specs.LinuxCPU{} } - newResources := ContainerResources{ - VCPUs: uint32(utils.ConstraintsToVCPUs(*resources.CPU.Quota, *resources.CPU.Period)), - // do page align to memory, as cgroup memory.limit_in_bytes will be aligned to page when effect. - // TODO use GetGuestDetails to get the guest OS page size. - MemByte: (*resources.Memory.Limit >> 12) << 12, + if cpu := resources.CPU; cpu != nil { + if p := cpu.Period; p != nil && *p != 0 { + c.config.Resources.CPU.Period = p + } + if q := cpu.Quota; q != nil && *q != 0 { + c.config.Resources.CPU.Quota = q + } } - if err := c.updateResources(currentConfig.Resources, newResources); err != nil { - return err + if c.config.Resources.Memory == nil { + c.config.Resources.Memory = &specs.LinuxMemory{} } - if err := c.storeContainer(); err != nil { + if mem := resources.Memory; mem != nil && mem.Limit != nil { + c.config.Resources.Memory.Limit = mem.Limit + } + + if err := c.sandbox.updateResources(); err != nil { return err } @@ -1219,191 +1203,3 @@ func (c *Container) detachDevices() error { } return nil } - -func (c *Container) addResources() error { - if c.config == nil { - return nil - } - - addResources := ContainerResources{ - VCPUs: c.config.Resources.VCPUs, - MemByte: c.config.Resources.MemByte, - } - - if err := c.updateResources(ContainerResources{0, 0}, addResources); err != nil { - return err - } - - return nil -} - -func (c *Container) removeResources() error { - if c.config == nil { - return nil - } - - // In order to don't remove vCPUs used by other containers, we have to remove - // only the vCPUs assigned to the container - config, err := c.sandbox.storage.fetchContainerConfig(c.sandbox.id, c.id) - if err != nil { - // don't fail, let's use the default configuration - config = *c.config - } - - vCPUs := config.Resources.VCPUs - if vCPUs != 0 { - virtLog.Debugf("hot removing %d vCPUs", vCPUs) - if _, err := c.sandbox.hypervisor.hotplugRemoveDevice(vCPUs, cpuDev); err != nil { - return err - } - } - // hot remove memory unsupported - - return nil -} - -func (c *Container) updateVCPUResources(oldResources ContainerResources, newResources *ContainerResources) error { - var vCPUs uint32 - oldVCPUs := oldResources.VCPUs - newVCPUs := newResources.VCPUs - - // Update vCPUs is not possible if oldVCPUs and newVCPUs are equal. - // Don't fail, the constraint still can be applied in the cgroup. - if oldVCPUs == newVCPUs { - c.Logger().WithFields(logrus.Fields{ - "old-vcpus": fmt.Sprintf("%d", oldVCPUs), - "new-vcpus": fmt.Sprintf("%d", newVCPUs), - }).Debug("the actual number of vCPUs will not be modified") - return nil - } else if oldVCPUs < newVCPUs { - // hot add vCPUs - vCPUs = newVCPUs - oldVCPUs - virtLog.Debugf("hot adding %d vCPUs", vCPUs) - data, err := c.sandbox.hypervisor.hotplugAddDevice(vCPUs, cpuDev) - if err != nil { - return err - } - vcpusAdded, ok := data.(uint32) - if !ok { - return fmt.Errorf("Could not get the number of vCPUs added, got %+v", data) - } - // recalculate the actual number of vCPUs if a different number of vCPUs was added - newResources.VCPUs = oldVCPUs + vcpusAdded - if err := c.sandbox.agent.onlineCPUMem(vcpusAdded, true); err != nil { - return err - } - } else { - // hot remove vCPUs - vCPUs = oldVCPUs - newVCPUs - virtLog.Debugf("hot removing %d vCPUs", vCPUs) - data, err := c.sandbox.hypervisor.hotplugRemoveDevice(vCPUs, cpuDev) - if err != nil { - return err - } - vcpusRemoved, ok := data.(uint32) - if !ok { - return fmt.Errorf("Could not get the number of vCPUs removed, got %+v", data) - } - // recalculate the actual number of vCPUs if a different number of vCPUs was removed - newResources.VCPUs = oldVCPUs - vcpusRemoved - } - - return nil -} - -// calculate hotplug memory size with memory block size of guestos -func (c *Container) calcHotplugMemMiBSize(memByte int64) (uint32, error) { - memoryBlockSize := int64(c.sandbox.state.GuestMemoryBlockSizeMB) - if memoryBlockSize == 0 { - return uint32(memByte >> 20), nil - } - - // TODO: hot add memory aligned to memory section should be more properly. See https://github.com/kata-containers/runtime/pull/624#issuecomment-419656853 - return uint32(int64(math.Ceil(float64(memByte)/float64(memoryBlockSize<<20))) * memoryBlockSize), nil -} - -func (c *Container) updateMemoryResources(oldResources ContainerResources, newResources *ContainerResources) error { - oldMemByte := oldResources.MemByte - newMemByte := newResources.MemByte - c.Logger().WithFields(logrus.Fields{ - "old-mem": fmt.Sprintf("%dByte", oldMemByte), - "new-mem": fmt.Sprintf("%dByte", newMemByte), - }).Debug("Request update memory") - - if oldMemByte == newMemByte { - c.Logger().WithFields(logrus.Fields{ - "old-mem": fmt.Sprintf("%dByte", oldMemByte), - "new-mem": fmt.Sprintf("%dByte", newMemByte), - }).Debug("the actual number of Mem will not be modified") - return nil - } else if oldMemByte < newMemByte { - // hot add memory - addMemByte := newMemByte - oldMemByte - memHotplugMB, err := c.calcHotplugMemMiBSize(addMemByte) - if err != nil { - return err - } - - virtLog.Debugf("hotplug %dMB mem", memHotplugMB) - addMemDevice := &memoryDevice{ - sizeMB: int(memHotplugMB), - } - data, err := c.sandbox.hypervisor.hotplugAddDevice(addMemDevice, memoryDev) - if err != nil { - return err - } - memoryAdded, ok := data.(int) - if !ok { - return fmt.Errorf("Could not get the memory added, got %+v", data) - } - newResources.MemByte = oldMemByte + int64(memoryAdded)<<20 - if err := c.sandbox.agent.onlineCPUMem(0, false); err != nil { - return err - } - } else { - // Try to remove a memory device with the difference - // from new memory and old memory - removeMem := &memoryDevice{ - sizeMB: int((oldMemByte - newMemByte) >> 20), - } - - data, err := c.sandbox.hypervisor.hotplugRemoveDevice(removeMem, memoryDev) - if err != nil { - return err - } - memoryRemoved, ok := data.(int) - if !ok { - return fmt.Errorf("Could not get the memory added, got %+v", data) - } - newResources.MemByte = oldMemByte - int64(memoryRemoved)<<20 - } - - return nil -} - -func (c *Container) updateResources(oldResources, newResources ContainerResources) error { - // initialize with oldResources - c.config.Resources.VCPUs = oldResources.VCPUs - c.config.Resources.MemByte = oldResources.MemByte - - // Cpu is not updated if period and/or quota not set - if newResources.VCPUs != 0 { - if err := c.updateVCPUResources(oldResources, &newResources); err != nil { - return err - } - // Set container's config VCPUs field only - c.config.Resources.VCPUs = newResources.VCPUs - } - - // Memory is not updated if memory limit not set - if newResources.MemByte != 0 { - if err := c.updateMemoryResources(oldResources, &newResources); err != nil { - return err - } - - // Set container's config MemByte field only - c.config.Resources.MemByte = newResources.MemByte - } - - return nil -} diff --git a/virtcontainers/container_test.go b/virtcontainers/container_test.go index a2326ef47..e9bb57bea 100644 --- a/virtcontainers/container_test.go +++ b/virtcontainers/container_test.go @@ -20,7 +20,6 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/config" "github.com/kata-containers/runtime/virtcontainers/device/drivers" "github.com/kata-containers/runtime/virtcontainers/device/manager" - vcAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" "github.com/stretchr/testify/assert" ) @@ -299,156 +298,6 @@ func TestCheckSandboxRunningSuccessful(t *testing.T) { assert.Nil(t, err, "%v", err) } -func TestContainerAddResources(t *testing.T) { - assert := assert.New(t) - - c := &Container{ - sandbox: &Sandbox{ - storage: &filesystem{}, - }, - } - err := c.addResources() - assert.Nil(err) - - c.config = &ContainerConfig{Annotations: make(map[string]string)} - c.config.Annotations[vcAnnotations.ContainerTypeKey] = string(PodSandbox) - err = c.addResources() - assert.Nil(err) - - c.config.Annotations[vcAnnotations.ContainerTypeKey] = string(PodContainer) - err = c.addResources() - assert.Nil(err) - - vCPUs := uint32(5) - memByte := int64(104857600) - c.config.Resources = ContainerResources{ - VCPUs: vCPUs, - MemByte: memByte, - } - c.sandbox = &Sandbox{ - hypervisor: &mockHypervisor{}, - agent: &noopAgent{}, - storage: &filesystem{}, - } - err = c.addResources() - assert.Nil(err) -} - -func TestContainerRemoveResources(t *testing.T) { - assert := assert.New(t) - - c := &Container{ - sandbox: &Sandbox{ - storage: &filesystem{}, - }, - } - - err := c.addResources() - assert.Nil(err) - - c.config = &ContainerConfig{Annotations: make(map[string]string)} - c.config.Annotations[vcAnnotations.ContainerTypeKey] = string(PodSandbox) - err = c.removeResources() - assert.Nil(err) - - c.config.Annotations[vcAnnotations.ContainerTypeKey] = string(PodContainer) - err = c.removeResources() - assert.Nil(err) - - vCPUs := uint32(5) - c.config.Resources = ContainerResources{ - VCPUs: vCPUs, - } - - c.sandbox = &Sandbox{ - hypervisor: &mockHypervisor{}, - storage: &filesystem{}, - } - - err = c.removeResources() - assert.Nil(err) -} - -func TestContainerUpdateResources(t *testing.T) { - assert := assert.New(t) - - sandbox := &Sandbox{ - hypervisor: &mockHypervisor{}, - agent: &noopAgent{}, - storage: &filesystem{}, - } - - c := &Container{ - sandbox: sandbox, - } - c.config = &ContainerConfig{Annotations: make(map[string]string)} - - // VCPUs is equal to zero - oldResource := ContainerResources{ - VCPUs: 0, - MemByte: 0, - } - - newResource := ContainerResources{ - VCPUs: 0, - MemByte: 104857600, - } - - err := c.updateResources(oldResource, newResource) - assert.Nil(err) - - // MemByte is equal to zero - newResource = ContainerResources{ - VCPUs: 5, - MemByte: 0, - } - - err = c.updateResources(oldResource, newResource) - assert.Nil(err) - - // oldResource is equal to newResource - oldResource = ContainerResources{ - VCPUs: 5, - MemByte: 104857600, - } - - newResource = ContainerResources{ - VCPUs: 5, - MemByte: 104857600, - } - - err = c.updateResources(oldResource, newResource) - assert.Nil(err) - - // memory hotplug and cpu hotplug - oldResource = ContainerResources{ - VCPUs: 5, - MemByte: 104857600, - } - - newResource = ContainerResources{ - VCPUs: 10, - MemByte: 209715200, - } - - err = c.updateResources(oldResource, newResource) - assert.Nil(err) - - // memory hot remove and cpu hot remove - oldResource = ContainerResources{ - VCPUs: 10, - MemByte: 209715200, - } - - newResource = ContainerResources{ - VCPUs: 5, - MemByte: 104857600, - } - - err = c.updateResources(oldResource, newResource) - assert.Nil(err) -} - func TestContainerEnterErrorsOnContainerStates(t *testing.T) { assert := assert.New(t) c := &Container{ diff --git a/virtcontainers/hypervisor.go b/virtcontainers/hypervisor.go index 7cad8be30..8387674a7 100644 --- a/virtcontainers/hypervisor.go +++ b/virtcontainers/hypervisor.go @@ -585,6 +585,8 @@ type hypervisor interface { addDevice(devInfo interface{}, devType deviceType) error hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) + resizeMemory(memMB uint32, memoryBlockSizeMB uint32) (uint32, error) + resizeVCPUs(vcpus uint32) (uint32, uint32, error) getSandboxConsole(sandboxID string) (string, error) disconnect() capabilities() capabilities diff --git a/virtcontainers/mock_hypervisor.go b/virtcontainers/mock_hypervisor.go index de492f003..025527870 100644 --- a/virtcontainers/mock_hypervisor.go +++ b/virtcontainers/mock_hypervisor.go @@ -87,6 +87,13 @@ func (m *mockHypervisor) getSandboxConsole(sandboxID string) (string, error) { return "", nil } +func (m *mockHypervisor) resizeMemory(memMB uint32, memorySectionSizeMB uint32) (uint32, error) { + return 0, nil +} +func (m *mockHypervisor) resizeVCPUs(cpus uint32) (uint32, uint32, error) { + return 0, 0, nil +} + func (m *mockHypervisor) disconnect() { } diff --git a/virtcontainers/pkg/oci/utils.go b/virtcontainers/pkg/oci/utils.go index e0f431bb4..4fe588353 100644 --- a/virtcontainers/pkg/oci/utils.go +++ b/virtcontainers/pkg/oci/utils.go @@ -25,7 +25,6 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/config" vcAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" dockershimAnnotations "github.com/kata-containers/runtime/virtcontainers/pkg/annotations/dockershim" - "github.com/kata-containers/runtime/virtcontainers/utils" ) type annotationContainerType struct { @@ -542,21 +541,6 @@ func ContainerConfig(ocispec CompatOCISpec, bundlePath, cid, console string, det cmd.Capabilities = caps } - var resources vc.ContainerResources - if ocispec.Linux.Resources.CPU != nil { - if ocispec.Linux.Resources.CPU.Quota != nil && - ocispec.Linux.Resources.CPU.Period != nil { - resources.VCPUs = uint32(utils.ConstraintsToVCPUs(*ocispec.Linux.Resources.CPU.Quota, *ocispec.Linux.Resources.CPU.Period)) - } - } - if ocispec.Linux.Resources.Memory != nil { - if ocispec.Linux.Resources.Memory.Limit != nil { - // do page align to memory, as cgroup memory.limit_in_bytes will be aligned to page when effect - // TODO use GetGuestDetails to get the guest OS page size. - resources.MemByte = (*ocispec.Linux.Resources.Memory.Limit >> 12) << 12 - } - } - containerConfig := vc.ContainerConfig{ ID: cid, RootFs: rootfs, @@ -568,7 +552,7 @@ func ContainerConfig(ocispec CompatOCISpec, bundlePath, cid, console string, det }, Mounts: containerMounts(ocispec), DeviceInfos: deviceInfos, - Resources: resources, + Resources: *ocispec.Linux.Resources, } cType, err := ocispec.ContainerType() diff --git a/virtcontainers/pkg/oci/utils_test.go b/virtcontainers/pkg/oci/utils_test.go index 721d827c0..968fd5c69 100644 --- a/virtcontainers/pkg/oci/utils_test.go +++ b/virtcontainers/pkg/oci/utils_test.go @@ -209,6 +209,9 @@ func TestMinimalSandboxConfig(t *testing.T) { }, Mounts: expectedMounts, DeviceInfos: expectedDeviceInfo, + Resources: specs.LinuxResources{Devices: []specs.LinuxDeviceCgroup{ + {Allow: false, Type: "", Major: (*int64)(nil), Minor: (*int64)(nil), Access: "rwm"}, + }}, } expectedNetworkConfig := vc.NetworkConfig{} diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index 8027304fb..051db4cb2 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -8,6 +8,7 @@ package virtcontainers import ( "context" "fmt" + "math" "os" "path/filepath" "strconv" @@ -1078,34 +1079,52 @@ func (q *qemu) hotplugMemory(memDev *memoryDevice, op operation) (int, error) { if memDev.sizeMB < 0 { return 0, fmt.Errorf("cannot hotplug negative size (%d) memory", memDev.sizeMB) } + memLog := q.Logger().WithField("hotplug", "memory") - // We do not support memory hot unplug. - if op == removeDevice { - // Dont fail for now, until we fix it. - // We return that we only unplugged 0 - q.Logger().Warn("hot-remove VM memory not supported") - return 0, nil - } - + memLog.WithField("hotplug-memory-mb", memDev.sizeMB).Debug("requested memory hotplug") err := q.qmpSetup() if err != nil { return 0, err } - maxMem, err := q.hostMemMB() - if err != nil { - return 0, err - } - - // calculate current memory currentMemory := int(q.config.MemorySize) + q.state.HotpluggedMemory - // Don't exceed the maximum amount of memory - if currentMemory+memDev.sizeMB > int(maxMem) { - return 0, fmt.Errorf("Unable to hotplug %d MiB memory, the SB has %d MiB and the maximum amount is %d MiB", - memDev.sizeMB, currentMemory, q.config.MemorySize) + if memDev.sizeMB == 0 { + memLog.Debug("hotplug is not required") + return 0, nil } + switch op { + case removeDevice: + memLog.WithField("operation", "remove").Debugf("Requested to remove memory: %d MB", memDev.sizeMB) + // Dont fail but warn that this is not supported. + memLog.Warn("hot-remove VM memory not supported") + return 0, nil + case addDevice: + memLog.WithField("operation", "add").Debugf("Requested to add memory: %d MB", memDev.sizeMB) + maxMem, err := q.hostMemMB() + if err != nil { + return 0, err + } + + // Don't exceed the maximum amount of memory + if currentMemory+memDev.sizeMB > int(maxMem) { + // Fixme: return a typed error + return 0, fmt.Errorf("Unable to hotplug %d MiB memory, the SB has %d MiB and the maximum amount is %d MiB", + memDev.sizeMB, currentMemory, q.config.MemorySize) + } + memoryAdded, err := q.hotplugAddMemory(memDev) + if err != nil { + return memoryAdded, err + } + return memoryAdded, nil + default: + return 0, fmt.Errorf("invalid operation %v", op) + } + +} + +func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) { memoryDevices, err := q.qmpMonitorCh.qmp.ExecQueryMemoryDevices(q.qmpMonitorCh.ctx) if err != nil { return 0, fmt.Errorf("failed to query memory devices: %v", err) @@ -1114,12 +1133,7 @@ func (q *qemu) hotplugMemory(memDev *memoryDevice, op operation) (int, error) { if len(memoryDevices) != 0 { memDev.slot = memoryDevices[len(memoryDevices)-1].Data.Slot + 1 } - - return q.hotplugAddMemory(memDev) -} - -func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) { - err := q.qmpMonitorCh.qmp.ExecHotplugMemory(q.qmpMonitorCh.ctx, "memory-backend-ram", "mem"+strconv.Itoa(memDev.slot), "", memDev.sizeMB) + err = q.qmpMonitorCh.qmp.ExecHotplugMemory(q.qmpMonitorCh.ctx, "memory-backend-ram", "mem"+strconv.Itoa(memDev.slot), "", memDev.sizeMB) if err != nil { q.Logger().WithError(err).Error("hotplug memory") return 0, err @@ -1243,6 +1257,77 @@ func (q *qemu) disconnect() { q.qmpShutdown() } +// resizeMemory get a request to update the VM memory to reqMemMB +// Memory update is managed with two approaches +// Add memory to VM: +// When memory is required to be added we hotplug memory +// Remove Memory from VM/ Return memory to host. +// +// Memory unplug can be slow and it cannot be guaranteed. +// Additionally, the unplug has not small granularly it has to be +// the memory to remove has to be at least the size of one slot. +// To return memory back we are resizing the VM memory balloon. +// A longer term solution is evaluate solutions like virtio-mem +func (q *qemu) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32) (uint32, error) { + + currentMemory := q.config.MemorySize + uint32(q.state.HotpluggedMemory) + err := q.qmpSetup() + if err != nil { + return 0, err + } + switch { + case currentMemory < reqMemMB: + //hotplug + addMemMB := reqMemMB - currentMemory + memHotplugMB, err := calcHotplugMemMiBSize(addMemMB, memoryBlockSizeMB) + if err != nil { + return currentMemory, err + } + + addMemDevice := &memoryDevice{ + sizeMB: int(memHotplugMB), + } + data, err := q.hotplugAddDevice(addMemDevice, memoryDev) + if err != nil { + return currentMemory, err + } + memoryAdded, ok := data.(int) + if !ok { + return currentMemory, fmt.Errorf("Could not get the memory added, got %+v", data) + } + currentMemory += uint32(memoryAdded) + case currentMemory > reqMemMB: + //hotunplug + addMemMB := currentMemory - reqMemMB + memHotunplugMB, err := calcHotplugMemMiBSize(addMemMB, memoryBlockSizeMB) + if err != nil { + return currentMemory, err + } + + addMemDevice := &memoryDevice{ + sizeMB: int(memHotunplugMB), + } + data, err := q.hotplugRemoveDevice(addMemDevice, memoryDev) + if err != nil { + return currentMemory, err + } + memoryRemoved, ok := data.(int) + if !ok { + return currentMemory, fmt.Errorf("Could not get the memory removed, got %+v", data) + } + //FIXME: This is to check memory hotplugRemoveDevice reported 0, as this is not supported. + // In the future if this is implemented this validation should be removed. + if memoryRemoved != 0 { + return currentMemory, fmt.Errorf("memory hot unplug is not supported, something went wrong") + } + currentMemory -= uint32(memoryRemoved) + } + + // currentMemory is the current memory (updated) of the VM, return to caller to allow verify + // the current VM memory state. + return currentMemory, nil +} + // genericAppendBridges appends to devices the given bridges func genericAppendBridges(devices []govmmQemu.Device, bridges []Bridge, machineType string) []govmmQemu.Device { bus := defaultPCBridgeBus @@ -1349,3 +1434,45 @@ func (q *qemu) getThreadIDs() (*threadIDs, error) { } return &tid, nil } + +func calcHotplugMemMiBSize(mem uint32, memorySectionSizeMB uint32) (uint32, error) { + if memorySectionSizeMB == 0 { + return mem, nil + } + + // TODO: hot add memory aligned to memory section should be more properly. See https://github.com/kata-containers/runtime/pull/624#issuecomment-419656853 + return uint32(math.Ceil(float64(mem)/float64(memorySectionSizeMB))) * memorySectionSizeMB, nil +} + +func (q *qemu) resizeVCPUs(reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) { + + currentVCPUs = q.config.NumVCPUs + uint32(len(q.state.HotpluggedVCPUs)) + newVCPUs = currentVCPUs + switch { + case currentVCPUs < reqVCPUs: + //hotplug + addCPUs := reqVCPUs - currentVCPUs + data, err := q.hotplugAddDevice(addCPUs, cpuDev) + if err != nil { + return currentVCPUs, newVCPUs, err + } + vCPUsAdded, ok := data.(uint32) + if !ok { + return currentVCPUs, newVCPUs, fmt.Errorf("Could not get the vCPUs added, got %+v", data) + } + newVCPUs += vCPUsAdded + case currentVCPUs > reqVCPUs: + //hotunplug + removeCPUs := currentVCPUs - reqVCPUs + data, err := q.hotplugRemoveDevice(removeCPUs, cpuDev) + if err != nil { + return currentVCPUs, newVCPUs, err + } + vCPUsRemoved, ok := data.(uint32) + if !ok { + return currentVCPUs, newVCPUs, fmt.Errorf("Could not get the vCPUs removed, got %+v", data) + } + newVCPUs -= vCPUsRemoved + } + return currentVCPUs, newVCPUs, nil +} diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index c6cf59699..e1cdaed01 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -27,6 +27,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/device/drivers" deviceManager "github.com/kata-containers/runtime/virtcontainers/device/manager" "github.com/kata-containers/runtime/virtcontainers/pkg/types" + "github.com/kata-containers/runtime/virtcontainers/utils" "github.com/vishvananda/netlink" ) @@ -342,6 +343,7 @@ type SandboxConfig struct { // Containers describe the list of containers within a Sandbox. // This list can be empty and populated by adding containers // to the Sandbox a posteriori. + //TODO: this should be a map to avoid duplicated containers Containers []ContainerConfig // Annotations keys must be unique strings and must be name-spaced @@ -1271,9 +1273,25 @@ func (s *Sandbox) newContainers() error { } // CreateContainer creates a new container in the sandbox +// This should be called only when the sandbox is already created. +// It will add new container config to sandbox.config.Containers func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, error) { // Create the container. - c, err := createContainer(s, contConfig) + c, err := newContainer(s, contConfig) + if err != nil { + return nil, err + } + + // Update sandbox config. + s.config.Containers = append(s.config.Containers, contConfig) + + // Sandbox is reponsable to update VM resources needed by Containers + s.updateResources() + if err != nil { + return nil, err + } + + err = c.create() if err != nil { return nil, err } @@ -1289,8 +1307,6 @@ func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, erro return nil, err } - // Update sandbox config. - s.config.Containers = append(s.config.Containers, contConfig) err = s.storage.storeSandboxResource(s.id, configFileType, *(s.config)) if err != nil { return nil, err @@ -1316,6 +1332,7 @@ func (s *Sandbox) StartContainer(containerID string) (VCContainer, error) { if err != nil { return nil, err } + //Fixme Container delete from sandbox, need to update resources return c, nil } @@ -1445,7 +1462,12 @@ func (s *Sandbox) UpdateContainer(containerID string, resources specs.LinuxResou return err } - return c.update(resources) + err = c.update(resources) + if err != nil { + return err + } + + return c.storeContainer() } // StatsContainer return the stats of a running container @@ -1493,13 +1515,21 @@ func (s *Sandbox) createContainers() error { span, _ := s.trace("createContainers") defer span.Finish() + if err := s.updateResources(); err != nil { + return err + } + for _, contConfig := range s.config.Containers { - newContainer, err := createContainer(s, contConfig) + + c, err := newContainer(s, contConfig) if err != nil { return err } + if err := c.create(); err != nil { + return err + } - if err := s.addContainer(newContainer); err != nil { + if err := s.addContainer(c); err != nil { return err } } @@ -1868,3 +1898,63 @@ func (s *Sandbox) AddDevice(info config.DeviceInfo) (api.Device, error) { return b, nil } + +func (s *Sandbox) updateResources() error { + // the hypervisor.MemorySize is the amount of memory reserved for + // the VM and contaniners without memory limit + + sumResources := specs.LinuxResources{ + Memory: &specs.LinuxMemory{ + Limit: new(int64), + }, + CPU: &specs.LinuxCPU{ + Period: new(uint64), + Quota: new(int64), + }, + } + + for _, c := range s.config.Containers { + if m := c.Resources.Memory; m != nil && m.Limit != nil { + *sumResources.Memory.Limit += *m.Limit + } + if cpu := c.Resources.CPU; cpu != nil { + if cpu.Period != nil && cpu.Quota != nil { + *sumResources.CPU.Period += *cpu.Period + *sumResources.CPU.Quota += *cpu.Quota + } + } + } + + sandboxVCPUs := uint32(utils.ConstraintsToVCPUs(*sumResources.CPU.Quota, *sumResources.CPU.Period)) + sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs + + sandboxMemoryByte := int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift + sandboxMemoryByte += *sumResources.Memory.Limit + + // Update VCPUs + s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs") + oldCPUs, newCPUs, err := s.hypervisor.resizeVCPUs(sandboxVCPUs) + if err != nil { + return err + } + // The CPUs were increased, ask agent to online them + if oldCPUs < newCPUs { + vcpusAdded := newCPUs - oldCPUs + if err := s.agent.onlineCPUMem(vcpusAdded, true); err != nil { + return err + } + } + s.Logger().Debugf("Sandbox CPUs: %d", newCPUs) + + // Update Memory + s.Logger().WithField("memory-sandbox-size-byte", sandboxMemoryByte).Debugf("Request to hypervisor to update memory") + newMemory, err := s.hypervisor.resizeMemory(uint32(sandboxMemoryByte>>utils.MibToBytesShift), s.state.GuestMemoryBlockSizeMB) + if err != nil { + return err + } + s.Logger().Debugf("Sandbox memory size: %d Byte", newMemory) + if err := s.agent.onlineCPUMem(0, false); err != nil { + return err + } + return nil +} diff --git a/virtcontainers/sandbox_test.go b/virtcontainers/sandbox_test.go index 88b31de39..ce4daad4c 100644 --- a/virtcontainers/sandbox_test.go +++ b/virtcontainers/sandbox_test.go @@ -903,12 +903,12 @@ func TestSandboxGetContainer(t *testing.T) { contID := "999" contConfig := newTestContainerConfigNoop(contID) - newContainer, err := createContainer(p, contConfig) + nc, err := newContainer(p, contConfig) if err != nil { t.Fatalf("Failed to create container %+v in sandbox %+v: %v", contConfig, p, err) } - if err := p.addContainer(newContainer); err != nil { + if err := p.addContainer(nc); err != nil { t.Fatalf("Could not add container to sandbox %v", err) } diff --git a/virtcontainers/utils/utils.go b/virtcontainers/utils/utils.go index 8c2814b64..3d7d75c93 100644 --- a/virtcontainers/utils/utils.go +++ b/virtcontainers/utils/utils.go @@ -18,6 +18,9 @@ const cpBinaryName = "cp" const fileMode0755 = os.FileMode(0755) +// MibToBytesShift the number to shift needed to convert MiB to Bytes +const MibToBytesShift = 20 + // MaxSocketPathLen is the effective maximum Unix domain socket length. // // See unix(7).