mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-08-29 04:51:34 +00:00
Merge pull request #2153 from egernst/cgroup-fixups
sandbox/cgroups: don't constrain if using SandboxCgroupsOnly
This commit is contained in:
commit
764ba9f83d
@ -36,7 +36,7 @@ const cgroupKataPrefix = "kata"
|
|||||||
var cgroupsLoadFunc = cgroups.Load
|
var cgroupsLoadFunc = cgroups.Load
|
||||||
var cgroupsNewFunc = cgroups.New
|
var cgroupsNewFunc = cgroups.New
|
||||||
|
|
||||||
// V1Constraints returns the cgroups that are compatible with th VC architecture
|
// V1Constraints returns the cgroups that are compatible with the VC architecture
|
||||||
// and hypervisor, constraints can be applied to these cgroups.
|
// and hypervisor, constraints can be applied to these cgroups.
|
||||||
func V1Constraints() ([]cgroups.Subsystem, error) {
|
func V1Constraints() ([]cgroups.Subsystem, error) {
|
||||||
root, err := cgroupV1MountPoint()
|
root, err := cgroupV1MountPoint()
|
||||||
@ -51,7 +51,7 @@ func V1Constraints() ([]cgroups.Subsystem, error) {
|
|||||||
return cgroupsSubsystems(subsystems)
|
return cgroupsSubsystems(subsystems)
|
||||||
}
|
}
|
||||||
|
|
||||||
// V1NoConstraints returns the cgroups that are *not* compatible with th VC
|
// V1NoConstraints returns the cgroups that are *not* compatible with the VC
|
||||||
// architecture and hypervisor, constraints MUST NOT be applied to these cgroups.
|
// architecture and hypervisor, constraints MUST NOT be applied to these cgroups.
|
||||||
func V1NoConstraints() ([]cgroups.Subsystem, error) {
|
func V1NoConstraints() ([]cgroups.Subsystem, error) {
|
||||||
root, err := cgroupV1MountPoint()
|
root, err := cgroupV1MountPoint()
|
||||||
|
@ -130,6 +130,7 @@ func TestUpdateCgroups(t *testing.T) {
|
|||||||
state: types.SandboxState{
|
state: types.SandboxState{
|
||||||
CgroupPath: "",
|
CgroupPath: "",
|
||||||
},
|
},
|
||||||
|
config: &SandboxConfig{SandboxCgroupOnly: false},
|
||||||
}
|
}
|
||||||
|
|
||||||
// empty path
|
// empty path
|
||||||
|
@ -1852,10 +1852,12 @@ func (s *Sandbox) AddDevice(info config.DeviceInfo) (api.Device, error) {
|
|||||||
return b, nil
|
return b, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// updateResources will calculate the resources required for the virtual machine, and
|
||||||
|
// adjust the virtual machine sizing accordingly. For a given sandbox, it will calculate the
|
||||||
|
// number of vCPUs required based on the sum of container requests, plus default CPUs for the VM.
|
||||||
|
// Similar is done for memory. If changes in memory or CPU are made, the VM will be updated and
|
||||||
|
// the agent will online the applicable CPU and memory.
|
||||||
func (s *Sandbox) updateResources() error {
|
func (s *Sandbox) updateResources() error {
|
||||||
// the hypervisor.MemorySize is the amount of memory reserved for
|
|
||||||
// the VM and contaniners without memory limit
|
|
||||||
|
|
||||||
if s == nil {
|
if s == nil {
|
||||||
return errors.New("sandbox is nil")
|
return errors.New("sandbox is nil")
|
||||||
}
|
}
|
||||||
@ -1868,8 +1870,9 @@ func (s *Sandbox) updateResources() error {
|
|||||||
// Add default vcpus for sandbox
|
// Add default vcpus for sandbox
|
||||||
sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs
|
sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs
|
||||||
|
|
||||||
sandboxMemoryByte := int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
|
sandboxMemoryByte := s.calculateSandboxMemory()
|
||||||
sandboxMemoryByte += s.calculateSandboxMemory()
|
// Add default / rsvd memory for sandbox.
|
||||||
|
sandboxMemoryByte += int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
|
||||||
|
|
||||||
// Update VCPUs
|
// Update VCPUs
|
||||||
s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
|
s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
|
||||||
@ -1877,7 +1880,8 @@ func (s *Sandbox) updateResources() error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// The CPUs were increased, ask agent to online them
|
|
||||||
|
// If the CPUs were increased, ask agent to online them
|
||||||
if oldCPUs < newCPUs {
|
if oldCPUs < newCPUs {
|
||||||
vcpusAdded := newCPUs - oldCPUs
|
vcpusAdded := newCPUs - oldCPUs
|
||||||
if err := s.agent.onlineCPUMem(vcpusAdded, true); err != nil {
|
if err := s.agent.onlineCPUMem(vcpusAdded, true); err != nil {
|
||||||
@ -1894,7 +1898,7 @@ func (s *Sandbox) updateResources() error {
|
|||||||
}
|
}
|
||||||
s.Logger().Debugf("Sandbox memory size: %d MB", newMemory)
|
s.Logger().Debugf("Sandbox memory size: %d MB", newMemory)
|
||||||
if s.state.GuestMemoryHotplugProbe && updatedMemoryDevice.addr != 0 {
|
if s.state.GuestMemoryHotplugProbe && updatedMemoryDevice.addr != 0 {
|
||||||
//notify the guest kernel about memory hot-add event, before onlining them
|
// notify the guest kernel about memory hot-add event, before onlining them
|
||||||
s.Logger().Debugf("notify guest kernel memory hot-add event via probe interface, memory device located at 0x%x", updatedMemoryDevice.addr)
|
s.Logger().Debugf("notify guest kernel memory hot-add event via probe interface, memory device located at 0x%x", updatedMemoryDevice.addr)
|
||||||
if err := s.agent.memHotplugByProbe(updatedMemoryDevice.addr, uint32(updatedMemoryDevice.sizeMB), s.state.GuestMemoryBlockSizeMB); err != nil {
|
if err := s.agent.memHotplugByProbe(updatedMemoryDevice.addr, uint32(updatedMemoryDevice.sizeMB), s.state.GuestMemoryBlockSizeMB); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -1936,7 +1940,19 @@ func (s *Sandbox) GetHypervisorType() string {
|
|||||||
return string(s.config.HypervisorType)
|
return string(s.config.HypervisorType)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cgroupsUpdate will:
|
||||||
|
// 1) get the v1constraints cgroup associated with the stored cgroup path
|
||||||
|
// 2) (re-)add hypervisor vCPU threads to the appropriate cgroup
|
||||||
|
// 3) If we are managing sandbox cgroup, update the v1constraints cgroup size
|
||||||
func (s *Sandbox) cgroupsUpdate() error {
|
func (s *Sandbox) cgroupsUpdate() error {
|
||||||
|
|
||||||
|
// If Kata is configured for SandboxCgroupOnly, the VMM and its processes are already
|
||||||
|
// in the Kata sandbox cgroup (inherited). No need to move threads/processes, and we should
|
||||||
|
// rely on parent's cgroup CPU/memory values
|
||||||
|
if s.config.SandboxCgroupOnly {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
if s.state.CgroupPath == "" {
|
if s.state.CgroupPath == "" {
|
||||||
s.Logger().Warn("sandbox's cgroup won't be updated: cgroup path is empty")
|
s.Logger().Warn("sandbox's cgroup won't be updated: cgroup path is empty")
|
||||||
return nil
|
return nil
|
||||||
@ -1947,7 +1963,7 @@ func (s *Sandbox) cgroupsUpdate() error {
|
|||||||
return fmt.Errorf("Could not load cgroup %v: %v", s.state.CgroupPath, err)
|
return fmt.Errorf("Could not load cgroup %v: %v", s.state.CgroupPath, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := s.constrainHypervisorVCPUs(cgroup); err != nil {
|
if err := s.constrainHypervisor(cgroup); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1968,6 +1984,8 @@ func (s *Sandbox) cgroupsUpdate() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cgroupsDelete will move the running processes in the sandbox cgroup
|
||||||
|
// to the parent and then delete the sandbox cgroup
|
||||||
func (s *Sandbox) cgroupsDelete() error {
|
func (s *Sandbox) cgroupsDelete() error {
|
||||||
s.Logger().Debug("Deleting sandbox cgroup")
|
s.Logger().Debug("Deleting sandbox cgroup")
|
||||||
if s.state.CgroupPath == "" {
|
if s.state.CgroupPath == "" {
|
||||||
@ -1976,19 +1994,19 @@ func (s *Sandbox) cgroupsDelete() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var path string
|
var path string
|
||||||
cgroupSubystems := V1NoConstraints
|
var cgroupSubsystems cgroups.Hierarchy
|
||||||
|
|
||||||
if s.config.SandboxCgroupOnly {
|
if s.config.SandboxCgroupOnly {
|
||||||
// Override V1NoConstraints, if SandboxCgroupOnly is enabled
|
cgroupSubsystems = cgroups.V1
|
||||||
cgroupSubystems = cgroups.V1
|
|
||||||
path = s.state.CgroupPath
|
path = s.state.CgroupPath
|
||||||
s.Logger().WithField("path", path).Debug("Deleting sandbox cgroups (all subsystems)")
|
s.Logger().WithField("path", path).Debug("Deleting sandbox cgroups (all subsystems)")
|
||||||
} else {
|
} else {
|
||||||
|
cgroupSubsystems = V1NoConstraints
|
||||||
path = cgroupNoConstraintsPath(s.state.CgroupPath)
|
path = cgroupNoConstraintsPath(s.state.CgroupPath)
|
||||||
s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup")
|
s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup")
|
||||||
}
|
}
|
||||||
|
|
||||||
sandboxCgroups, err := cgroupsLoadFunc(cgroupSubystems, cgroups.StaticPath(path))
|
sandboxCgroups, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path))
|
||||||
if err == cgroups.ErrCgroupDeleted {
|
if err == cgroups.ErrCgroupDeleted {
|
||||||
// cgroup already deleted
|
// cgroup already deleted
|
||||||
s.Logger().Warnf("cgroup already deleted: '%s'", err)
|
s.Logger().Warnf("cgroup already deleted: '%s'", err)
|
||||||
@ -2000,7 +2018,7 @@ func (s *Sandbox) cgroupsDelete() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// move running process here, that way cgroup can be removed
|
// move running process here, that way cgroup can be removed
|
||||||
parent, err := parentCgroup(cgroupSubystems, path)
|
parent, err := parentCgroup(cgroupSubsystems, path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// parent cgroup doesn't exist, that means there are no process running
|
// parent cgroup doesn't exist, that means there are no process running
|
||||||
// and the no constraints cgroup was removed.
|
// and the no constraints cgroup was removed.
|
||||||
@ -2016,35 +2034,50 @@ func (s *Sandbox) cgroupsDelete() error {
|
|||||||
return sandboxCgroups.Delete()
|
return sandboxCgroups.Delete()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Sandbox) constrainHypervisorVCPUs(cgroup cgroups.Cgroup) error {
|
// constrainHypervisor will place the VMM and vCPU threads into cgroups.
|
||||||
|
func (s *Sandbox) constrainHypervisor(cgroup cgroups.Cgroup) error {
|
||||||
|
// VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set.
|
||||||
|
// This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take
|
||||||
|
// Kata/VMM into account, Kata may fail to boot due to being overconstrained.
|
||||||
|
// If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained
|
||||||
|
// cgroup
|
||||||
|
if s.config.SandboxCgroupOnly {
|
||||||
|
// Kata components were moved into the sandbox-cgroup already, so VMM
|
||||||
|
// will already land there as well. No need to take action
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
pids := s.hypervisor.getPids()
|
pids := s.hypervisor.getPids()
|
||||||
if len(pids) == 0 || pids[0] == 0 {
|
if len(pids) == 0 || pids[0] == 0 {
|
||||||
return fmt.Errorf("Invalid hypervisor PID: %+v", pids)
|
return fmt.Errorf("Invalid hypervisor PID: %+v", pids)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Move hypervisor into cgroups without constraints,
|
// VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set.
|
||||||
// those cgroups are not yet supported.
|
// This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take
|
||||||
|
// Kata/VMM into account, Kata may fail to boot due to being overconstrained.
|
||||||
|
// If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained
|
||||||
|
// cgroup
|
||||||
|
// Move the VMM into cgroups without constraints, those cgroups are not yet supported.
|
||||||
resources := &specs.LinuxResources{}
|
resources := &specs.LinuxResources{}
|
||||||
path := cgroupNoConstraintsPath(s.state.CgroupPath)
|
path := cgroupNoConstraintsPath(s.state.CgroupPath)
|
||||||
noConstraintsCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources)
|
vmmCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Could not create cgroup %v: %v", path, err)
|
return fmt.Errorf("Could not create cgroup %v: %v", path, err)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, pid := range pids {
|
for _, pid := range pids {
|
||||||
if pid <= 0 {
|
if pid <= 0 {
|
||||||
s.Logger().Warnf("Invalid hypervisor pid: %d", pid)
|
s.Logger().Warnf("Invalid hypervisor pid: %d", pid)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := noConstraintsCgroup.Add(cgroups.Process{Pid: pid}); err != nil {
|
if err := vmmCgroup.Add(cgroups.Process{Pid: pid}); err != nil {
|
||||||
return fmt.Errorf("Could not add hypervisor PID %d to cgroup %v: %v", pid, path, err)
|
return fmt.Errorf("Could not add hypervisor PID %d to cgroup: %v", pid, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// when new container joins, new CPU could be hotplugged, so we
|
// when new container joins, new CPU could be hotplugged, so we
|
||||||
// have to query fresh vcpu info from hypervisor for every time.
|
// have to query fresh vcpu info from hypervisor every time.
|
||||||
tids, err := s.hypervisor.getThreadIDs()
|
tids, err := s.hypervisor.getThreadIDs()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
|
return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
|
||||||
@ -2056,9 +2089,9 @@ func (s *Sandbox) constrainHypervisorVCPUs(cgroup cgroups.Cgroup) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// We are about to move just the vcpus (threads) into cgroups with constraints.
|
// Move vcpus (threads) into cgroups with constraints.
|
||||||
// Move whole hypervisor process whould be easier but the IO/network performance
|
// Move whole hypervisor process would be easier but the IO/network performance
|
||||||
// whould be impacted.
|
// would be over-constrained.
|
||||||
for _, i := range tids.vcpus {
|
for _, i := range tids.vcpus {
|
||||||
// In contrast, AddTask will write thread id to `tasks`
|
// In contrast, AddTask will write thread id to `tasks`
|
||||||
// After this, vcpu threads are in "vcpu" sub-cgroup, other threads in
|
// After this, vcpu threads are in "vcpu" sub-cgroup, other threads in
|
||||||
|
Loading…
Reference in New Issue
Block a user