mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-10-22 04:18:53 +00:00
In case the hypervisor implementation does not return any thread ID, this should not issue any error since there is simply nothing to constrain. Fixes #1062 Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
198 lines
5.3 KiB
Go
198 lines
5.3 KiB
Go
// Copyright (c) 2018 Huawei Corporation
|
|
//
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
package virtcontainers
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
|
|
"github.com/containerd/cgroups"
|
|
"github.com/kata-containers/runtime/virtcontainers/pkg/annotations"
|
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
|
)
|
|
|
|
const (
|
|
vcpuGroupName = "vcpu"
|
|
defaultCgroupParent = "/kata"
|
|
)
|
|
|
|
type sandboxCgroups struct {
|
|
commonParent cgroups.Cgroup
|
|
sandboxSub cgroups.Cgroup
|
|
vcpuSub cgroups.Cgroup
|
|
}
|
|
|
|
func (s *Sandbox) newCgroups() error {
|
|
// New will still succeed when cgroup exists
|
|
// create common parent for all kata-containers
|
|
// e.g. /sys/fs/cgroup/cpu/vc
|
|
parent, err := cgroups.New(cgroups.V1,
|
|
cgroups.StaticPath(defaultCgroupParent), &specs.LinuxResources{})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create cgroup for %q", defaultCgroupParent)
|
|
}
|
|
|
|
// create sub-cgroup for each sandbox
|
|
// e.g. /sys/fs/cgroup/cpu/vc/<sandbox>
|
|
sandboxSub, err := parent.New(s.id, &specs.LinuxResources{})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create cgroup for %s/%s", defaultCgroupParent, s.id)
|
|
}
|
|
|
|
// create sub-cgroup for vcpu threads
|
|
vcpuSub, err := sandboxSub.New(vcpuGroupName, &specs.LinuxResources{})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create cgroup for %s/%s/%s", defaultCgroupParent, s.id, vcpuGroupName)
|
|
}
|
|
|
|
s.cgroup = &sandboxCgroups{
|
|
commonParent: parent,
|
|
sandboxSub: sandboxSub,
|
|
vcpuSub: vcpuSub,
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *Sandbox) destroyCgroups() error {
|
|
if s.cgroup == nil {
|
|
s.Logger().Warningf("cgroup is not initialized, no need to destroy")
|
|
return nil
|
|
}
|
|
|
|
// first move all processes in subgroup to parent in case live process blocks
|
|
// deletion of cgroup
|
|
if err := s.cgroup.sandboxSub.MoveTo(s.cgroup.commonParent); err != nil {
|
|
return fmt.Errorf("failed to clear cgroup processes")
|
|
}
|
|
|
|
return s.cgroup.sandboxSub.Delete()
|
|
}
|
|
|
|
func (s *Sandbox) setupCgroups() error {
|
|
if s.cgroup == nil {
|
|
return fmt.Errorf("failed to setup uninitialized cgroup for sandbox")
|
|
}
|
|
|
|
resource, err := s.mergeSpecResource()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := s.applyCPUCgroup(resource); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *Sandbox) applyCPUCgroup(rc *specs.LinuxResources) error {
|
|
if s.cgroup == nil {
|
|
return fmt.Errorf("failed to setup uninitialized cgroup for sandbox")
|
|
}
|
|
|
|
// apply cpu constraint to vcpu cgroup
|
|
if err := s.cgroup.vcpuSub.Update(rc); err != nil {
|
|
return err
|
|
}
|
|
|
|
// when new container joins, new CPU could be hotplugged, so we
|
|
// have to query fresh vcpu info from hypervisor for every time.
|
|
tids, err := s.hypervisor.getThreadIDs()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
|
|
}
|
|
if tids == nil {
|
|
// If there's no tid returned from the hypervisor, this is not
|
|
// a bug. It simply means there is nothing to constrain, hence
|
|
// let's return without any error from here.
|
|
return nil
|
|
}
|
|
|
|
// use Add() to add vcpu thread to s.cgroup, it will write thread id to
|
|
// `cgroup.procs` which will move all threads in qemu process to this cgroup
|
|
// immediately as default behaviour.
|
|
if len(tids.vcpus) > 0 {
|
|
if err := s.cgroup.sandboxSub.Add(cgroups.Process{
|
|
Pid: tids.vcpus[0],
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
for _, i := range tids.vcpus {
|
|
if i <= 0 {
|
|
continue
|
|
}
|
|
|
|
// In contrast, AddTask will write thread id to `tasks`
|
|
// After this, vcpu threads are in "vcpu" sub-cgroup, other threads in
|
|
// qemu will be left in parent cgroup untouched.
|
|
if err := s.cgroup.vcpuSub.AddTask(cgroups.Process{
|
|
Pid: i,
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *Sandbox) mergeSpecResource() (*specs.LinuxResources, error) {
|
|
if s.config == nil {
|
|
return nil, fmt.Errorf("sandbox config is nil")
|
|
}
|
|
|
|
resource := &specs.LinuxResources{
|
|
CPU: &specs.LinuxCPU{},
|
|
}
|
|
|
|
for _, c := range s.config.Containers {
|
|
config, ok := c.Annotations[annotations.ConfigJSONKey]
|
|
if !ok {
|
|
s.Logger().WithField("container", c.ID).Warningf("failed to find config from container annotations")
|
|
continue
|
|
}
|
|
|
|
var spec specs.Spec
|
|
if err := json.Unmarshal([]byte(config), &spec); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// TODO: how to handle empty/unlimited resource?
|
|
// maybe we should add a default CPU/Memory delta when no
|
|
// resource limit is given. -- @WeiZhang555
|
|
if spec.Linux == nil || spec.Linux.Resources == nil {
|
|
continue
|
|
}
|
|
// calculate cpu quota and period
|
|
s.mergeCPUResource(resource, spec.Linux.Resources)
|
|
}
|
|
return resource, nil
|
|
}
|
|
|
|
func (s *Sandbox) mergeCPUResource(orig, rc *specs.LinuxResources) {
|
|
if orig.CPU == nil {
|
|
orig.CPU = &specs.LinuxCPU{}
|
|
}
|
|
|
|
if rc.CPU != nil && rc.CPU.Quota != nil && rc.CPU.Period != nil &&
|
|
*rc.CPU.Quota > 0 && *rc.CPU.Period > 0 {
|
|
if orig.CPU.Period == nil {
|
|
orig.CPU.Period = rc.CPU.Period
|
|
orig.CPU.Quota = rc.CPU.Quota
|
|
} else {
|
|
// this is an example to show how it works:
|
|
// container A and `orig` has quota: 5000 and period 10000
|
|
// here comes container B with quota 40 and period 100,
|
|
// so use previous period 10000 as a baseline, container B
|
|
// has proportional resource of quota 4000 and period 10000, calculated as
|
|
// delta := 40 / 100 * 10000 = 4000
|
|
// and final `*orig.CPU.Quota` = 5000 + 4000 = 9000
|
|
delta := float64(*rc.CPU.Quota) / float64(*rc.CPU.Period) * float64(*orig.CPU.Period)
|
|
*orig.CPU.Quota += int64(delta)
|
|
}
|
|
}
|
|
}
|