mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-28 16:27:50 +00:00
Merge pull request #1880 from jcvenegas/pod-cgroup-only
cgroups: Use only pod cgroup
This commit is contained in:
commit
282d85899e
5
Makefile
5
Makefile
@ -183,6 +183,9 @@ DEFDISABLENESTINGCHECKS := false
|
||||
DEFMSIZE9P := 8192
|
||||
DEFHOTPLUGVFIOONROOTBUS := false
|
||||
|
||||
# Default cgroup model
|
||||
DEFSANDBOXCGROUPONLY ?= false
|
||||
|
||||
SED = sed
|
||||
|
||||
CLI_DIR = cli
|
||||
@ -424,6 +427,7 @@ USER_VARS += DEFDISABLENESTINGCHECKS
|
||||
USER_VARS += DEFMSIZE9P
|
||||
USER_VARS += DEFHOTPLUGVFIOONROOTBUS
|
||||
USER_VARS += DEFENTROPYSOURCE
|
||||
USER_VARS += DEFSANDBOXCGROUPONLY
|
||||
USER_VARS += BUILDFLAGS
|
||||
|
||||
|
||||
@ -579,6 +583,7 @@ $(GENERATED_FILES): %: %.in $(MAKEFILE_LIST) VERSION .git-commit
|
||||
-e "s|@DEFMSIZE9P@|$(DEFMSIZE9P)|g" \
|
||||
-e "s|@DEFHOTPLUGONROOTBUS@|$(DEFHOTPLUGVFIOONROOTBUS)|g" \
|
||||
-e "s|@DEFENTROPYSOURCE@|$(DEFENTROPYSOURCE)|g" \
|
||||
-e "s|@DEFSANDBOXCGROUPONLY@|$(DEFSANDBOXCGROUPONLY)|g" \
|
||||
$< > $@
|
||||
|
||||
generate-config: $(CONFIGS)
|
||||
|
@ -228,6 +228,14 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||
# (default: false)
|
||||
#disable_new_netns = true
|
||||
|
||||
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
|
||||
# The container cgroups in the host are not created, just one single cgroup per sandbox.
|
||||
# The sandbox cgroup is not constrained by the runtime
|
||||
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
|
||||
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
|
||||
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
|
||||
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
||||
|
||||
# Enabled experimental feature list, format: ["a", "b"].
|
||||
# Experimental features are features not stable enough for production,
|
||||
# They may break compatibility, and are prepared for a big version bump.
|
||||
|
@ -330,6 +330,14 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||
# (default: false)
|
||||
#disable_new_netns = true
|
||||
|
||||
# if enable, the runtime will add all the kata processes inside one dedicated cgroup.
|
||||
# The container cgroups in the host are not created, just one single cgroup per sandbox.
|
||||
# The sandbox cgroup is not constrained by the runtime
|
||||
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
|
||||
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
|
||||
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
|
||||
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
||||
|
||||
# Enabled experimental feature list, format: ["a", "b"].
|
||||
# Experimental features are features not stable enough for production,
|
||||
# They may break compatibility, and are prepared for a big version bump.
|
||||
|
@ -404,6 +404,12 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||
# (default: false)
|
||||
#disable_new_netns = true
|
||||
|
||||
# if enable, the runtime use the parent cgroup of a container PodSandbox. This
|
||||
# should be enabled for users where the caller setup the parent cgroup of the
|
||||
# containers running in a sandbox so all the resouces of the kata container run
|
||||
# in the same cgroup and performance isolation its more accurate.
|
||||
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
||||
|
||||
# Enabled experimental feature list, format: ["a", "b"].
|
||||
# Experimental features are features not stable enough for production,
|
||||
# They may break compatibility, and are prepared for a big version bump.
|
||||
|
@ -412,6 +412,14 @@ disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||
# (default: false)
|
||||
#disable_new_netns = true
|
||||
|
||||
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
|
||||
# The container cgroups in the host are not created, just one single cgroup per sandbox.
|
||||
# The sandbox cgroup is not constrained by the runtime
|
||||
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
|
||||
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
|
||||
# See: https://godoc.org/github.com/kata-containers/runtime/virtcontainers#ContainerType
|
||||
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
||||
|
||||
# Enabled experimental feature list, format: ["a", "b"].
|
||||
# Experimental features are features not stable enough for production,
|
||||
# They may break compatibility, and are prepared for a big version bump.
|
||||
|
@ -69,6 +69,7 @@ type RuntimeInfo struct {
|
||||
Trace bool
|
||||
DisableGuestSeccomp bool
|
||||
DisableNewNetNs bool
|
||||
SandboxCgroupOnly bool
|
||||
Experimental []exp.Feature
|
||||
Path string
|
||||
}
|
||||
@ -187,6 +188,7 @@ func getRuntimeInfo(configFile string, config oci.RuntimeConfig) RuntimeInfo {
|
||||
Config: runtimeConfig,
|
||||
Path: runtimePath,
|
||||
DisableNewNetNs: config.DisableNewNetNs,
|
||||
SandboxCgroupOnly: config.SandboxCgroupOnly,
|
||||
Experimental: config.Experimental,
|
||||
DisableGuestSeccomp: config.DisableGuestSeccomp,
|
||||
}
|
||||
|
@ -133,6 +133,7 @@ type runtime struct {
|
||||
Tracing bool `toml:"enable_tracing"`
|
||||
DisableNewNetNs bool `toml:"disable_new_netns"`
|
||||
DisableGuestSeccomp bool `toml:"disable_guest_seccomp"`
|
||||
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
|
||||
Experimental []string `toml:"experimental"`
|
||||
InterNetworkModel string `toml:"internetworking_model"`
|
||||
}
|
||||
@ -1054,6 +1055,7 @@ func LoadConfiguration(configPath string, ignoreLogging, builtIn bool) (resolved
|
||||
config.ProxyConfig = vc.ProxyConfig{Debug: config.Debug}
|
||||
}
|
||||
|
||||
config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly
|
||||
config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs
|
||||
for _, f := range tomlConf.Runtime.Experimental {
|
||||
feature := exp.Get(f)
|
||||
|
@ -75,6 +75,13 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Move runtime to sandbox cgroup so all process are created there.
|
||||
if s.config.SandboxCgroupOnly {
|
||||
if err := s.setupSandboxCgroup(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// cleanup sandbox resources in case of any failure
|
||||
defer func() {
|
||||
if err != nil {
|
||||
|
@ -9,13 +9,11 @@ package virtcontainers
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
"github.com/kata-containers/runtime/virtcontainers/pkg/annotations"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
@ -144,194 +142,6 @@ func parentCgroup(hierarchy cgroups.Hierarchy, path string) (cgroups.Cgroup, err
|
||||
return parentCgroup, nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) updateCgroups() error {
|
||||
if s.state.CgroupPath == "" {
|
||||
s.Logger().Warn("sandbox's cgroup won't be updated: cgroup path is empty")
|
||||
return nil
|
||||
}
|
||||
|
||||
cgroup, err := cgroupsLoadFunc(V1Constraints, cgroups.StaticPath(s.state.CgroupPath))
|
||||
if err != nil {
|
||||
return fmt.Errorf("Could not load cgroup %v: %v", s.state.CgroupPath, err)
|
||||
}
|
||||
|
||||
if err := s.constrainHypervisor(cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(s.containers) <= 1 {
|
||||
// nothing to update
|
||||
return nil
|
||||
}
|
||||
|
||||
resources, err := s.resources()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := cgroup.Update(&resources); err != nil {
|
||||
return fmt.Errorf("Could not update cgroup %v: %v", s.state.CgroupPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) deleteCgroups() error {
|
||||
s.Logger().Debug("Deleting sandbox cgroup")
|
||||
|
||||
path := cgroupNoConstraintsPath(s.state.CgroupPath)
|
||||
s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup")
|
||||
noConstraintsCgroup, err := cgroupsLoadFunc(V1NoConstraints, cgroups.StaticPath(path))
|
||||
if err == cgroups.ErrCgroupDeleted {
|
||||
// cgroup already deleted
|
||||
return nil
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("Could not load cgroup without constraints %v: %v", path, err)
|
||||
}
|
||||
|
||||
// move running process here, that way cgroup can be removed
|
||||
parent, err := parentCgroup(V1NoConstraints, path)
|
||||
if err != nil {
|
||||
// parent cgroup doesn't exist, that means there are no process running
|
||||
// and the no constraints cgroup was removed.
|
||||
s.Logger().WithError(err).Warn("Parent cgroup doesn't exist")
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := noConstraintsCgroup.MoveTo(parent); err != nil {
|
||||
// Don't fail, cgroup can be deleted
|
||||
s.Logger().WithError(err).Warn("Could not move process from no constraints to parent cgroup")
|
||||
}
|
||||
|
||||
return noConstraintsCgroup.Delete()
|
||||
}
|
||||
|
||||
func (s *Sandbox) constrainHypervisor(cgroup cgroups.Cgroup) error {
|
||||
pids := s.hypervisor.getPids()
|
||||
if len(pids) == 0 || pids[0] == 0 {
|
||||
return fmt.Errorf("Invalid hypervisor PID: %+v", pids)
|
||||
}
|
||||
|
||||
// Move hypervisor into cgroups without constraints,
|
||||
// those cgroups are not yet supported.
|
||||
resources := &specs.LinuxResources{}
|
||||
path := cgroupNoConstraintsPath(s.state.CgroupPath)
|
||||
noConstraintsCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Could not create cgroup %v: %v", path, err)
|
||||
}
|
||||
for _, pid := range pids {
|
||||
if pid <= 0 {
|
||||
s.Logger().Warnf("Invalid hypervisor pid: %d", pid)
|
||||
continue
|
||||
}
|
||||
if err := noConstraintsCgroup.Add(cgroups.Process{Pid: pid}); err != nil {
|
||||
return fmt.Errorf("Could not add hypervisor PID %d to cgroup %v: %v", pid, path, err)
|
||||
}
|
||||
}
|
||||
|
||||
// when new container joins, new CPU could be hotplugged, so we
|
||||
// have to query fresh vcpu info from hypervisor for every time.
|
||||
tids, err := s.hypervisor.getThreadIDs()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
|
||||
}
|
||||
if len(tids.vcpus) == 0 {
|
||||
// If there's no tid returned from the hypervisor, this is not
|
||||
// a bug. It simply means there is nothing to constrain, hence
|
||||
// let's return without any error from here.
|
||||
return nil
|
||||
}
|
||||
|
||||
// We are about to move just the vcpus (threads) into cgroups with constraints.
|
||||
// Move whole hypervisor process whould be easier but the IO/network performance
|
||||
// whould be impacted.
|
||||
for _, i := range tids.vcpus {
|
||||
// In contrast, AddTask will write thread id to `tasks`
|
||||
// After this, vcpu threads are in "vcpu" sub-cgroup, other threads in
|
||||
// qemu will be left in parent cgroup untouched.
|
||||
if err := cgroup.AddTask(cgroups.Process{
|
||||
Pid: i,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) resources() (specs.LinuxResources, error) {
|
||||
resources := specs.LinuxResources{
|
||||
CPU: s.cpuResources(),
|
||||
}
|
||||
|
||||
return resources, nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) cpuResources() *specs.LinuxCPU {
|
||||
// Use default period and quota if they are not specified.
|
||||
// Container will inherit the constraints from its parent.
|
||||
quota := int64(0)
|
||||
period := uint64(0)
|
||||
shares := uint64(0)
|
||||
realtimePeriod := uint64(0)
|
||||
realtimeRuntime := int64(0)
|
||||
|
||||
cpu := &specs.LinuxCPU{
|
||||
Quota: "a,
|
||||
Period: &period,
|
||||
Shares: &shares,
|
||||
RealtimePeriod: &realtimePeriod,
|
||||
RealtimeRuntime: &realtimeRuntime,
|
||||
}
|
||||
|
||||
for _, c := range s.containers {
|
||||
ann := c.GetAnnotations()
|
||||
if ann[annotations.ContainerTypeKey] == string(PodSandbox) {
|
||||
// skip sandbox container
|
||||
continue
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.Shares != nil {
|
||||
shares = uint64(math.Max(float64(*c.config.Resources.CPU.Shares), float64(shares)))
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.Quota != nil {
|
||||
quota += *c.config.Resources.CPU.Quota
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.Period != nil {
|
||||
period = uint64(math.Max(float64(*c.config.Resources.CPU.Period), float64(period)))
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.Cpus != "" {
|
||||
cpu.Cpus += c.config.Resources.CPU.Cpus + ","
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.RealtimeRuntime != nil {
|
||||
realtimeRuntime += *c.config.Resources.CPU.RealtimeRuntime
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.RealtimePeriod != nil {
|
||||
realtimePeriod += *c.config.Resources.CPU.RealtimePeriod
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.Mems != "" {
|
||||
cpu.Mems += c.config.Resources.CPU.Mems + ","
|
||||
}
|
||||
}
|
||||
|
||||
cpu.Cpus = strings.Trim(cpu.Cpus, " \n\t,")
|
||||
|
||||
return validCPUResources(cpu)
|
||||
}
|
||||
|
||||
// validCPUResources checks CPU resources coherency
|
||||
func validCPUResources(cpuSpec *specs.LinuxCPU) *specs.LinuxCPU {
|
||||
if cpuSpec == nil {
|
||||
|
@ -133,12 +133,12 @@ func TestUpdateCgroups(t *testing.T) {
|
||||
}
|
||||
|
||||
// empty path
|
||||
err := s.updateCgroups()
|
||||
err := s.cgroupsUpdate()
|
||||
assert.NoError(err)
|
||||
|
||||
// path doesn't exist
|
||||
s.state.CgroupPath = "/abc/123/rgb"
|
||||
err = s.updateCgroups()
|
||||
err = s.cgroupsUpdate()
|
||||
assert.Error(err)
|
||||
|
||||
if os.Getuid() != 0 {
|
||||
@ -152,7 +152,7 @@ func TestUpdateCgroups(t *testing.T) {
|
||||
s.hypervisor = &mockHypervisor{mockPid: 0}
|
||||
|
||||
// bad pid
|
||||
err = s.updateCgroups()
|
||||
err = s.cgroupsUpdate()
|
||||
assert.Error(err)
|
||||
|
||||
// fake workload
|
||||
@ -161,7 +161,7 @@ func TestUpdateCgroups(t *testing.T) {
|
||||
s.hypervisor = &mockHypervisor{mockPid: cmd.Process.Pid}
|
||||
|
||||
// no containers
|
||||
err = s.updateCgroups()
|
||||
err = s.cgroupsUpdate()
|
||||
assert.NoError(err)
|
||||
|
||||
s.config = &SandboxConfig{}
|
||||
@ -186,11 +186,11 @@ func TestUpdateCgroups(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
err = s.updateCgroups()
|
||||
err = s.cgroupsUpdate()
|
||||
assert.NoError(err)
|
||||
|
||||
// cleanup
|
||||
assert.NoError(cmd.Process.Kill())
|
||||
err = s.deleteCgroups()
|
||||
err = s.cgroupsDelete()
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
@ -887,8 +887,10 @@ func (c *Container) create() (err error) {
|
||||
}
|
||||
c.process = *process
|
||||
|
||||
if err = c.newCgroups(); err != nil {
|
||||
return
|
||||
if !c.sandbox.config.SandboxCgroupOnly {
|
||||
if err = c.cgroupsCreate(); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if !c.sandbox.supportNewStore() {
|
||||
@ -916,8 +918,10 @@ func (c *Container) delete() error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := c.deleteCgroups(); err != nil {
|
||||
return err
|
||||
if !c.sandbox.config.SandboxCgroupOnly {
|
||||
if err := c.cgroupsDelete(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return c.store.Delete()
|
||||
@ -1208,8 +1212,10 @@ func (c *Container) update(resources specs.LinuxResources) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := c.updateCgroups(resources); err != nil {
|
||||
return err
|
||||
if !c.sandbox.config.SandboxCgroupOnly {
|
||||
if err := c.cgroupsUpdate(resources); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return c.sandbox.agent.updateContainer(c.sandbox, *c, resources)
|
||||
@ -1430,8 +1436,8 @@ func (c *Container) detachDevices() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// creates a new cgroup and return the cgroups path
|
||||
func (c *Container) newCgroups() (err error) {
|
||||
// cgroupsCreate creates cgroups on the host for the associated container
|
||||
func (c *Container) cgroupsCreate() (err error) {
|
||||
ann := c.GetAnnotations()
|
||||
|
||||
config, ok := ann[annotations.ConfigJSONKey]
|
||||
@ -1477,7 +1483,14 @@ func (c *Container) newCgroups() (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Container) deleteCgroups() error {
|
||||
// cgroupsDelete deletes the cgroups on the host for the associated container
|
||||
func (c *Container) cgroupsDelete() error {
|
||||
|
||||
if c.state.CgroupPath == "" {
|
||||
c.Logger().Debug("container does not have host cgroups: nothing to update")
|
||||
return nil
|
||||
}
|
||||
|
||||
cgroup, err := cgroupsLoadFunc(cgroups.V1,
|
||||
cgroups.StaticPath(c.state.CgroupPath))
|
||||
|
||||
@ -1505,13 +1518,19 @@ func (c *Container) deleteCgroups() error {
|
||||
}
|
||||
|
||||
if err := cgroup.Delete(); err != nil {
|
||||
return fmt.Errorf("Could not delete container cgroup %v: %v", c.state.CgroupPath, err)
|
||||
return fmt.Errorf("Could not delete container cgroup path='%v': error='%v'", c.state.CgroupPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Container) updateCgroups(resources specs.LinuxResources) error {
|
||||
// cgroupsUpdate updates cgroups on the host for the associated container
|
||||
func (c *Container) cgroupsUpdate(resources specs.LinuxResources) error {
|
||||
|
||||
if c.state.CgroupPath == "" {
|
||||
c.Logger().Debug("container does not have host cgroups: nothing to update")
|
||||
return nil
|
||||
}
|
||||
cgroup, err := cgroupsLoadFunc(cgroups.V1,
|
||||
cgroups.StaticPath(c.state.CgroupPath))
|
||||
if err != nil {
|
||||
@ -1525,7 +1544,7 @@ func (c *Container) updateCgroups(resources specs.LinuxResources) error {
|
||||
|
||||
// update cgroup
|
||||
if err := cgroup.Update(&r); err != nil {
|
||||
return fmt.Errorf("Could not update cgroup %v: %v", c.state.CgroupPath, err)
|
||||
return fmt.Errorf("Could not update container cgroup path='%v': error='%v'", c.state.CgroupPath, err)
|
||||
}
|
||||
|
||||
// store new resources
|
||||
|
@ -141,6 +141,9 @@ type RuntimeConfig struct {
|
||||
//Determines if create a netns for hypervisor process
|
||||
DisableNewNetNs bool
|
||||
|
||||
//Determines kata processes are managed only in sandbox cgroup
|
||||
SandboxCgroupOnly bool
|
||||
|
||||
//Experimental features enabled
|
||||
Experimental []exp.Feature
|
||||
}
|
||||
@ -516,6 +519,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid, c
|
||||
|
||||
SystemdCgroup: systemdCgroup,
|
||||
|
||||
SandboxCgroupOnly: runtime.SandboxCgroupOnly,
|
||||
|
||||
DisableGuestSeccomp: runtime.DisableGuestSeccomp,
|
||||
|
||||
Experimental: runtime.Experimental,
|
||||
|
@ -7,13 +7,18 @@ package virtcontainers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
"github.com/containernetworking/plugins/pkg/ns"
|
||||
"github.com/kata-containers/agent/protocols/grpc"
|
||||
"github.com/kata-containers/runtime/virtcontainers/device/api"
|
||||
@ -101,6 +106,9 @@ type SandboxConfig struct {
|
||||
// SystemdCgroup enables systemd cgroup support
|
||||
SystemdCgroup bool
|
||||
|
||||
// SandboxCgroupOnly enables cgroup only at podlevel in the host
|
||||
SandboxCgroupOnly bool
|
||||
|
||||
DisableGuestSeccomp bool
|
||||
|
||||
// Experimental features enabled
|
||||
@ -755,7 +763,7 @@ func (s *Sandbox) Delete() error {
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.deleteCgroups(); err != nil {
|
||||
if err := s.cgroupsDelete(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@ -1073,14 +1081,6 @@ func (s *Sandbox) addContainer(c *Container) error {
|
||||
}
|
||||
s.containers[c.id] = c
|
||||
|
||||
ann := c.GetAnnotations()
|
||||
if ann[annotations.ContainerTypeKey] == string(PodSandbox) {
|
||||
s.state.CgroupPath = c.state.CgroupPath
|
||||
if !s.supportNewStore() {
|
||||
return s.store.Store(store.State, s.state)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -1153,7 +1153,7 @@ func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, erro
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err = s.updateCgroups(); err != nil {
|
||||
if err = s.cgroupsUpdate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -1330,7 +1330,7 @@ func (s *Sandbox) UpdateContainer(containerID string, resources specs.LinuxResou
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.updateCgroups(); err != nil {
|
||||
if err := s.cgroupsUpdate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@ -1421,7 +1421,7 @@ func (s *Sandbox) createContainers() error {
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.updateCgroups(); err != nil {
|
||||
if err := s.cgroupsUpdate(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.storeSandbox(); err != nil {
|
||||
@ -1880,3 +1880,266 @@ func (s *Sandbox) calculateSandboxCPUs() uint32 {
|
||||
func (s *Sandbox) GetHypervisorType() string {
|
||||
return string(s.config.HypervisorType)
|
||||
}
|
||||
|
||||
func (s *Sandbox) cgroupsUpdate() error {
|
||||
if s.state.CgroupPath == "" {
|
||||
s.Logger().Warn("sandbox's cgroup won't be updated: cgroup path is empty")
|
||||
return nil
|
||||
}
|
||||
|
||||
cgroup, err := cgroupsLoadFunc(V1Constraints, cgroups.StaticPath(s.state.CgroupPath))
|
||||
if err != nil {
|
||||
return fmt.Errorf("Could not load cgroup %v: %v", s.state.CgroupPath, err)
|
||||
}
|
||||
|
||||
if err := s.constrainHypervisorVCPUs(cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(s.containers) <= 1 {
|
||||
// nothing to update
|
||||
return nil
|
||||
}
|
||||
|
||||
resources, err := s.resources()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := cgroup.Update(&resources); err != nil {
|
||||
return fmt.Errorf("Could not update sandbox cgroup path='%v' error='%v'", s.state.CgroupPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) cgroupsDelete() error {
|
||||
s.Logger().Debug("Deleting sandbox cgroup")
|
||||
if s.state.CgroupPath == "" {
|
||||
s.Logger().Warnf("sandox cgroups path is empty")
|
||||
return nil
|
||||
}
|
||||
|
||||
var path string
|
||||
cgroupSubystems := V1NoConstraints
|
||||
|
||||
if s.config.SandboxCgroupOnly {
|
||||
// Override V1NoConstraints, if SandboxCgroupOnly is enabled
|
||||
cgroupSubystems = cgroups.V1
|
||||
path = s.state.CgroupPath
|
||||
s.Logger().WithField("path", path).Debug("Deleting sandbox cgroups (all subsystems)")
|
||||
} else {
|
||||
path = cgroupNoConstraintsPath(s.state.CgroupPath)
|
||||
s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup")
|
||||
}
|
||||
|
||||
sandboxCgroups, err := cgroupsLoadFunc(cgroupSubystems, cgroups.StaticPath(path))
|
||||
if err == cgroups.ErrCgroupDeleted {
|
||||
// cgroup already deleted
|
||||
s.Logger().Warnf("cgroup already deleted: '%s'", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("Could not load cgroups %v: %v", path, err)
|
||||
}
|
||||
|
||||
// move running process here, that way cgroup can be removed
|
||||
parent, err := parentCgroup(cgroupSubystems, path)
|
||||
if err != nil {
|
||||
// parent cgroup doesn't exist, that means there are no process running
|
||||
// and the no constraints cgroup was removed.
|
||||
s.Logger().WithError(err).Warn("Parent cgroup doesn't exist")
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := sandboxCgroups.MoveTo(parent); err != nil {
|
||||
// Don't fail, cgroup can be deleted
|
||||
s.Logger().WithError(err).Warnf("Could not move process from %s to parent cgroup", path)
|
||||
}
|
||||
|
||||
return sandboxCgroups.Delete()
|
||||
}
|
||||
|
||||
func (s *Sandbox) constrainHypervisorVCPUs(cgroup cgroups.Cgroup) error {
|
||||
pids := s.hypervisor.getPids()
|
||||
if len(pids) == 0 || pids[0] == 0 {
|
||||
return fmt.Errorf("Invalid hypervisor PID: %+v", pids)
|
||||
}
|
||||
|
||||
// Move hypervisor into cgroups without constraints,
|
||||
// those cgroups are not yet supported.
|
||||
resources := &specs.LinuxResources{}
|
||||
path := cgroupNoConstraintsPath(s.state.CgroupPath)
|
||||
noConstraintsCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Could not create cgroup %v: %v", path, err)
|
||||
|
||||
}
|
||||
for _, pid := range pids {
|
||||
if pid <= 0 {
|
||||
s.Logger().Warnf("Invalid hypervisor pid: %d", pid)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := noConstraintsCgroup.Add(cgroups.Process{Pid: pid}); err != nil {
|
||||
return fmt.Errorf("Could not add hypervisor PID %d to cgroup %v: %v", pid, path, err)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// when new container joins, new CPU could be hotplugged, so we
|
||||
// have to query fresh vcpu info from hypervisor for every time.
|
||||
tids, err := s.hypervisor.getThreadIDs()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
|
||||
}
|
||||
if len(tids.vcpus) == 0 {
|
||||
// If there's no tid returned from the hypervisor, this is not
|
||||
// a bug. It simply means there is nothing to constrain, hence
|
||||
// let's return without any error from here.
|
||||
return nil
|
||||
}
|
||||
|
||||
// We are about to move just the vcpus (threads) into cgroups with constraints.
|
||||
// Move whole hypervisor process whould be easier but the IO/network performance
|
||||
// whould be impacted.
|
||||
for _, i := range tids.vcpus {
|
||||
// In contrast, AddTask will write thread id to `tasks`
|
||||
// After this, vcpu threads are in "vcpu" sub-cgroup, other threads in
|
||||
// qemu will be left in parent cgroup untouched.
|
||||
if err := cgroup.AddTask(cgroups.Process{
|
||||
Pid: i,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) resources() (specs.LinuxResources, error) {
|
||||
resources := specs.LinuxResources{
|
||||
CPU: s.cpuResources(),
|
||||
}
|
||||
|
||||
return resources, nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) cpuResources() *specs.LinuxCPU {
|
||||
// Use default period and quota if they are not specified.
|
||||
// Container will inherit the constraints from its parent.
|
||||
quota := int64(0)
|
||||
period := uint64(0)
|
||||
shares := uint64(0)
|
||||
realtimePeriod := uint64(0)
|
||||
realtimeRuntime := int64(0)
|
||||
|
||||
cpu := &specs.LinuxCPU{
|
||||
Quota: "a,
|
||||
Period: &period,
|
||||
Shares: &shares,
|
||||
RealtimePeriod: &realtimePeriod,
|
||||
RealtimeRuntime: &realtimeRuntime,
|
||||
}
|
||||
|
||||
for _, c := range s.containers {
|
||||
ann := c.GetAnnotations()
|
||||
if ann[annotations.ContainerTypeKey] == string(PodSandbox) {
|
||||
// skip sandbox container
|
||||
continue
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.Shares != nil {
|
||||
shares = uint64(math.Max(float64(*c.config.Resources.CPU.Shares), float64(shares)))
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.Quota != nil {
|
||||
quota += *c.config.Resources.CPU.Quota
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.Period != nil {
|
||||
period = uint64(math.Max(float64(*c.config.Resources.CPU.Period), float64(period)))
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.Cpus != "" {
|
||||
cpu.Cpus += c.config.Resources.CPU.Cpus + ","
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.RealtimeRuntime != nil {
|
||||
realtimeRuntime += *c.config.Resources.CPU.RealtimeRuntime
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.RealtimePeriod != nil {
|
||||
realtimePeriod += *c.config.Resources.CPU.RealtimePeriod
|
||||
}
|
||||
|
||||
if c.config.Resources.CPU.Mems != "" {
|
||||
cpu.Mems += c.config.Resources.CPU.Mems + ","
|
||||
}
|
||||
}
|
||||
|
||||
cpu.Cpus = strings.Trim(cpu.Cpus, " \n\t,")
|
||||
|
||||
return validCPUResources(cpu)
|
||||
}
|
||||
|
||||
// setupSandboxCgroup creates and joins sandbox cgroups for the sandbox config
|
||||
func (s *Sandbox) setupSandboxCgroup() error {
|
||||
var podSandboxConfig *ContainerConfig
|
||||
|
||||
if s.config == nil {
|
||||
return fmt.Errorf("Sandbox config is nil")
|
||||
}
|
||||
|
||||
// get the container associated with the PodSandbox annotation. In Kubernetes, this
|
||||
// represents the pause container. In Docker, this is the container. We derive the
|
||||
// cgroup path from this container.
|
||||
for _, cConfig := range s.config.Containers {
|
||||
if cConfig.Annotations[annotations.ContainerTypeKey] == string(PodSandbox) {
|
||||
podSandboxConfig = &cConfig
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if podSandboxConfig == nil {
|
||||
return fmt.Errorf("Failed to find cgroup path for sandbox: Container of type '%s' not found", PodSandbox)
|
||||
}
|
||||
|
||||
configJSON, ok := podSandboxConfig.Annotations[annotations.ConfigJSONKey]
|
||||
if !ok {
|
||||
return fmt.Errorf("Could not find json config in annotations for container '%s'", podSandboxConfig.ID)
|
||||
}
|
||||
|
||||
var spec specs.Spec
|
||||
if err := json.Unmarshal([]byte(configJSON), &spec); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if spec.Linux == nil {
|
||||
// Cgroup path is optional, though expected. If not defined, skip the setup
|
||||
s.Logger().WithField("sandboxid", podSandboxConfig.ID).Warning("no cgroup path provided for pod sandbox, not creating sandbox cgroup")
|
||||
return nil
|
||||
}
|
||||
validContainerCgroup := utils.ValidCgroupPath(spec.Linux.CgroupsPath)
|
||||
|
||||
// Create a Kata sandbox cgroup with the cgroup of the sandbox container as the parent
|
||||
s.state.CgroupPath = filepath.Join(filepath.Dir(validContainerCgroup), cgroupKataPrefix+"_"+podSandboxConfig.ID)
|
||||
cgroup, err := cgroupsNewFunc(cgroups.V1, cgroups.StaticPath(s.state.CgroupPath), &specs.LinuxResources{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("Could not create sandbox cgroup in %v: %v", s.state.CgroupPath, err)
|
||||
|
||||
}
|
||||
|
||||
// Add the runtime to the Kata sandbox cgroup
|
||||
runtimePid := os.Getpid()
|
||||
if err := cgroup.Add(cgroups.Process{Pid: runtimePid}); err != nil {
|
||||
return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -1516,3 +1516,99 @@ func TestSandboxExperimentalFeature(t *testing.T) {
|
||||
assert.NotNil(t, exp.Get(testFeature.Name))
|
||||
assert.True(t, sconfig.valid())
|
||||
}
|
||||
|
||||
/*
|
||||
func TestSandbox_joinSandboxCgroup(t *testing.T) {
|
||||
|
||||
mockValidCgroup := &Sandbox{}
|
||||
mockValidCgroup.state.CgroupPath = "/my/cgroup"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
s *Sandbox
|
||||
wantErr bool
|
||||
}{
|
||||
{"New Config", &Sandbox{}, false},
|
||||
{"Mock cgroup path", mockValidCgroup, false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.s.joinSandboxCgroup(); (err != nil) != tt.wantErr {
|
||||
t.Errorf("Sandbox.joinSandboxCgroup() error = %v, wantErr %v", err, tt.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
func TestSandbox_SetupSandboxCgroup(t *testing.T) {
|
||||
sandboxContainer := ContainerConfig{}
|
||||
sandboxContainer.Annotations = make(map[string]string)
|
||||
sandboxContainer.Annotations[annotations.ContainerTypeKey] = string(PodSandbox)
|
||||
|
||||
emptyJSONLinux := ContainerConfig{}
|
||||
emptyJSONLinux.Annotations = make(map[string]string)
|
||||
emptyJSONLinux.Annotations[annotations.ContainerTypeKey] = string(PodSandbox)
|
||||
emptyJSONLinux.Annotations[annotations.ConfigJSONKey] = "{}"
|
||||
|
||||
successfulContainer := ContainerConfig{}
|
||||
successfulContainer.Annotations = make(map[string]string)
|
||||
successfulContainer.Annotations[annotations.ContainerTypeKey] = string(PodSandbox)
|
||||
successfulContainer.Annotations[annotations.ConfigJSONKey] = "{\"linux\": { \"cgroupsPath\": \"/myRuntime/myContainer\" }}"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
s *Sandbox
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
"New sandbox",
|
||||
&Sandbox{},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"New sandbox, new config",
|
||||
&Sandbox{config: &SandboxConfig{}},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"sandbox, container no sandbox type",
|
||||
&Sandbox{
|
||||
config: &SandboxConfig{Containers: []ContainerConfig{
|
||||
{},
|
||||
}}},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"sandbox, container sandbox type",
|
||||
&Sandbox{
|
||||
config: &SandboxConfig{Containers: []ContainerConfig{
|
||||
sandboxContainer,
|
||||
}}},
|
||||
true,
|
||||
},
|
||||
{
|
||||
"sandbox, empty linux json",
|
||||
&Sandbox{
|
||||
config: &SandboxConfig{Containers: []ContainerConfig{
|
||||
emptyJSONLinux,
|
||||
}}},
|
||||
false,
|
||||
},
|
||||
{
|
||||
"sandbox, successful config",
|
||||
&Sandbox{
|
||||
config: &SandboxConfig{Containers: []ContainerConfig{
|
||||
successfulContainer,
|
||||
}}},
|
||||
false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.s.setupSandboxCgroup(); (err != nil) != tt.wantErr {
|
||||
t.Errorf("Sandbox.SetupSandboxCgroupOnly() error = %v, wantErr %v", err, tt.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user