mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-06-24 06:27:39 +00:00
virtcontainers/pkg/cgroups: implement cgroup manager
cgroup manager is in charge to create and setup cgroups for virtual containers, for example it adds /dev/kvm and /dev/vhost-net to the list of cgroup devices in order to have virtual containers working. fixes #2438 fixes #2419 Signed-off-by: Julio Montes <julio.montes@intel.com>
This commit is contained in:
parent
03cdf6c4a9
commit
ea82922a54
321
virtcontainers/pkg/cgroups/manager.go
Normal file
321
virtcontainers/pkg/cgroups/manager.go
Normal file
@ -0,0 +1,321 @@
|
||||
// Copyright (c) 2020 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/kata-containers/runtime/virtcontainers/pkg/rootless"
|
||||
libcontcgroups "github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
libcontcgroupsfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
libcontcgroupssystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
|
||||
// placed into to limit the resources the container has available
|
||||
// If nil, New() will create one.
|
||||
Cgroups *configs.Cgroup
|
||||
|
||||
// CgroupPaths contains paths to all the cgroups setup for a container. Key is cgroup subsystem name
|
||||
// with the value as the path.
|
||||
CgroupPaths map[string]string
|
||||
|
||||
// Resources represents the runtime resource constraints
|
||||
Resources specs.LinuxResources
|
||||
|
||||
// CgroupPath is the OCI spec cgroup path
|
||||
CgroupPath string
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
sync.Mutex
|
||||
mgr libcontcgroups.Manager
|
||||
}
|
||||
|
||||
const (
|
||||
// file in the cgroup that contains the pids
|
||||
cgroupProcs = "cgroup.procs"
|
||||
)
|
||||
|
||||
var (
|
||||
// If set to true, expects cgroupsPath to be of form "slice:prefix:name", otherwise cgroups creation will fail
|
||||
systemdCgroup *bool
|
||||
|
||||
cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups")
|
||||
)
|
||||
|
||||
func EnableSystemdCgroup() {
|
||||
systemd := true
|
||||
systemdCgroup = &systemd
|
||||
}
|
||||
|
||||
func UseSystemdCgroup() bool {
|
||||
if systemdCgroup != nil {
|
||||
return *systemdCgroup
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// returns the list of devices that a hypervisor may need
|
||||
func hypervisorDevices() []specs.LinuxDeviceCgroup {
|
||||
wildcard := int64(-1)
|
||||
devicemapperMajor := int64(253)
|
||||
|
||||
devices := []specs.LinuxDeviceCgroup{}
|
||||
|
||||
devices = append(devices,
|
||||
// hypervisor needs access to all devicemapper devices,
|
||||
// since they can be hotplugged in the VM.
|
||||
specs.LinuxDeviceCgroup{
|
||||
Allow: true,
|
||||
Type: "b",
|
||||
Major: &devicemapperMajor,
|
||||
Minor: &wildcard,
|
||||
Access: "rwm",
|
||||
})
|
||||
|
||||
// Processes running in a device-cgroup are constrained, they have acccess
|
||||
// only to the devices listed in the devices.list file.
|
||||
// In order to run Virtual Machines and create virtqueues, hypervisors
|
||||
// need access to certain character devices in the host, like kvm and vhost-net.
|
||||
hypervisorDevices := []string{
|
||||
"/dev/kvm", // To run virtual machines
|
||||
"/dev/vhost-net", // To create virtqueues
|
||||
}
|
||||
|
||||
for _, device := range hypervisorDevices {
|
||||
var st unix.Stat_t
|
||||
linuxDevice := specs.LinuxDeviceCgroup{
|
||||
Allow: true,
|
||||
Access: "rwm",
|
||||
}
|
||||
|
||||
if err := unix.Stat(device, &st); err != nil {
|
||||
cgroupsLogger.WithError(err).WithField("device", device).Warn("Could not get device information")
|
||||
continue
|
||||
}
|
||||
|
||||
switch st.Mode & unix.S_IFMT {
|
||||
case unix.S_IFCHR:
|
||||
linuxDevice.Type = "c"
|
||||
case unix.S_IFBLK:
|
||||
linuxDevice.Type = "b"
|
||||
}
|
||||
|
||||
major := int64(unix.Major(st.Rdev))
|
||||
minor := int64(unix.Minor(st.Rdev))
|
||||
linuxDevice.Major = &major
|
||||
linuxDevice.Minor = &minor
|
||||
|
||||
devices = append(devices, linuxDevice)
|
||||
}
|
||||
|
||||
return devices
|
||||
}
|
||||
|
||||
// New creates a new CgroupManager
|
||||
func New(config *Config) (*Manager, error) {
|
||||
var err error
|
||||
useSystemdCgroup := UseSystemdCgroup()
|
||||
|
||||
devices := []specs.LinuxDeviceCgroup{}
|
||||
copy(devices, config.Resources.Devices)
|
||||
devices = append(devices, hypervisorDevices()...)
|
||||
// Do not modify original devices
|
||||
config.Resources.Devices = devices
|
||||
|
||||
newSpec := specs.Spec{
|
||||
Linux: &specs.Linux{
|
||||
Resources: &config.Resources,
|
||||
},
|
||||
}
|
||||
|
||||
rootless := rootless.IsRootless()
|
||||
|
||||
cgroups := config.Cgroups
|
||||
cgroupPaths := config.CgroupPaths
|
||||
|
||||
// Create a new cgroup if the current one is nil
|
||||
// this cgroups must be saved later
|
||||
if cgroups == nil {
|
||||
if config.CgroupPath == "" && !rootless {
|
||||
cgroupsLogger.Warn("cgroups have not been created and cgroup path is empty")
|
||||
}
|
||||
|
||||
newSpec.Linux.CgroupsPath, err = ValidCgroupPath(config.CgroupPath, useSystemdCgroup)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Invalid cgroup path: %v", err)
|
||||
}
|
||||
|
||||
if cgroups, err = specconv.CreateCgroupConfig(&specconv.CreateOpts{
|
||||
// cgroup name is taken from spec
|
||||
CgroupName: "",
|
||||
UseSystemdCgroup: useSystemdCgroup,
|
||||
Spec: &newSpec,
|
||||
RootlessCgroups: rootless,
|
||||
}); err != nil {
|
||||
return nil, fmt.Errorf("Could not create cgroup config: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Set cgroupPaths to nil when the map is empty, it can and will be
|
||||
// populated by `Manager.Apply()` when the runtime or any other process
|
||||
// is moved to the cgroup.
|
||||
if len(cgroupPaths) == 0 {
|
||||
cgroupPaths = nil
|
||||
}
|
||||
|
||||
if useSystemdCgroup {
|
||||
systemdCgroupFunc, err := libcontcgroupssystemd.NewSystemdCgroupsManager()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Could not create systemd cgroup manager: %v", err)
|
||||
}
|
||||
libcontcgroupssystemd.UseSystemd()
|
||||
return &Manager{
|
||||
mgr: systemdCgroupFunc(cgroups, cgroupPaths),
|
||||
}, nil
|
||||
}
|
||||
|
||||
return &Manager{
|
||||
mgr: &libcontcgroupsfs.Manager{
|
||||
Cgroups: cgroups,
|
||||
Rootless: rootless,
|
||||
Paths: cgroupPaths,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// read all the pids in cgroupPath
|
||||
func readPids(cgroupPath string) ([]int, error) {
|
||||
pids := []int{}
|
||||
f, err := os.Open(filepath.Join(cgroupPath, cgroupProcs))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
buf := bufio.NewScanner(f)
|
||||
|
||||
for buf.Scan() {
|
||||
if t := buf.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pids = append(pids, pid)
|
||||
}
|
||||
}
|
||||
return pids, nil
|
||||
}
|
||||
|
||||
// write the pids into cgroup.procs
|
||||
func writePids(pids []int, cgroupPath string) error {
|
||||
cgroupProcsPath := filepath.Join(cgroupPath, cgroupProcs)
|
||||
for _, pid := range pids {
|
||||
if err := ioutil.WriteFile(cgroupProcsPath,
|
||||
[]byte(strconv.Itoa(pid)),
|
||||
os.FileMode(0),
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) logger() *logrus.Entry {
|
||||
return cgroupsLogger.WithField("source", "cgroup-manager")
|
||||
}
|
||||
|
||||
// move all the processes in the current cgroup to the parent
|
||||
func (m *Manager) moveToParent() error {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
for _, cgroupPath := range m.mgr.GetPaths() {
|
||||
pids, err := readPids(cgroupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(pids) == 0 {
|
||||
// no pids in this cgroup
|
||||
continue
|
||||
}
|
||||
|
||||
cgroupParentPath := filepath.Dir(filepath.Clean(cgroupPath))
|
||||
if err = writePids(pids, cgroupParentPath); err != nil {
|
||||
if !strings.Contains(err.Error(), "no such process") {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add pid to cgroups
|
||||
func (m *Manager) Add(pid int) error {
|
||||
if rootless.IsRootless() {
|
||||
m.logger().Debug("Unable to setup add pids to cgroup: running rootless")
|
||||
return nil
|
||||
}
|
||||
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
return m.mgr.Apply(pid)
|
||||
}
|
||||
|
||||
// Apply constraints
|
||||
func (m *Manager) Apply() error {
|
||||
if rootless.IsRootless() {
|
||||
m.logger().Debug("Unable to apply constraints: running rootless")
|
||||
return nil
|
||||
}
|
||||
|
||||
cgroups, err := m.GetCgroups()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
return m.mgr.Set(&configs.Config{
|
||||
Cgroups: cgroups,
|
||||
})
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
return m.mgr.GetCgroups()
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
return m.mgr.GetPaths()
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
// cgroup can't be destroyed if it contains running processes
|
||||
if err := m.moveToParent(); err != nil {
|
||||
return fmt.Errorf("Could not move processes into parent cgroup: %v", err)
|
||||
}
|
||||
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
return m.mgr.Destroy()
|
||||
}
|
55
virtcontainers/pkg/cgroups/manager_test.go
Normal file
55
virtcontainers/pkg/cgroups/manager_test.go
Normal file
@ -0,0 +1,55 @@
|
||||
// Copyright (c) 2020 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestEnableSystemdCgroup(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
orgSystemdCgroup := systemdCgroup
|
||||
defer func() {
|
||||
systemdCgroup = orgSystemdCgroup
|
||||
}()
|
||||
|
||||
useSystemdCgroup := UseSystemdCgroup()
|
||||
if systemdCgroup != nil {
|
||||
assert.Equal(*systemdCgroup, useSystemdCgroup)
|
||||
} else {
|
||||
assert.False(useSystemdCgroup)
|
||||
}
|
||||
|
||||
EnableSystemdCgroup()
|
||||
assert.True(UseSystemdCgroup())
|
||||
}
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
useSystemdCgroup := false
|
||||
orgSystemdCgroup := systemdCgroup
|
||||
defer func() {
|
||||
systemdCgroup = orgSystemdCgroup
|
||||
}()
|
||||
systemdCgroup = &useSystemdCgroup
|
||||
|
||||
c := &Config{
|
||||
Cgroups: nil,
|
||||
CgroupPath: "",
|
||||
}
|
||||
|
||||
mgr, err := New(c)
|
||||
assert.NoError(err)
|
||||
assert.NotNil(mgr.mgr)
|
||||
|
||||
useSystemdCgroup = true
|
||||
mgr, err = New(c)
|
||||
assert.Error(err)
|
||||
assert.Nil(mgr)
|
||||
}
|
@ -34,6 +34,7 @@ import (
|
||||
"github.com/kata-containers/runtime/virtcontainers/persist"
|
||||
persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api"
|
||||
"github.com/kata-containers/runtime/virtcontainers/pkg/annotations"
|
||||
vccgroups "github.com/kata-containers/runtime/virtcontainers/pkg/cgroups"
|
||||
"github.com/kata-containers/runtime/virtcontainers/pkg/compatoci"
|
||||
"github.com/kata-containers/runtime/virtcontainers/pkg/rootless"
|
||||
vcTypes "github.com/kata-containers/runtime/virtcontainers/pkg/types"
|
||||
@ -214,6 +215,8 @@ type Sandbox struct {
|
||||
seccompSupported bool
|
||||
disableVMShutdown bool
|
||||
|
||||
cgroupMgr *vccgroups.Manager
|
||||
|
||||
ctx context.Context
|
||||
}
|
||||
|
||||
@ -597,6 +600,10 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.createCgroupManager(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
agentConfig, err := newAgentConfig(sandboxConfig.AgentType, sandboxConfig.AgentConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -609,6 +616,46 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (s *Sandbox) createCgroupManager() error {
|
||||
var err error
|
||||
cgroupPath := ""
|
||||
|
||||
// Do not change current cgroup configuration.
|
||||
// Create a spec without constraints
|
||||
resources := specs.LinuxResources{}
|
||||
|
||||
if s.config == nil {
|
||||
return fmt.Errorf("Could not create cgroup manager: empty sandbox configuration")
|
||||
}
|
||||
|
||||
spec := s.GetPatchedOCISpec()
|
||||
if spec != nil {
|
||||
cgroupPath = spec.Linux.CgroupsPath
|
||||
|
||||
// kata should rely on the cgroup created and configured by
|
||||
// container engine *only* if actual container was
|
||||
// marked *explicitly* as sandbox through annotations.
|
||||
if !s.config.HasCRIContainerType {
|
||||
resources = *spec.Linux.Resources
|
||||
}
|
||||
}
|
||||
|
||||
// Create the cgroup manager, this way it can be used later
|
||||
// to create or detroy cgroups
|
||||
if s.cgroupMgr, err = vccgroups.New(
|
||||
&vccgroups.Config{
|
||||
Cgroups: s.config.Cgroups,
|
||||
CgroupPaths: s.state.CgroupPaths,
|
||||
Resources: resources,
|
||||
CgroupPath: cgroupPath,
|
||||
},
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// storeSandbox stores a sandbox config.
|
||||
func (s *Sandbox) storeSandbox() error {
|
||||
span, _ := s.trace("storeSandbox")
|
||||
@ -1855,15 +1902,13 @@ func (s *Sandbox) cgroupsDelete() error {
|
||||
var cgroupSubsystems cgroups.Hierarchy
|
||||
|
||||
if s.config.SandboxCgroupOnly {
|
||||
cgroupSubsystems = cgroups.V1
|
||||
path = s.state.CgroupPath
|
||||
s.Logger().WithField("path", path).Debug("Deleting sandbox cgroups (all subsystems)")
|
||||
} else {
|
||||
cgroupSubsystems = V1NoConstraints
|
||||
path = cgroupNoConstraintsPath(s.state.CgroupPath)
|
||||
s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup")
|
||||
return s.cgroupMgr.Destroy()
|
||||
}
|
||||
|
||||
cgroupSubsystems = V1NoConstraints
|
||||
path = cgroupNoConstraintsPath(s.state.CgroupPath)
|
||||
s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup")
|
||||
|
||||
sandboxCgroups, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path))
|
||||
if err == cgroups.ErrCgroupDeleted {
|
||||
// cgroup already deleted
|
||||
@ -2049,60 +2094,27 @@ func (s *Sandbox) setupSandboxCgroup() error {
|
||||
|
||||
s.Logger().WithField("hasCRIContainerType", s.config.HasCRIContainerType).Debug("Setting sandbox cgroup")
|
||||
|
||||
s.state.CgroupPath, err = validCgroupPath(spec.Linux.CgroupsPath, s.config.SystemdCgroup)
|
||||
s.state.CgroupPath, err = vccgroups.ValidCgroupPath(spec.Linux.CgroupsPath, s.config.SystemdCgroup)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Invalid cgroup path: %v", err)
|
||||
}
|
||||
|
||||
// Don't modify original resources, create a copy
|
||||
resources := *spec.Linux.Resources
|
||||
sandboxSpec := specs.Spec{
|
||||
Linux: &specs.Linux{
|
||||
Resources: &resources,
|
||||
},
|
||||
}
|
||||
|
||||
// kata should rely on the cgroup created and configured by
|
||||
// container engine *only* if actual container was
|
||||
// marked *explicitly* as sandbox through annotations.
|
||||
if s.config.HasCRIContainerType {
|
||||
// Do not change current cgroup configuration.
|
||||
// Create a spec without constraints
|
||||
sandboxSpec.Linux.Resources = &specs.LinuxResources{}
|
||||
}
|
||||
|
||||
sandboxSpec.Linux.CgroupsPath = s.state.CgroupPath
|
||||
|
||||
// Remove this to improve device resource management, but first we need to fix some issues:
|
||||
// - hypervisors will need access to following host's devices:
|
||||
// * /dev/kvm
|
||||
// * /dev/vhost-net
|
||||
// - If devicemapper is the storage driver, hypervisor will need access to devicemapper devices:
|
||||
// * The list of cgroup devices MUST BE updated when a new container is created in the POD
|
||||
sandboxSpec.Linux.Resources.Devices = []specs.LinuxDeviceCgroup{}
|
||||
|
||||
cmgr, err := newCgroupManager(s.config.Cgroups, s.state.CgroupPaths, &sandboxSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Could not create a new cgroup manager: %v", err)
|
||||
}
|
||||
|
||||
runtimePid := os.Getpid()
|
||||
|
||||
// Add the runtime to the Kata sandbox cgroup
|
||||
if err = cmgr.Apply(runtimePid); err != nil {
|
||||
if err = s.cgroupMgr.Add(runtimePid); err != nil {
|
||||
return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err)
|
||||
}
|
||||
|
||||
// `Apply` updates manager's Cgroups and CgroupPaths,
|
||||
// they both need to be saved since are used to create
|
||||
// or restore a cgroup managers.
|
||||
if s.config.Cgroups, err = cmgr.GetCgroups(); err != nil {
|
||||
if s.config.Cgroups, err = s.cgroupMgr.GetCgroups(); err != nil {
|
||||
return fmt.Errorf("Could not get cgroup configuration: %v", err)
|
||||
}
|
||||
|
||||
s.state.CgroupPaths = cmgr.GetPaths()
|
||||
s.state.CgroupPaths = s.cgroupMgr.GetPaths()
|
||||
|
||||
if err = cmgr.Set(&configs.Config{Cgroups: s.config.Cgroups}); err != nil {
|
||||
if err = s.cgroupMgr.Apply(); err != nil {
|
||||
return fmt.Errorf("Could not constrain cgroup: %v", err)
|
||||
}
|
||||
|
||||
|
@ -1514,6 +1514,7 @@ func TestSandbox_SetupSandboxCgroup(t *testing.T) {
|
||||
}
|
||||
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tt.s.createCgroupManager()
|
||||
if err := tt.s.setupSandboxCgroup(); (err != nil) != tt.wantErr {
|
||||
t.Errorf("Sandbox.SetupSandboxCgroupOnly() error = %v, wantErr %v", err, tt.wantErr)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user