virtcontainers: Convert to the new cgroups package API

The new API is based on containerd's cgroups package.
With that conversion we can simpligy the virtcontainers sandbox code and
also uniformize our cgroups external API dependency. We now only depend
on containerd/cgroups for everything cgroups related.

Depends-on: github.com/kata-containers/tests#3805
Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
Signed-off-by: Eric Ernst <eric_ernst@apple.com>
This commit is contained in:
Samuel Ortiz 2021-07-08 11:36:29 +02:00 committed by Samuel Ortiz
parent b42ed39349
commit 9bed2ade0f
14 changed files with 127 additions and 915 deletions

View File

@ -25,7 +25,6 @@ import (
"strings"
"syscall"
"github.com/containerd/cgroups"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
@ -415,11 +414,6 @@ EXAMPLES:
return errors.New("check: cannot determine runtime config")
}
// check if cgroup can work use the same logic for creating containers
if _, err := vc.V1Constraints(); err != nil && err == cgroups.ErrMountPointNotExist && !runtimeConfig.SandboxCgroupOnly {
return fmt.Errorf("Cgroup v2 requires the following configuration: `sandbox_cgroup_only=true`.")
}
err := setCPUtype(runtimeConfig.HypervisorType)
if err != nil {
return err

View File

@ -130,8 +130,11 @@ func newTestSandboxConfigKataAgent() SandboxConfig {
}
func TestCreateSandboxNoopAgentSuccessful(t *testing.T) {
defer cleanUp()
assert := assert.New(t)
if tc.NotValid(ktu.NeedRoot()) {
t.Skip(testDisabledAsNonRoot)
}
defer cleanUp()
config := newTestSandboxConfigNoop()
@ -181,6 +184,9 @@ func TestCreateSandboxKataAgentSuccessful(t *testing.T) {
}
func TestCreateSandboxFailing(t *testing.T) {
if tc.NotValid(ktu.NeedRoot()) {
t.Skip(testDisabledAsNonRoot)
}
defer cleanUp()
assert := assert.New(t)
@ -240,6 +246,9 @@ func createAndStartSandbox(ctx context.Context, config SandboxConfig) (sandbox V
}
func TestReleaseSandbox(t *testing.T) {
if tc.NotValid(ktu.NeedRoot()) {
t.Skip(testDisabledAsNonRoot)
}
defer cleanUp()
config := newTestSandboxConfigNoop()
@ -254,6 +263,10 @@ func TestReleaseSandbox(t *testing.T) {
}
func TestCleanupContainer(t *testing.T) {
if tc.NotValid(ktu.NeedRoot()) {
t.Skip(testDisabledAsNonRoot)
}
config := newTestSandboxConfigNoop()
assert := assert.New(t)

View File

@ -1,167 +0,0 @@
// Copyright (c) 2018 Huawei Corporation
// Copyright (c) 2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/containerd/cgroups"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
type cgroupPather interface {
cgroups.Subsystem
Path(path string) string
}
// unconstrained cgroups are placed here.
// for example /sys/fs/cgroup/memory/kata/$CGPATH
// where path is defined by the containers manager
const cgroupKataPath = "/kata/"
var cgroupsLoadFunc = cgroups.Load
var cgroupsNewFunc = cgroups.New
// V1Constraints returns the cgroups that are compatible with the VC architecture
// and hypervisor, constraints can be applied to these cgroups.
func V1Constraints() ([]cgroups.Subsystem, error) {
root, err := cgroupV1MountPoint()
if err != nil {
return nil, err
}
subsystems := []cgroups.Subsystem{
cgroups.NewCpuset(root),
cgroups.NewCpu(root),
cgroups.NewCpuacct(root),
}
return cgroupsSubsystems(subsystems)
}
// V1NoConstraints returns the cgroups that are *not* compatible with the VC
// architecture and hypervisor, constraints MUST NOT be applied to these cgroups.
func V1NoConstraints() ([]cgroups.Subsystem, error) {
root, err := cgroupV1MountPoint()
if err != nil {
return nil, err
}
subsystems := []cgroups.Subsystem{
// Some constainers managers, like k8s, take the control of cgroups.
// k8s: the memory cgroup for the dns containers is small to place
// a hypervisor there.
cgroups.NewMemory(root),
}
return cgroupsSubsystems(subsystems)
}
func cgroupsSubsystems(subsystems []cgroups.Subsystem) ([]cgroups.Subsystem, error) {
var enabled []cgroups.Subsystem
for _, s := range cgroupPathers(subsystems) {
// check and remove the default groups that do not exist
if _, err := os.Lstat(s.Path("/")); err == nil {
enabled = append(enabled, s)
}
}
return enabled, nil
}
func cgroupPathers(subystems []cgroups.Subsystem) []cgroupPather {
var out []cgroupPather
for _, s := range subystems {
if p, ok := s.(cgroupPather); ok {
out = append(out, p)
}
}
return out
}
// v1MountPoint returns the mount point where the cgroup
// mountpoints are mounted in a single hiearchy
func cgroupV1MountPoint() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
if err := scanner.Err(); err != nil {
return "", err
}
var (
text = scanner.Text()
fields = strings.Split(text, " ")
// safe as mountinfo encodes mountpoints with spaces as \040.
index = strings.Index(text, " - ")
postSeparatorFields = strings.Fields(text[index+3:])
numPostFields = len(postSeparatorFields)
)
// this is an error as we can't detect if the mount is for "cgroup"
if numPostFields == 0 {
return "", fmt.Errorf("Found no fields post '-' in %q", text)
}
if postSeparatorFields[0] == "cgroup" {
// check that the mount is properly formated.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
return filepath.Dir(fields[4]), nil
}
}
return "", cgroups.ErrMountPointNotExist
}
func cgroupNoConstraintsPath(path string) string {
return filepath.Join(cgroupKataPath, path)
}
// return the parent cgroup for the given path
func parentCgroup(hierarchy cgroups.Hierarchy, path string) (cgroups.Cgroup, error) {
// append '/' just in case CgroupsPath doesn't start with it
parent := filepath.Dir("/" + path)
parentCgroup, err := cgroupsLoadFunc(hierarchy,
cgroups.StaticPath(parent))
if err != nil {
return nil, fmt.Errorf("Could not load parent cgroup %v: %v", parent, err)
}
return parentCgroup, nil
}
// validCPUResources checks CPU resources coherency
func validCPUResources(cpuSpec *specs.LinuxCPU) *specs.LinuxCPU {
if cpuSpec == nil {
return nil
}
cpu := *cpuSpec
if cpu.Period != nil && *cpu.Period < 1 {
cpu.Period = nil
}
if cpu.Quota != nil && *cpu.Quota < 1 {
cpu.Quota = nil
}
if cpu.Shares != nil && *cpu.Shares < 1 {
cpu.Shares = nil
}
if cpu.RealtimePeriod != nil && *cpu.RealtimePeriod < 1 {
cpu.RealtimePeriod = nil
}
if cpu.RealtimeRuntime != nil && *cpu.RealtimeRuntime < 1 {
cpu.RealtimeRuntime = nil
}
return &cpu
}

View File

@ -1,207 +0,0 @@
// Copyright (c) 2018 Huawei Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"testing"
"github.com/containerd/cgroups"
cgroupsstatsv1 "github.com/containerd/cgroups/stats/v1"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
)
type mockCgroup struct {
}
func (m *mockCgroup) New(string, *specs.LinuxResources) (cgroups.Cgroup, error) {
return &mockCgroup{}, nil
}
func (m *mockCgroup) Add(cgroups.Process) error {
return nil
}
func (m *mockCgroup) AddTask(cgroups.Process) error {
return nil
}
func (m *mockCgroup) Delete() error {
return nil
}
func (m *mockCgroup) MoveTo(cgroups.Cgroup) error {
return nil
}
func (m *mockCgroup) Stat(...cgroups.ErrorHandler) (*cgroupsstatsv1.Metrics, error) {
return &cgroupsstatsv1.Metrics{}, nil
}
func (m *mockCgroup) Update(resources *specs.LinuxResources) error {
return nil
}
func (m *mockCgroup) Processes(cgroups.Name, bool) ([]cgroups.Process, error) {
return nil, nil
}
func (m *mockCgroup) Freeze() error {
return nil
}
func (m *mockCgroup) Thaw() error {
return nil
}
func (m *mockCgroup) OOMEventFD() (uintptr, error) {
return 0, nil
}
func (m *mockCgroup) RegisterMemoryEvent(event cgroups.MemoryEvent) (uintptr, error) {
return 0, nil
}
func (m *mockCgroup) State() cgroups.State {
return ""
}
func (m *mockCgroup) Subsystems() []cgroups.Subsystem {
return nil
}
func (m *mockCgroup) Tasks(cgroups.Name, bool) ([]cgroups.Task, error) {
return nil, nil
}
func mockCgroupNew(hierarchy cgroups.Hierarchy, path cgroups.Path, resources *specs.LinuxResources, opts ...cgroups.InitOpts) (cgroups.Cgroup, error) {
return &mockCgroup{}, nil
}
func mockCgroupLoad(hierarchy cgroups.Hierarchy, path cgroups.Path, opts ...cgroups.InitOpts) (cgroups.Cgroup, error) {
return &mockCgroup{}, nil
}
func init() {
cgroupsNewFunc = mockCgroupNew
cgroupsLoadFunc = mockCgroupLoad
}
func TestV1Constraints(t *testing.T) {
assert := assert.New(t)
systems, err := V1Constraints()
assert.NoError(err)
assert.NotEmpty(systems)
}
func TestV1NoConstraints(t *testing.T) {
assert := assert.New(t)
systems, err := V1NoConstraints()
assert.NoError(err)
assert.NotEmpty(systems)
}
func TestCgroupNoConstraintsPath(t *testing.T) {
assert := assert.New(t)
cgrouPath := "abc"
expectedPath := filepath.Join(cgroupKataPath, cgrouPath)
path := cgroupNoConstraintsPath(cgrouPath)
assert.Equal(expectedPath, path)
}
func TestUpdateCgroups(t *testing.T) {
assert := assert.New(t)
oldCgroupsNew := cgroupsNewFunc
oldCgroupsLoad := cgroupsLoadFunc
cgroupsNewFunc = cgroups.New
cgroupsLoadFunc = cgroups.Load
defer func() {
cgroupsNewFunc = oldCgroupsNew
cgroupsLoadFunc = oldCgroupsLoad
}()
s := &Sandbox{
state: types.SandboxState{
CgroupPath: "",
},
config: &SandboxConfig{SandboxCgroupOnly: false},
}
ctx := context.Background()
// empty path
err := s.cgroupsUpdate(ctx)
assert.NoError(err)
// path doesn't exist
s.state.CgroupPath = "/abc/123/rgb"
err = s.cgroupsUpdate(ctx)
assert.Error(err)
if os.Getuid() != 0 {
return
}
s.state.CgroupPath = fmt.Sprintf("/kata-tests-%d", os.Getpid())
testCgroup, err := cgroups.New(cgroups.V1, cgroups.StaticPath(s.state.CgroupPath), &specs.LinuxResources{})
assert.NoError(err)
defer testCgroup.Delete()
s.hypervisor = &mockHypervisor{mockPid: 0}
// bad pid
err = s.cgroupsUpdate(ctx)
assert.Error(err)
// fake workload
cmd := exec.Command("tail", "-f", "/dev/null")
assert.NoError(cmd.Start())
s.hypervisor = &mockHypervisor{mockPid: cmd.Process.Pid}
// no containers
err = s.cgroupsUpdate(ctx)
assert.NoError(err)
s.config = &SandboxConfig{}
s.config.HypervisorConfig.NumVCPUs = 1
s.containers = map[string]*Container{
"abc": {
process: Process{
Pid: cmd.Process.Pid,
},
config: &ContainerConfig{
Annotations: containerAnnotations,
CustomSpec: newEmptySpec(),
},
},
"xyz": {
process: Process{
Pid: cmd.Process.Pid,
},
config: &ContainerConfig{
Annotations: containerAnnotations,
CustomSpec: newEmptySpec(),
},
},
}
err = s.cgroupsUpdate(context.Background())
assert.NoError(err)
// cleanup
assert.NoError(cmd.Process.Kill())
err = s.cgroupsDelete()
assert.NoError(err)
}

View File

@ -33,8 +33,8 @@ func (s *Sandbox) dumpState(ss *persistapi.SandboxState, cs map[string]persistap
ss.GuestMemoryBlockSizeMB = s.state.GuestMemoryBlockSizeMB
ss.GuestMemoryHotplugProbe = s.state.GuestMemoryHotplugProbe
ss.State = string(s.state.State)
ss.CgroupPath = s.state.CgroupPath
ss.CgroupPaths = s.state.CgroupPaths
ss.SandboxCgroupPath = s.state.SandboxCgroupPath
ss.OverheadCgroupPath = s.state.OverheadCgroupPath
for id, cont := range s.containers {
state := persistapi.ContainerState{}
@ -188,7 +188,6 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
SystemdCgroup: sconfig.SystemdCgroup,
SandboxCgroupOnly: sconfig.SandboxCgroupOnly,
DisableGuestSeccomp: sconfig.DisableGuestSeccomp,
Cgroups: sconfig.Cgroups,
}
ss.Config.SandboxBindMounts = append(ss.Config.SandboxBindMounts, sconfig.SandboxBindMounts...)
@ -302,8 +301,8 @@ func (s *Sandbox) loadState(ss persistapi.SandboxState) {
s.state.GuestMemoryBlockSizeMB = ss.GuestMemoryBlockSizeMB
s.state.BlockIndexMap = ss.HypervisorState.BlockIndexMap
s.state.State = types.StateString(ss.State)
s.state.CgroupPath = ss.CgroupPath
s.state.CgroupPaths = ss.CgroupPaths
s.state.SandboxCgroupPath = ss.SandboxCgroupPath
s.state.OverheadCgroupPath = ss.OverheadCgroupPath
s.state.GuestMemoryHotplugProbe = ss.GuestMemoryHotplugProbe
}
@ -459,7 +458,6 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
SystemdCgroup: savedConf.SystemdCgroup,
SandboxCgroupOnly: savedConf.SandboxCgroupOnly,
DisableGuestSeccomp: savedConf.DisableGuestSeccomp,
Cgroups: savedConf.Cgroups,
}
sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...)

View File

@ -30,9 +30,12 @@ type SandboxState struct {
// SandboxContainer specifies which container is used to start the sandbox/vm
SandboxContainer string
// CgroupPath is the cgroup hierarchy where sandbox's processes
// including the hypervisor are placed.
CgroupPath string
// SandboxCgroupPath is the sandbox cgroup path
SandboxCgroupPath string
// OverheadCgroupPath is the sandbox overhead cgroup path.
// It can be an empty string if sandbox_cgroup_only is set.
OverheadCgroupPath string
// HypervisorState saves hypervisor specific data
HypervisorState HypervisorState

View File

@ -26,6 +26,17 @@ type Cgroup struct {
sync.Mutex
}
var (
cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups")
)
// SetLogger sets up a logger for this pkg
func SetLogger(logger *logrus.Entry) {
fields := cgroupsLogger.Data
cgroupsLogger = logger.WithFields(fields)
}
func deviceToDeviceCgroup(device string) (*specs.LinuxDeviceCgroup, error) {
var st unix.Stat_t

View File

@ -1,355 +0,0 @@
// Copyright (c) 2020 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package cgroups
import (
"bufio"
"context"
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
"github.com/opencontainers/runc/libcontainer"
libcontcgroups "github.com/opencontainers/runc/libcontainer/cgroups"
libcontcgroupsfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
type Config struct {
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
// If nil, New() will create one.
Cgroups *configs.Cgroup
// CgroupPaths contains paths to all the cgroups setup for a container. Key is cgroup subsystem name
// with the value as the path.
CgroupPaths map[string]string
// Resources represents the runtime resource constraints
Resources specs.LinuxResources
// CgroupPath is the OCI spec cgroup path
CgroupPath string
}
type Manager struct {
mgr libcontcgroups.Manager
sync.Mutex
}
const (
// file in the cgroup that contains the pids
cgroupProcs = "cgroup.procs"
)
var (
cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups")
)
// SetLogger sets up a logger for this pkg
func SetLogger(logger *logrus.Entry) {
fields := cgroupsLogger.Data
cgroupsLogger = logger.WithFields(fields)
}
// returns the list of devices that a hypervisor may need
func hypervisorDevices() []specs.LinuxDeviceCgroup {
devices := []specs.LinuxDeviceCgroup{}
// Processes running in a device-cgroup are constrained, they have acccess
// only to the devices listed in the devices.list file.
// In order to run Virtual Machines and create virtqueues, hypervisors
// need access to certain character devices in the host, like kvm and vhost-net.
hypervisorDevices := []string{
"/dev/kvm", // To run virtual machines
"/dev/vhost-net", // To create virtqueues
"/dev/vfio/vfio", // To access VFIO devices
}
for _, device := range hypervisorDevices {
ldevice, err := DeviceToLinuxDevice(device)
if err != nil {
cgroupsLogger.WithError(err).Warnf("Could not get device information")
continue
}
devices = append(devices, ldevice)
}
return devices
}
// New creates a new CgroupManager
func New(config *Config) (*Manager, error) {
var err error
devices := config.Resources.Devices
devices = append(devices, hypervisorDevices()...)
// Do not modify original devices
config.Resources.Devices = devices
newSpec := specs.Spec{
Linux: &specs.Linux{
Resources: &config.Resources,
},
}
rootless := rootless.IsRootless()
cgroups := config.Cgroups
cgroupPaths := config.CgroupPaths
// determine if we are utilizing systemd managed cgroups based on the path provided
useSystemdCgroup := IsSystemdCgroup(config.CgroupPath)
// Create a new cgroup if the current one is nil
// this cgroups must be saved later
if cgroups == nil {
if config.CgroupPath == "" && !rootless {
cgroupsLogger.Warn("cgroups have not been created and cgroup path is empty")
}
newSpec.Linux.CgroupsPath, err = ValidCgroupPath(config.CgroupPath, useSystemdCgroup)
if err != nil {
return nil, fmt.Errorf("Invalid cgroup path: %v", err)
}
if cgroups, err = specconv.CreateCgroupConfig(&specconv.CreateOpts{
// cgroup name is taken from spec
CgroupName: "",
UseSystemdCgroup: useSystemdCgroup,
Spec: &newSpec,
RootlessCgroups: rootless,
}, nil); err != nil {
return nil, fmt.Errorf("Could not create cgroup config: %v", err)
}
}
// Set cgroupPaths to nil when the map is empty, it can and will be
// populated by `Manager.Apply()` when the runtime or any other process
// is moved to the cgroup.
if len(cgroupPaths) == 0 {
cgroupPaths = nil
}
if useSystemdCgroup {
factory, err := libcontainer.New("")
if err != nil {
return nil, fmt.Errorf("Could not create linux factory for systemd cgroup manager: %v", err)
}
lfactory, ok := factory.(*libcontainer.LinuxFactory)
if !ok {
return nil, errors.New("expected linux factory returned on linux based systems")
}
err = libcontainer.SystemdCgroups(lfactory)
if err != nil {
return nil, fmt.Errorf("Could not create systemd cgroup manager: %v", err)
}
return &Manager{
mgr: lfactory.NewCgroupsManager(cgroups, cgroupPaths),
}, nil
}
return &Manager{
mgr: libcontcgroupsfs.NewManager(cgroups, cgroupPaths, rootless),
}, nil
}
// read all the pids in cgroupPath
func readPids(cgroupPath string) ([]int, error) {
pids := []int{}
f, err := os.Open(filepath.Join(cgroupPath, cgroupProcs))
if err != nil {
return nil, err
}
defer f.Close()
buf := bufio.NewScanner(f)
for buf.Scan() {
if t := buf.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
pids = append(pids, pid)
}
}
return pids, nil
}
// write the pids into cgroup.procs
func writePids(pids []int, cgroupPath string) error {
cgroupProcsPath := filepath.Join(cgroupPath, cgroupProcs)
for _, pid := range pids {
if err := ioutil.WriteFile(cgroupProcsPath,
[]byte(strconv.Itoa(pid)),
os.FileMode(0),
); err != nil {
return err
}
}
return nil
}
func (m *Manager) logger() *logrus.Entry {
return cgroupsLogger.WithField("source", "cgroup-manager")
}
// move all the processes in the current cgroup to the parent
func (m *Manager) moveToParent() error {
m.Lock()
defer m.Unlock()
for _, cgroupPath := range m.mgr.GetPaths() {
pids, err := readPids(cgroupPath)
// possible that the cgroupPath doesn't exist. If so, skip:
if os.IsNotExist(err) {
// The cgroup is not present on the filesystem: no pids to move. The systemd cgroup
// manager lists all of the subsystems, including those that are not actually being managed.
continue
}
if err != nil {
return err
}
if len(pids) == 0 {
// no pids in this cgroup
continue
}
cgroupParentPath := filepath.Dir(filepath.Clean(cgroupPath))
if err = writePids(pids, cgroupParentPath); err != nil {
if !strings.Contains(err.Error(), "no such process") {
return err
}
}
}
return nil
}
// Add pid to cgroups
func (m *Manager) Add(pid int) error {
if rootless.IsRootless() {
m.logger().Debug("Unable to setup add pids to cgroup: running rootless")
return nil
}
m.Lock()
defer m.Unlock()
return m.mgr.Apply(pid)
}
// Apply constraints
func (m *Manager) Apply() error {
if rootless.IsRootless() {
m.logger().Debug("Unable to apply constraints: running rootless")
return nil
}
cgroups, err := m.GetCgroups()
if err != nil {
return err
}
m.Lock()
defer m.Unlock()
return m.mgr.Set(cgroups.Resources)
}
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
m.Lock()
defer m.Unlock()
return m.mgr.GetCgroups()
}
func (m *Manager) GetPaths() map[string]string {
m.Lock()
defer m.Unlock()
return m.mgr.GetPaths()
}
func (m *Manager) Destroy() error {
// cgroup can't be destroyed if it contains running processes
if err := m.moveToParent(); err != nil {
// If the process migration to the parent cgroup fails, then
// we expect the Destroy to fail as well. Let's log an error here
// and attempt to execute the Destroy still to help cleanup the hosts' FS.
m.logger().WithError(err).Error("Could not move processes into parent cgroup")
}
m.Lock()
defer m.Unlock()
return m.mgr.Destroy()
}
// AddDevice adds a device to the device cgroup
func (m *Manager) AddDevice(ctx context.Context, device string) error {
cgroups, err := m.GetCgroups()
if err != nil {
return err
}
ld, err := DeviceToCgroupDeviceRule(device)
if err != nil {
return err
}
m.Lock()
cgroups.Devices = append(cgroups.Devices, ld)
m.Unlock()
return m.Apply()
}
// RemoceDevice removed a device from the device cgroup
func (m *Manager) RemoveDevice(device string) error {
cgroups, err := m.GetCgroups()
if err != nil {
return err
}
ld, err := DeviceToCgroupDeviceRule(device)
if err != nil {
return err
}
m.Lock()
for i, d := range cgroups.Devices {
if d.Major == ld.Major && d.Minor == ld.Minor {
cgroups.Devices = append(cgroups.Devices[:i], cgroups.Devices[i+1:]...)
m.Unlock()
return m.Apply()
}
}
m.Unlock()
return fmt.Errorf("device %v not found in the cgroup", device)
}
func (m *Manager) SetCPUSet(cpuset, memset string) error {
cgroups, err := m.GetCgroups()
if err != nil {
return err
}
m.Lock()
cgroups.CpusetCpus = cpuset
cgroups.CpusetMems = memset
m.Unlock()
return m.Apply()
}

View File

@ -1,38 +0,0 @@
// Copyright (c) 2020 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package cgroups
import (
"testing"
"github.com/stretchr/testify/assert"
)
//very very basic test; should be expanded
func TestNew(t *testing.T) {
assert := assert.New(t)
// create a cgroupfs cgroup manager
c := &Config{
Cgroups: nil,
CgroupPath: "",
}
mgr, err := New(c)
assert.NoError(err)
assert.NotNil(mgr.mgr)
// create a systemd cgroup manager
s := &Config{
Cgroups: nil,
CgroupPath: "system.slice:kubepod:container",
}
mgr, err = New(s)
assert.NoError(err)
assert.NotNil(mgr.mgr)
}

View File

@ -49,13 +49,13 @@ func ValidCgroupPath(path string, systemdCgroup bool) (string, error) {
// In the case of an absolute path (starting with /), the runtime MUST
// take the path to be relative to the cgroups mount point.
if filepath.IsAbs(path) {
return RenameCgroupPath(filepath.Clean(path))
return filepath.Clean(path), nil
}
// In the case of a relative path (not starting with /), the runtime MAY
// interpret the path relative to a runtime-determined location in the cgroups hierarchy.
// clean up path and return a new path relative to DefaultCgroupPath
return RenameCgroupPath(filepath.Join(DefaultCgroupPath, filepath.Clean("/"+path)))
return filepath.Join(DefaultCgroupPath, filepath.Clean("/"+path)), nil
}
func IsSystemdCgroup(cgroupPath string) bool {

View File

@ -62,6 +62,8 @@ func TestValidCgroupPath(t *testing.T) {
{"/../hi", false, false},
{"/../hi/foo", false, false},
{"o / m /../ g", false, false},
{"/overhead/foobar", false, false},
{"/sys/fs/cgroup/cpu/sandbox/kata_foobar", false, false},
// invalid systemd paths
{"o / m /../ g", true, true},
@ -93,13 +95,13 @@ func TestValidCgroupPath(t *testing.T) {
if filepath.IsAbs(t.path) {
cleanPath := filepath.Dir(filepath.Clean(t.path))
assert.True(strings.HasPrefix(path, cleanPath),
"%v should have prefix %v", cleanPath)
"%v should have prefix %v", path, cleanPath)
} else if t.systemdCgroup {
assert.Equal(t.path, path)
} else {
assert.True(strings.HasPrefix(path, "/"+CgroupKataPrefix) ||
assert.True(
strings.HasPrefix(path, DefaultCgroupPath),
"%v should have prefix /%v or %v", path, CgroupKataPrefix, DefaultCgroupPath)
"%v should have prefix /%v", path, DefaultCgroupPath)
}
}

View File

@ -20,9 +20,7 @@ import (
"sync"
"syscall"
"github.com/containerd/cgroups"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/opencontainers/runc/libcontainer/configs"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
@ -39,7 +37,7 @@ import (
pbTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
vccgroups "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cpuset"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
@ -118,10 +116,6 @@ type SandboxConfig struct {
// Experimental features enabled
Experimental []exp.Feature
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
Cgroups *configs.Cgroup
// Annotations keys must be unique strings and must be name-spaced
// with e.g. reverse domain notation (org.clearlinux.key).
Annotations map[string]string
@ -188,8 +182,8 @@ type Sandbox struct {
config *SandboxConfig
annotationsLock *sync.RWMutex
wg *sync.WaitGroup
sandboxCgroup *vccgroups.Manager
overheadCgroup *vccgroups.Manager
sandboxCgroup *cgroups.Cgroup
overheadCgroup *cgroups.Cgroup
cw *consoleWatcher
containers map[string]*Container
@ -592,6 +586,13 @@ func (s *Sandbox) createCgroups() error {
// Kata relies on the cgroup parent created and configured by the container
// engine by default. The exception is for devices whitelist as well as sandbox-level
// CPUSet.
// For the sandbox cgroups we create and manage, rename the base of the cgroup path to
// include "kata_"
cgroupPath, err = cgroups.RenameCgroupPath(cgroupPath)
if err != nil {
return err
}
if spec.Linux.Resources != nil {
resources.Devices = spec.Linux.Resources.Devices
@ -637,7 +638,7 @@ func (s *Sandbox) createCgroups() error {
if s.devManager != nil {
for _, d := range s.devManager.GetAllDevices() {
dev, err := vccgroups.DeviceToLinuxDevice(d.GetHostPath())
dev, err := cgroups.DeviceToLinuxDevice(d.GetHostPath())
if err != nil {
s.Logger().WithError(err).WithField("device", d.GetHostPath()).Warn("Could not add device to sandbox resources")
continue
@ -650,22 +651,14 @@ func (s *Sandbox) createCgroups() error {
// Depending on the SandboxCgroupOnly value, this cgroup
// will either hold all the pod threads (SandboxCgroupOnly is true)
// or only the virtual CPU ones (SandboxCgroupOnly is false).
if s.sandboxCgroup, err = vccgroups.New(
&vccgroups.Config{
Cgroups: s.config.Cgroups,
CgroupPaths: s.state.CgroupPaths,
Resources: resources,
CgroupPath: cgroupPath,
},
); err != nil {
return err
s.sandboxCgroup, err = cgroups.NewSandboxCgroup(cgroupPath, &resources)
if err != nil {
return fmt.Errorf("Could not create the sandbox cgroup %v", err)
}
// Now that the sandbox cgroup is created, we can set the state cgroup root path.
s.state.CgroupPath, err = vccgroups.ValidCgroupPath(cgroupPath, s.config.SystemdCgroup)
if err != nil {
return fmt.Errorf("Invalid cgroup path: %v", err)
}
// Now that the sandbox cgroup is created, we can set the state cgroup root paths.
s.state.SandboxCgroupPath = s.sandboxCgroup.Path()
s.state.OverheadCgroupPath = ""
if s.config.SandboxCgroupOnly {
s.overheadCgroup = nil
@ -674,16 +667,12 @@ func (s *Sandbox) createCgroups() error {
// into the sandbox cgroup.
// We're creating an overhead cgroup, with no constraints. Everything but
// the vCPU threads will eventually make it there.
if s.overheadCgroup, err = vccgroups.New(
&vccgroups.Config{
Cgroups: nil,
CgroupPaths: nil,
Resources: specs.LinuxResources{},
CgroupPath: cgroupKataOverheadPath,
},
); err != nil {
overheadCgroup, err := cgroups.NewCgroup(fmt.Sprintf("/%s/%s", cgroupKataOverheadPath, s.id), &specs.LinuxResources{})
if err != nil {
return err
}
s.overheadCgroup = overheadCgroup
s.state.OverheadCgroupPath = s.overheadCgroup.Path()
}
return nil
@ -1540,33 +1529,15 @@ func (s *Sandbox) StatsContainer(ctx context.Context, containerID string) (Conta
// Stats returns the stats of a running sandbox
func (s *Sandbox) Stats(ctx context.Context) (SandboxStats, error) {
if s.state.CgroupPath == "" {
return SandboxStats{}, fmt.Errorf("sandbox cgroup path is empty")
}
var path string
var cgroupSubsystems cgroups.Hierarchy
if s.config.SandboxCgroupOnly {
cgroupSubsystems = cgroups.V1
path = s.state.CgroupPath
} else {
cgroupSubsystems = V1NoConstraints
path = cgroupNoConstraintsPath(s.state.CgroupPath)
}
cgroup, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path))
if err != nil {
return SandboxStats{}, fmt.Errorf("Could not load sandbox cgroup in %v: %v", s.state.CgroupPath, err)
}
metrics, err := cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist))
metrics, err := s.sandboxCgroup.Stat()
if err != nil {
return SandboxStats{}, err
}
stats := SandboxStats{}
// TODO Do we want to aggregate the overhead cgroup stats to the sandbox ones?
stats.CgroupStats.CPUStats.CPUUsage.TotalUsage = metrics.CPU.Usage.Total
stats.CgroupStats.MemoryStats.Usage.Usage = metrics.Memory.Usage.Usage
tids, err := s.hypervisor.getThreadIDs(ctx)
@ -1796,15 +1767,9 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy
span, ctx := katatrace.Trace(ctx, s.Logger(), "HotplugAddDevice", sandboxTracingTags, map[string]string{"sandbox_id": s.id})
defer span.End()
if s.config.SandboxCgroupOnly {
// We are about to add a device to the hypervisor,
// the device cgroup MUST be updated since the hypervisor
// will need access to such device
hdev := device.GetHostPath()
if err := s.sandboxCgroup.AddDevice(ctx, hdev); err != nil {
s.Logger().WithError(err).WithField("device", hdev).
Warn("Could not add device to cgroup")
}
if err := s.sandboxCgroup.AddDevice(device.GetHostPath()); err != nil {
s.Logger().WithError(err).WithField("device", device).
Warn("Could not add device to cgroup")
}
switch devType {
@ -1852,14 +1817,9 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy
// Sandbox implement DeviceReceiver interface from device/api/interface.go
func (s *Sandbox) HotplugRemoveDevice(ctx context.Context, device api.Device, devType config.DeviceType) error {
defer func() {
if s.config.SandboxCgroupOnly {
// Remove device from cgroup, the hypervisor
// should not have access to such device anymore.
hdev := device.GetHostPath()
if err := s.sandboxCgroup.RemoveDevice(hdev); err != nil {
s.Logger().WithError(err).WithField("device", hdev).
Warn("Could not remove device from cgroup")
}
if err := s.sandboxCgroup.RemoveDevice(device.GetHostPath()); err != nil {
s.Logger().WithError(err).WithField("device", device).
Warn("Could not add device to cgroup")
}
}()
@ -2140,7 +2100,7 @@ func (s *Sandbox) cgroupsUpdate(ctx context.Context) error {
}
// We update the sandbox cgroup with potentially new virtual CPUs.
if err := s.sandboxCgroup.SetCPUSet(cpuset, memset); err != nil {
if err := s.sandboxCgroup.UpdateCpuSet(cpuset, memset); err != nil {
return err
}
@ -2160,21 +2120,39 @@ func (s *Sandbox) cgroupsUpdate(ctx context.Context) error {
// to the parent and then delete the sandbox cgroup
func (s *Sandbox) cgroupsDelete() error {
s.Logger().Debug("Deleting sandbox cgroup")
if s.state.CgroupPath == "" {
s.Logger().Warnf("sandbox cgroups path is empty")
if s.state.SandboxCgroupPath == "" {
s.Logger().Warnf("sandbox cgroup path is empty")
return nil
}
if s.overheadCgroup != nil {
if err := s.overheadCgroup.Destroy(); err != nil {
return err
}
sandboxCgroup, err := cgroups.Load(s.state.SandboxCgroupPath)
if err != nil {
return err
}
if err := s.sandboxCgroup.Destroy(); err != nil {
if err := sandboxCgroup.MoveToParent(); err != nil {
return err
}
if err := sandboxCgroup.Delete(); err != nil {
return err
}
if s.state.OverheadCgroupPath != "" {
overheadCgroup, err := cgroups.Load(s.state.OverheadCgroupPath)
if err != nil {
return err
}
if err := s.overheadCgroup.MoveToParent(); err != nil {
return err
}
if err := overheadCgroup.Delete(); err != nil {
return err
}
}
return nil
}
@ -2187,7 +2165,7 @@ func (s *Sandbox) constrainHypervisor(ctx context.Context) error {
// All vCPU threads move to the sandbox cgroup.
for _, i := range tids.vcpus {
if err := s.sandboxCgroup.Add(i); err != nil {
if err := s.sandboxCgroup.AddTask(i); err != nil {
return err
}
}
@ -2198,8 +2176,6 @@ func (s *Sandbox) constrainHypervisor(ctx context.Context) error {
// setupCgroups adds the runtime process to either the sandbox cgroup or the overhead one,
// depending on the sandbox_cgroup_only configuration setting.
func (s *Sandbox) setupCgroups() error {
var err error
vmmCgroup := s.sandboxCgroup
if s.overheadCgroup != nil {
vmmCgroup = s.overheadCgroup
@ -2211,27 +2187,8 @@ func (s *Sandbox) setupCgroups() error {
// then move the vCPU threads between cgroups.
runtimePid := os.Getpid()
// Add the runtime to the VMM sandbox cgroup
if err := vmmCgroup.Add(runtimePid); err != nil {
return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err)
}
// `Apply` updates the sandbox cgroup Cgroups and CgroupPaths,
// they both need to be saved since they are used to create
// or restore the sandbox cgroup.
if s.config.Cgroups, err = vmmCgroup.GetCgroups(); err != nil {
return fmt.Errorf("Could not get cgroup configuration: %v", err)
}
s.state.CgroupPaths = vmmCgroup.GetPaths()
if err := s.sandboxCgroup.Apply(); err != nil {
return fmt.Errorf("Could not constrain cgroup: %v", err)
}
if s.overheadCgroup != nil {
if err = s.overheadCgroup.Apply(); err != nil {
return fmt.Errorf("Could not constrain cgroup: %v", err)
}
if err := vmmCgroup.AddProcess(runtimePid); err != nil {
return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err)
}
return nil

View File

@ -52,6 +52,10 @@ func testCreateSandbox(t *testing.T, id string,
nconfig NetworkConfig, containers []ContainerConfig,
volumes []types.Volume) (*Sandbox, error) {
if tc.NotValid(ktu.NeedRoot()) {
t.Skip(testDisabledAsNonRoot)
}
sconfig := SandboxConfig{
ID: id,
HypervisorType: htype,
@ -963,22 +967,6 @@ func TestEnterContainer(t *testing.T) {
assert.Nil(t, err, "Enter container failed: %v", err)
}
func TestDeleteStoreWhenCreateContainerFail(t *testing.T) {
hypervisorConfig := newHypervisorConfig(nil, nil)
s, err := testCreateSandbox(t, testSandboxID, MockHypervisor, hypervisorConfig, NetworkConfig{}, nil, nil)
if err != nil {
t.Fatal(err)
}
defer cleanUp()
contID := "999"
contConfig := newTestContainerConfigNoop(contID)
contConfig.RootFs = RootFs{Target: "", Mounted: true}
s.state.CgroupPath = filepath.Join(testDir, "bad-cgroup")
_, err = s.CreateContainer(context.Background(), contConfig)
assert.NotNil(t, err, "Should fail to create container due to wrong cgroup")
}
func TestDeleteStoreWhenNewContainerFail(t *testing.T) {
hConfig := newHypervisorConfig(nil, nil)
p, err := testCreateSandbox(t, testSandboxID, MockHypervisor, hConfig, NetworkConfig{}, nil, nil)
@ -1451,7 +1439,7 @@ func TestSandboxExperimentalFeature(t *testing.T) {
assert.True(t, sconfig.valid())
}
func TestSandbox_SetupSandboxCgroup(t *testing.T) {
func TestSandbox_Cgroups(t *testing.T) {
sandboxContainer := ContainerConfig{}
sandboxContainer.Annotations = make(map[string]string)
sandboxContainer.Annotations[annotations.ContainerTypeKey] = string(PodSandbox)
@ -1486,8 +1474,8 @@ func TestSandbox_SetupSandboxCgroup(t *testing.T) {
{
"New sandbox, new config",
&Sandbox{config: &SandboxConfig{}},
true,
false,
true,
},
{
"sandbox, container no sandbox type",
@ -1495,8 +1483,8 @@ func TestSandbox_SetupSandboxCgroup(t *testing.T) {
config: &SandboxConfig{Containers: []ContainerConfig{
{},
}}},
true,
false,
true,
},
{
"sandbox, container sandbox type",
@ -1504,8 +1492,8 @@ func TestSandbox_SetupSandboxCgroup(t *testing.T) {
config: &SandboxConfig{Containers: []ContainerConfig{
sandboxContainer,
}}},
true,
false,
true,
},
{
"sandbox, empty linux json",
@ -1532,9 +1520,16 @@ func TestSandbox_SetupSandboxCgroup(t *testing.T) {
}
t.Run(tt.name, func(t *testing.T) {
tt.s.createCgroupManager()
if err := tt.s.setupSandboxCgroup(); (err != nil) != tt.wantErr {
t.Errorf("Sandbox.SetupSandboxCgroupOnly() error = %v, wantErr %v", err, tt.wantErr)
err := tt.s.createCgroups()
t.Logf("create groups error %v", err)
if (err != nil) != tt.wantErr {
t.Errorf("Sandbox.CreateCgroups() error = %v, wantErr %v", err, tt.wantErr)
}
if err == nil {
if err := tt.s.setupCgroups(); (err != nil) != tt.wantErr {
t.Errorf("Sandbox.SetupCgroups() error = %v, wantErr %v", err, tt.wantErr)
}
}
})
}

View File

@ -50,9 +50,15 @@ type SandboxState struct {
State StateString `json:"state"`
// CgroupPath is the cgroup hierarchy where sandbox's processes
// including the hypervisor are placed.
CgroupPath string `json:"cgroupPath,omitempty"`
// SandboxCgroupPath is the cgroup path for all the sandbox processes,
// when sandbox_cgroup_only is set. When it's not set, part of those
// processes will be living under the overhead cgroup.
SandboxCgroupPath string `json:"sandboxCgroupPath,omitempty"`
// OverheadCgroupPath is the path to the optional overhead cgroup
// path holding processes that should not be part of the sandbox
// cgroup.
OverheadCgroupPath string `json:"overheadCgroupPath,omitempty"`
// PersistVersion indicates current storage api version.
// It's also known as ABI version of kata-runtime.