mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-29 04:04:45 +00:00
runtime: Support for host cgroup v2
Support cgroup v2 on the host. Update vendor containerd/cgroups to add cgroup v2. Fixes: #3073 Signed-off-by: yaoyinnan <yaoyinnan@foxmail.com>
This commit is contained in:
parent
4ab45e5c93
commit
5c3155f7e2
@ -7,7 +7,7 @@ require (
|
||||
github.com/BurntSushi/toml v0.3.1
|
||||
github.com/blang/semver v3.5.1+incompatible
|
||||
github.com/blang/semver/v4 v4.0.0
|
||||
github.com/containerd/cgroups v1.0.3
|
||||
github.com/containerd/cgroups v1.0.5-0.20220625035431-cf7417bca682
|
||||
github.com/containerd/console v1.0.3
|
||||
github.com/containerd/containerd v1.6.6
|
||||
github.com/containerd/cri-containerd v1.11.1-0.20190125013620-4dd6735020f5
|
||||
|
@ -188,8 +188,9 @@ github.com/containerd/cgroups v0.0.0-20200710171044-318312a37340/go.mod h1:s5q4S
|
||||
github.com/containerd/cgroups v0.0.0-20200824123100-0b889c03f102/go.mod h1:s5q4SojHctfxANBDvMeIaIovkq29IP48TKAxnhYRxvo=
|
||||
github.com/containerd/cgroups v0.0.0-20210114181951-8a68de567b68/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE=
|
||||
github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f25ZfBiPYRkU=
|
||||
github.com/containerd/cgroups v1.0.3 h1:ADZftAkglvCiD44c77s5YmMqaP2pzVCFZvBmAlBdAP4=
|
||||
github.com/containerd/cgroups v1.0.3/go.mod h1:/ofk34relqNjSGyqPrmEULrO4Sc8LJhvJmWbUCUKqj8=
|
||||
github.com/containerd/cgroups v1.0.5-0.20220625035431-cf7417bca682 h1:d/YjAAP6A6fT0vpMhbYSDkE+K1ww/DZodOIamD8Pr/E=
|
||||
github.com/containerd/cgroups v1.0.5-0.20220625035431-cf7417bca682/go.mod h1:nLNQtsF7Sl2HxNebu77i1R0oDlhiTG+kO4JTrUzo6IA=
|
||||
github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw=
|
||||
github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw=
|
||||
github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE=
|
||||
@ -1079,6 +1080,7 @@ go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
|
||||
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
|
||||
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
|
||||
go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A=
|
||||
go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA=
|
||||
go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
|
||||
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
|
||||
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
|
||||
@ -1126,6 +1128,7 @@ golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHl
|
||||
golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
|
||||
golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
|
||||
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
|
||||
golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5 h1:2M3HP5CCK1Si9FQhwnzYhXdG6DXeebvUHFpre8QvbyI=
|
||||
golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
|
||||
golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
|
||||
golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
|
||||
@ -1137,6 +1140,7 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.4.2 h1:Gz96sIWK3OalVv/I/qNygP42zyoKp3xptRVCWRFEBvo=
|
||||
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
@ -1420,6 +1424,7 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f
|
||||
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
|
||||
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||
golang.org/x/tools v0.1.5 h1:ouewzE6p+/VEB31YYnTbEJdi8pFqKp4P4n85vwo3DHA=
|
||||
golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
|
@ -15,16 +15,21 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
v1 "github.com/containerd/cgroups/stats/v1"
|
||||
cgroupsv2 "github.com/containerd/cgroups/v2"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// prepend a kata specific string to oci cgroup path to
|
||||
// form a different cgroup path, thus cAdvisor couldn't
|
||||
// find kata containers cgroup path on host to prevent it
|
||||
// from grabbing the stats data.
|
||||
const CgroupKataPrefix = "kata"
|
||||
const (
|
||||
// prepend a kata specific string to oci cgroup path to
|
||||
// form a different cgroup path, thus cAdvisor couldn't
|
||||
// find kata containers cgroup path on host to prevent it
|
||||
// from grabbing the stats data.
|
||||
CgroupKataPrefix = "kata"
|
||||
|
||||
// cgroup v2 mount point
|
||||
unifiedMountpoint = "/sys/fs/cgroup"
|
||||
)
|
||||
|
||||
func RenameCgroupPath(path string) (string, error) {
|
||||
if path == "" {
|
||||
@ -34,11 +39,10 @@ func RenameCgroupPath(path string) (string, error) {
|
||||
cgroupPathDir := filepath.Dir(path)
|
||||
cgroupPathName := fmt.Sprintf("%s_%s", CgroupKataPrefix, filepath.Base(path))
|
||||
return filepath.Join(cgroupPathDir, cgroupPathName), nil
|
||||
|
||||
}
|
||||
|
||||
type LinuxCgroup struct {
|
||||
cgroup cgroups.Cgroup
|
||||
cgroup interface{}
|
||||
path string
|
||||
cpusets *specs.LinuxCPU
|
||||
devices []specs.LinuxDeviceCgroup
|
||||
@ -128,15 +132,29 @@ func sandboxDevices() []specs.LinuxDeviceCgroup {
|
||||
|
||||
func NewResourceController(path string, resources *specs.LinuxResources) (ResourceController, error) {
|
||||
var err error
|
||||
var cgroup interface{}
|
||||
var cgroupPath string
|
||||
|
||||
cgroupPath, err := ValidCgroupPath(path, IsSystemdCgroup(path))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cgroup, err := cgroups.New(cgroups.V1, cgroups.StaticPath(cgroupPath), resources)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
if cgroups.Mode() == cgroups.Legacy || cgroups.Mode() == cgroups.Hybrid {
|
||||
cgroupPath, err = ValidCgroupPathV1(path, IsSystemdCgroup(path))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cgroup, err = cgroups.New(cgroups.V1, cgroups.StaticPath(cgroupPath), resources)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else if cgroups.Mode() == cgroups.Unified {
|
||||
cgroupPath, err = ValidCgroupPathV2(path, IsSystemdCgroup(path))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cgroup, err = cgroupsv2.NewManager(unifiedMountpoint, cgroupPath, cgroupsv2.ToResources(resources))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
return nil, ErrCgroupMode
|
||||
}
|
||||
|
||||
return &LinuxCgroup{
|
||||
@ -148,40 +166,56 @@ func NewResourceController(path string, resources *specs.LinuxResources) (Resour
|
||||
}
|
||||
|
||||
func NewSandboxResourceController(path string, resources *specs.LinuxResources, sandboxCgroupOnly bool) (ResourceController, error) {
|
||||
var cgroup cgroups.Cgroup
|
||||
sandboxResources := *resources
|
||||
sandboxResources.Devices = append(sandboxResources.Devices, sandboxDevices()...)
|
||||
|
||||
// Currently we know to handle systemd cgroup path only when it's the only cgroup (no overhead group), hence,
|
||||
// if sandboxCgroupOnly is not true we treat it as cgroupfs path as it used to be, although it may be incorrect
|
||||
// if sandboxCgroupOnly is not true we treat it as cgroupfs path as it used to be, although it may be incorrect.
|
||||
if !IsSystemdCgroup(path) || !sandboxCgroupOnly {
|
||||
return NewResourceController(path, &sandboxResources)
|
||||
}
|
||||
|
||||
var cgroup interface{}
|
||||
|
||||
slice, unit, err := getSliceAndUnit(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// github.com/containerd/cgroups doesn't support creating a scope unit with
|
||||
// v1 cgroups against systemd, the following interacts directly with systemd
|
||||
// to create the cgroup and then load it using containerd's api
|
||||
err = createCgroupsSystemd(slice, unit, uint32(os.Getpid())) // adding runtime process, it makes calling setupCgroups redundant
|
||||
if err != nil {
|
||||
|
||||
//github.com/containerd/cgroups doesn't support creating a scope unit with
|
||||
//v1 and v2 cgroups against systemd, the following interacts directly with systemd
|
||||
//to create the cgroup and then load it using containerd's api.
|
||||
//adding runtime process, it makes calling setupCgroups redundant
|
||||
if createCgroupsSystemd(slice, unit, os.Getpid()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cgHierarchy, cgPath, err := cgroupHierarchy(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Create systemd cgroup
|
||||
if cgroups.Mode() == cgroups.Legacy || cgroups.Mode() == cgroups.Hybrid {
|
||||
cgHierarchy, cgPath, err := cgroupHierarchy(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// load created cgroup and update with resources
|
||||
if cgroup, err = cgroups.Load(cgHierarchy, cgPath); err == nil {
|
||||
err = cgroup.Update(&sandboxResources)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// load created cgroup and update with resources
|
||||
cg, err := cgroups.Load(cgHierarchy, cgPath)
|
||||
if err != nil {
|
||||
if cg.Update(&sandboxResources); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
cgroup = cg
|
||||
} else if cgroups.Mode() == cgroups.Unified {
|
||||
// load created cgroup and update with resources
|
||||
cg, err := cgroupsv2.LoadSystemd(slice, unit)
|
||||
if err != nil {
|
||||
if cg.Update(cgroupsv2.ToResources(&sandboxResources)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
cgroup = cg
|
||||
} else {
|
||||
return nil, ErrCgroupMode
|
||||
}
|
||||
|
||||
return &LinuxCgroup{
|
||||
@ -193,14 +227,38 @@ func NewSandboxResourceController(path string, resources *specs.LinuxResources,
|
||||
}
|
||||
|
||||
func LoadResourceController(path string) (ResourceController, error) {
|
||||
cgHierarchy, cgPath, err := cgroupHierarchy(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var err error
|
||||
var cgroup interface{}
|
||||
|
||||
cgroup, err := cgroups.Load(cgHierarchy, cgPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// load created cgroup and update with resources
|
||||
if cgroups.Mode() == cgroups.Legacy || cgroups.Mode() == cgroups.Hybrid {
|
||||
cgHierarchy, cgPath, err := cgroupHierarchy(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cgroup, err = cgroups.Load(cgHierarchy, cgPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else if cgroups.Mode() == cgroups.Unified {
|
||||
if IsSystemdCgroup(path) {
|
||||
slice, unit, err := getSliceAndUnit(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cgroup, err = cgroupsv2.LoadSystemd(slice, unit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
cgroup, err = cgroupsv2.LoadManager(unifiedMountpoint, path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return nil, ErrCgroupMode
|
||||
}
|
||||
|
||||
return &LinuxCgroup{
|
||||
@ -214,37 +272,86 @@ func (c *LinuxCgroup) Logger() *logrus.Entry {
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) Delete() error {
|
||||
return c.cgroup.Delete()
|
||||
switch cg := c.cgroup.(type) {
|
||||
case cgroups.Cgroup:
|
||||
return cg.Delete()
|
||||
case *cgroupsv2.Manager:
|
||||
if IsSystemdCgroup(c.ID()) {
|
||||
if err := cg.DeleteSystemd(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return cg.Delete()
|
||||
default:
|
||||
return ErrCgroupMode
|
||||
}
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) Stat() (*v1.Metrics, error) {
|
||||
return c.cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist))
|
||||
func (c *LinuxCgroup) Stat() (interface{}, error) {
|
||||
switch cg := c.cgroup.(type) {
|
||||
case cgroups.Cgroup:
|
||||
return cg.Stat(cgroups.IgnoreNotExist)
|
||||
case *cgroupsv2.Manager:
|
||||
return cg.Stat()
|
||||
default:
|
||||
return nil, ErrCgroupMode
|
||||
}
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) AddProcess(pid int, subsystems ...string) error {
|
||||
return c.cgroup.Add(cgroups.Process{Pid: pid})
|
||||
switch cg := c.cgroup.(type) {
|
||||
case cgroups.Cgroup:
|
||||
return cg.AddProc(uint64(pid))
|
||||
case *cgroupsv2.Manager:
|
||||
return cg.AddProc(uint64(pid))
|
||||
default:
|
||||
return ErrCgroupMode
|
||||
}
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) AddThread(pid int, subsystems ...string) error {
|
||||
return c.cgroup.AddTask(cgroups.Process{Pid: pid})
|
||||
switch cg := c.cgroup.(type) {
|
||||
case cgroups.Cgroup:
|
||||
return cg.AddTask(cgroups.Process{Pid: pid})
|
||||
case *cgroupsv2.Manager:
|
||||
return cg.AddProc(uint64(pid))
|
||||
default:
|
||||
return ErrCgroupMode
|
||||
}
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) Update(resources *specs.LinuxResources) error {
|
||||
return c.cgroup.Update(resources)
|
||||
switch cg := c.cgroup.(type) {
|
||||
case cgroups.Cgroup:
|
||||
return cg.Update(resources)
|
||||
case *cgroupsv2.Manager:
|
||||
return cg.Update(cgroupsv2.ToResources(resources))
|
||||
default:
|
||||
return ErrCgroupMode
|
||||
}
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) MoveTo(path string) error {
|
||||
cgHierarchy, cgPath, err := cgroupHierarchy(path)
|
||||
if err != nil {
|
||||
return err
|
||||
switch cg := c.cgroup.(type) {
|
||||
case cgroups.Cgroup:
|
||||
cgHierarchy, cgPath, err := cgroupHierarchy(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
newCgroup, err := cgroups.Load(cgHierarchy, cgPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return cg.MoveTo(newCgroup)
|
||||
case *cgroupsv2.Manager:
|
||||
newCgroup, err := cgroupsv2.LoadManager(unifiedMountpoint, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return cg.MoveTo(newCgroup)
|
||||
default:
|
||||
return ErrCgroupMode
|
||||
}
|
||||
|
||||
newCgroup, err := cgroups.Load(cgHierarchy, cgPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return c.cgroup.MoveTo(newCgroup)
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) AddDevice(deviceHostPath string) error {
|
||||
@ -258,10 +365,21 @@ func (c *LinuxCgroup) AddDevice(deviceHostPath string) error {
|
||||
|
||||
c.devices = append(c.devices, deviceResource)
|
||||
|
||||
if err := c.cgroup.Update(&specs.LinuxResources{
|
||||
Devices: c.devices,
|
||||
}); err != nil {
|
||||
return err
|
||||
switch cg := c.cgroup.(type) {
|
||||
case cgroups.Cgroup:
|
||||
if err := cg.Update(&specs.LinuxResources{
|
||||
Devices: c.devices,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
case *cgroupsv2.Manager:
|
||||
if err := cg.Update(cgroupsv2.ToResources(&specs.LinuxResources{
|
||||
Devices: c.devices,
|
||||
})); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return ErrCgroupMode
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -284,10 +402,21 @@ func (c *LinuxCgroup) RemoveDevice(deviceHostPath string) error {
|
||||
}
|
||||
}
|
||||
|
||||
if err := c.cgroup.Update(&specs.LinuxResources{
|
||||
Devices: c.devices,
|
||||
}); err != nil {
|
||||
return err
|
||||
switch cg := c.cgroup.(type) {
|
||||
case cgroups.Cgroup:
|
||||
if err := cg.Update(&specs.LinuxResources{
|
||||
Devices: c.devices,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
case *cgroupsv2.Manager:
|
||||
if err := cg.Update(cgroupsv2.ToResources(&specs.LinuxResources{
|
||||
Devices: c.devices,
|
||||
})); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return ErrCgroupMode
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -315,9 +444,18 @@ func (c *LinuxCgroup) UpdateCpuSet(cpuset, memset string) error {
|
||||
c.cpusets.Mems = memset
|
||||
}
|
||||
|
||||
return c.cgroup.Update(&specs.LinuxResources{
|
||||
CPU: c.cpusets,
|
||||
})
|
||||
switch cg := c.cgroup.(type) {
|
||||
case cgroups.Cgroup:
|
||||
return cg.Update(&specs.LinuxResources{
|
||||
CPU: c.cpusets,
|
||||
})
|
||||
case *cgroupsv2.Manager:
|
||||
return cg.Update(cgroupsv2.ToResources(&specs.LinuxResources{
|
||||
CPU: c.cpusets,
|
||||
}))
|
||||
default:
|
||||
return ErrCgroupMode
|
||||
}
|
||||
}
|
||||
|
||||
func (c *LinuxCgroup) Type() ResourceControllerType {
|
||||
|
@ -6,7 +6,6 @@
|
||||
package resourcecontrol
|
||||
|
||||
import (
|
||||
v1 "github.com/containerd/cgroups/stats/v1"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
@ -56,7 +55,7 @@ type ResourceController interface {
|
||||
Delete() error
|
||||
|
||||
// Stat returns the statistics for the controller.
|
||||
Stat() (*v1.Metrics, error)
|
||||
Stat() (interface{}, error)
|
||||
|
||||
// AddProcess adds a process to a set of controllers.
|
||||
AddProcess(int, ...string) error
|
||||
|
@ -6,6 +6,7 @@
|
||||
package resourcecontrol
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
@ -14,6 +15,10 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrCgroupMode = errors.New("cgroup controller type error")
|
||||
)
|
||||
|
||||
func DeviceToCgroupDeviceRule(device string) (*devices.Rule, error) {
|
||||
var st unix.Stat_t
|
||||
deviceRule := devices.Rule{
|
||||
|
@ -20,9 +20,9 @@ import (
|
||||
// DefaultResourceControllerID runtime-determined location in the cgroups hierarchy.
|
||||
const DefaultResourceControllerID = "/vc"
|
||||
|
||||
// ValidCgroupPath returns a valid cgroup path.
|
||||
// ValidCgroupPathV1 returns a valid cgroup path for cgroup v1.
|
||||
// see https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path
|
||||
func ValidCgroupPath(path string, systemdCgroup bool) (string, error) {
|
||||
func ValidCgroupPathV1(path string, systemdCgroup bool) (string, error) {
|
||||
if IsSystemdCgroup(path) {
|
||||
return path, nil
|
||||
}
|
||||
@ -43,6 +43,30 @@ func ValidCgroupPath(path string, systemdCgroup bool) (string, error) {
|
||||
return filepath.Join(DefaultResourceControllerID, filepath.Clean("/"+path)), nil
|
||||
}
|
||||
|
||||
// ValidCgroupPathV2 returns a valid cgroup path for cgroup v2.
|
||||
// see https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path
|
||||
func ValidCgroupPathV2(path string, systemdCgroup bool) (string, error) {
|
||||
// In cgroup v2,path must be a "clean" absolute path starts with "/".
|
||||
if IsSystemdCgroup(path) {
|
||||
return filepath.Join("/", path), nil
|
||||
}
|
||||
|
||||
if systemdCgroup {
|
||||
return "", fmt.Errorf("malformed systemd path '%v': expected to be of form 'slice:prefix:name'", path)
|
||||
}
|
||||
|
||||
// In the case of an absolute path (starting with /), the runtime MUST
|
||||
// take the path to be relative to the cgroups mount point.
|
||||
if filepath.IsAbs(path) {
|
||||
return filepath.Clean(path), nil
|
||||
}
|
||||
|
||||
// In the case of a relative path (not starting with /), the runtime MAY
|
||||
// interpret the path relative to a runtime-determined location in the cgroups hierarchy.
|
||||
// clean up path and return a new path relative to DefaultResourceControllerID
|
||||
return filepath.Join(DefaultResourceControllerID, filepath.Clean("/"+path)), nil
|
||||
}
|
||||
|
||||
func newProperty(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
@ -68,7 +92,7 @@ func cgroupHierarchy(path string) (cgroups.Hierarchy, cgroups.Path, error) {
|
||||
}
|
||||
}
|
||||
|
||||
func createCgroupsSystemd(slice string, unit string, pid uint32) error {
|
||||
func createCgroupsSystemd(slice string, unit string, pid int) error {
|
||||
ctx := context.TODO()
|
||||
conn, err := systemdDbus.NewWithContext(ctx)
|
||||
if err != nil {
|
||||
@ -84,14 +108,19 @@ func createCgroupsSystemd(slice string, unit string, pid uint32) error {
|
||||
newProperty("IOAccounting", true),
|
||||
}
|
||||
|
||||
// https://github.com/opencontainers/runc/blob/master/docs/systemd.md
|
||||
if strings.HasSuffix(unit, ".scope") {
|
||||
// It's a scope, which we put into a Slice=.
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
properties = append(properties, newProperty("Delegate", true))
|
||||
properties = append(properties, systemdDbus.PropPids(pid))
|
||||
if strings.HasSuffix(unit, ".slice") {
|
||||
// If we create a slice, the parent is defined via a Wants=.
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
return fmt.Errorf("Failed to create cgroups with systemd: unit %s is not a scope", unit)
|
||||
// Otherwise it's a scope, which we put into a Slice=.
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
}
|
||||
|
||||
// Assume scopes always support delegation (supported since systemd v218).
|
||||
properties = append(properties, newProperty("Delegate", true))
|
||||
|
||||
if pid != -1 {
|
||||
properties = append(properties, systemdDbus.PropPids(uint32(pid)))
|
||||
}
|
||||
|
||||
ch := make(chan string)
|
||||
|
@ -41,7 +41,7 @@ func TestIsSystemdCgroup(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidCgroupPath(t *testing.T) {
|
||||
func TestValidCgroupPathV1(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
for _, t := range []struct {
|
||||
@ -62,12 +62,14 @@ func TestValidCgroupPath(t *testing.T) {
|
||||
{"/../hi/foo", false, false},
|
||||
{"o / m /../ g", false, false},
|
||||
{"/overhead/foobar", false, false},
|
||||
{"/kata/afhts2e5d4g5s", false, false},
|
||||
{"/kubepods/besteffort/podxxx-afhts2e5d4g5s/kata_afhts2e5d4g5s", false, false},
|
||||
{"/sys/fs/cgroup/cpu/sandbox/kata_foobar", false, false},
|
||||
{"kata_overhead/afhts2e5d4g5s", false, false},
|
||||
|
||||
// invalid systemd paths
|
||||
{"o / m /../ g", true, true},
|
||||
{"slice:kata", true, true},
|
||||
{"/kata/afhts2e5d4g5s", true, true},
|
||||
{"a:b:c:d", true, true},
|
||||
{":::", true, true},
|
||||
{"", true, true},
|
||||
@ -83,7 +85,7 @@ func TestValidCgroupPath(t *testing.T) {
|
||||
{"x.slice:kata:55555", true, false},
|
||||
{"system.slice:kata:afhts2e5d4g5s", true, false},
|
||||
} {
|
||||
path, err := ValidCgroupPath(t.path, t.systemdCgroup)
|
||||
path, err := ValidCgroupPathV1(t.path, t.systemdCgroup)
|
||||
if t.error {
|
||||
assert.Error(err)
|
||||
continue
|
||||
@ -106,6 +108,73 @@ func TestValidCgroupPath(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
func TestValidCgroupPathV2(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
for _, t := range []struct {
|
||||
path string
|
||||
systemdCgroup bool
|
||||
error bool
|
||||
}{
|
||||
// empty paths
|
||||
{"../../../", false, false},
|
||||
{"../", false, false},
|
||||
{".", false, false},
|
||||
{"../../../", false, false},
|
||||
{"./../", false, false},
|
||||
|
||||
// valid no-systemd paths
|
||||
{"../../../foo", false, false},
|
||||
{"/../hi", false, false},
|
||||
{"/../hi/foo", false, false},
|
||||
{"o / m /../ g", false, false},
|
||||
{"/overhead/foobar", false, false},
|
||||
{"/kata/afhts2e5d4g5s", false, false},
|
||||
{"/kubepods/besteffort/podxxx-afhts2e5d4g5s/kata_afhts2e5d4g5s", false, false},
|
||||
{"/sys/fs/cgroup/cpu/sandbox/kata_foobar", false, false},
|
||||
{"kata_overhead/afhts2e5d4g5s", false, false},
|
||||
|
||||
// invalid systemd paths
|
||||
{"o / m /../ g", true, true},
|
||||
{"slice:kata", true, true},
|
||||
{"a:b:c:d", true, true},
|
||||
{":::", true, true},
|
||||
{"", true, true},
|
||||
{":", true, true},
|
||||
{"::", true, true},
|
||||
{":::", true, true},
|
||||
{"a:b", true, true},
|
||||
{"a:b:", true, true},
|
||||
{":a:b", true, true},
|
||||
{"@:@:@", true, true},
|
||||
|
||||
// valid systemd paths
|
||||
{"x.slice:kata:55555", true, false},
|
||||
{"system.slice:kata:afhts2e5d4g5s", true, false},
|
||||
} {
|
||||
path, err := ValidCgroupPathV2(t.path, t.systemdCgroup)
|
||||
if t.error {
|
||||
assert.Error(err)
|
||||
continue
|
||||
} else {
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
if filepath.IsAbs(t.path) {
|
||||
cleanPath := filepath.Dir(filepath.Clean(t.path))
|
||||
assert.True(strings.HasPrefix(path, cleanPath),
|
||||
"%v should have prefix %v", path, cleanPath)
|
||||
} else if t.systemdCgroup {
|
||||
assert.Equal(filepath.Join("/", t.path), path)
|
||||
} else {
|
||||
assert.True(
|
||||
strings.HasPrefix(path, DefaultResourceControllerID),
|
||||
"%v should have prefix /%v", path, DefaultResourceControllerID)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestDeviceToCgroupDeviceRule(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
|
59
src/runtime/vendor/github.com/containerd/cgroups/README.md
generated
vendored
59
src/runtime/vendor/github.com/containerd/cgroups/README.md
generated
vendored
@ -9,7 +9,7 @@ Go package for creating, managing, inspecting, and destroying cgroups.
|
||||
The resources format for settings on the cgroup uses the OCI runtime-spec found
|
||||
[here](https://github.com/opencontainers/runtime-spec).
|
||||
|
||||
## Examples
|
||||
## Examples (v1)
|
||||
|
||||
### Create a new cgroup
|
||||
|
||||
@ -26,7 +26,7 @@ uses the v1 implementation of cgroups.
|
||||
```go
|
||||
shares := uint64(100)
|
||||
control, err := cgroups.New(cgroups.V1, cgroups.StaticPath("/test"), &specs.LinuxResources{
|
||||
CPU: &specs.CPU{
|
||||
CPU: &specs.LinuxCPU{
|
||||
Shares: &shares,
|
||||
},
|
||||
})
|
||||
@ -133,6 +133,61 @@ event := cgroups.OOMEvent()
|
||||
efd, err := control.RegisterMemoryEvent(event)
|
||||
```
|
||||
|
||||
## Examples (v2/unified)
|
||||
|
||||
### Check that the current system is running cgroups v2
|
||||
|
||||
```go
|
||||
var cgroupV2 bool
|
||||
if cgroups.Mode() == cgroups.Unified {
|
||||
cgroupV2 = true
|
||||
}
|
||||
```
|
||||
|
||||
### Create a new cgroup
|
||||
|
||||
This creates a new systemd v2 cgroup slice. Systemd slices consider ["-" a special character](https://www.freedesktop.org/software/systemd/man/systemd.slice.html),
|
||||
so the resulting slice would be located here on disk:
|
||||
|
||||
* /sys/fs/cgroup/my.slice/my-cgroup.slice/my-cgroup-abc.slice
|
||||
|
||||
```go
|
||||
import (
|
||||
cgroupsv2 "github.com/containerd/cgroups/v2"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
res := cgroupsv2.Resources{}
|
||||
// dummy PID of -1 is used for creating a "general slice" to be used as a parent cgroup.
|
||||
// see https://github.com/containerd/cgroups/blob/1df78138f1e1e6ee593db155c6b369466f577651/v2/manager.go#L732-L735
|
||||
m, err := cgroupsv2.NewSystemd("/", "my-cgroup-abc.slice", -1, &res)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
```
|
||||
|
||||
### Load an existing cgroup
|
||||
|
||||
```go
|
||||
m, err := cgroupsv2.LoadSystemd("/", "my-cgroup-abc.slice")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
```
|
||||
|
||||
### Delete a cgroup
|
||||
|
||||
```go
|
||||
m, err := cgroupsv2.LoadSystemd("/", "my-cgroup-abc.slice")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = m.DeleteSystemd()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
```
|
||||
|
||||
### Attention
|
||||
|
||||
All static path should not include `/sys/fs/cgroup/` prefix, it should start with your own cgroups name
|
||||
|
8
src/runtime/vendor/github.com/containerd/cgroups/Vagrantfile
generated
vendored
8
src/runtime/vendor/github.com/containerd/cgroups/Vagrantfile
generated
vendored
@ -3,19 +3,19 @@
|
||||
|
||||
Vagrant.configure("2") do |config|
|
||||
# Fedora box is used for testing cgroup v2 support
|
||||
config.vm.box = "fedora/32-cloud-base"
|
||||
config.vm.box = "fedora/35-cloud-base"
|
||||
config.vm.provider :virtualbox do |v|
|
||||
v.memory = 2048
|
||||
v.memory = 4096
|
||||
v.cpus = 2
|
||||
end
|
||||
config.vm.provider :libvirt do |v|
|
||||
v.memory = 2048
|
||||
v.memory = 4096
|
||||
v.cpus = 2
|
||||
end
|
||||
config.vm.provision "shell", inline: <<-SHELL
|
||||
set -eux -o pipefail
|
||||
# configuration
|
||||
GO_VERSION="1.15"
|
||||
GO_VERSION="1.17.7"
|
||||
|
||||
# install gcc and Golang
|
||||
dnf -y install gcc
|
||||
|
10
src/runtime/vendor/github.com/containerd/cgroups/go.mod
generated
vendored
10
src/runtime/vendor/github.com/containerd/cgroups/go.mod
generated
vendored
@ -1,18 +1,22 @@
|
||||
module github.com/containerd/cgroups
|
||||
|
||||
go 1.16
|
||||
go 1.17
|
||||
|
||||
require (
|
||||
github.com/cilium/ebpf v0.4.0
|
||||
github.com/coreos/go-systemd/v22 v22.3.2
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect
|
||||
github.com/docker/go-units v0.4.0
|
||||
github.com/godbus/dbus/v5 v5.0.4
|
||||
github.com/gogo/protobuf v1.3.2
|
||||
github.com/opencontainers/runtime-spec v1.0.2
|
||||
github.com/sirupsen/logrus v1.8.1
|
||||
github.com/stretchr/testify v1.7.0
|
||||
github.com/urfave/cli v1.22.2
|
||||
go.uber.org/goleak v1.1.12
|
||||
golang.org/x/sys v0.0.0-20210510120138-977fb7262007
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
|
||||
)
|
||||
|
11
src/runtime/vendor/github.com/containerd/cgroups/go.sum
generated
vendored
11
src/runtime/vendor/github.com/containerd/cgroups/go.sum
generated
vendored
@ -1,11 +1,7 @@
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/cilium/ebpf v0.4.0 h1:QlHdikaxALkqWasW8hAC1mfR0jdmvbfaBdBPFmRSglA=
|
||||
github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
|
||||
github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI=
|
||||
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0 h1:EoUDS0afbrsXAZ9YQ9jdu/mZ2sXgT1/2yyNng4PGlyM=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
@ -31,18 +27,12 @@ github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNia
|
||||
github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
|
||||
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
|
||||
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
|
||||
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
|
||||
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/urfave/cli v1.22.2 h1:gsqYFH8bb9ekPA12kRo0hfjngWQjkJPlN9R0N78BoUo=
|
||||
github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
|
||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
|
||||
@ -93,6 +83,5 @@ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8T
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
37
src/runtime/vendor/github.com/containerd/cgroups/utils.go
generated
vendored
37
src/runtime/vendor/github.com/containerd/cgroups/utils.go
generated
vendored
@ -261,21 +261,28 @@ func parseKV(raw string) (string, uint64, error) {
|
||||
// "pids": "/user.slice/user-1000.slice"
|
||||
// etc.
|
||||
//
|
||||
// Note that for cgroup v2 unified hierarchy, there are no per-controller
|
||||
// cgroup paths, so the resulting map will have a single element where the key
|
||||
// is empty string ("") and the value is the cgroup path the <pid> is in.
|
||||
// The resulting map does not have an element for cgroup v2 unified hierarchy.
|
||||
// Use ParseCgroupFileUnified to get the unified path.
|
||||
func ParseCgroupFile(path string) (map[string]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
return parseCgroupFromReader(f)
|
||||
x, _, err := ParseCgroupFileUnified(path)
|
||||
return x, err
|
||||
}
|
||||
|
||||
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
||||
// ParseCgroupFileUnified returns legacy subsystem paths as the first value,
|
||||
// and returns the unified path as the second value.
|
||||
func ParseCgroupFileUnified(path string) (map[string]string, string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
defer f.Close()
|
||||
return parseCgroupFromReaderUnified(f)
|
||||
}
|
||||
|
||||
func parseCgroupFromReaderUnified(r io.Reader) (map[string]string, string, error) {
|
||||
var (
|
||||
cgroups = make(map[string]string)
|
||||
unified = ""
|
||||
s = bufio.NewScanner(r)
|
||||
)
|
||||
for s.Scan() {
|
||||
@ -284,18 +291,20 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
||||
parts = strings.SplitN(text, ":", 3)
|
||||
)
|
||||
if len(parts) < 3 {
|
||||
return nil, fmt.Errorf("invalid cgroup entry: %q", text)
|
||||
return nil, unified, fmt.Errorf("invalid cgroup entry: %q", text)
|
||||
}
|
||||
for _, subs := range strings.Split(parts[1], ",") {
|
||||
if subs != "" {
|
||||
if subs == "" {
|
||||
unified = parts[2]
|
||||
} else {
|
||||
cgroups[subs] = parts[2]
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
return nil, unified, err
|
||||
}
|
||||
return cgroups, nil
|
||||
return cgroups, unified, nil
|
||||
}
|
||||
|
||||
func getCgroupDestination(subsystem string) (string, error) {
|
||||
|
83
src/runtime/vendor/github.com/containerd/cgroups/v2/cpu.go
generated
vendored
Normal file
83
src/runtime/vendor/github.com/containerd/cgroups/v2/cpu.go
generated
vendored
Normal file
@ -0,0 +1,83 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type CPUMax string
|
||||
|
||||
func NewCPUMax(quota *int64, period *uint64) CPUMax {
|
||||
max := "max"
|
||||
if quota != nil {
|
||||
max = strconv.FormatInt(*quota, 10)
|
||||
}
|
||||
return CPUMax(strings.Join([]string{max, strconv.FormatUint(*period, 10)}, " "))
|
||||
}
|
||||
|
||||
type CPU struct {
|
||||
Weight *uint64
|
||||
Max CPUMax
|
||||
Cpus string
|
||||
Mems string
|
||||
}
|
||||
|
||||
func (c CPUMax) extractQuotaAndPeriod() (int64, uint64) {
|
||||
var (
|
||||
quota int64
|
||||
period uint64
|
||||
)
|
||||
values := strings.Split(string(c), " ")
|
||||
if values[0] == "max" {
|
||||
quota = math.MaxInt64
|
||||
} else {
|
||||
quota, _ = strconv.ParseInt(values[0], 10, 64)
|
||||
}
|
||||
period, _ = strconv.ParseUint(values[1], 10, 64)
|
||||
return quota, period
|
||||
}
|
||||
|
||||
func (r *CPU) Values() (o []Value) {
|
||||
if r.Weight != nil {
|
||||
o = append(o, Value{
|
||||
filename: "cpu.weight",
|
||||
value: *r.Weight,
|
||||
})
|
||||
}
|
||||
if r.Max != "" {
|
||||
o = append(o, Value{
|
||||
filename: "cpu.max",
|
||||
value: r.Max,
|
||||
})
|
||||
}
|
||||
if r.Cpus != "" {
|
||||
o = append(o, Value{
|
||||
filename: "cpuset.cpus",
|
||||
value: r.Cpus,
|
||||
})
|
||||
}
|
||||
if r.Mems != "" {
|
||||
o = append(o, Value{
|
||||
filename: "cpuset.mems",
|
||||
value: r.Mems,
|
||||
})
|
||||
}
|
||||
return o
|
||||
}
|
200
src/runtime/vendor/github.com/containerd/cgroups/v2/devicefilter.go
generated
vendored
Normal file
200
src/runtime/vendor/github.com/containerd/cgroups/v2/devicefilter.go
generated
vendored
Normal file
@ -0,0 +1,200 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Devicefilter containes eBPF device filter program
|
||||
//
|
||||
// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
|
||||
//
|
||||
// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano)
|
||||
// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397
|
||||
//
|
||||
// This particular Go implementation based on runc version
|
||||
// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
// license string format is same as kernel MODULE_LICENSE macro
|
||||
license = "Apache"
|
||||
)
|
||||
|
||||
// DeviceFilter returns eBPF device filter program and its license string
|
||||
func DeviceFilter(devices []specs.LinuxDeviceCgroup) (asm.Instructions, string, error) {
|
||||
p := &program{}
|
||||
p.init()
|
||||
for i := len(devices) - 1; i >= 0; i-- {
|
||||
if err := p.appendDevice(devices[i]); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
insts, err := p.finalize()
|
||||
return insts, license, err
|
||||
}
|
||||
|
||||
type program struct {
|
||||
insts asm.Instructions
|
||||
hasWildCard bool
|
||||
blockID int
|
||||
}
|
||||
|
||||
func (p *program) init() {
|
||||
// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423
|
||||
/*
|
||||
u32 access_type
|
||||
u32 major
|
||||
u32 minor
|
||||
*/
|
||||
// R2 <- type (lower 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R2, asm.R1, 0, asm.Half))
|
||||
|
||||
// R3 <- access (upper 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R3, asm.R1, 0, asm.Word),
|
||||
// RSh: bitwise shift right
|
||||
asm.RSh.Imm32(asm.R3, 16))
|
||||
|
||||
// R4 <- major (u32 major at R1[4])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R4, asm.R1, 4, asm.Word))
|
||||
|
||||
// R5 <- minor (u32 minor at R1[8])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
|
||||
}
|
||||
|
||||
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
|
||||
func (p *program) appendDevice(dev specs.LinuxDeviceCgroup) error {
|
||||
if p.blockID < 0 {
|
||||
return errors.New("the program is finalized")
|
||||
}
|
||||
if p.hasWildCard {
|
||||
// All entries after wildcard entry are ignored
|
||||
return nil
|
||||
}
|
||||
|
||||
bpfType := int32(-1)
|
||||
hasType := true
|
||||
switch dev.Type {
|
||||
case string('c'):
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
|
||||
case string('b'):
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
||||
case string('a'):
|
||||
hasType = false
|
||||
default:
|
||||
// if not specified in OCI json, typ is set to DeviceTypeAll
|
||||
return fmt.Errorf("invalid DeviceType %q", dev.Type)
|
||||
}
|
||||
if *dev.Major > math.MaxUint32 {
|
||||
return fmt.Errorf("invalid major %d", *dev.Major)
|
||||
}
|
||||
if *dev.Minor > math.MaxUint32 {
|
||||
return fmt.Errorf("invalid minor %d", *dev.Major)
|
||||
}
|
||||
hasMajor := *dev.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||
hasMinor := *dev.Minor >= 0
|
||||
bpfAccess := int32(0)
|
||||
for _, r := range dev.Access {
|
||||
switch r {
|
||||
case 'r':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_READ
|
||||
case 'w':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_WRITE
|
||||
case 'm':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
|
||||
default:
|
||||
return fmt.Errorf("unknown device access %v", r)
|
||||
}
|
||||
}
|
||||
// If the access is rwm, skip the check.
|
||||
hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
|
||||
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1)
|
||||
prevBlockLastIdx := len(p.insts) - 1
|
||||
if hasType {
|
||||
p.insts = append(p.insts,
|
||||
// if (R2 != bpfType) goto next
|
||||
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasAccess {
|
||||
p.insts = append(p.insts,
|
||||
// if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next
|
||||
asm.Mov.Reg32(asm.R1, asm.R3),
|
||||
asm.And.Imm32(asm.R1, bpfAccess),
|
||||
asm.JEq.Imm(asm.R1, 0, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMajor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R4 != major) goto next
|
||||
asm.JNE.Imm(asm.R4, int32(*dev.Major), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMinor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R5 != minor) goto next
|
||||
asm.JNE.Imm(asm.R5, int32(*dev.Minor), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if !hasType && !hasAccess && !hasMajor && !hasMinor {
|
||||
p.hasWildCard = true
|
||||
}
|
||||
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
|
||||
// set blockSym to the first instruction we added in this iteration
|
||||
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
|
||||
p.blockID++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *program) finalize() (asm.Instructions, error) {
|
||||
if p.hasWildCard {
|
||||
// acceptBlock with asm.Return() is already inserted
|
||||
return p.insts, nil
|
||||
}
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
p.insts = append(p.insts,
|
||||
// R0 <- 0
|
||||
asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
|
||||
asm.Return(),
|
||||
)
|
||||
p.blockID = -1
|
||||
return p.insts, nil
|
||||
}
|
||||
|
||||
func acceptBlock(accept bool) asm.Instructions {
|
||||
v := int32(0)
|
||||
if accept {
|
||||
v = 1
|
||||
}
|
||||
return []asm.Instruction{
|
||||
// R0 <- v
|
||||
asm.Mov.Imm32(asm.R0, v),
|
||||
asm.Return(),
|
||||
}
|
||||
}
|
96
src/runtime/vendor/github.com/containerd/cgroups/v2/ebpf.go
generated
vendored
Normal file
96
src/runtime/vendor/github.com/containerd/cgroups/v2/ebpf.go
generated
vendored
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/link"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||
//
|
||||
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||
//
|
||||
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
|
||||
nilCloser := func() error {
|
||||
return nil
|
||||
}
|
||||
spec := &ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
Instructions: insts,
|
||||
License: license,
|
||||
}
|
||||
prog, err := ebpf.NewProgram(spec)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||
Target: dirFD,
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
Flags: unix.BPF_F_ALLOW_MULTI,
|
||||
})
|
||||
if err != nil {
|
||||
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
|
||||
}
|
||||
closer := func() error {
|
||||
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||
Target: dirFD,
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return closer, nil
|
||||
}
|
||||
|
||||
func isRWM(cgroupPermissions string) bool {
|
||||
r := false
|
||||
w := false
|
||||
m := false
|
||||
for _, rn := range cgroupPermissions {
|
||||
switch rn {
|
||||
case 'r':
|
||||
r = true
|
||||
case 'w':
|
||||
w = true
|
||||
case 'm':
|
||||
m = true
|
||||
}
|
||||
}
|
||||
return r && w && m
|
||||
}
|
||||
|
||||
// the logic is from runc
|
||||
// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/fs/devices_v2.go#L44
|
||||
func canSkipEBPFError(devices []specs.LinuxDeviceCgroup) bool {
|
||||
for _, dev := range devices {
|
||||
if dev.Allow || !isRWM(dev.Access) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
26
src/runtime/vendor/github.com/containerd/cgroups/v2/errors.go
generated
vendored
Normal file
26
src/runtime/vendor/github.com/containerd/cgroups/v2/errors.go
generated
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalidFormat = errors.New("cgroups: parsing file with invalid format failed")
|
||||
ErrInvalidGroupPath = errors.New("cgroups: invalid group path")
|
||||
)
|
37
src/runtime/vendor/github.com/containerd/cgroups/v2/hugetlb.go
generated
vendored
Normal file
37
src/runtime/vendor/github.com/containerd/cgroups/v2/hugetlb.go
generated
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import "strings"
|
||||
|
||||
type HugeTlb []HugeTlbEntry
|
||||
|
||||
type HugeTlbEntry struct {
|
||||
HugePageSize string
|
||||
Limit uint64
|
||||
}
|
||||
|
||||
func (r *HugeTlb) Values() (o []Value) {
|
||||
for _, e := range *r {
|
||||
o = append(o, Value{
|
||||
filename: strings.Join([]string{"hugetlb", e.HugePageSize, "max"}, "."),
|
||||
value: e.Limit,
|
||||
})
|
||||
}
|
||||
|
||||
return o
|
||||
}
|
64
src/runtime/vendor/github.com/containerd/cgroups/v2/io.go
generated
vendored
Normal file
64
src/runtime/vendor/github.com/containerd/cgroups/v2/io.go
generated
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import "fmt"
|
||||
|
||||
type IOType string
|
||||
|
||||
const (
|
||||
ReadBPS IOType = "rbps"
|
||||
WriteBPS IOType = "wbps"
|
||||
ReadIOPS IOType = "riops"
|
||||
WriteIOPS IOType = "wiops"
|
||||
)
|
||||
|
||||
type BFQ struct {
|
||||
Weight uint16
|
||||
}
|
||||
|
||||
type Entry struct {
|
||||
Type IOType
|
||||
Major int64
|
||||
Minor int64
|
||||
Rate uint64
|
||||
}
|
||||
|
||||
func (e Entry) String() string {
|
||||
return fmt.Sprintf("%d:%d %s=%d", e.Major, e.Minor, e.Type, e.Rate)
|
||||
}
|
||||
|
||||
type IO struct {
|
||||
BFQ BFQ
|
||||
Max []Entry
|
||||
}
|
||||
|
||||
func (i *IO) Values() (o []Value) {
|
||||
if i.BFQ.Weight != 0 {
|
||||
o = append(o, Value{
|
||||
filename: "io.bfq.weight",
|
||||
value: i.BFQ.Weight,
|
||||
})
|
||||
}
|
||||
for _, e := range i.Max {
|
||||
o = append(o, Value{
|
||||
filename: "io.max",
|
||||
value: e.String(),
|
||||
})
|
||||
}
|
||||
return o
|
||||
}
|
879
src/runtime/vendor/github.com/containerd/cgroups/v2/manager.go
generated
vendored
Normal file
879
src/runtime/vendor/github.com/containerd/cgroups/v2/manager.go
generated
vendored
Normal file
@ -0,0 +1,879 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/cgroups/v2/stats"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
subtreeControl = "cgroup.subtree_control"
|
||||
controllersFile = "cgroup.controllers"
|
||||
defaultCgroup2Path = "/sys/fs/cgroup"
|
||||
defaultSlice = "system.slice"
|
||||
)
|
||||
|
||||
var (
|
||||
canDelegate bool
|
||||
)
|
||||
|
||||
type Event struct {
|
||||
Low uint64
|
||||
High uint64
|
||||
Max uint64
|
||||
OOM uint64
|
||||
OOMKill uint64
|
||||
}
|
||||
|
||||
// Resources for a cgroups v2 unified hierarchy
|
||||
type Resources struct {
|
||||
CPU *CPU
|
||||
Memory *Memory
|
||||
Pids *Pids
|
||||
IO *IO
|
||||
RDMA *RDMA
|
||||
HugeTlb *HugeTlb
|
||||
// When len(Devices) is zero, devices are not controlled
|
||||
Devices []specs.LinuxDeviceCgroup
|
||||
}
|
||||
|
||||
// Values returns the raw filenames and values that
|
||||
// can be written to the unified hierarchy
|
||||
func (r *Resources) Values() (o []Value) {
|
||||
if r.CPU != nil {
|
||||
o = append(o, r.CPU.Values()...)
|
||||
}
|
||||
if r.Memory != nil {
|
||||
o = append(o, r.Memory.Values()...)
|
||||
}
|
||||
if r.Pids != nil {
|
||||
o = append(o, r.Pids.Values()...)
|
||||
}
|
||||
if r.IO != nil {
|
||||
o = append(o, r.IO.Values()...)
|
||||
}
|
||||
if r.RDMA != nil {
|
||||
o = append(o, r.RDMA.Values()...)
|
||||
}
|
||||
if r.HugeTlb != nil {
|
||||
o = append(o, r.HugeTlb.Values()...)
|
||||
}
|
||||
return o
|
||||
}
|
||||
|
||||
// EnabledControllers returns the list of all not nil resource controllers
|
||||
func (r *Resources) EnabledControllers() (c []string) {
|
||||
if r.CPU != nil {
|
||||
c = append(c, "cpu")
|
||||
c = append(c, "cpuset")
|
||||
}
|
||||
if r.Memory != nil {
|
||||
c = append(c, "memory")
|
||||
}
|
||||
if r.Pids != nil {
|
||||
c = append(c, "pids")
|
||||
}
|
||||
if r.IO != nil {
|
||||
c = append(c, "io")
|
||||
}
|
||||
if r.RDMA != nil {
|
||||
c = append(c, "rdma")
|
||||
}
|
||||
if r.HugeTlb != nil {
|
||||
c = append(c, "hugetlb")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Value of a cgroup setting
|
||||
type Value struct {
|
||||
filename string
|
||||
value interface{}
|
||||
}
|
||||
|
||||
// write the value to the full, absolute path, of a unified hierarchy
|
||||
func (c *Value) write(path string, perm os.FileMode) error {
|
||||
var data []byte
|
||||
switch t := c.value.(type) {
|
||||
case uint64:
|
||||
data = []byte(strconv.FormatUint(t, 10))
|
||||
case uint16:
|
||||
data = []byte(strconv.FormatUint(uint64(t), 10))
|
||||
case int64:
|
||||
data = []byte(strconv.FormatInt(t, 10))
|
||||
case []byte:
|
||||
data = t
|
||||
case string:
|
||||
data = []byte(t)
|
||||
case CPUMax:
|
||||
data = []byte(t)
|
||||
default:
|
||||
return ErrInvalidFormat
|
||||
}
|
||||
|
||||
// Retry writes on EINTR; see:
|
||||
// https://github.com/golang/go/issues/38033
|
||||
for {
|
||||
err := ioutil.WriteFile(
|
||||
filepath.Join(path, c.filename),
|
||||
data,
|
||||
perm,
|
||||
)
|
||||
if err == nil {
|
||||
return nil
|
||||
} else if !errors.Is(err, syscall.EINTR) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func writeValues(path string, values []Value) error {
|
||||
for _, o := range values {
|
||||
if err := o.write(path, defaultFilePerm); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewManager(mountpoint string, group string, resources *Resources) (*Manager, error) {
|
||||
if resources == nil {
|
||||
return nil, errors.New("resources reference is nil")
|
||||
}
|
||||
if err := VerifyGroupPath(group); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
path := filepath.Join(mountpoint, group)
|
||||
if err := os.MkdirAll(path, defaultDirPerm); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := Manager{
|
||||
unifiedMountpoint: mountpoint,
|
||||
path: path,
|
||||
}
|
||||
if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil {
|
||||
// clean up cgroup dir on failure
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
if err := setResources(path, resources); err != nil {
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
func LoadManager(mountpoint string, group string) (*Manager, error) {
|
||||
if err := VerifyGroupPath(group); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
path := filepath.Join(mountpoint, group)
|
||||
return &Manager{
|
||||
unifiedMountpoint: mountpoint,
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
unifiedMountpoint string
|
||||
path string
|
||||
}
|
||||
|
||||
func setResources(path string, resources *Resources) error {
|
||||
if resources != nil {
|
||||
if err := writeValues(path, resources.Values()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setDevices(path, resources.Devices); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Manager) RootControllers() ([]string, error) {
|
||||
b, err := ioutil.ReadFile(filepath.Join(c.unifiedMountpoint, controllersFile))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return strings.Fields(string(b)), nil
|
||||
}
|
||||
|
||||
func (c *Manager) Controllers() ([]string, error) {
|
||||
b, err := ioutil.ReadFile(filepath.Join(c.path, controllersFile))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return strings.Fields(string(b)), nil
|
||||
}
|
||||
|
||||
func (c *Manager) Update(resources *Resources) error {
|
||||
return setResources(c.path, resources)
|
||||
}
|
||||
|
||||
type ControllerToggle int
|
||||
|
||||
const (
|
||||
Enable ControllerToggle = iota + 1
|
||||
Disable
|
||||
)
|
||||
|
||||
func toggleFunc(controllers []string, prefix string) []string {
|
||||
out := make([]string, len(controllers))
|
||||
for i, c := range controllers {
|
||||
out[i] = prefix + c
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (c *Manager) ToggleControllers(controllers []string, t ControllerToggle) error {
|
||||
// when c.path is like /foo/bar/baz, the following files need to be written:
|
||||
// * /sys/fs/cgroup/cgroup.subtree_control
|
||||
// * /sys/fs/cgroup/foo/cgroup.subtree_control
|
||||
// * /sys/fs/cgroup/foo/bar/cgroup.subtree_control
|
||||
// Note that /sys/fs/cgroup/foo/bar/baz/cgroup.subtree_control does not need to be written.
|
||||
split := strings.Split(c.path, "/")
|
||||
var lastErr error
|
||||
for i := range split {
|
||||
f := strings.Join(split[:i], "/")
|
||||
if !strings.HasPrefix(f, c.unifiedMountpoint) || f == c.path {
|
||||
continue
|
||||
}
|
||||
filePath := filepath.Join(f, subtreeControl)
|
||||
if err := c.writeSubtreeControl(filePath, controllers, t); err != nil {
|
||||
// When running as rootless, the user may face EPERM on parent groups, but it is neglible when the
|
||||
// controller is already written.
|
||||
// So we only return the last error.
|
||||
lastErr = fmt.Errorf("failed to write subtree controllers %+v to %q: %w", controllers, filePath, err)
|
||||
} else {
|
||||
lastErr = nil
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
func (c *Manager) writeSubtreeControl(filePath string, controllers []string, t ControllerToggle) error {
|
||||
f, err := os.OpenFile(filePath, os.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
switch t {
|
||||
case Enable:
|
||||
controllers = toggleFunc(controllers, "+")
|
||||
case Disable:
|
||||
controllers = toggleFunc(controllers, "-")
|
||||
}
|
||||
_, err = f.WriteString(strings.Join(controllers, " "))
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Manager) NewChild(name string, resources *Resources) (*Manager, error) {
|
||||
if strings.HasPrefix(name, "/") {
|
||||
return nil, errors.New("name must be relative")
|
||||
}
|
||||
path := filepath.Join(c.path, name)
|
||||
if err := os.MkdirAll(path, defaultDirPerm); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := Manager{
|
||||
unifiedMountpoint: c.unifiedMountpoint,
|
||||
path: path,
|
||||
}
|
||||
if resources != nil {
|
||||
if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil {
|
||||
// clean up cgroup dir on failure
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if err := setResources(path, resources); err != nil {
|
||||
// clean up cgroup dir on failure
|
||||
os.Remove(path)
|
||||
return nil, err
|
||||
}
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
func (c *Manager) AddProc(pid uint64) error {
|
||||
v := Value{
|
||||
filename: cgroupProcs,
|
||||
value: pid,
|
||||
}
|
||||
return writeValues(c.path, []Value{v})
|
||||
}
|
||||
|
||||
func (c *Manager) Delete() error {
|
||||
return remove(c.path)
|
||||
}
|
||||
|
||||
func (c *Manager) Procs(recursive bool) ([]uint64, error) {
|
||||
var processes []uint64
|
||||
err := filepath.Walk(c.path, func(p string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !recursive && info.IsDir() {
|
||||
if p == c.path {
|
||||
return nil
|
||||
}
|
||||
return filepath.SkipDir
|
||||
}
|
||||
_, name := filepath.Split(p)
|
||||
if name != cgroupProcs {
|
||||
return nil
|
||||
}
|
||||
procs, err := parseCgroupProcsFile(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
processes = append(processes, procs...)
|
||||
return nil
|
||||
})
|
||||
return processes, err
|
||||
}
|
||||
|
||||
func (c *Manager) MoveTo(destination *Manager) error {
|
||||
processes, err := c.Procs(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, p := range processes {
|
||||
if err := destination.AddProc(p); err != nil {
|
||||
if strings.Contains(err.Error(), "no such process") {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var singleValueFiles = []string{
|
||||
"pids.current",
|
||||
"pids.max",
|
||||
}
|
||||
|
||||
func (c *Manager) Stat() (*stats.Metrics, error) {
|
||||
controllers, err := c.Controllers()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out := make(map[string]interface{})
|
||||
for _, controller := range controllers {
|
||||
switch controller {
|
||||
case "cpu", "memory":
|
||||
if err := readKVStatsFile(c.path, controller+".stat", out); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, name := range singleValueFiles {
|
||||
if err := readSingleFile(c.path, name, out); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
memoryEvents := make(map[string]interface{})
|
||||
if err := readKVStatsFile(c.path, "memory.events", memoryEvents); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
var metrics stats.Metrics
|
||||
|
||||
metrics.Pids = &stats.PidsStat{
|
||||
Current: getPidValue("pids.current", out),
|
||||
Limit: getPidValue("pids.max", out),
|
||||
}
|
||||
metrics.CPU = &stats.CPUStat{
|
||||
UsageUsec: getUint64Value("usage_usec", out),
|
||||
UserUsec: getUint64Value("user_usec", out),
|
||||
SystemUsec: getUint64Value("system_usec", out),
|
||||
NrPeriods: getUint64Value("nr_periods", out),
|
||||
NrThrottled: getUint64Value("nr_throttled", out),
|
||||
ThrottledUsec: getUint64Value("throttled_usec", out),
|
||||
}
|
||||
metrics.Memory = &stats.MemoryStat{
|
||||
Anon: getUint64Value("anon", out),
|
||||
File: getUint64Value("file", out),
|
||||
KernelStack: getUint64Value("kernel_stack", out),
|
||||
Slab: getUint64Value("slab", out),
|
||||
Sock: getUint64Value("sock", out),
|
||||
Shmem: getUint64Value("shmem", out),
|
||||
FileMapped: getUint64Value("file_mapped", out),
|
||||
FileDirty: getUint64Value("file_dirty", out),
|
||||
FileWriteback: getUint64Value("file_writeback", out),
|
||||
AnonThp: getUint64Value("anon_thp", out),
|
||||
InactiveAnon: getUint64Value("inactive_anon", out),
|
||||
ActiveAnon: getUint64Value("active_anon", out),
|
||||
InactiveFile: getUint64Value("inactive_file", out),
|
||||
ActiveFile: getUint64Value("active_file", out),
|
||||
Unevictable: getUint64Value("unevictable", out),
|
||||
SlabReclaimable: getUint64Value("slab_reclaimable", out),
|
||||
SlabUnreclaimable: getUint64Value("slab_unreclaimable", out),
|
||||
Pgfault: getUint64Value("pgfault", out),
|
||||
Pgmajfault: getUint64Value("pgmajfault", out),
|
||||
WorkingsetRefault: getUint64Value("workingset_refault", out),
|
||||
WorkingsetActivate: getUint64Value("workingset_activate", out),
|
||||
WorkingsetNodereclaim: getUint64Value("workingset_nodereclaim", out),
|
||||
Pgrefill: getUint64Value("pgrefill", out),
|
||||
Pgscan: getUint64Value("pgscan", out),
|
||||
Pgsteal: getUint64Value("pgsteal", out),
|
||||
Pgactivate: getUint64Value("pgactivate", out),
|
||||
Pgdeactivate: getUint64Value("pgdeactivate", out),
|
||||
Pglazyfree: getUint64Value("pglazyfree", out),
|
||||
Pglazyfreed: getUint64Value("pglazyfreed", out),
|
||||
ThpFaultAlloc: getUint64Value("thp_fault_alloc", out),
|
||||
ThpCollapseAlloc: getUint64Value("thp_collapse_alloc", out),
|
||||
Usage: getStatFileContentUint64(filepath.Join(c.path, "memory.current")),
|
||||
UsageLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.max")),
|
||||
SwapUsage: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.current")),
|
||||
SwapLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.max")),
|
||||
}
|
||||
if len(memoryEvents) > 0 {
|
||||
metrics.MemoryEvents = &stats.MemoryEvents{
|
||||
Low: getUint64Value("low", memoryEvents),
|
||||
High: getUint64Value("high", memoryEvents),
|
||||
Max: getUint64Value("max", memoryEvents),
|
||||
Oom: getUint64Value("oom", memoryEvents),
|
||||
OomKill: getUint64Value("oom_kill", memoryEvents),
|
||||
}
|
||||
}
|
||||
metrics.Io = &stats.IOStat{Usage: readIoStats(c.path)}
|
||||
metrics.Rdma = &stats.RdmaStat{
|
||||
Current: rdmaStats(filepath.Join(c.path, "rdma.current")),
|
||||
Limit: rdmaStats(filepath.Join(c.path, "rdma.max")),
|
||||
}
|
||||
metrics.Hugetlb = readHugeTlbStats(c.path)
|
||||
|
||||
return &metrics, nil
|
||||
}
|
||||
|
||||
func getUint64Value(key string, out map[string]interface{}) uint64 {
|
||||
v, ok := out[key]
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
switch t := v.(type) {
|
||||
case uint64:
|
||||
return t
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func getPidValue(key string, out map[string]interface{}) uint64 {
|
||||
v, ok := out[key]
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
switch t := v.(type) {
|
||||
case uint64:
|
||||
return t
|
||||
case string:
|
||||
if t == "max" {
|
||||
return math.MaxUint64
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func readSingleFile(path string, file string, out map[string]interface{}) error {
|
||||
f, err := os.Open(filepath.Join(path, file))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
data, err := ioutil.ReadAll(f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s := strings.TrimSpace(string(data))
|
||||
v, err := parseUint(s, 10, 64)
|
||||
if err != nil {
|
||||
// if we cannot parse as a uint, parse as a string
|
||||
out[file] = s
|
||||
return nil
|
||||
}
|
||||
out[file] = v
|
||||
return nil
|
||||
}
|
||||
|
||||
func readKVStatsFile(path string, file string, out map[string]interface{}) error {
|
||||
f, err := os.Open(filepath.Join(path, file))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
name, value, err := parseKV(s.Text())
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while parsing %s (line=%q): %w", filepath.Join(path, file), s.Text(), err)
|
||||
}
|
||||
out[name] = value
|
||||
}
|
||||
return s.Err()
|
||||
}
|
||||
|
||||
func (c *Manager) Freeze() error {
|
||||
return c.freeze(c.path, Frozen)
|
||||
}
|
||||
|
||||
func (c *Manager) Thaw() error {
|
||||
return c.freeze(c.path, Thawed)
|
||||
}
|
||||
|
||||
func (c *Manager) freeze(path string, state State) error {
|
||||
values := state.Values()
|
||||
for {
|
||||
if err := writeValues(path, values); err != nil {
|
||||
return err
|
||||
}
|
||||
current, err := fetchState(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if current == state {
|
||||
return nil
|
||||
}
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Manager) isCgroupEmpty() bool {
|
||||
// In case of any error we return true so that we exit and don't leak resources
|
||||
out := make(map[string]interface{})
|
||||
if err := readKVStatsFile(c.path, "cgroup.events", out); err != nil {
|
||||
return true
|
||||
}
|
||||
if v, ok := out["populated"]; ok {
|
||||
populated, ok := v.(uint64)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
return populated == 0
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// MemoryEventFD returns inotify file descriptor and 'memory.events' inotify watch descriptor
|
||||
func (c *Manager) MemoryEventFD() (int, uint32, error) {
|
||||
fpath := filepath.Join(c.path, "memory.events")
|
||||
fd, err := syscall.InotifyInit()
|
||||
if err != nil {
|
||||
return 0, 0, errors.New("failed to create inotify fd")
|
||||
}
|
||||
wd, err := syscall.InotifyAddWatch(fd, fpath, unix.IN_MODIFY)
|
||||
if err != nil {
|
||||
syscall.Close(fd)
|
||||
return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", fpath, err)
|
||||
}
|
||||
// monitor to detect process exit/cgroup deletion
|
||||
evpath := filepath.Join(c.path, "cgroup.events")
|
||||
if _, err = syscall.InotifyAddWatch(fd, evpath, unix.IN_MODIFY); err != nil {
|
||||
syscall.Close(fd)
|
||||
return 0, 0, fmt.Errorf("failed to add inotify watch for %q: %w", evpath, err)
|
||||
}
|
||||
|
||||
return fd, uint32(wd), nil
|
||||
}
|
||||
|
||||
func (c *Manager) EventChan() (<-chan Event, <-chan error) {
|
||||
ec := make(chan Event)
|
||||
errCh := make(chan error, 1)
|
||||
go c.waitForEvents(ec, errCh)
|
||||
|
||||
return ec, errCh
|
||||
}
|
||||
|
||||
func parseMemoryEvents(out map[string]interface{}) (Event, error) {
|
||||
e := Event{}
|
||||
if v, ok := out["high"]; ok {
|
||||
e.High, ok = v.(uint64)
|
||||
if !ok {
|
||||
return Event{}, fmt.Errorf("cannot convert high to uint64: %+v", v)
|
||||
}
|
||||
}
|
||||
if v, ok := out["low"]; ok {
|
||||
e.Low, ok = v.(uint64)
|
||||
if !ok {
|
||||
return Event{}, fmt.Errorf("cannot convert low to uint64: %+v", v)
|
||||
}
|
||||
}
|
||||
if v, ok := out["max"]; ok {
|
||||
e.Max, ok = v.(uint64)
|
||||
if !ok {
|
||||
return Event{}, fmt.Errorf("cannot convert max to uint64: %+v", v)
|
||||
}
|
||||
}
|
||||
if v, ok := out["oom"]; ok {
|
||||
e.OOM, ok = v.(uint64)
|
||||
if !ok {
|
||||
return Event{}, fmt.Errorf("cannot convert oom to uint64: %+v", v)
|
||||
}
|
||||
}
|
||||
if v, ok := out["oom_kill"]; ok {
|
||||
e.OOMKill, ok = v.(uint64)
|
||||
if !ok {
|
||||
return Event{}, fmt.Errorf("cannot convert oom_kill to uint64: %+v", v)
|
||||
}
|
||||
}
|
||||
return e, nil
|
||||
}
|
||||
|
||||
func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
|
||||
defer close(errCh)
|
||||
|
||||
fd, _, err := c.MemoryEventFD()
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
defer syscall.Close(fd)
|
||||
|
||||
for {
|
||||
buffer := make([]byte, syscall.SizeofInotifyEvent*10)
|
||||
bytesRead, err := syscall.Read(fd, buffer)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
if bytesRead >= syscall.SizeofInotifyEvent {
|
||||
out := make(map[string]interface{})
|
||||
if err := readKVStatsFile(c.path, "memory.events", out); err != nil {
|
||||
// When cgroup is deleted read may return -ENODEV instead of -ENOENT from open.
|
||||
if _, statErr := os.Lstat(filepath.Join(c.path, "memory.events")); !os.IsNotExist(statErr) {
|
||||
errCh <- err
|
||||
}
|
||||
return
|
||||
}
|
||||
e, err := parseMemoryEvents(out)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
ec <- e
|
||||
if c.isCgroupEmpty() {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func setDevices(path string, devices []specs.LinuxDeviceCgroup) error {
|
||||
if len(devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
insts, license, err := DeviceFilter(devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot get dir FD for %s", path)
|
||||
}
|
||||
defer unix.Close(dirFD)
|
||||
if _, err := LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
if !canSkipEBPFError(devices) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getSystemdFullPath returns the full systemd path when creating a systemd slice group.
|
||||
// the reason this is necessary is because the "-" character has a special meaning in
|
||||
// systemd slice. For example, when creating a slice called "my-group-112233.slice",
|
||||
// systemd will create a hierarchy like this:
|
||||
// /sys/fs/cgroup/my.slice/my-group.slice/my-group-112233.slice
|
||||
func getSystemdFullPath(slice, group string) string {
|
||||
return filepath.Join(defaultCgroup2Path, dashesToPath(slice), dashesToPath(group))
|
||||
}
|
||||
|
||||
// dashesToPath converts a slice name with dashes to it's corresponding systemd filesystem path.
|
||||
func dashesToPath(in string) string {
|
||||
path := ""
|
||||
if strings.HasSuffix(in, ".slice") && strings.Contains(in, "-") {
|
||||
parts := strings.Split(in, "-")
|
||||
for i := range parts {
|
||||
s := strings.Join(parts[0:i+1], "-")
|
||||
if !strings.HasSuffix(s, ".slice") {
|
||||
s += ".slice"
|
||||
}
|
||||
path = filepath.Join(path, s)
|
||||
}
|
||||
} else {
|
||||
path = filepath.Join(path, in)
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, error) {
|
||||
if slice == "" {
|
||||
slice = defaultSlice
|
||||
}
|
||||
ctx := context.TODO()
|
||||
path := getSystemdFullPath(slice, group)
|
||||
conn, err := systemdDbus.NewWithContext(ctx)
|
||||
if err != nil {
|
||||
return &Manager{}, err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
properties := []systemdDbus.Property{
|
||||
systemdDbus.PropDescription("cgroup " + group),
|
||||
newSystemdProperty("DefaultDependencies", false),
|
||||
newSystemdProperty("MemoryAccounting", true),
|
||||
newSystemdProperty("CPUAccounting", true),
|
||||
newSystemdProperty("IOAccounting", true),
|
||||
}
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(group, ".slice") {
|
||||
properties = append(properties, systemdDbus.PropWants(defaultSlice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
properties = append(properties, systemdDbus.PropSlice(defaultSlice))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newSystemdProperty("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
if resources.Memory != nil && resources.Memory.Min != nil && *resources.Memory.Min != 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("MemoryMin", uint64(*resources.Memory.Min)))
|
||||
}
|
||||
|
||||
if resources.Memory != nil && resources.Memory.Max != nil && *resources.Memory.Max != 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("MemoryMax", uint64(*resources.Memory.Max)))
|
||||
}
|
||||
|
||||
if resources.CPU != nil && resources.CPU.Weight != nil && *resources.CPU.Weight != 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("CPUWeight", *resources.CPU.Weight))
|
||||
}
|
||||
|
||||
if resources.CPU != nil && resources.CPU.Max != "" {
|
||||
quota, period := resources.CPU.Max.extractQuotaAndPeriod()
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
||||
// always setting a property value ensures we can apply a quota and remove it later
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if quota > 0 {
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(quota*1000000) / period
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
properties = append(properties,
|
||||
newSystemdProperty("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
|
||||
// If we can delegate, we add the property back in
|
||||
if canDelegate {
|
||||
properties = append(properties, newSystemdProperty("Delegate", true))
|
||||
}
|
||||
|
||||
if resources.Pids != nil && resources.Pids.Max > 0 {
|
||||
properties = append(properties,
|
||||
newSystemdProperty("TasksAccounting", true),
|
||||
newSystemdProperty("TasksMax", uint64(resources.Pids.Max)))
|
||||
}
|
||||
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := conn.StartTransientUnitContext(ctx, group, "replace", properties, statusChan); err == nil {
|
||||
select {
|
||||
case <-statusChan:
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", group)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
return &Manager{}, err
|
||||
}
|
||||
|
||||
return &Manager{
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func LoadSystemd(slice, group string) (*Manager, error) {
|
||||
if slice == "" {
|
||||
slice = defaultSlice
|
||||
}
|
||||
path := getSystemdFullPath(slice, group)
|
||||
return &Manager{
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *Manager) DeleteSystemd() error {
|
||||
ctx := context.TODO()
|
||||
conn, err := systemdDbus.NewWithContext(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
group := systemdUnitFromPath(c.path)
|
||||
ch := make(chan string)
|
||||
_, err = conn.StopUnitContext(ctx, group, "replace", ch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
<-ch
|
||||
return nil
|
||||
}
|
||||
|
||||
func newSystemdProperty(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
59
src/runtime/vendor/github.com/containerd/cgroups/v2/memory.go
generated
vendored
Normal file
59
src/runtime/vendor/github.com/containerd/cgroups/v2/memory.go
generated
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
type Memory struct {
|
||||
Swap *int64
|
||||
Min *int64
|
||||
Max *int64
|
||||
Low *int64
|
||||
High *int64
|
||||
}
|
||||
|
||||
func (r *Memory) Values() (o []Value) {
|
||||
if r.Swap != nil {
|
||||
o = append(o, Value{
|
||||
filename: "memory.swap.max",
|
||||
value: *r.Swap,
|
||||
})
|
||||
}
|
||||
if r.Min != nil {
|
||||
o = append(o, Value{
|
||||
filename: "memory.min",
|
||||
value: *r.Min,
|
||||
})
|
||||
}
|
||||
if r.Max != nil {
|
||||
o = append(o, Value{
|
||||
filename: "memory.max",
|
||||
value: *r.Max,
|
||||
})
|
||||
}
|
||||
if r.Low != nil {
|
||||
o = append(o, Value{
|
||||
filename: "memory.low",
|
||||
value: *r.Low,
|
||||
})
|
||||
}
|
||||
if r.High != nil {
|
||||
o = append(o, Value{
|
||||
filename: "memory.high",
|
||||
value: *r.High,
|
||||
})
|
||||
}
|
||||
return o
|
||||
}
|
60
src/runtime/vendor/github.com/containerd/cgroups/v2/paths.go
generated
vendored
Normal file
60
src/runtime/vendor/github.com/containerd/cgroups/v2/paths.go
generated
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// NestedGroupPath will nest the cgroups based on the calling processes cgroup
|
||||
// placing its child processes inside its own path
|
||||
func NestedGroupPath(suffix string) (string, error) {
|
||||
path, err := parseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(path, suffix), nil
|
||||
}
|
||||
|
||||
// PidGroupPath will return the correct cgroup paths for an existing process running inside a cgroup
|
||||
// This is commonly used for the Load function to restore an existing container
|
||||
func PidGroupPath(pid int) (string, error) {
|
||||
p := fmt.Sprintf("/proc/%d/cgroup", pid)
|
||||
return parseCgroupFile(p)
|
||||
}
|
||||
|
||||
// VerifyGroupPath verifies the format of group path string g.
|
||||
// The format is same as the third field in /proc/PID/cgroup.
|
||||
// e.g. "/user.slice/user-1001.slice/session-1.scope"
|
||||
//
|
||||
// g must be a "clean" absolute path starts with "/", and must not contain "/sys/fs/cgroup" prefix.
|
||||
//
|
||||
// VerifyGroupPath doesn't verify whether g actually exists on the system.
|
||||
func VerifyGroupPath(g string) error {
|
||||
if !strings.HasPrefix(g, "/") {
|
||||
return ErrInvalidGroupPath
|
||||
}
|
||||
if filepath.Clean(g) != g {
|
||||
return ErrInvalidGroupPath
|
||||
}
|
||||
if strings.HasPrefix(g, "/sys/fs/cgroup") {
|
||||
return ErrInvalidGroupPath
|
||||
}
|
||||
return nil
|
||||
}
|
37
src/runtime/vendor/github.com/containerd/cgroups/v2/pids.go
generated
vendored
Normal file
37
src/runtime/vendor/github.com/containerd/cgroups/v2/pids.go
generated
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import "strconv"
|
||||
|
||||
type Pids struct {
|
||||
Max int64
|
||||
}
|
||||
|
||||
func (r *Pids) Values() (o []Value) {
|
||||
if r.Max != 0 {
|
||||
limit := "max"
|
||||
if r.Max > 0 {
|
||||
limit = strconv.FormatInt(r.Max, 10)
|
||||
}
|
||||
o = append(o, Value{
|
||||
filename: "pids.max",
|
||||
value: limit,
|
||||
})
|
||||
}
|
||||
return o
|
||||
}
|
46
src/runtime/vendor/github.com/containerd/cgroups/v2/rdma.go
generated
vendored
Normal file
46
src/runtime/vendor/github.com/containerd/cgroups/v2/rdma.go
generated
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type RDMA struct {
|
||||
Limit []RDMAEntry
|
||||
}
|
||||
|
||||
type RDMAEntry struct {
|
||||
Device string
|
||||
HcaHandles uint32
|
||||
HcaObjects uint32
|
||||
}
|
||||
|
||||
func (r RDMAEntry) String() string {
|
||||
return fmt.Sprintf("%s hca_handle=%d hca_object=%d", r.Device, r.HcaHandles, r.HcaObjects)
|
||||
}
|
||||
|
||||
func (r *RDMA) Values() (o []Value) {
|
||||
for _, e := range r.Limit {
|
||||
o = append(o, Value{
|
||||
filename: "rdma.max",
|
||||
value: e.String(),
|
||||
})
|
||||
}
|
||||
|
||||
return o
|
||||
}
|
65
src/runtime/vendor/github.com/containerd/cgroups/v2/state.go
generated
vendored
Normal file
65
src/runtime/vendor/github.com/containerd/cgroups/v2/state.go
generated
vendored
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// State is a type that represents the state of the current cgroup
|
||||
type State string
|
||||
|
||||
const (
|
||||
Unknown State = ""
|
||||
Thawed State = "thawed"
|
||||
Frozen State = "frozen"
|
||||
Deleted State = "deleted"
|
||||
|
||||
cgroupFreeze = "cgroup.freeze"
|
||||
)
|
||||
|
||||
func (s State) Values() []Value {
|
||||
v := Value{
|
||||
filename: cgroupFreeze,
|
||||
}
|
||||
switch s {
|
||||
case Frozen:
|
||||
v.value = "1"
|
||||
case Thawed:
|
||||
v.value = "0"
|
||||
}
|
||||
return []Value{
|
||||
v,
|
||||
}
|
||||
}
|
||||
|
||||
func fetchState(path string) (State, error) {
|
||||
current, err := ioutil.ReadFile(filepath.Join(path, cgroupFreeze))
|
||||
if err != nil {
|
||||
return Unknown, err
|
||||
}
|
||||
switch strings.TrimSpace(string(current)) {
|
||||
case "1":
|
||||
return Frozen, nil
|
||||
case "0":
|
||||
return Thawed, nil
|
||||
default:
|
||||
return Unknown, nil
|
||||
}
|
||||
}
|
17
src/runtime/vendor/github.com/containerd/cgroups/v2/stats/doc.go
generated
vendored
Normal file
17
src/runtime/vendor/github.com/containerd/cgroups/v2/stats/doc.go
generated
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package stats
|
3992
src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.go
generated
vendored
Normal file
3992
src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
539
src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.txt
generated
vendored
Normal file
539
src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.txt
generated
vendored
Normal file
@ -0,0 +1,539 @@
|
||||
file {
|
||||
name: "github.com/containerd/cgroups/v2/stats/metrics.proto"
|
||||
package: "io.containerd.cgroups.v2"
|
||||
dependency: "gogoproto/gogo.proto"
|
||||
message_type {
|
||||
name: "Metrics"
|
||||
field {
|
||||
name: "pids"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.PidsStat"
|
||||
json_name: "pids"
|
||||
}
|
||||
field {
|
||||
name: "cpu"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.CPUStat"
|
||||
options {
|
||||
65004: "CPU"
|
||||
}
|
||||
json_name: "cpu"
|
||||
}
|
||||
field {
|
||||
name: "memory"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.MemoryStat"
|
||||
json_name: "memory"
|
||||
}
|
||||
field {
|
||||
name: "rdma"
|
||||
number: 5
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.RdmaStat"
|
||||
json_name: "rdma"
|
||||
}
|
||||
field {
|
||||
name: "io"
|
||||
number: 6
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.IOStat"
|
||||
json_name: "io"
|
||||
}
|
||||
field {
|
||||
name: "hugetlb"
|
||||
number: 7
|
||||
label: LABEL_REPEATED
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.HugeTlbStat"
|
||||
json_name: "hugetlb"
|
||||
}
|
||||
field {
|
||||
name: "memory_events"
|
||||
number: 8
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.MemoryEvents"
|
||||
json_name: "memoryEvents"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "PidsStat"
|
||||
field {
|
||||
name: "current"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "current"
|
||||
}
|
||||
field {
|
||||
name: "limit"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "limit"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "CPUStat"
|
||||
field {
|
||||
name: "usage_usec"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "usageUsec"
|
||||
}
|
||||
field {
|
||||
name: "user_usec"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "userUsec"
|
||||
}
|
||||
field {
|
||||
name: "system_usec"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "systemUsec"
|
||||
}
|
||||
field {
|
||||
name: "nr_periods"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "nrPeriods"
|
||||
}
|
||||
field {
|
||||
name: "nr_throttled"
|
||||
number: 5
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "nrThrottled"
|
||||
}
|
||||
field {
|
||||
name: "throttled_usec"
|
||||
number: 6
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "throttledUsec"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "MemoryStat"
|
||||
field {
|
||||
name: "anon"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "anon"
|
||||
}
|
||||
field {
|
||||
name: "file"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "file"
|
||||
}
|
||||
field {
|
||||
name: "kernel_stack"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "kernelStack"
|
||||
}
|
||||
field {
|
||||
name: "slab"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "slab"
|
||||
}
|
||||
field {
|
||||
name: "sock"
|
||||
number: 5
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "sock"
|
||||
}
|
||||
field {
|
||||
name: "shmem"
|
||||
number: 6
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "shmem"
|
||||
}
|
||||
field {
|
||||
name: "file_mapped"
|
||||
number: 7
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "fileMapped"
|
||||
}
|
||||
field {
|
||||
name: "file_dirty"
|
||||
number: 8
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "fileDirty"
|
||||
}
|
||||
field {
|
||||
name: "file_writeback"
|
||||
number: 9
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "fileWriteback"
|
||||
}
|
||||
field {
|
||||
name: "anon_thp"
|
||||
number: 10
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "anonThp"
|
||||
}
|
||||
field {
|
||||
name: "inactive_anon"
|
||||
number: 11
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "inactiveAnon"
|
||||
}
|
||||
field {
|
||||
name: "active_anon"
|
||||
number: 12
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "activeAnon"
|
||||
}
|
||||
field {
|
||||
name: "inactive_file"
|
||||
number: 13
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "inactiveFile"
|
||||
}
|
||||
field {
|
||||
name: "active_file"
|
||||
number: 14
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "activeFile"
|
||||
}
|
||||
field {
|
||||
name: "unevictable"
|
||||
number: 15
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "unevictable"
|
||||
}
|
||||
field {
|
||||
name: "slab_reclaimable"
|
||||
number: 16
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "slabReclaimable"
|
||||
}
|
||||
field {
|
||||
name: "slab_unreclaimable"
|
||||
number: 17
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "slabUnreclaimable"
|
||||
}
|
||||
field {
|
||||
name: "pgfault"
|
||||
number: 18
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgfault"
|
||||
}
|
||||
field {
|
||||
name: "pgmajfault"
|
||||
number: 19
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgmajfault"
|
||||
}
|
||||
field {
|
||||
name: "workingset_refault"
|
||||
number: 20
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "workingsetRefault"
|
||||
}
|
||||
field {
|
||||
name: "workingset_activate"
|
||||
number: 21
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "workingsetActivate"
|
||||
}
|
||||
field {
|
||||
name: "workingset_nodereclaim"
|
||||
number: 22
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "workingsetNodereclaim"
|
||||
}
|
||||
field {
|
||||
name: "pgrefill"
|
||||
number: 23
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgrefill"
|
||||
}
|
||||
field {
|
||||
name: "pgscan"
|
||||
number: 24
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgscan"
|
||||
}
|
||||
field {
|
||||
name: "pgsteal"
|
||||
number: 25
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgsteal"
|
||||
}
|
||||
field {
|
||||
name: "pgactivate"
|
||||
number: 26
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgactivate"
|
||||
}
|
||||
field {
|
||||
name: "pgdeactivate"
|
||||
number: 27
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pgdeactivate"
|
||||
}
|
||||
field {
|
||||
name: "pglazyfree"
|
||||
number: 28
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pglazyfree"
|
||||
}
|
||||
field {
|
||||
name: "pglazyfreed"
|
||||
number: 29
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "pglazyfreed"
|
||||
}
|
||||
field {
|
||||
name: "thp_fault_alloc"
|
||||
number: 30
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "thpFaultAlloc"
|
||||
}
|
||||
field {
|
||||
name: "thp_collapse_alloc"
|
||||
number: 31
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "thpCollapseAlloc"
|
||||
}
|
||||
field {
|
||||
name: "usage"
|
||||
number: 32
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "usage"
|
||||
}
|
||||
field {
|
||||
name: "usage_limit"
|
||||
number: 33
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "usageLimit"
|
||||
}
|
||||
field {
|
||||
name: "swap_usage"
|
||||
number: 34
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "swapUsage"
|
||||
}
|
||||
field {
|
||||
name: "swap_limit"
|
||||
number: 35
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "swapLimit"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "MemoryEvents"
|
||||
field {
|
||||
name: "low"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "low"
|
||||
}
|
||||
field {
|
||||
name: "high"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "high"
|
||||
}
|
||||
field {
|
||||
name: "max"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "max"
|
||||
}
|
||||
field {
|
||||
name: "oom"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "oom"
|
||||
}
|
||||
field {
|
||||
name: "oom_kill"
|
||||
number: 5
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "oomKill"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "RdmaStat"
|
||||
field {
|
||||
name: "current"
|
||||
number: 1
|
||||
label: LABEL_REPEATED
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.RdmaEntry"
|
||||
json_name: "current"
|
||||
}
|
||||
field {
|
||||
name: "limit"
|
||||
number: 2
|
||||
label: LABEL_REPEATED
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.RdmaEntry"
|
||||
json_name: "limit"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "RdmaEntry"
|
||||
field {
|
||||
name: "device"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_STRING
|
||||
json_name: "device"
|
||||
}
|
||||
field {
|
||||
name: "hca_handles"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT32
|
||||
json_name: "hcaHandles"
|
||||
}
|
||||
field {
|
||||
name: "hca_objects"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT32
|
||||
json_name: "hcaObjects"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "IOStat"
|
||||
field {
|
||||
name: "usage"
|
||||
number: 1
|
||||
label: LABEL_REPEATED
|
||||
type: TYPE_MESSAGE
|
||||
type_name: ".io.containerd.cgroups.v2.IOEntry"
|
||||
json_name: "usage"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "IOEntry"
|
||||
field {
|
||||
name: "major"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "major"
|
||||
}
|
||||
field {
|
||||
name: "minor"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "minor"
|
||||
}
|
||||
field {
|
||||
name: "rbytes"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "rbytes"
|
||||
}
|
||||
field {
|
||||
name: "wbytes"
|
||||
number: 4
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "wbytes"
|
||||
}
|
||||
field {
|
||||
name: "rios"
|
||||
number: 5
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "rios"
|
||||
}
|
||||
field {
|
||||
name: "wios"
|
||||
number: 6
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "wios"
|
||||
}
|
||||
}
|
||||
message_type {
|
||||
name: "HugeTlbStat"
|
||||
field {
|
||||
name: "current"
|
||||
number: 1
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "current"
|
||||
}
|
||||
field {
|
||||
name: "max"
|
||||
number: 2
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_UINT64
|
||||
json_name: "max"
|
||||
}
|
||||
field {
|
||||
name: "pagesize"
|
||||
number: 3
|
||||
label: LABEL_OPTIONAL
|
||||
type: TYPE_STRING
|
||||
json_name: "pagesize"
|
||||
}
|
||||
}
|
||||
syntax: "proto3"
|
||||
}
|
105
src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.proto
generated
vendored
Normal file
105
src/runtime/vendor/github.com/containerd/cgroups/v2/stats/metrics.proto
generated
vendored
Normal file
@ -0,0 +1,105 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package io.containerd.cgroups.v2;
|
||||
|
||||
import "gogoproto/gogo.proto";
|
||||
|
||||
message Metrics {
|
||||
PidsStat pids = 1;
|
||||
CPUStat cpu = 2 [(gogoproto.customname) = "CPU"];
|
||||
MemoryStat memory = 4;
|
||||
RdmaStat rdma = 5;
|
||||
IOStat io = 6;
|
||||
repeated HugeTlbStat hugetlb = 7;
|
||||
MemoryEvents memory_events = 8;
|
||||
}
|
||||
|
||||
message PidsStat {
|
||||
uint64 current = 1;
|
||||
uint64 limit = 2;
|
||||
}
|
||||
|
||||
message CPUStat {
|
||||
uint64 usage_usec = 1;
|
||||
uint64 user_usec = 2;
|
||||
uint64 system_usec = 3;
|
||||
uint64 nr_periods = 4;
|
||||
uint64 nr_throttled = 5;
|
||||
uint64 throttled_usec = 6;
|
||||
}
|
||||
|
||||
message MemoryStat {
|
||||
uint64 anon = 1;
|
||||
uint64 file = 2;
|
||||
uint64 kernel_stack = 3;
|
||||
uint64 slab = 4;
|
||||
uint64 sock = 5;
|
||||
uint64 shmem = 6;
|
||||
uint64 file_mapped = 7;
|
||||
uint64 file_dirty = 8;
|
||||
uint64 file_writeback = 9;
|
||||
uint64 anon_thp = 10;
|
||||
uint64 inactive_anon = 11;
|
||||
uint64 active_anon = 12;
|
||||
uint64 inactive_file = 13;
|
||||
uint64 active_file = 14;
|
||||
uint64 unevictable = 15;
|
||||
uint64 slab_reclaimable = 16;
|
||||
uint64 slab_unreclaimable = 17;
|
||||
uint64 pgfault = 18;
|
||||
uint64 pgmajfault = 19;
|
||||
uint64 workingset_refault = 20;
|
||||
uint64 workingset_activate = 21;
|
||||
uint64 workingset_nodereclaim = 22;
|
||||
uint64 pgrefill = 23;
|
||||
uint64 pgscan = 24;
|
||||
uint64 pgsteal = 25;
|
||||
uint64 pgactivate = 26;
|
||||
uint64 pgdeactivate = 27;
|
||||
uint64 pglazyfree = 28;
|
||||
uint64 pglazyfreed = 29;
|
||||
uint64 thp_fault_alloc = 30;
|
||||
uint64 thp_collapse_alloc = 31;
|
||||
uint64 usage = 32;
|
||||
uint64 usage_limit = 33;
|
||||
uint64 swap_usage = 34;
|
||||
uint64 swap_limit = 35;
|
||||
}
|
||||
|
||||
message MemoryEvents {
|
||||
uint64 low = 1;
|
||||
uint64 high = 2;
|
||||
uint64 max = 3;
|
||||
uint64 oom = 4;
|
||||
uint64 oom_kill = 5;
|
||||
}
|
||||
|
||||
message RdmaStat {
|
||||
repeated RdmaEntry current = 1;
|
||||
repeated RdmaEntry limit = 2;
|
||||
}
|
||||
|
||||
message RdmaEntry {
|
||||
string device = 1;
|
||||
uint32 hca_handles = 2;
|
||||
uint32 hca_objects = 3;
|
||||
}
|
||||
|
||||
message IOStat {
|
||||
repeated IOEntry usage = 1;
|
||||
}
|
||||
|
||||
message IOEntry {
|
||||
uint64 major = 1;
|
||||
uint64 minor = 2;
|
||||
uint64 rbytes = 3;
|
||||
uint64 wbytes = 4;
|
||||
uint64 rios = 5;
|
||||
uint64 wios = 6;
|
||||
}
|
||||
|
||||
message HugeTlbStat {
|
||||
uint64 current = 1;
|
||||
uint64 max = 2;
|
||||
string pagesize = 3;
|
||||
}
|
436
src/runtime/vendor/github.com/containerd/cgroups/v2/utils.go
generated
vendored
Normal file
436
src/runtime/vendor/github.com/containerd/cgroups/v2/utils.go
generated
vendored
Normal file
@ -0,0 +1,436 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/cgroups/v2/stats"
|
||||
|
||||
"github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupProcs = "cgroup.procs"
|
||||
defaultDirPerm = 0755
|
||||
)
|
||||
|
||||
// defaultFilePerm is a var so that the test framework can change the filemode
|
||||
// of all files created when the tests are running. The difference between the
|
||||
// tests and real world use is that files like "cgroup.procs" will exist when writing
|
||||
// to a read cgroup filesystem and do not exist prior when running in the tests.
|
||||
// this is set to a non 0 value in the test code
|
||||
var defaultFilePerm = os.FileMode(0)
|
||||
|
||||
// remove will remove a cgroup path handling EAGAIN and EBUSY errors and
|
||||
// retrying the remove after a exp timeout
|
||||
func remove(path string) error {
|
||||
var err error
|
||||
delay := 10 * time.Millisecond
|
||||
for i := 0; i < 5; i++ {
|
||||
if i != 0 {
|
||||
time.Sleep(delay)
|
||||
delay *= 2
|
||||
}
|
||||
if err = os.RemoveAll(path); err == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("cgroups: unable to remove path %q: %w", path, err)
|
||||
}
|
||||
|
||||
// parseCgroupProcsFile parses /sys/fs/cgroup/$GROUPPATH/cgroup.procs
|
||||
func parseCgroupProcsFile(path string) ([]uint64, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
var (
|
||||
out []uint64
|
||||
s = bufio.NewScanner(f)
|
||||
)
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.ParseUint(t, 10, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func parseKV(raw string) (string, interface{}, error) {
|
||||
parts := strings.Fields(raw)
|
||||
switch len(parts) {
|
||||
case 2:
|
||||
v, err := parseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
// if we cannot parse as a uint, parse as a string
|
||||
return parts[0], parts[1], nil
|
||||
}
|
||||
return parts[0], v, nil
|
||||
default:
|
||||
return "", 0, ErrInvalidFormat
|
||||
}
|
||||
}
|
||||
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
v, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
// 1. Handle negative values greater than MinInt64 (and)
|
||||
// 2. Handle negative values lesser than MinInt64
|
||||
if intErr == nil && intValue < 0 {
|
||||
return 0, nil
|
||||
} else if intErr != nil &&
|
||||
intErr.(*strconv.NumError).Err == strconv.ErrRange &&
|
||||
intValue < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
return 0, err
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// parseCgroupFile parses /proc/PID/cgroup file and return string
|
||||
func parseCgroupFile(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||
var (
|
||||
s = bufio.NewScanner(r)
|
||||
)
|
||||
for s.Scan() {
|
||||
var (
|
||||
text = s.Text()
|
||||
parts = strings.SplitN(text, ":", 3)
|
||||
)
|
||||
if len(parts) < 3 {
|
||||
return "", fmt.Errorf("invalid cgroup entry: %q", text)
|
||||
}
|
||||
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
|
||||
if parts[0] == "0" && parts[1] == "" {
|
||||
return parts[2], nil
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return "", fmt.Errorf("cgroup path not found")
|
||||
}
|
||||
|
||||
// ToResources converts the oci LinuxResources struct into a
|
||||
// v2 Resources type for use with this package.
|
||||
//
|
||||
// converting cgroups configuration from v1 to v2
|
||||
// ref: https://github.com/containers/crun/blob/master/crun.1.md#cgroup-v2
|
||||
func ToResources(spec *specs.LinuxResources) *Resources {
|
||||
var resources Resources
|
||||
if cpu := spec.CPU; cpu != nil {
|
||||
resources.CPU = &CPU{
|
||||
Cpus: cpu.Cpus,
|
||||
Mems: cpu.Mems,
|
||||
}
|
||||
if shares := cpu.Shares; shares != nil {
|
||||
convertedWeight := 1 + ((*shares-2)*9999)/262142
|
||||
resources.CPU.Weight = &convertedWeight
|
||||
}
|
||||
if period := cpu.Period; period != nil {
|
||||
resources.CPU.Max = NewCPUMax(cpu.Quota, period)
|
||||
}
|
||||
}
|
||||
if mem := spec.Memory; mem != nil {
|
||||
resources.Memory = &Memory{}
|
||||
if swap := mem.Swap; swap != nil {
|
||||
resources.Memory.Swap = swap
|
||||
}
|
||||
if l := mem.Limit; l != nil {
|
||||
resources.Memory.Max = l
|
||||
}
|
||||
if l := mem.Reservation; l != nil {
|
||||
resources.Memory.Low = l
|
||||
}
|
||||
}
|
||||
if hugetlbs := spec.HugepageLimits; hugetlbs != nil {
|
||||
hugeTlbUsage := HugeTlb{}
|
||||
for _, hugetlb := range hugetlbs {
|
||||
hugeTlbUsage = append(hugeTlbUsage, HugeTlbEntry{
|
||||
HugePageSize: hugetlb.Pagesize,
|
||||
Limit: hugetlb.Limit,
|
||||
})
|
||||
}
|
||||
resources.HugeTlb = &hugeTlbUsage
|
||||
}
|
||||
if pids := spec.Pids; pids != nil {
|
||||
resources.Pids = &Pids{
|
||||
Max: pids.Limit,
|
||||
}
|
||||
}
|
||||
if i := spec.BlockIO; i != nil {
|
||||
resources.IO = &IO{}
|
||||
if i.Weight != nil {
|
||||
resources.IO.BFQ.Weight = 1 + (*i.Weight-10)*9999/990
|
||||
}
|
||||
for t, devices := range map[IOType][]specs.LinuxThrottleDevice{
|
||||
ReadBPS: i.ThrottleReadBpsDevice,
|
||||
WriteBPS: i.ThrottleWriteBpsDevice,
|
||||
ReadIOPS: i.ThrottleReadIOPSDevice,
|
||||
WriteIOPS: i.ThrottleWriteIOPSDevice,
|
||||
} {
|
||||
for _, d := range devices {
|
||||
resources.IO.Max = append(resources.IO.Max, Entry{
|
||||
Type: t,
|
||||
Major: d.Major,
|
||||
Minor: d.Minor,
|
||||
Rate: d.Rate,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
if i := spec.Rdma; i != nil {
|
||||
resources.RDMA = &RDMA{}
|
||||
for device, value := range spec.Rdma {
|
||||
if device != "" && (value.HcaHandles != nil && value.HcaObjects != nil) {
|
||||
resources.RDMA.Limit = append(resources.RDMA.Limit, RDMAEntry{
|
||||
Device: device,
|
||||
HcaHandles: *value.HcaHandles,
|
||||
HcaObjects: *value.HcaObjects,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &resources
|
||||
}
|
||||
|
||||
// Gets uint64 parsed content of single value cgroup stat file
|
||||
func getStatFileContentUint64(filePath string) uint64 {
|
||||
contents, err := ioutil.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
trimmed := strings.TrimSpace(string(contents))
|
||||
if trimmed == "max" {
|
||||
return math.MaxUint64
|
||||
}
|
||||
|
||||
res, err := parseUint(trimmed, 10, 64)
|
||||
if err != nil {
|
||||
logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), filePath)
|
||||
return res
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func readIoStats(path string) []*stats.IOEntry {
|
||||
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
var usage []*stats.IOEntry
|
||||
fpath := filepath.Join(path, "io.stat")
|
||||
currentData, err := ioutil.ReadFile(fpath)
|
||||
if err != nil {
|
||||
return usage
|
||||
}
|
||||
entries := strings.Split(string(currentData), "\n")
|
||||
|
||||
for _, entry := range entries {
|
||||
parts := strings.Split(entry, " ")
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
majmin := strings.Split(parts[0], ":")
|
||||
if len(majmin) != 2 {
|
||||
continue
|
||||
}
|
||||
major, err := strconv.ParseUint(majmin[0], 10, 0)
|
||||
if err != nil {
|
||||
return usage
|
||||
}
|
||||
minor, err := strconv.ParseUint(majmin[1], 10, 0)
|
||||
if err != nil {
|
||||
return usage
|
||||
}
|
||||
parts = parts[1:]
|
||||
ioEntry := stats.IOEntry{
|
||||
Major: major,
|
||||
Minor: minor,
|
||||
}
|
||||
for _, s := range parts {
|
||||
keyPairValue := strings.Split(s, "=")
|
||||
if len(keyPairValue) != 2 {
|
||||
continue
|
||||
}
|
||||
v, err := strconv.ParseUint(keyPairValue[1], 10, 0)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
switch keyPairValue[0] {
|
||||
case "rbytes":
|
||||
ioEntry.Rbytes = v
|
||||
case "wbytes":
|
||||
ioEntry.Wbytes = v
|
||||
case "rios":
|
||||
ioEntry.Rios = v
|
||||
case "wios":
|
||||
ioEntry.Wios = v
|
||||
}
|
||||
}
|
||||
usage = append(usage, &ioEntry)
|
||||
}
|
||||
return usage
|
||||
}
|
||||
|
||||
func rdmaStats(filepath string) []*stats.RdmaEntry {
|
||||
currentData, err := ioutil.ReadFile(filepath)
|
||||
if err != nil {
|
||||
return []*stats.RdmaEntry{}
|
||||
}
|
||||
return toRdmaEntry(strings.Split(string(currentData), "\n"))
|
||||
}
|
||||
|
||||
func parseRdmaKV(raw string, entry *stats.RdmaEntry) {
|
||||
var value uint64
|
||||
var err error
|
||||
|
||||
parts := strings.Split(raw, "=")
|
||||
switch len(parts) {
|
||||
case 2:
|
||||
if parts[1] == "max" {
|
||||
value = math.MaxUint32
|
||||
} else {
|
||||
value, err = parseUint(parts[1], 10, 32)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
if parts[0] == "hca_handle" {
|
||||
entry.HcaHandles = uint32(value)
|
||||
} else if parts[0] == "hca_object" {
|
||||
entry.HcaObjects = uint32(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func toRdmaEntry(strEntries []string) []*stats.RdmaEntry {
|
||||
var rdmaEntries []*stats.RdmaEntry
|
||||
for i := range strEntries {
|
||||
parts := strings.Fields(strEntries[i])
|
||||
switch len(parts) {
|
||||
case 3:
|
||||
entry := new(stats.RdmaEntry)
|
||||
entry.Device = parts[0]
|
||||
parseRdmaKV(parts[1], entry)
|
||||
parseRdmaKV(parts[2], entry)
|
||||
|
||||
rdmaEntries = append(rdmaEntries, entry)
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
return rdmaEntries
|
||||
}
|
||||
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
if err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func systemdUnitFromPath(path string) string {
|
||||
_, unit := filepath.Split(path)
|
||||
return unit
|
||||
}
|
||||
|
||||
func readHugeTlbStats(path string) []*stats.HugeTlbStat {
|
||||
var usage = []*stats.HugeTlbStat{}
|
||||
var keyUsage = make(map[string]*stats.HugeTlbStat)
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return usage
|
||||
}
|
||||
files, err := f.Readdir(-1)
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return usage
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
if strings.Contains(file.Name(), "hugetlb") &&
|
||||
(strings.HasSuffix(file.Name(), "max") || strings.HasSuffix(file.Name(), "current")) {
|
||||
var hugeTlb *stats.HugeTlbStat
|
||||
var ok bool
|
||||
fileName := strings.Split(file.Name(), ".")
|
||||
pageSize := fileName[1]
|
||||
if hugeTlb, ok = keyUsage[pageSize]; !ok {
|
||||
hugeTlb = &stats.HugeTlbStat{}
|
||||
}
|
||||
hugeTlb.Pagesize = pageSize
|
||||
out, err := ioutil.ReadFile(filepath.Join(path, file.Name()))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
var value uint64
|
||||
stringVal := strings.TrimSpace(string(out))
|
||||
if stringVal == "max" {
|
||||
value = math.MaxUint64
|
||||
} else {
|
||||
value, err = strconv.ParseUint(stringVal, 10, 64)
|
||||
}
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
switch fileName[2] {
|
||||
case "max":
|
||||
hugeTlb.Max = value
|
||||
case "current":
|
||||
hugeTlb.Current = value
|
||||
}
|
||||
keyUsage[pageSize] = hugeTlb
|
||||
}
|
||||
}
|
||||
for _, entry := range keyUsage {
|
||||
usage = append(usage, entry)
|
||||
}
|
||||
return usage
|
||||
}
|
4
src/runtime/vendor/modules.txt
vendored
4
src/runtime/vendor/modules.txt
vendored
@ -57,10 +57,12 @@ github.com/cilium/ebpf/internal
|
||||
github.com/cilium/ebpf/internal/btf
|
||||
github.com/cilium/ebpf/internal/unix
|
||||
github.com/cilium/ebpf/link
|
||||
# github.com/containerd/cgroups v1.0.3
|
||||
# github.com/containerd/cgroups v1.0.5-0.20220625035431-cf7417bca682
|
||||
## explicit
|
||||
github.com/containerd/cgroups
|
||||
github.com/containerd/cgroups/stats/v1
|
||||
github.com/containerd/cgroups/v2
|
||||
github.com/containerd/cgroups/v2/stats
|
||||
# github.com/containerd/console v1.0.3
|
||||
## explicit
|
||||
github.com/containerd/console
|
||||
|
@ -20,6 +20,8 @@ import (
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
v1 "github.com/containerd/cgroups/stats/v1"
|
||||
v2 "github.com/containerd/cgroups/v2/stats"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
@ -698,7 +700,7 @@ func (s *Sandbox) createResourceController() error {
|
||||
return fmt.Errorf("Could not create the sandbox resource controller %v", err)
|
||||
}
|
||||
|
||||
// Now that the sandbox resource controller is created, we can set the state controller paths..
|
||||
// Now that the sandbox resource controller is created, we can set the state controller paths.
|
||||
s.state.SandboxCgroupPath = s.sandboxController.ID()
|
||||
s.state.OverheadCgroupPath = ""
|
||||
|
||||
@ -709,7 +711,7 @@ func (s *Sandbox) createResourceController() error {
|
||||
// into the sandbox resource controller.
|
||||
// We're creating an overhead controller, with no constraints. Everything but
|
||||
// the vCPU threads will eventually make it there.
|
||||
overheadController, err := resCtrl.NewResourceController(fmt.Sprintf("/%s/%s", resCtrlKataOverheadID, s.id), &specs.LinuxResources{})
|
||||
overheadController, err := resCtrl.NewResourceController(fmt.Sprintf("%s%s", resCtrlKataOverheadID, s.id), &specs.LinuxResources{})
|
||||
// TODO: support systemd cgroups overhead cgroup
|
||||
// https://github.com/kata-containers/kata-containers/issues/2963
|
||||
if err != nil {
|
||||
@ -1544,8 +1546,15 @@ func (s *Sandbox) Stats(ctx context.Context) (SandboxStats, error) {
|
||||
stats := SandboxStats{}
|
||||
|
||||
// TODO Do we want to aggregate the overhead cgroup stats to the sandbox ones?
|
||||
stats.CgroupStats.CPUStats.CPUUsage.TotalUsage = metrics.CPU.Usage.Total
|
||||
stats.CgroupStats.MemoryStats.Usage.Usage = metrics.Memory.Usage.Usage
|
||||
switch mt := metrics.(type) {
|
||||
case v1.Metrics:
|
||||
stats.CgroupStats.CPUStats.CPUUsage.TotalUsage = mt.CPU.Usage.Total
|
||||
stats.CgroupStats.MemoryStats.Usage.Usage = mt.Memory.Usage.Usage
|
||||
case v2.Metrics:
|
||||
stats.CgroupStats.CPUStats.CPUUsage.TotalUsage = mt.CPU.UsageUsec
|
||||
stats.CgroupStats.MemoryStats.Usage.Usage = mt.Memory.Usage
|
||||
}
|
||||
|
||||
tids, err := s.hypervisor.GetThreadIDs(ctx)
|
||||
if err != nil {
|
||||
return stats, err
|
||||
|
Loading…
Reference in New Issue
Block a user