1
0
mirror of https://github.com/rancher/os.git synced 2025-07-01 01:01:48 +00:00

Merge pull request #529 from ibuildthecloud/storage

Storage
This commit is contained in:
Ivan Mikushin 2015-08-27 17:24:25 +05:00
commit 94269475c2
74 changed files with 6422 additions and 33 deletions

24
Godeps/Godeps.json generated
View File

@ -330,6 +330,26 @@
"ImportPath": "github.com/kless/term",
"Rev": "d57d9d2d5be197e12d9dee142d855470d83ce62f"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups",
"Comment": "v0.0.2-32-gb40c790",
"Rev": "b40c7901845dcec5950ecb37cb9de178fc2c0604"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/configs",
"Comment": "v0.0.2-32-gb40c790",
"Rev": "b40c7901845dcec5950ecb37cb9de178fc2c0604"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/nsenter",
"Comment": "v0.0.2-32-gb40c790",
"Rev": "b40c7901845dcec5950ecb37cb9de178fc2c0604"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/system",
"Comment": "v0.0.2-32-gb40c790",
"Rev": "b40c7901845dcec5950ecb37cb9de178fc2c0604"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/user",
"Comment": "v0.0.2-32-gb40c790",
@ -337,8 +357,8 @@
},
{
"ImportPath": "github.com/rancher/docker-from-scratch",
"Comment": "1.8.1",
"Rev": "f348f946923bd2938a56f3bee21cf44431e05181"
"Comment": "1.8.1-2-gbcc17ea",
"Rev": "bcc17eae6f79e46cb65a74e4406cb3bcc24c38fb"
},
{
"ImportPath": "github.com/rancher/netconf",

View File

@ -0,0 +1,61 @@
// +build linux
package cgroups
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/configs"
)
type Manager interface {
// Apply cgroup configuration to the process with the specified pid
Apply(pid int) error
// Returns the PIDs inside the cgroup set
GetPids() ([]int, error)
// Returns statistics for the cgroup set
GetStats() (*Stats, error)
// Toggles the freezer cgroup according with specified state
Freeze(state configs.FreezerState) error
// Destroys the cgroup set
Destroy() error
// NewCgroupManager() and LoadCgroupManager() require following attributes:
// Paths map[string]string
// Cgroups *cgroups.Cgroup
// Paths maps cgroup subsystem to path at which it is mounted.
// Cgroups specifies specific cgroup settings for the various subsystems
// Returns cgroup paths to save in a state file and to be able to
// restore the object later.
GetPaths() map[string]string
// Set the cgroup as configured.
Set(container *configs.Config) error
}
type NotFoundError struct {
Subsystem string
}
func (e *NotFoundError) Error() string {
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
}
func NewNotFoundError(sub string) error {
return &NotFoundError{
Subsystem: sub,
}
}
func IsNotFound(err error) bool {
if err == nil {
return false
}
_, ok := err.(*NotFoundError)
return ok
}

View File

@ -0,0 +1,29 @@
// +build linux
package cgroups
import (
"bytes"
"testing"
)
const (
cgroupsContents = `11:hugetlb:/
10:perf_event:/
9:blkio:/
8:net_cls:/
7:freezer:/
6:devices:/
5:memory:/
4:cpuacct,cpu:/
3:cpuset:/
2:name=systemd:/user.slice/user-1000.slice/session-16.scope`
)
func TestParseCgroups(t *testing.T) {
r := bytes.NewBuffer([]byte(cgroupsContents))
_, err := ParseCgroupFile("blkio", r)
if err != nil {
t.Fatal(err)
}
}

View File

@ -0,0 +1,3 @@
// +build !linux
package cgroups

View File

@ -0,0 +1,333 @@
// +build linux
package fs
import (
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"sync"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
subsystems = map[string]subsystem{
"devices": &DevicesGroup{},
"memory": &MemoryGroup{},
"cpu": &CpuGroup{},
"cpuset": &CpusetGroup{},
"cpuacct": &CpuacctGroup{},
"blkio": &BlkioGroup{},
"hugetlb": &HugetlbGroup{},
"net_cls": &NetClsGroup{},
"net_prio": &NetPrioGroup{},
"perf_event": &PerfEventGroup{},
"freezer": &FreezerGroup{},
}
CgroupProcesses = "cgroup.procs"
HugePageSizes, _ = cgroups.GetHugePageSize()
)
type subsystem interface {
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error
// Removes the cgroup represented by 'data'.
Remove(*data) error
// Creates and joins the cgroup represented by data.
Apply(*data) error
// Set the cgroup represented by cgroup.
Set(path string, cgroup *configs.Cgroup) error
}
type Manager struct {
mu sync.Mutex
Cgroups *configs.Cgroup
Paths map[string]string
}
// The absolute path to the root of the cgroup hierarchies.
var cgroupRootLock sync.Mutex
var cgroupRoot string
// Gets the cgroupRoot.
func getCgroupRoot() (string, error) {
cgroupRootLock.Lock()
defer cgroupRootLock.Unlock()
if cgroupRoot != "" {
return cgroupRoot, nil
}
root, err := cgroups.FindCgroupMountpointDir()
if err != nil {
return "", err
}
if _, err := os.Stat(root); err != nil {
return "", err
}
cgroupRoot = root
return cgroupRoot, nil
}
type data struct {
root string
cgroup string
c *configs.Cgroup
pid int
}
func (m *Manager) Apply(pid int) error {
if m.Cgroups == nil {
return nil
}
var c = m.Cgroups
d, err := getCgroupData(m.Cgroups, pid)
if err != nil {
return err
}
paths := make(map[string]string)
defer func() {
if err != nil {
cgroups.RemovePaths(paths)
}
}()
for name, sys := range subsystems {
if err := sys.Apply(d); err != nil {
return err
}
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
// create and join phase so that the cgroup hierarchy for a container can be
// created then join consists of writing the process pids to cgroup.procs
p, err := d.path(name)
if err != nil {
if cgroups.IsNotFound(err) {
continue
}
return err
}
paths[name] = p
}
m.Paths = paths
if paths["cpu"] != "" {
if err := CheckCpushares(paths["cpu"], c.CpuShares); err != nil {
return err
}
}
return nil
}
func (m *Manager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()
if err := cgroups.RemovePaths(m.Paths); err != nil {
return err
}
m.Paths = make(map[string]string)
return nil
}
func (m *Manager) GetPaths() map[string]string {
m.mu.Lock()
paths := m.Paths
m.mu.Unlock()
return paths
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
m.mu.Lock()
defer m.mu.Unlock()
stats := cgroups.NewStats()
for name, path := range m.Paths {
sys, ok := subsystems[name]
if !ok || !cgroups.PathExists(path) {
continue
}
if err := sys.GetStats(path, stats); err != nil {
return nil, err
}
}
return stats, nil
}
func (m *Manager) Set(container *configs.Config) error {
for name, path := range m.Paths {
sys, ok := subsystems[name]
if !ok || !cgroups.PathExists(path) {
continue
}
if err := sys.Set(path, container.Cgroups); err != nil {
return err
}
}
return nil
}
// Freeze toggles the container's freezer cgroup depending on the state
// provided
func (m *Manager) Freeze(state configs.FreezerState) error {
d, err := getCgroupData(m.Cgroups, 0)
if err != nil {
return err
}
dir, err := d.path("freezer")
if err != nil {
return err
}
prevState := m.Cgroups.Freezer
m.Cgroups.Freezer = state
freezer := subsystems["freezer"]
err = freezer.Set(dir, m.Cgroups)
if err != nil {
m.Cgroups.Freezer = prevState
return err
}
return nil
}
func (m *Manager) GetPids() ([]int, error) {
d, err := getCgroupData(m.Cgroups, 0)
if err != nil {
return nil, err
}
dir, err := d.path("devices")
if err != nil {
return nil, err
}
return cgroups.ReadProcsFile(dir)
}
func getCgroupData(c *configs.Cgroup, pid int) (*data, error) {
root, err := getCgroupRoot()
if err != nil {
return nil, err
}
cgroup := c.Name
if c.Parent != "" {
cgroup = filepath.Join(c.Parent, cgroup)
}
return &data{
root: root,
cgroup: cgroup,
c: c,
pid: pid,
}, nil
}
func (raw *data) parent(subsystem, mountpoint, src string) (string, error) {
initPath, err := cgroups.GetInitCgroupDir(subsystem)
if err != nil {
return "", err
}
relDir, err := filepath.Rel(src, initPath)
if err != nil {
return "", err
}
return filepath.Join(mountpoint, relDir), nil
}
func (raw *data) path(subsystem string) (string, error) {
mnt, src, err := cgroups.FindCgroupMountpointAndSource(subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err
}
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
if filepath.IsAbs(raw.cgroup) {
return filepath.Join(raw.root, filepath.Base(mnt), raw.cgroup), nil
}
parent, err := raw.parent(subsystem, mnt, src)
if err != nil {
return "", err
}
return filepath.Join(parent, raw.cgroup), nil
}
func (raw *data) join(subsystem string) (string, error) {
path, err := raw.path(subsystem)
if err != nil {
return "", err
}
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return "", err
}
if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil {
return "", err
}
return path, nil
}
func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s.", file)
}
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func readFile(dir, file string) (string, error) {
data, err := ioutil.ReadFile(filepath.Join(dir, file))
return string(data), err
}
func removePath(p string, err error) error {
if err != nil {
return err
}
if p != "" {
return os.RemoveAll(p)
}
return nil
}
func CheckCpushares(path string, c int64) error {
var cpuShares int64
if c == 0 {
return nil
}
fd, err := os.Open(filepath.Join(path, "cpu.shares"))
if err != nil {
return err
}
defer fd.Close()
_, err = fmt.Fscanf(fd, "%d", &cpuShares)
if err != nil && err != io.EOF {
return err
}
if c > cpuShares {
return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares)
} else if c < cpuShares {
return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares)
}
return nil
}

View File

@ -0,0 +1,230 @@
// +build linux
package fs
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type BlkioGroup struct {
}
func (s *BlkioGroup) Apply(d *data) error {
dir, err := d.join("blkio")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.BlkioWeight != 0 {
if err := writeFile(path, "blkio.weight", strconv.FormatInt(cgroup.BlkioWeight, 10)); err != nil {
return err
}
}
if cgroup.BlkioWeightDevice != "" {
if err := writeFile(path, "blkio.weight_device", cgroup.BlkioWeightDevice); err != nil {
return err
}
}
if cgroup.BlkioThrottleReadBpsDevice != "" {
if err := writeFile(path, "blkio.throttle.read_bps_device", cgroup.BlkioThrottleReadBpsDevice); err != nil {
return err
}
}
if cgroup.BlkioThrottleWriteBpsDevice != "" {
if err := writeFile(path, "blkio.throttle.write_bps_device", cgroup.BlkioThrottleWriteBpsDevice); err != nil {
return err
}
}
if cgroup.BlkioThrottleReadIOpsDevice != "" {
if err := writeFile(path, "blkio.throttle.read_iops_device", cgroup.BlkioThrottleReadIOpsDevice); err != nil {
return err
}
}
if cgroup.BlkioThrottleWriteIOpsDevice != "" {
if err := writeFile(path, "blkio.throttle.write_iops_device", cgroup.BlkioThrottleWriteIOpsDevice); err != nil {
return err
}
}
return nil
}
func (s *BlkioGroup) Remove(d *data) error {
return removePath(d.path("blkio"))
}
/*
examples:
blkio.sectors
8:0 6792
blkio.io_service_bytes
8:0 Read 1282048
8:0 Write 2195456
8:0 Sync 2195456
8:0 Async 1282048
8:0 Total 3477504
Total 3477504
blkio.io_serviced
8:0 Read 124
8:0 Write 104
8:0 Sync 104
8:0 Async 124
8:0 Total 228
Total 228
blkio.io_queued
8:0 Read 0
8:0 Write 0
8:0 Sync 0
8:0 Async 0
8:0 Total 0
Total 0
*/
func splitBlkioStatLine(r rune) bool {
return r == ' ' || r == ':'
}
func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
var blkioStats []cgroups.BlkioStatEntry
f, err := os.Open(path)
if err != nil {
if os.IsNotExist(err) {
return blkioStats, nil
}
return nil, err
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
// format: dev type amount
fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine)
if len(fields) < 3 {
if len(fields) == 2 && fields[0] == "Total" {
// skip total line
continue
} else {
return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text())
}
}
v, err := strconv.ParseUint(fields[0], 10, 64)
if err != nil {
return nil, err
}
major := v
v, err = strconv.ParseUint(fields[1], 10, 64)
if err != nil {
return nil, err
}
minor := v
op := ""
valueField := 2
if len(fields) == 4 {
op = fields[2]
valueField = 3
}
v, err = strconv.ParseUint(fields[valueField], 10, 64)
if err != nil {
return nil, err
}
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
}
return blkioStats, nil
}
func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
// Try to read CFQ stats available on all CFQ enabled kernels first
if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil {
return getCFQStats(path, stats)
}
return getStats(path, stats) // Use generic stats as fallback
}
func getCFQStats(path string, stats *cgroups.Stats) error {
var blkioStats []cgroups.BlkioStatEntry
var err error
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil {
return err
}
stats.BlkioStats.SectorsRecursive = blkioStats
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil {
return err
}
stats.BlkioStats.IoServiceBytesRecursive = blkioStats
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil {
return err
}
stats.BlkioStats.IoServicedRecursive = blkioStats
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil {
return err
}
stats.BlkioStats.IoQueuedRecursive = blkioStats
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil {
return err
}
stats.BlkioStats.IoServiceTimeRecursive = blkioStats
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil {
return err
}
stats.BlkioStats.IoWaitTimeRecursive = blkioStats
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil {
return err
}
stats.BlkioStats.IoMergedRecursive = blkioStats
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil {
return err
}
stats.BlkioStats.IoTimeRecursive = blkioStats
return nil
}
func getStats(path string, stats *cgroups.Stats) error {
var blkioStats []cgroups.BlkioStatEntry
var err error
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil {
return err
}
stats.BlkioStats.IoServiceBytesRecursive = blkioStats
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil {
return err
}
stats.BlkioStats.IoServicedRecursive = blkioStats
return nil
}

View File

@ -0,0 +1,570 @@
// +build linux
package fs
import (
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
const (
sectorsRecursiveContents = `8:0 1024`
serviceBytesRecursiveContents = `8:0 Read 100
8:0 Write 200
8:0 Sync 300
8:0 Async 500
8:0 Total 500
Total 500`
servicedRecursiveContents = `8:0 Read 10
8:0 Write 40
8:0 Sync 20
8:0 Async 30
8:0 Total 50
Total 50`
queuedRecursiveContents = `8:0 Read 1
8:0 Write 4
8:0 Sync 2
8:0 Async 3
8:0 Total 5
Total 5`
serviceTimeRecursiveContents = `8:0 Read 173959
8:0 Write 0
8:0 Sync 0
8:0 Async 173959
8:0 Total 17395
Total 17395`
waitTimeRecursiveContents = `8:0 Read 15571
8:0 Write 0
8:0 Sync 0
8:0 Async 15571
8:0 Total 15571`
mergedRecursiveContents = `8:0 Read 5
8:0 Write 10
8:0 Sync 0
8:0 Async 0
8:0 Total 15
Total 15`
timeRecursiveContents = `8:0 8`
throttleServiceBytes = `8:0 Read 11030528
8:0 Write 23
8:0 Sync 42
8:0 Async 11030528
8:0 Total 11030528
252:0 Read 11030528
252:0 Write 23
252:0 Sync 42
252:0 Async 11030528
252:0 Total 11030528
Total 22061056`
throttleServiced = `8:0 Read 164
8:0 Write 23
8:0 Sync 42
8:0 Async 164
8:0 Total 164
252:0 Read 164
252:0 Write 23
252:0 Sync 42
252:0 Async 164
252:0 Total 164
Total 328`
throttleBefore = `8:0 1024`
throttleAfter = `8:0 2048`
)
func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) {
*blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op})
}
func TestBlkioSetWeight(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
weightBefore = 100
weightAfter = 200
)
helper.writeFileContents(map[string]string{
"blkio.weight": strconv.Itoa(weightBefore),
})
helper.CgroupData.c.BlkioWeight = weightAfter
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "blkio.weight")
if err != nil {
t.Fatalf("Failed to parse blkio.weight - %s", err)
}
if value != weightAfter {
t.Fatal("Got the wrong value, set blkio.weight failed.")
}
}
func TestBlkioSetWeightDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
const (
weightDeviceBefore = "8:0 400"
weightDeviceAfter = "8:0 500"
)
helper.writeFileContents(map[string]string{
"blkio.weight_device": weightDeviceBefore,
})
helper.CgroupData.c.BlkioWeightDevice = weightDeviceAfter
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device")
if err != nil {
t.Fatalf("Failed to parse blkio.weight_device - %s", err)
}
if value != weightDeviceAfter {
t.Fatal("Got the wrong value, set blkio.weight_device failed.")
}
}
func TestBlkioStats(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
// Verify expected stats.
expectedStats := cgroups.BlkioStats{}
appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async")
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 17395, "Total")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Read")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Write")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Sync")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Async")
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Total")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 5, "Read")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 10, "Write")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Sync")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Async")
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 15, "Total")
appendBlkioStatEntry(&expectedStats.IoTimeRecursive, 8, 0, 8, "")
expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
}
func TestBlkioStatsNoSectorsFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoServiceBytesFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoServicedFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoQueuedFile(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoServiceTimeFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoWaitTimeFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoMergedFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsNoTimeFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatalf("Failed unexpectedly: %s", err)
}
}
func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": "8:0 Read 100 100",
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}
func TestBlkioStatsUnexpectedFieldType(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": "8:0 Read Write",
"blkio.io_serviced_recursive": servicedRecursiveContents,
"blkio.io_queued_recursive": queuedRecursiveContents,
"blkio.sectors_recursive": sectorsRecursiveContents,
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
"blkio.io_merged_recursive": mergedRecursiveContents,
"blkio.time_recursive": timeRecursiveContents,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected to fail, but did not")
}
}
func TestNonCFQBlkioStats(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.io_service_bytes_recursive": "",
"blkio.io_serviced_recursive": "",
"blkio.io_queued_recursive": "",
"blkio.sectors_recursive": "",
"blkio.io_service_time_recursive": "",
"blkio.io_wait_time_recursive": "",
"blkio.io_merged_recursive": "",
"blkio.time_recursive": "",
"blkio.throttle.io_service_bytes": throttleServiceBytes,
"blkio.throttle.io_serviced": throttleServiced,
})
blkio := &BlkioGroup{}
actualStats := *cgroups.NewStats()
err := blkio.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
// Verify expected stats.
expectedStats := cgroups.BlkioStats{}
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Total")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Read")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Async")
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Total")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Read")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Async")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Total")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Read")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 23, "Write")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 42, "Sync")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Async")
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Total")
expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
}
func TestBlkioSetThrottleReadBpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.throttle.read_bps_device": throttleBefore,
})
helper.CgroupData.c.BlkioThrottleReadBpsDevice = throttleAfter
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_bps_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.read_bps_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.read_bps_device failed.")
}
}
func TestBlkioSetThrottleWriteBpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.throttle.write_bps_device": throttleBefore,
})
helper.CgroupData.c.BlkioThrottleWriteBpsDevice = throttleAfter
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_bps_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.write_bps_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.write_bps_device failed.")
}
}
func TestBlkioSetThrottleReadIOpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.throttle.read_iops_device": throttleBefore,
})
helper.CgroupData.c.BlkioThrottleReadIOpsDevice = throttleAfter
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_iops_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.read_iops_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.read_iops_device failed.")
}
}
func TestBlkioSetThrottleWriteIOpsDevice(t *testing.T) {
helper := NewCgroupTestUtil("blkio", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"blkio.throttle.write_iops_device": throttleBefore,
})
helper.CgroupData.c.BlkioThrottleWriteIOpsDevice = throttleAfter
blkio := &BlkioGroup{}
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_iops_device")
if err != nil {
t.Fatalf("Failed to parse blkio.throttle.write_iops_device - %s", err)
}
if value != throttleAfter {
t.Fatal("Got the wrong value, set blkio.throttle.write_iops_device failed.")
}
}

View File

@ -0,0 +1,95 @@
// +build linux
package fs
import (
"bufio"
"os"
"path/filepath"
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type CpuGroup struct {
}
func (s *CpuGroup) Apply(d *data) error {
// We always want to join the cpu group, to allow fair cpu scheduling
// on a container basis
dir, err := d.join("cpu")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.CpuShares != 0 {
if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.CpuShares, 10)); err != nil {
return err
}
}
if cgroup.CpuPeriod != 0 {
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.CpuPeriod, 10)); err != nil {
return err
}
}
if cgroup.CpuQuota != 0 {
if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.CpuQuota, 10)); err != nil {
return err
}
}
if cgroup.CpuRtPeriod != 0 {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.CpuRtPeriod, 10)); err != nil {
return err
}
}
if cgroup.CpuRtRuntime != 0 {
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.CpuRtRuntime, 10)); err != nil {
return err
}
}
return nil
}
func (s *CpuGroup) Remove(d *data) error {
return removePath(d.path("cpu"))
}
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
f, err := os.Open(filepath.Join(path, "cpu.stat"))
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
t, v, err := getCgroupParamKeyValue(sc.Text())
if err != nil {
return err
}
switch t {
case "nr_periods":
stats.CpuStats.ThrottlingData.Periods = v
case "nr_throttled":
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
case "throttled_time":
stats.CpuStats.ThrottlingData.ThrottledTime = v
}
}
return nil
}

View File

@ -0,0 +1,163 @@
// +build linux
package fs
import (
"fmt"
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
func TestCpuSetShares(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
sharesBefore = 1024
sharesAfter = 512
)
helper.writeFileContents(map[string]string{
"cpu.shares": strconv.Itoa(sharesBefore),
})
helper.CgroupData.c.CpuShares = sharesAfter
cpu := &CpuGroup{}
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "cpu.shares")
if err != nil {
t.Fatalf("Failed to parse cpu.shares - %s", err)
}
if value != sharesAfter {
t.Fatal("Got the wrong value, set cpu.shares failed.")
}
}
func TestCpuSetBandWidth(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
quotaBefore = 8000
quotaAfter = 5000
periodBefore = 10000
periodAfter = 7000
rtRuntimeBefore = 8000
rtRuntimeAfter = 5000
rtPeriodBefore = 10000
rtPeriodAfter = 7000
)
helper.writeFileContents(map[string]string{
"cpu.cfs_quota_us": strconv.Itoa(quotaBefore),
"cpu.cfs_period_us": strconv.Itoa(periodBefore),
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
})
helper.CgroupData.c.CpuQuota = quotaAfter
helper.CgroupData.c.CpuPeriod = periodAfter
helper.CgroupData.c.CpuRtRuntime = rtRuntimeAfter
helper.CgroupData.c.CpuRtPeriod = rtPeriodAfter
cpu := &CpuGroup{}
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
quota, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us")
if err != nil {
t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err)
}
if quota != quotaAfter {
t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
}
period, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us")
if err != nil {
t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err)
}
if period != periodAfter {
t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.")
}
rtRuntime, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
if err != nil {
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
}
if rtRuntime != rtRuntimeAfter {
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
}
rtPeriod, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
if err != nil {
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
}
if rtPeriod != rtPeriodAfter {
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
}
}
func TestCpuStats(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
const (
kNrPeriods = 2000
kNrThrottled = 200
kThrottledTime = uint64(18446744073709551615)
)
cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n",
kNrPeriods, kNrThrottled, kThrottledTime)
helper.writeFileContents(map[string]string{
"cpu.stat": cpuStatContent,
})
cpu := &CpuGroup{}
actualStats := *cgroups.NewStats()
err := cpu.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.ThrottlingData{
Periods: kNrPeriods,
ThrottledPeriods: kNrThrottled,
ThrottledTime: kThrottledTime}
expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData)
}
func TestNoCpuStatFile(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
cpu := &CpuGroup{}
actualStats := *cgroups.NewStats()
err := cpu.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal("Expected not to fail, but did")
}
}
func TestInvalidCpuStat(t *testing.T) {
helper := NewCgroupTestUtil("cpu", t)
defer helper.cleanup()
cpuStatContent := `nr_periods 2000
nr_throttled 200
throttled_time fortytwo`
helper.writeFileContents(map[string]string{
"cpu.stat": cpuStatContent,
})
cpu := &CpuGroup{}
actualStats := *cgroups.NewStats()
err := cpu.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failed stat parsing.")
}
}

View File

@ -0,0 +1,117 @@
// +build linux
package fs
import (
"fmt"
"io/ioutil"
"path/filepath"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
)
const (
cgroupCpuacctStat = "cpuacct.stat"
nanosecondsInSecond = 1000000000
)
var clockTicks = uint64(system.GetClockTicks())
type CpuacctGroup struct {
}
func (s *CpuacctGroup) Apply(d *data) error {
// we just want to join this group even though we don't set anything
if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) {
return err
}
return nil
}
func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error {
return nil
}
func (s *CpuacctGroup) Remove(d *data) error {
return removePath(d.path("cpuacct"))
}
func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
if err != nil {
return err
}
totalUsage, err := getCgroupParamUint(path, "cpuacct.usage")
if err != nil {
return err
}
percpuUsage, err := getPercpuUsage(path)
if err != nil {
return err
}
stats.CpuStats.CpuUsage.TotalUsage = totalUsage
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
return nil
}
// Returns user and kernel usage breakdown in nanoseconds.
func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
userModeUsage := uint64(0)
kernelModeUsage := uint64(0)
const (
userField = "user"
systemField = "system"
)
// Expected format:
// user <usage in ticks>
// system <usage in ticks>
data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat))
if err != nil {
return 0, 0, err
}
fields := strings.Fields(string(data))
if len(fields) != 4 {
return 0, 0, fmt.Errorf("failure - %s is expected to have 4 fields", filepath.Join(path, cgroupCpuacctStat))
}
if fields[0] != userField {
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
}
if fields[2] != systemField {
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField)
}
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
return 0, 0, err
}
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
return 0, 0, err
}
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
}
func getPercpuUsage(path string) ([]uint64, error) {
percpuUsage := []uint64{}
data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu"))
if err != nil {
return percpuUsage, err
}
for _, value := range strings.Fields(string(data)) {
value, err := strconv.ParseUint(value, 10, 64)
if err != nil {
return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err)
}
percpuUsage = append(percpuUsage, value)
}
return percpuUsage, nil
}

View File

@ -0,0 +1,135 @@
// +build linux
package fs
import (
"bytes"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type CpusetGroup struct {
}
func (s *CpusetGroup) Apply(d *data) error {
dir, err := d.path("cpuset")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
return s.ApplyDir(dir, d.c, d.pid)
}
func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.CpusetCpus != "" {
if err := writeFile(path, "cpuset.cpus", cgroup.CpusetCpus); err != nil {
return err
}
}
if cgroup.CpusetMems != "" {
if err := writeFile(path, "cpuset.mems", cgroup.CpusetMems); err != nil {
return err
}
}
return nil
}
func (s *CpusetGroup) Remove(d *data) error {
return removePath(d.path("cpuset"))
}
func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}
func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
// This might happen if we have no cpuset cgroup mounted.
// Just do nothing and don't fail.
if dir == "" {
return nil
}
root, err := getCgroupRoot()
if err != nil {
return err
}
if err := s.ensureParent(dir, root); err != nil {
return err
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil {
return err
}
// the default values inherit from parent cgroup are already set in
// s.ensureParent, cover these if we have our own
if err := s.Set(dir, cgroup); err != nil {
return err
}
return nil
}
func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) {
if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil {
return
}
if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil {
return
}
return cpus, mems, nil
}
// ensureParent makes sure that the parent directory of current is created
// and populated with the proper cpus and mems files copied from
// it's parent.
func (s *CpusetGroup) ensureParent(current, root string) error {
parent := filepath.Dir(current)
if filepath.Clean(parent) == root {
return nil
}
if err := s.ensureParent(parent, root); err != nil {
return err
}
if err := os.MkdirAll(current, 0755); err != nil && !os.IsExist(err) {
return err
}
return s.copyIfNeeded(current, parent)
}
// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
// directory to the current directory if the file's contents are 0
func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
var (
err error
currentCpus, currentMems []byte
parentCpus, parentMems []byte
)
if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil {
return err
}
if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil {
return err
}
if s.isEmpty(currentCpus) {
if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
return err
}
}
if s.isEmpty(currentMems) {
if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil {
return err
}
}
return nil
}
func (s *CpusetGroup) isEmpty(b []byte) bool {
return len(bytes.Trim(b, "\n")) == 0
}

View File

@ -0,0 +1,65 @@
// +build linux
package fs
import (
"testing"
)
func TestCpusetSetCpus(t *testing.T) {
helper := NewCgroupTestUtil("cpuset", t)
defer helper.cleanup()
const (
cpusBefore = "0"
cpusAfter = "1-3"
)
helper.writeFileContents(map[string]string{
"cpuset.cpus": cpusBefore,
})
helper.CgroupData.c.CpusetCpus = cpusAfter
cpuset := &CpusetGroup{}
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "cpuset.cpus")
if err != nil {
t.Fatalf("Failed to parse cpuset.cpus - %s", err)
}
if value != cpusAfter {
t.Fatal("Got the wrong value, set cpuset.cpus failed.")
}
}
func TestCpusetSetMems(t *testing.T) {
helper := NewCgroupTestUtil("cpuset", t)
defer helper.cleanup()
const (
memsBefore = "0"
memsAfter = "1"
)
helper.writeFileContents(map[string]string{
"cpuset.mems": memsBefore,
})
helper.CgroupData.c.CpusetMems = memsAfter
cpuset := &CpusetGroup{}
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "cpuset.mems")
if err != nil {
t.Fatalf("Failed to parse cpuset.mems - %s", err)
}
if value != memsAfter {
t.Fatal("Got the wrong value, set cpuset.mems failed.")
}
}

View File

@ -0,0 +1,61 @@
// +build linux
package fs
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type DevicesGroup struct {
}
func (s *DevicesGroup) Apply(d *data) error {
dir, err := d.join("devices")
if err != nil {
// We will return error even it's `not found` error, devices
// cgroup is hard requirement for container's security.
return err
}
if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
if !cgroup.AllowAllDevices {
if err := writeFile(path, "devices.deny", "a"); err != nil {
return err
}
for _, dev := range cgroup.AllowedDevices {
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
return err
}
}
return nil
}
if err := writeFile(path, "devices.allow", "a"); err != nil {
return err
}
for _, dev := range cgroup.DeniedDevices {
if err := writeFile(path, "devices.deny", dev.CgroupString()); err != nil {
return err
}
}
return nil
}
func (s *DevicesGroup) Remove(d *data) error {
return removePath(d.path("devices"))
}
func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}

View File

@ -0,0 +1,84 @@
// +build linux
package fs
import (
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
allowedDevices = []*configs.Device{
{
Path: "/dev/zero",
Type: 'c',
Major: 1,
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
},
}
allowedList = "c 1:5 rwm"
deniedDevices = []*configs.Device{
{
Path: "/dev/null",
Type: 'c',
Major: 1,
Minor: 3,
Permissions: "rwm",
FileMode: 0666,
},
}
deniedList = "c 1:3 rwm"
)
func TestDevicesSetAllow(t *testing.T) {
helper := NewCgroupTestUtil("devices", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"devices.deny": "a",
})
helper.CgroupData.c.AllowAllDevices = false
helper.CgroupData.c.AllowedDevices = allowedDevices
devices := &DevicesGroup{}
if err := devices.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "devices.allow")
if err != nil {
t.Fatalf("Failed to parse devices.allow - %s", err)
}
if value != allowedList {
t.Fatal("Got the wrong value, set devices.allow failed.")
}
}
func TestDevicesSetDeny(t *testing.T) {
helper := NewCgroupTestUtil("devices", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"devices.allow": "a",
})
helper.CgroupData.c.AllowAllDevices = true
helper.CgroupData.c.DeniedDevices = deniedDevices
devices := &DevicesGroup{}
if err := devices.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "devices.deny")
if err != nil {
t.Fatalf("Failed to parse devices.deny - %s", err)
}
if value != deniedList {
t.Fatal("Got the wrong value, set devices.deny failed.")
}
}

View File

@ -0,0 +1,62 @@
// +build linux
package fs
import (
"fmt"
"strings"
"time"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type FreezerGroup struct {
}
func (s *FreezerGroup) Apply(d *data) error {
dir, err := d.join("freezer")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
switch cgroup.Freezer {
case configs.Frozen, configs.Thawed:
if err := writeFile(path, "freezer.state", string(cgroup.Freezer)); err != nil {
return err
}
for {
state, err := readFile(path, "freezer.state")
if err != nil {
return err
}
if strings.TrimSpace(state) == string(cgroup.Freezer) {
break
}
time.Sleep(1 * time.Millisecond)
}
case configs.Undefined:
return nil
default:
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Freezer))
}
return nil
}
func (s *FreezerGroup) Remove(d *data) error {
return removePath(d.path("freezer"))
}
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}

View File

@ -0,0 +1,45 @@
package fs
import (
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
func TestFreezerSetState(t *testing.T) {
helper := NewCgroupTestUtil("freezer", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"freezer.state": string(configs.Frozen),
})
helper.CgroupData.c.Freezer = configs.Thawed
freezer := &FreezerGroup{}
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "freezer.state")
if err != nil {
t.Fatalf("Failed to parse freezer.state - %s", err)
}
if value != string(configs.Thawed) {
t.Fatal("Got the wrong value, set freezer.state failed.")
}
}
func TestFreezerSetInvalidState(t *testing.T) {
helper := NewCgroupTestUtil("freezer", t)
defer helper.cleanup()
const (
invalidArg configs.FreezerState = "Invalid"
)
helper.CgroupData.c.Freezer = invalidArg
freezer := &FreezerGroup{}
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.c); err == nil {
t.Fatal("Failed to return invalid argument error")
}
}

View File

@ -0,0 +1,3 @@
// +build !linux
package fs

View File

@ -0,0 +1,72 @@
// +build linux
package fs
import (
"fmt"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type HugetlbGroup struct {
}
func (s *HugetlbGroup) Apply(d *data) error {
dir, err := d.join("hugetlb")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
for _, hugetlb := range cgroup.HugetlbLimit {
if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.Itoa(hugetlb.Limit)); err != nil {
return err
}
}
return nil
}
func (s *HugetlbGroup) Remove(d *data) error {
return removePath(d.path("hugetlb"))
}
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
hugetlbStats := cgroups.HugetlbStats{}
for _, pageSize := range HugePageSizes {
usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
value, err := getCgroupParamUint(path, usage)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", usage, err)
}
hugetlbStats.Usage = value
maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
value, err = getCgroupParamUint(path, maxUsage)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
}
hugetlbStats.MaxUsage = value
failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
value, err = getCgroupParamUint(path, failcnt)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", failcnt, err)
}
hugetlbStats.Failcnt = value
stats.HugetlbStats[pageSize] = hugetlbStats
}
return nil
}

View File

@ -0,0 +1,138 @@
package fs
import (
"strconv"
"strings"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
const (
hugetlbUsageContents = "128\n"
hugetlbMaxUsageContents = "256\n"
hugetlbFailcnt = "100\n"
)
var (
hugePageSize, _ = cgroups.GetHugePageSize()
usage = strings.Join([]string{"hugetlb", hugePageSize[0], "usage_in_bytes"}, ".")
limit = strings.Join([]string{"hugetlb", hugePageSize[0], "limit_in_bytes"}, ".")
maxUsage = strings.Join([]string{"hugetlb", hugePageSize[0], "max_usage_in_bytes"}, ".")
failcnt = strings.Join([]string{"hugetlb", hugePageSize[0], "failcnt"}, ".")
)
func TestHugetlbSetHugetlb(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
const (
hugetlbBefore = 256
hugetlbAfter = 512
)
helper.writeFileContents(map[string]string{
limit: strconv.Itoa(hugetlbBefore),
})
helper.CgroupData.c.HugetlbLimit = []*configs.HugepageLimit{
{
Pagesize: hugePageSize[0],
Limit: hugetlbAfter,
},
}
hugetlb := &HugetlbGroup{}
if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, limit)
if err != nil {
t.Fatalf("Failed to parse %s - %s", limit, err)
}
if value != hugetlbAfter {
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
}
}
func TestHugetlbStats(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
usage: hugetlbUsageContents,
maxUsage: hugetlbMaxUsageContents,
failcnt: hugetlbFailcnt,
})
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[hugePageSize[0]])
}
func TestHugetlbStatsNoUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
maxUsage: hugetlbMaxUsageContents,
})
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
usage: hugetlbUsageContents,
})
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestHugetlbStatsBadUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
usage: "bad",
maxUsage: hugetlbMaxUsageContents,
})
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("hugetlb", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
usage: hugetlbUsageContents,
maxUsage: "bad",
})
hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}

View File

@ -0,0 +1,171 @@
// +build linux
package fs
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type MemoryGroup struct {
}
func (s *MemoryGroup) Apply(d *data) error {
path, err := d.path("memory")
if err != nil {
if cgroups.IsNotFound(err) {
return nil
}
return err
}
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return err
}
if err := s.Set(path, d.c); err != nil {
return err
}
// We need to join memory cgroup after set memory limits, because
// kmem.limit_in_bytes can only be set when the cgroup is empty.
_, err = d.join("memory")
if err != nil {
return err
}
defer func() {
if err != nil {
os.RemoveAll(path)
}
}()
return nil
}
func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.Memory != 0 {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Memory, 10)); err != nil {
return err
}
}
if cgroup.MemoryReservation != 0 {
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.MemoryReservation, 10)); err != nil {
return err
}
}
if cgroup.MemorySwap > 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.MemorySwap, 10)); err != nil {
return err
}
}
if cgroup.KernelMemory > 0 {
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.KernelMemory, 10)); err != nil {
return err
}
}
if cgroup.OomKillDisable {
if err := writeFile(path, "memory.oom_control", "1"); err != nil {
return err
}
}
if cgroup.MemorySwappiness >= 0 && cgroup.MemorySwappiness <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(cgroup.MemorySwappiness, 10)); err != nil {
return err
}
} else if cgroup.MemorySwappiness == -1 {
return nil
} else {
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", cgroup.MemorySwappiness)
}
return nil
}
func (s *MemoryGroup) Remove(d *data) error {
return removePath(d.path("memory"))
}
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
// Set stats from memory.stat.
statsFile, err := os.Open(filepath.Join(path, "memory.stat"))
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
defer statsFile.Close()
sc := bufio.NewScanner(statsFile)
for sc.Scan() {
t, v, err := getCgroupParamKeyValue(sc.Text())
if err != nil {
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
}
stats.MemoryStats.Stats[t] = v
}
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
memoryUsage, err := getMemoryData(path, "")
if err != nil {
return err
}
stats.MemoryStats.Usage = memoryUsage
swapUsage, err := getMemoryData(path, "memsw")
if err != nil {
return err
}
stats.MemoryStats.SwapUsage = swapUsage
kernelUsage, err := getMemoryData(path, "kmem")
if err != nil {
return err
}
stats.MemoryStats.KernelUsage = kernelUsage
return nil
}
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
memoryData := cgroups.MemoryData{}
moduleName := "memory"
if name != "" {
moduleName = strings.Join([]string{"memory", name}, ".")
}
usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".")
maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".")
failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
value, err := getCgroupParamUint(path, usage)
if err != nil {
if moduleName != "memory" && os.IsNotExist(err) {
return cgroups.MemoryData{}, nil
}
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
}
memoryData.Usage = value
value, err = getCgroupParamUint(path, maxUsage)
if err != nil {
if moduleName != "memory" && os.IsNotExist(err) {
return cgroups.MemoryData{}, nil
}
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
}
memoryData.MaxUsage = value
value, err = getCgroupParamUint(path, failcnt)
if err != nil {
if moduleName != "memory" && os.IsNotExist(err) {
return cgroups.MemoryData{}, nil
}
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
}
memoryData.Failcnt = value
return memoryData, nil
}

View File

@ -0,0 +1,294 @@
// +build linux
package fs
import (
"strconv"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
const (
memoryStatContents = `cache 512
rss 1024`
memoryUsageContents = "2048\n"
memoryMaxUsageContents = "4096\n"
memoryFailcnt = "100\n"
)
func TestMemorySetMemory(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
memoryBefore = 314572800 // 300M
memoryAfter = 524288000 // 500M
reservationBefore = 209715200 // 200M
reservationAfter = 314572800 // 300M
)
helper.writeFileContents(map[string]string{
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
"memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore),
})
helper.CgroupData.c.Memory = memoryAfter
helper.CgroupData.c.MemoryReservation = reservationAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
}
if value != memoryAfter {
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
}
value, err = getCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err)
}
if value != reservationAfter {
t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.")
}
}
func TestMemorySetMemoryswap(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
memoryswapBefore = 314572800 // 300M
memoryswapAfter = 524288000 // 500M
)
helper.writeFileContents(map[string]string{
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
})
helper.CgroupData.c.MemorySwap = memoryswapAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
}
if value != memoryswapAfter {
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
}
}
func TestMemorySetKernelMemory(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
kernelMemoryBefore = 314572800 // 300M
kernelMemoryAfter = 524288000 // 500M
)
helper.writeFileContents(map[string]string{
"memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore),
})
helper.CgroupData.c.KernelMemory = kernelMemoryAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err)
}
if value != kernelMemoryAfter {
t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.")
}
}
func TestMemorySetMemorySwappinessDefault(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
swappinessBefore = 60 //deafult is 60
swappinessAfter = 0
)
helper.writeFileContents(map[string]string{
"memory.swappiness": strconv.Itoa(swappinessBefore),
})
helper.CgroupData.c.Memory = swappinessAfter
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.swappiness")
if err != nil {
t.Fatalf("Failed to parse memory.swappiness - %s", err)
}
if value != swappinessAfter {
t.Fatal("Got the wrong value, set memory.swappiness failed.")
}
}
func TestMemoryStats(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
"memory.failcnt": memoryFailcnt,
"memory.memsw.usage_in_bytes": memoryUsageContents,
"memory.memsw.max_usage_in_bytes": memoryMaxUsageContents,
"memory.memsw.failcnt": memoryFailcnt,
"memory.kmem.usage_in_bytes": memoryUsageContents,
"memory.kmem.max_usage_in_bytes": memoryMaxUsageContents,
"memory.kmem.failcnt": memoryFailcnt,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100}, Stats: map[string]uint64{"cache": 512, "rss": 1024}}
expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats)
}
func TestMemoryStatsNoStatFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err != nil {
t.Fatal(err)
}
}
func TestMemoryStatsNoUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadStatFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": "rss rss",
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": memoryMaxUsageContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": "bad",
"memory.max_usage_in_bytes": memoryMaxUsageContents,
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"memory.stat": memoryStatContents,
"memory.usage_in_bytes": memoryUsageContents,
"memory.max_usage_in_bytes": "bad",
})
memory := &MemoryGroup{}
actualStats := *cgroups.NewStats()
err := memory.GetStats(helper.CgroupPath, &actualStats)
if err == nil {
t.Fatal("Expected failure")
}
}
func TestMemorySetOomControl(t *testing.T) {
helper := NewCgroupTestUtil("memory", t)
defer helper.cleanup()
const (
oom_kill_disable = 1 // disable oom killer, default is 0
)
helper.writeFileContents(map[string]string{
"memory.oom_control": strconv.Itoa(oom_kill_disable),
})
memory := &MemoryGroup{}
if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(helper.CgroupPath, "memory.oom_control")
if err != nil {
t.Fatalf("Failed to parse memory.oom_control - %s", err)
}
if value != oom_kill_disable {
t.Fatalf("Got the wrong value, set memory.oom_control failed.")
}
}

View File

@ -0,0 +1,40 @@
package fs
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type NetClsGroup struct {
}
func (s *NetClsGroup) Apply(d *data) error {
dir, err := d.join("net_cls")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.NetClsClassid != "" {
if err := writeFile(path, "net_cls.classid", cgroup.NetClsClassid); err != nil {
return err
}
}
return nil
}
func (s *NetClsGroup) Remove(d *data) error {
return removePath(d.path("net_cls"))
}
func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}

View File

@ -0,0 +1,36 @@
package fs
import (
"testing"
)
const (
classidBefore = "0x100002"
classidAfter = "0x100001"
)
func TestNetClsSetClassid(t *testing.T) {
helper := NewCgroupTestUtil("net_cls", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"net_cls.classid": classidBefore,
})
helper.CgroupData.c.NetClsClassid = classidAfter
netcls := &NetClsGroup{}
if err := netcls.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
// As we are in mock environment, we can't get correct value of classid from
// net_cls.classid.
// So. we just judge if we successfully write classid into file
value, err := getCgroupParamString(helper.CgroupPath, "net_cls.classid")
if err != nil {
t.Fatalf("Failed to parse net_cls.classid - %s", err)
}
if value != classidAfter {
t.Fatal("Got the wrong value, set net_cls.classid failed.")
}
}

View File

@ -0,0 +1,40 @@
package fs
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type NetPrioGroup struct {
}
func (s *NetPrioGroup) Apply(d *data) error {
dir, err := d.join("net_prio")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
for _, prioMap := range cgroup.NetPrioIfpriomap {
if err := writeFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
return err
}
}
return nil
}
func (s *NetPrioGroup) Remove(d *data) error {
return removePath(d.path("net_prio"))
}
func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}

View File

@ -0,0 +1,36 @@
package fs
import (
"strings"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
prioMap = []*configs.IfPrioMap{
{
Interface: "test",
Priority: 5,
},
}
)
func TestNetPrioSetIfPrio(t *testing.T) {
helper := NewCgroupTestUtil("net_prio", t)
defer helper.cleanup()
helper.CgroupData.c.NetPrioIfpriomap = prioMap
netPrio := &NetPrioGroup{}
if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil {
t.Fatal(err)
}
value, err := getCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap")
if err != nil {
t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err)
}
if !strings.Contains(value, "test 5") {
t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.")
}
}

View File

@ -0,0 +1,31 @@
// +build linux
package fs
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type PerfEventGroup struct {
}
func (s *PerfEventGroup) Apply(d *data) error {
// we just want to join this group even though we don't set anything
if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) {
return err
}
return nil
}
func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error {
return nil
}
func (s *PerfEventGroup) Remove(d *data) error {
return removePath(d.path("perf_event"))
}
func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}

View File

@ -0,0 +1,113 @@
// +build linux
package fs
import (
"fmt"
"testing"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error {
if len(expected) != len(actual) {
return fmt.Errorf("blkioStatEntries length do not match")
}
for i, expValue := range expected {
actValue := actual[i]
if expValue != actValue {
return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue)
}
}
return nil
}
func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) {
if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil {
logrus.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil {
logrus.Printf("blkio IoServicedRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil {
logrus.Printf("blkio IoQueuedRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil {
logrus.Printf("blkio SectorsRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoServiceTimeRecursive, actual.IoServiceTimeRecursive); err != nil {
logrus.Printf("blkio IoServiceTimeRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoWaitTimeRecursive, actual.IoWaitTimeRecursive); err != nil {
logrus.Printf("blkio IoWaitTimeRecursive do not match - %s\n", err)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil {
logrus.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive)
t.Fail()
}
if err := blkioStatEntryEquals(expected.IoTimeRecursive, actual.IoTimeRecursive); err != nil {
logrus.Printf("blkio IoTimeRecursive do not match - %s\n", err)
t.Fail()
}
}
func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) {
if expected != actual {
logrus.Printf("Expected throttling data %v but found %v\n", expected, actual)
t.Fail()
}
}
func expectHugetlbStatEquals(t *testing.T, expected, actual cgroups.HugetlbStats) {
if expected != actual {
logrus.Printf("Expected hugetlb stats %v but found %v\n", expected, actual)
t.Fail()
}
}
func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) {
expectMemoryDataEquals(t, expected.Usage, actual.Usage)
expectMemoryDataEquals(t, expected.SwapUsage, actual.SwapUsage)
expectMemoryDataEquals(t, expected.KernelUsage, actual.KernelUsage)
for key, expValue := range expected.Stats {
actValue, ok := actual.Stats[key]
if !ok {
logrus.Printf("Expected memory stat key %s not found\n", key)
t.Fail()
}
if expValue != actValue {
logrus.Printf("Expected memory stat value %d but found %d\n", expValue, actValue)
t.Fail()
}
}
}
func expectMemoryDataEquals(t *testing.T, expected, actual cgroups.MemoryData) {
if expected.Usage != actual.Usage {
logrus.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage)
t.Fail()
}
if expected.MaxUsage != actual.MaxUsage {
logrus.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage)
t.Fail()
}
if expected.Failcnt != actual.Failcnt {
logrus.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt)
t.Fail()
}
}

View File

@ -0,0 +1,66 @@
// +build linux
/*
Utility for testing cgroup operations.
Creates a mock of the cgroup filesystem for the duration of the test.
*/
package fs
import (
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
type cgroupTestUtil struct {
// data to use in tests.
CgroupData *data
// Path to the mock cgroup directory.
CgroupPath string
// Temporary directory to store mock cgroup filesystem.
tempDir string
t *testing.T
}
// Creates a new test util for the specified subsystem
func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil {
d := &data{
c: &configs.Cgroup{},
}
tempDir, err := ioutil.TempDir("", "cgroup_test")
if err != nil {
t.Fatal(err)
}
d.root = tempDir
testCgroupPath := filepath.Join(d.root, subsystem)
if err != nil {
t.Fatal(err)
}
// Ensure the full mock cgroup path exists.
err = os.MkdirAll(testCgroupPath, 0755)
if err != nil {
t.Fatal(err)
}
return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t}
}
func (c *cgroupTestUtil) cleanup() {
os.RemoveAll(c.tempDir)
}
// Write the specified contents on the mock of the specified cgroup files.
func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) {
for file, contents := range fileContents {
err := writeFile(c.CgroupPath, file, contents)
if err != nil {
c.t.Fatal(err)
}
}
}

View File

@ -0,0 +1,74 @@
// +build linux
package fs
import (
"errors"
"fmt"
"io/ioutil"
"path/filepath"
"strconv"
"strings"
)
var (
ErrNotSupportStat = errors.New("stats are not supported for subsystem")
ErrNotValidFormat = errors.New("line is not a valid key value format")
)
// Saturates negative values at zero and returns a uint64.
// Due to kernel bugs, some of the memory cgroup stats can be negative.
func parseUint(s string, base, bitSize int) (uint64, error) {
value, err := strconv.ParseUint(s, base, bitSize)
if err != nil {
intValue, intErr := strconv.ParseInt(s, base, bitSize)
// 1. Handle negative values greater than MinInt64 (and)
// 2. Handle negative values lesser than MinInt64
if intErr == nil && intValue < 0 {
return 0, nil
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
return 0, nil
}
return value, err
}
return value, nil
}
// Parses a cgroup param and returns as name, value
// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234
func getCgroupParamKeyValue(t string) (string, uint64, error) {
parts := strings.Fields(t)
switch len(parts) {
case 2:
value, err := parseUint(parts[1], 10, 64)
if err != nil {
return "", 0, fmt.Errorf("Unable to convert param value (%q) to uint64: %v", parts[1], err)
}
return parts[0], value, nil
default:
return "", 0, ErrNotValidFormat
}
}
// Gets a single uint64 value from the specified cgroup file.
func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile))
if err != nil {
return 0, err
}
return parseUint(strings.TrimSpace(string(contents)), 10, 64)
}
// Gets a string value from the specified cgroup file
func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) {
contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile))
if err != nil {
return "", err
}
return strings.TrimSpace(string(contents)), nil
}

View File

@ -0,0 +1,97 @@
// +build linux
package fs
import (
"io/ioutil"
"math"
"os"
"path/filepath"
"strconv"
"testing"
)
const (
cgroupFile = "cgroup.file"
floatValue = 2048.0
floatString = "2048"
)
func TestGetCgroupParamsInt(t *testing.T) {
// Setup tempdir.
tempDir, err := ioutil.TempDir("", "cgroup_utils_test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(tempDir)
tempFile := filepath.Join(tempDir, cgroupFile)
// Success.
err = ioutil.WriteFile(tempFile, []byte(floatString), 0755)
if err != nil {
t.Fatal(err)
}
value, err := getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != floatValue {
t.Fatalf("Expected %d to equal %f", value, floatValue)
}
// Success with new line.
err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755)
if err != nil {
t.Fatal(err)
}
value, err = getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != floatValue {
t.Fatalf("Expected %d to equal %f", value, floatValue)
}
// Success with negative values
err = ioutil.WriteFile(tempFile, []byte("-12345"), 0755)
if err != nil {
t.Fatal(err)
}
value, err = getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != 0 {
t.Fatalf("Expected %d to equal %d", value, 0)
}
// Success with negative values lesser than min int64
s := strconv.FormatFloat(math.MinInt64, 'f', -1, 64)
err = ioutil.WriteFile(tempFile, []byte(s), 0755)
if err != nil {
t.Fatal(err)
}
value, err = getCgroupParamUint(tempDir, cgroupFile)
if err != nil {
t.Fatal(err)
} else if value != 0 {
t.Fatalf("Expected %d to equal %d", value, 0)
}
// Not a float.
err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755)
if err != nil {
t.Fatal(err)
}
_, err = getCgroupParamUint(tempDir, cgroupFile)
if err == nil {
t.Fatal("Expecting error, got none")
}
// Unknown file.
err = os.Remove(tempFile)
if err != nil {
t.Fatal(err)
}
_, err = getCgroupParamUint(tempDir, cgroupFile)
if err == nil {
t.Fatal("Expecting error, got none")
}
}

View File

@ -0,0 +1,92 @@
// +build linux
package cgroups
type ThrottlingData struct {
// Number of periods with throttling active
Periods uint64 `json:"periods,omitempty"`
// Number of periods when the container hit its throttling limit.
ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
// Aggregate time the container was throttled for in nanoseconds.
ThrottledTime uint64 `json:"throttled_time,omitempty"`
}
// All CPU stats are aggregate since container inception.
type CpuUsage struct {
// Total CPU time consumed.
// Units: nanoseconds.
TotalUsage uint64 `json:"total_usage,omitempty"`
// Total CPU time consumed per core.
// Units: nanoseconds.
PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
// Time spent by tasks of the cgroup in kernel mode.
// Units: nanoseconds.
UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
// Time spent by tasks of the cgroup in user mode.
// Units: nanoseconds.
UsageInUsermode uint64 `json:"usage_in_usermode"`
}
type CpuStats struct {
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
}
type MemoryData struct {
Usage uint64 `json:"usage,omitempty"`
MaxUsage uint64 `json:"max_usage,omitempty"`
Failcnt uint64 `json:"failcnt"`
}
type MemoryStats struct {
// memory used for cache
Cache uint64 `json:"cache,omitempty"`
// usage of memory
Usage MemoryData `json:"usage,omitempty"`
// usage of memory + swap
SwapUsage MemoryData `json:"swap_usage,omitempty"`
// usafe of kernel memory
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
Stats map[string]uint64 `json:"stats,omitempty"`
}
type BlkioStatEntry struct {
Major uint64 `json:"major,omitempty"`
Minor uint64 `json:"minor,omitempty"`
Op string `json:"op,omitempty"`
Value uint64 `json:"value,omitempty"`
}
type BlkioStats struct {
// number of bytes tranferred to and from the block device
IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
}
type HugetlbStats struct {
// current res_counter usage for hugetlb
Usage uint64 `json:"usage,omitempty"`
// maximum usage ever recorded.
MaxUsage uint64 `json:"max_usage,omitempty"`
// number of times htgetlb usage allocation failure.
Failcnt uint64 `json:"failcnt"`
}
type Stats struct {
CpuStats CpuStats `json:"cpu_stats,omitempty"`
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
// the map is in the format "size of hugepage: stats of the hugepage"
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
}
func NewStats() *Stats {
memoryStats := MemoryStats{Stats: make(map[string]uint64)}
hugetlbStats := make(map[string]HugetlbStats)
return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
}

View File

@ -0,0 +1,51 @@
// +build !linux
package systemd
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}
func UseSystemd() bool {
return false
}
func (m *Manager) Apply(pid int) error {
return fmt.Errorf("Systemd not supported")
}
func (m *Manager) GetPids() ([]int, error) {
return nil, fmt.Errorf("Systemd not supported")
}
func (m *Manager) Destroy() error {
return fmt.Errorf("Systemd not supported")
}
func (m *Manager) GetPaths() map[string]string {
return nil
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
return nil, fmt.Errorf("Systemd not supported")
}
func (m *Manager) Set(container *configs.Config) error {
return nil, fmt.Errorf("Systemd not supported")
}
func (m *Manager) Freeze(state configs.FreezerState) error {
return fmt.Errorf("Systemd not supported")
}
func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
return fmt.Errorf("Systemd not supported")
}

View File

@ -0,0 +1,587 @@
// +build linux
package systemd
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
systemd "github.com/coreos/go-systemd/dbus"
"github.com/godbus/dbus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
)
type Manager struct {
mu sync.Mutex
Cgroups *configs.Cgroup
Paths map[string]string
}
type subsystem interface {
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error
// Set the cgroup represented by cgroup.
Set(path string, cgroup *configs.Cgroup) error
}
var subsystems = map[string]subsystem{
"devices": &fs.DevicesGroup{},
"memory": &fs.MemoryGroup{},
"cpu": &fs.CpuGroup{},
"cpuset": &fs.CpusetGroup{},
"cpuacct": &fs.CpuacctGroup{},
"blkio": &fs.BlkioGroup{},
"hugetlb": &fs.HugetlbGroup{},
"perf_event": &fs.PerfEventGroup{},
"freezer": &fs.FreezerGroup{},
"net_prio": &fs.NetPrioGroup{},
"net_cls": &fs.NetClsGroup{},
}
const (
testScopeWait = 4
)
var (
connLock sync.Mutex
theConn *systemd.Conn
hasStartTransientUnit bool
hasTransientDefaultDependencies bool
)
func newProp(name string, units interface{}) systemd.Property {
return systemd.Property{
Name: name,
Value: dbus.MakeVariant(units),
}
}
func UseSystemd() bool {
s, err := os.Stat("/run/systemd/system")
if err != nil || !s.IsDir() {
return false
}
connLock.Lock()
defer connLock.Unlock()
if theConn == nil {
var err error
theConn, err = systemd.New()
if err != nil {
return false
}
// Assume we have StartTransientUnit
hasStartTransientUnit = true
// But if we get UnknownMethod error we don't
if _, err := theConn.StartTransientUnit("test.scope", "invalid"); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" {
hasStartTransientUnit = false
return hasStartTransientUnit
}
}
}
// Ensure the scope name we use doesn't exist. Use the Pid to
// avoid collisions between multiple libcontainer users on a
// single host.
scope := fmt.Sprintf("libcontainer-%d-systemd-test-default-dependencies.scope", os.Getpid())
testScopeExists := true
for i := 0; i <= testScopeWait; i++ {
if _, err := theConn.StopUnit(scope, "replace"); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
testScopeExists = false
break
}
}
}
time.Sleep(time.Millisecond)
}
// Bail out if we can't kill this scope without testing for DefaultDependencies
if testScopeExists {
return hasStartTransientUnit
}
// Assume StartTransientUnit on a scope allows DefaultDependencies
hasTransientDefaultDependencies = true
ddf := newProp("DefaultDependencies", false)
if _, err := theConn.StartTransientUnit(scope, "replace", ddf); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
hasTransientDefaultDependencies = false
}
}
}
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace")
}
return hasStartTransientUnit
}
func getIfaceForUnit(unitName string) string {
if strings.HasSuffix(unitName, ".scope") {
return "Scope"
}
if strings.HasSuffix(unitName, ".service") {
return "Service"
}
return "Unit"
}
func (m *Manager) Apply(pid int) error {
var (
c = m.Cgroups
unitName = getUnitName(c)
slice = "system.slice"
properties []systemd.Property
)
if c.Slice != "" {
slice = c.Slice
}
properties = append(properties,
systemd.PropSlice(slice),
systemd.PropDescription("docker container "+c.Name),
newProp("PIDs", []uint32{uint32(pid)}),
)
// Always enable accounting, this gets us the same behaviour as the fs implementation,
// plus the kernel has some problems with joining the memory cgroup at a later time.
properties = append(properties,
newProp("MemoryAccounting", true),
newProp("CPUAccounting", true),
newProp("BlockIOAccounting", true))
if hasTransientDefaultDependencies {
properties = append(properties,
newProp("DefaultDependencies", false))
}
if c.Memory != 0 {
properties = append(properties,
newProp("MemoryLimit", uint64(c.Memory)))
}
// TODO: MemoryReservation and MemorySwap not available in systemd
if c.CpuShares != 0 {
properties = append(properties,
newProp("CPUShares", uint64(c.CpuShares)))
}
if c.BlkioWeight != 0 {
properties = append(properties,
newProp("BlockIOWeight", uint64(c.BlkioWeight)))
}
// We need to set kernel memory before processes join cgroup because
// kmem.limit_in_bytes can only be set when the cgroup is empty.
// And swap memory limit needs to be set after memory limit, only
// memory limit is handled by systemd, so it's kind of ugly here.
if c.KernelMemory > 0 {
if err := setKernelMemory(c); err != nil {
return err
}
}
if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil {
return err
}
if err := joinDevices(c, pid); err != nil {
return err
}
// TODO: CpuQuota and CpuPeriod not available in systemd
// we need to manually join the cpu.cfs_quota_us and cpu.cfs_period_us
if err := joinCpu(c, pid); err != nil {
return err
}
if err := joinMemory(c, pid); err != nil {
return err
}
// we need to manually join the freezer, net_cls, net_prio and cpuset cgroup in systemd
// because it does not currently support it via the dbus api.
if err := joinFreezer(c, pid); err != nil {
return err
}
if err := joinNetPrio(c, pid); err != nil {
return err
}
if err := joinNetCls(c, pid); err != nil {
return err
}
if err := joinCpuset(c, pid); err != nil {
return err
}
if err := joinHugetlb(c, pid); err != nil {
return err
}
// FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem
// using that (at least on systemd 208, see https://github.com/opencontainers/runc/libcontainer/pull/354),
// so use fs work around for now.
if err := joinBlkio(c, pid); err != nil {
return err
}
paths := make(map[string]string)
for sysname := range subsystems {
subsystemPath, err := getSubsystemPath(m.Cgroups, sysname)
if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue
}
return err
}
paths[sysname] = subsystemPath
}
m.Paths = paths
if paths["cpu"] != "" {
if err := fs.CheckCpushares(paths["cpu"], c.CpuShares); err != nil {
return err
}
}
return nil
}
func (m *Manager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()
theConn.StopUnit(getUnitName(m.Cgroups), "replace")
if err := cgroups.RemovePaths(m.Paths); err != nil {
return err
}
m.Paths = make(map[string]string)
return nil
}
func (m *Manager) GetPaths() map[string]string {
m.mu.Lock()
paths := m.Paths
m.mu.Unlock()
return paths
}
func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s.", file)
}
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
path, err := getSubsystemPath(c, subsystem)
if err != nil {
return "", err
}
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return "", err
}
if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil {
return "", err
}
return path, nil
}
func joinCpu(c *configs.Cgroup, pid int) error {
path, err := getSubsystemPath(c, "cpu")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if c.CpuQuota != 0 {
if err = writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(c.CpuQuota, 10)); err != nil {
return err
}
}
if c.CpuPeriod != 0 {
if err = writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(c.CpuPeriod, 10)); err != nil {
return err
}
}
if c.CpuRtPeriod != 0 {
if err = writeFile(path, "cpu.rt_period_us", strconv.FormatInt(c.CpuRtPeriod, 10)); err != nil {
return err
}
}
if c.CpuRtRuntime != 0 {
if err = writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(c.CpuRtRuntime, 10)); err != nil {
return err
}
}
return nil
}
func joinFreezer(c *configs.Cgroup, pid int) error {
path, err := join(c, "freezer", pid)
if err != nil && !cgroups.IsNotFound(err) {
return err
}
freezer := subsystems["freezer"]
return freezer.Set(path, c)
}
func joinNetPrio(c *configs.Cgroup, pid int) error {
path, err := join(c, "net_prio", pid)
if err != nil && !cgroups.IsNotFound(err) {
return err
}
netPrio := subsystems["net_prio"]
return netPrio.Set(path, c)
}
func joinNetCls(c *configs.Cgroup, pid int) error {
path, err := join(c, "net_cls", pid)
if err != nil && !cgroups.IsNotFound(err) {
return err
}
netcls := subsystems["net_cls"]
return netcls.Set(path, c)
}
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
mountpoint, err := cgroups.FindCgroupMountpoint(subsystem)
if err != nil {
return "", err
}
initPath, err := cgroups.GetInitCgroupDir(subsystem)
if err != nil {
return "", err
}
slice := "system.slice"
if c.Slice != "" {
slice = c.Slice
}
return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
}
func (m *Manager) Freeze(state configs.FreezerState) error {
path, err := getSubsystemPath(m.Cgroups, "freezer")
if err != nil {
return err
}
prevState := m.Cgroups.Freezer
m.Cgroups.Freezer = state
freezer := subsystems["freezer"]
err = freezer.Set(path, m.Cgroups)
if err != nil {
m.Cgroups.Freezer = prevState
return err
}
return nil
}
func (m *Manager) GetPids() ([]int, error) {
path, err := getSubsystemPath(m.Cgroups, "cpu")
if err != nil {
return nil, err
}
return cgroups.ReadProcsFile(path)
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
m.mu.Lock()
defer m.mu.Unlock()
stats := cgroups.NewStats()
for name, path := range m.Paths {
sys, ok := subsystems[name]
if !ok || !cgroups.PathExists(path) {
continue
}
if err := sys.GetStats(path, stats); err != nil {
return nil, err
}
}
return stats, nil
}
func (m *Manager) Set(container *configs.Config) error {
for name, path := range m.Paths {
sys, ok := subsystems[name]
if !ok || !cgroups.PathExists(path) {
continue
}
if err := sys.Set(path, container.Cgroups); err != nil {
return err
}
}
return nil
}
func getUnitName(c *configs.Cgroup) string {
return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name)
}
// Atm we can't use the systemd device support because of two missing things:
// * Support for wildcards to allow mknod on any device
// * Support for wildcards to allow /dev/pts support
//
// The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is
// in wide use. When both these are available we will be able to switch, but need to keep the old
// implementation for backwards compat.
//
// Note: we can't use systemd to set up the initial limits, and then change the cgroup
// because systemd will re-write the device settings if it needs to re-apply the cgroup context.
// This happens at least for v208 when any sibling unit is started.
func joinDevices(c *configs.Cgroup, pid int) error {
path, err := join(c, "devices", pid)
// Even if it's `not found` error, we'll return err because devices cgroup
// is hard requirement for container security.
if err != nil {
return err
}
devices := subsystems["devices"]
return devices.Set(path, c)
}
func setKernelMemory(c *configs.Cgroup) error {
path, err := getSubsystemPath(c, "memory")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return err
}
if c.KernelMemory > 0 {
err = writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(c.KernelMemory, 10))
if err != nil {
return err
}
}
return nil
}
func joinMemory(c *configs.Cgroup, pid int) error {
path, err := getSubsystemPath(c, "memory")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
// -1 disables memoryswap
if c.MemorySwap > 0 {
err = writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.MemorySwap, 10))
if err != nil {
return err
}
}
if c.OomKillDisable {
if err := writeFile(path, "memory.oom_control", "1"); err != nil {
return err
}
}
if c.MemorySwappiness >= 0 && c.MemorySwappiness <= 100 {
err = writeFile(path, "memory.swappiness", strconv.FormatInt(c.MemorySwappiness, 10))
if err != nil {
return err
}
} else if c.MemorySwappiness == -1 {
return nil
} else {
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", c.MemorySwappiness)
}
return nil
}
// systemd does not atm set up the cpuset controller, so we must manually
// join it. Additionally that is a very finicky controller where each
// level must have a full setup as the default for a new directory is "no cpus"
func joinCpuset(c *configs.Cgroup, pid int) error {
path, err := getSubsystemPath(c, "cpuset")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
s := &fs.CpusetGroup{}
return s.ApplyDir(path, c, pid)
}
// `BlockIODeviceWeight` property of systemd does not work properly, and systemd
// expects device path instead of major minor numbers, which is also confusing
// for users. So we use fs work around for now.
func joinBlkio(c *configs.Cgroup, pid int) error {
path, err := getSubsystemPath(c, "blkio")
if err != nil {
return err
}
if c.BlkioWeightDevice != "" {
if err := writeFile(path, "blkio.weight_device", c.BlkioWeightDevice); err != nil {
return err
}
}
if c.BlkioThrottleReadBpsDevice != "" {
if err := writeFile(path, "blkio.throttle.read_bps_device", c.BlkioThrottleReadBpsDevice); err != nil {
return err
}
}
if c.BlkioThrottleWriteBpsDevice != "" {
if err := writeFile(path, "blkio.throttle.write_bps_device", c.BlkioThrottleWriteBpsDevice); err != nil {
return err
}
}
if c.BlkioThrottleReadIOpsDevice != "" {
if err := writeFile(path, "blkio.throttle.read_iops_device", c.BlkioThrottleReadIOpsDevice); err != nil {
return err
}
}
if c.BlkioThrottleWriteIOpsDevice != "" {
if err := writeFile(path, "blkio.throttle.write_iops_device", c.BlkioThrottleWriteIOpsDevice); err != nil {
return err
}
}
return nil
}
func joinHugetlb(c *configs.Cgroup, pid int) error {
path, err := join(c, "hugetlb", pid)
if err != nil && !cgroups.IsNotFound(err) {
return err
}
hugetlb := subsystems["hugetlb"]
return hugetlb.Set(path, c)
}

View File

@ -0,0 +1,293 @@
// +build linux
package cgroups
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/units"
)
const cgroupNamePrefix = "name="
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Split(txt, " ")
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], nil
}
}
}
if err := scanner.Err(); err != nil {
return "", err
}
return "", NewNotFoundError(subsystem)
}
func FindCgroupMountpointAndSource(subsystem string) (string, string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Split(txt, " ")
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], fields[3], nil
}
}
}
if err := scanner.Err(); err != nil {
return "", "", err
}
return "", "", NewNotFoundError(subsystem)
}
func FindCgroupMountpointDir() (string, error) {
mounts, err := mount.GetMounts()
if err != nil {
return "", err
}
for _, mount := range mounts {
if mount.Fstype == "cgroup" {
return filepath.Dir(mount.Mountpoint), nil
}
}
return "", NewNotFoundError("cgroup")
}
type Mount struct {
Mountpoint string
Root string
Subsystems []string
}
func (m Mount) GetThisCgroupDir() (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
return GetThisCgroupDir(m.Subsystems[0])
}
func GetCgroupMounts() ([]Mount, error) {
mounts, err := mount.GetMounts()
if err != nil {
return nil, err
}
all, err := GetAllSubsystems()
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for _, s := range all {
allMap[s] = true
}
res := []Mount{}
for _, mount := range mounts {
if mount.Fstype == "cgroup" {
m := Mount{Mountpoint: mount.Mountpoint, Root: mount.Root}
for _, opt := range strings.Split(mount.VfsOpts, ",") {
if strings.HasPrefix(opt, cgroupNamePrefix) {
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
}
if allMap[opt] {
m.Subsystems = append(m.Subsystems, opt)
}
}
res = append(res, m)
}
}
return res, nil
}
// Returns all the cgroup subsystems supported by the kernel
func GetAllSubsystems() ([]string, error) {
f, err := os.Open("/proc/cgroups")
if err != nil {
return nil, err
}
defer f.Close()
subsystems := []string{}
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
if text[0] != '#' {
parts := strings.Fields(text)
if len(parts) >= 4 && parts[3] != "0" {
subsystems = append(subsystems, parts[0])
}
}
}
return subsystems, nil
}
// Returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
f, err := os.Open("/proc/self/cgroup")
if err != nil {
return "", err
}
defer f.Close()
return ParseCgroupFile(subsystem, f)
}
func GetInitCgroupDir(subsystem string) (string, error) {
f, err := os.Open("/proc/1/cgroup")
if err != nil {
return "", err
}
defer f.Close()
return ParseCgroupFile(subsystem, f)
}
func ReadProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, "cgroup.procs"))
if err != nil {
return nil, err
}
defer f.Close()
var (
s = bufio.NewScanner(f)
out = []int{}
)
for s.Scan() {
if t := s.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
out = append(out, pid)
}
}
return out, nil
}
func ParseCgroupFile(subsystem string, r io.Reader) (string, error) {
s := bufio.NewScanner(r)
for s.Scan() {
if err := s.Err(); err != nil {
return "", err
}
text := s.Text()
parts := strings.Split(text, ":")
for _, subs := range strings.Split(parts[1], ",") {
if subs == subsystem || subs == cgroupNamePrefix+subsystem {
return parts[2], nil
}
}
}
return "", NewNotFoundError(subsystem)
}
func PathExists(path string) bool {
if _, err := os.Stat(path); err != nil {
return false
}
return true
}
func EnterPid(cgroupPaths map[string]string, pid int) error {
for _, path := range cgroupPaths {
if PathExists(path) {
if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"),
[]byte(strconv.Itoa(pid)), 0700); err != nil {
return err
}
}
}
return nil
}
// RemovePaths iterates over the provided paths removing them.
// We trying to remove all paths five times with increasing delay between tries.
// If after all there are not removed cgroups - appropriate error will be
// returned.
func RemovePaths(paths map[string]string) (err error) {
delay := 10 * time.Millisecond
for i := 0; i < 5; i++ {
if i != 0 {
time.Sleep(delay)
delay *= 2
}
for s, p := range paths {
os.RemoveAll(p)
// TODO: here probably should be logging
_, err := os.Stat(p)
// We need this strange way of checking cgroups existence because
// RemoveAll almost always returns error, even on already removed
// cgroups
if os.IsNotExist(err) {
delete(paths, s)
}
}
if len(paths) == 0 {
return nil
}
}
return fmt.Errorf("Failed to remove paths: %s", paths)
}
func GetHugePageSize() ([]string, error) {
var pageSizes []string
sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
if err != nil {
return pageSizes, err
}
for _, st := range files {
nameArray := strings.Split(st.Name(), "-")
pageSize, err := units.RAMInBytes(nameArray[1])
if err != nil {
return []string{}, err
}
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
pageSizes = append(pageSizes, sizeString)
}
return pageSizes, nil
}

View File

@ -0,0 +1,98 @@
package configs
type FreezerState string
const (
Undefined FreezerState = ""
Frozen FreezerState = "FROZEN"
Thawed FreezerState = "THAWED"
)
// TODO Windows: This can be factored out in the future as Cgroups are not
// supported on the Windows platform.
type Cgroup struct {
Name string `json:"name"`
// name of parent cgroup or slice
Parent string `json:"parent"`
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
AllowAllDevices bool `json:"allow_all_devices"`
AllowedDevices []*Device `json:"allowed_devices"`
DeniedDevices []*Device `json:"denied_devices"`
// Memory limit (in bytes)
Memory int64 `json:"memory"`
// Memory reservation or soft_limit (in bytes)
MemoryReservation int64 `json:"memory_reservation"`
// Total memory usage (memory + swap); set `-1' to disable swap
MemorySwap int64 `json:"memory_swap"`
// Kernel memory limit (in bytes)
KernelMemory int64 `json:"kernel_memory"`
// CPU shares (relative weight vs. other containers)
CpuShares int64 `json:"cpu_shares"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuQuota int64 `json:"cpu_quota"`
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod int64 `json:"cpu_period"`
// How many time CPU will use in realtime scheduling (in usecs).
CpuRtRuntime int64 `json:"cpu_quota"`
// CPU period to be used for realtime scheduling (in usecs).
CpuRtPeriod int64 `json:"cpu_period"`
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
// MEM to use
CpusetMems string `json:"cpuset_mems"`
// IO read rate limit per cgroup per device, bytes per second.
BlkioThrottleReadBpsDevice string `json:"blkio_throttle_read_bps_device"`
// IO write rate limit per cgroup per divice, bytes per second.
BlkioThrottleWriteBpsDevice string `json:"blkio_throttle_write_bps_device"`
// IO read rate limit per cgroup per device, IO per second.
BlkioThrottleReadIOpsDevice string `json:"blkio_throttle_read_iops_device"`
// IO write rate limit per cgroup per device, IO per second.
BlkioThrottleWriteIOpsDevice string `json:"blkio_throttle_write_iops_device"`
// Specifies per cgroup weight, range is from 10 to 1000.
BlkioWeight int64 `json:"blkio_weight"`
// Weight per cgroup per device, can override BlkioWeight.
BlkioWeightDevice string `json:"blkio_weight_device"`
// set the freeze value for the process
Freezer FreezerState `json:"freezer"`
// Hugetlb limit (in bytes)
HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
// Parent slice to use for systemd TODO: remove in favor or parent
Slice string `json:"slice"`
// Whether to disable OOM Killer
OomKillDisable bool `json:"oom_kill_disable"`
// Tuning swappiness behaviour per cgroup
MemorySwappiness int64 `json:"memory_swappiness"`
// Set priority of network traffic for container
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
// Set class identifier for container's network packets
NetClsClassid string `json:"net_cls_classid"`
}

View File

@ -0,0 +1,146 @@
package configs
type Rlimit struct {
Type int `json:"type"`
Hard uint64 `json:"hard"`
Soft uint64 `json:"soft"`
}
// IDMap represents UID/GID Mappings for User Namespaces.
type IDMap struct {
ContainerID int `json:"container_id"`
HostID int `json:"host_id"`
Size int `json:"size"`
}
type Seccomp struct {
Syscalls []*Syscall `json:"syscalls"`
}
type Action int
const (
Kill Action = iota - 3
Trap
Allow
)
type Operator int
const (
EqualTo Operator = iota
NotEqualTo
GreatherThan
LessThan
MaskEqualTo
)
type Arg struct {
Index int `json:"index"`
Value uint32 `json:"value"`
Op Operator `json:"op"`
}
type Syscall struct {
Value int `json:"value"`
Action Action `json:"action"`
Args []*Arg `json:"args"`
}
// TODO Windows. Many of these fields should be factored out into those parts
// which are common across platforms, and those which are platform specific.
// Config defines configuration options for executing a process inside a contained environment.
type Config struct {
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
// This is a common option when the container is running in ramdisk
NoPivotRoot bool `json:"no_pivot_root"`
// ParentDeathSignal specifies the signal that is sent to the container's process in the case
// that the parent process dies.
ParentDeathSignal int `json:"parent_death_signal"`
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
PivotDir string `json:"pivot_dir"`
// Path to a directory containing the container's root filesystem.
Rootfs string `json:"rootfs"`
// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
// bind mounts are writtable.
Readonlyfs bool `json:"readonlyfs"`
// Privatefs will mount the container's rootfs as private where mount points from the parent will not propogate
Privatefs bool `json:"privatefs"`
// Mounts specify additional source and destination paths that will be mounted inside the container's
// rootfs and mount namespace if specified
Mounts []*Mount `json:"mounts"`
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
Devices []*Device `json:"devices"`
MountLabel string `json:"mount_label"`
// Hostname optionally sets the container's hostname if provided
Hostname string `json:"hostname"`
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
Namespaces Namespaces `json:"namespaces"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
Capabilities []string `json:"capabilities"`
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks"`
// Routes can be specified to create entries in the route table as the container is started
Routes []*Route `json:"routes"`
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
Cgroups *Cgroup `json:"cgroups"`
// AppArmorProfile specifies the profile to apply to the process running in the container and is
// change at the time the process is execed
AppArmorProfile string `json:"apparmor_profile"`
// ProcessLabel specifies the label to apply to the process running in the container. It is
// commonly used by selinux
ProcessLabel string `json:"process_label"`
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []Rlimit `json:"rlimits"`
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []string `json:"additional_groups"`
// UidMappings is an array of User ID mappings for User Namespaces
UidMappings []IDMap `json:"uid_mappings"`
// GidMappings is an array of Group ID mappings for User Namespaces
GidMappings []IDMap `json:"gid_mappings"`
// MaskPaths specifies paths within the container's rootfs to mask over with a bind
// mount pointing to /dev/null as to prevent reads of the file.
MaskPaths []string `json:"mask_paths"`
// ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
// so that these files prevent any writes.
ReadonlyPaths []string `json:"readonly_paths"`
// Sysctl is a map of properties and their values. It is the equivalent of using
// sysctl -w my.property.name value in Linux.
Sysctl map[string]string `json:"sysctl"`
// Seccomp allows actions to be taken whenever a syscall is made within the container.
// By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno
// can be specified on a per syscall basis.
Seccomp *Seccomp `json:"seccomp"`
}

View File

@ -0,0 +1,154 @@
package configs
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"testing"
)
// Checks whether the expected capability is specified in the capabilities.
func contains(expected string, values []string) bool {
for _, v := range values {
if v == expected {
return true
}
}
return false
}
func containsDevice(expected *Device, values []*Device) bool {
for _, d := range values {
if d.Path == expected.Path &&
d.Permissions == expected.Permissions &&
d.FileMode == expected.FileMode &&
d.Major == expected.Major &&
d.Minor == expected.Minor &&
d.Type == expected.Type {
return true
}
}
return false
}
func loadConfig(name string) (*Config, error) {
f, err := os.Open(filepath.Join("../sample_configs", name))
if err != nil {
return nil, err
}
defer f.Close()
var container *Config
if err := json.NewDecoder(f).Decode(&container); err != nil {
return nil, err
}
// Check that a config doesn't contain extra fields
var configMap, abstractMap map[string]interface{}
if _, err := f.Seek(0, 0); err != nil {
return nil, err
}
if err := json.NewDecoder(f).Decode(&abstractMap); err != nil {
return nil, err
}
configData, err := json.Marshal(&container)
if err != nil {
return nil, err
}
if err := json.Unmarshal(configData, &configMap); err != nil {
return nil, err
}
for k := range configMap {
delete(abstractMap, k)
}
if len(abstractMap) != 0 {
return nil, fmt.Errorf("unknown fields: %s", abstractMap)
}
return container, nil
}
func TestRemoveNamespace(t *testing.T) {
ns := Namespaces{
{Type: NEWNET},
}
if !ns.Remove(NEWNET) {
t.Fatal("NEWNET was not removed")
}
if len(ns) != 0 {
t.Fatalf("namespaces should have 0 items but reports %d", len(ns))
}
}
func TestHostUIDNoUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{},
}
uid, err := config.HostUID()
if err != nil {
t.Fatal(err)
}
if uid != 0 {
t.Fatalf("expected uid 0 with no USERNS but received %d", uid)
}
}
func TestHostUIDWithUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}},
UidMappings: []IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
}
uid, err := config.HostUID()
if err != nil {
t.Fatal(err)
}
if uid != 1000 {
t.Fatalf("expected uid 1000 with no USERNS but received %d", uid)
}
}
func TestHostGIDNoUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{},
}
uid, err := config.HostGID()
if err != nil {
t.Fatal(err)
}
if uid != 0 {
t.Fatalf("expected gid 0 with no USERNS but received %d", uid)
}
}
func TestHostGIDWithUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}},
GidMappings: []IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
}
uid, err := config.HostGID()
if err != nil {
t.Fatal(err)
}
if uid != 1000 {
t.Fatalf("expected gid 1000 with no USERNS but received %d", uid)
}
}

View File

@ -0,0 +1,51 @@
// +build freebsd linux
package configs
import "fmt"
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
}
id, found := c.hostIDFromMapping(0, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(0, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View File

@ -0,0 +1,54 @@
package configs
import (
"fmt"
"os"
)
const (
Wildcard = -1
)
// TODO Windows: This can be factored out in the future
type Device struct {
// Device type, block, char, etc.
Type rune `json:"type"`
// Path to the device.
Path string `json:"path"`
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
// Cgroup permissions format, rwm.
Permissions string `json:"permissions"`
// FileMode permission bits for the device.
FileMode os.FileMode `json:"file_mode"`
// Uid of the device.
Uid uint32 `json:"uid"`
// Gid of the device.
Gid uint32 `json:"gid"`
}
func (d *Device) CgroupString() string {
return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
}
func (d *Device) Mkdev() int {
return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12))
}
// deviceNumberString converts the device number to a string return result.
func deviceNumberString(number int64) string {
if number == Wildcard {
return "*"
}
return fmt.Sprint(number)
}

View File

@ -0,0 +1,139 @@
// +build linux freebsd
package configs
var (
// These are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{
Path: "/dev/null",
Type: 'c',
Major: 1,
Minor: 3,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/zero",
Type: 'c',
Major: 1,
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/full",
Type: 'c',
Major: 1,
Minor: 7,
Permissions: "rwm",
FileMode: 0666,
},
// consoles and ttys
{
Path: "/dev/tty",
Type: 'c',
Major: 5,
Minor: 0,
Permissions: "rwm",
FileMode: 0666,
},
// /dev/urandom,/dev/random
{
Path: "/dev/urandom",
Type: 'c',
Major: 1,
Minor: 9,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/random",
Type: 'c',
Major: 1,
Minor: 8,
Permissions: "rwm",
FileMode: 0666,
},
}
DefaultAllowedDevices = append([]*Device{
// allow mknod for any device
{
Type: 'c',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Type: 'b',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Path: "/dev/console",
Type: 'c',
Major: 5,
Minor: 1,
Permissions: "rwm",
},
{
Path: "/dev/tty0",
Type: 'c',
Major: 4,
Minor: 0,
Permissions: "rwm",
},
{
Path: "/dev/tty1",
Type: 'c',
Major: 4,
Minor: 1,
Permissions: "rwm",
},
// /dev/pts/ - pts namespaces are "coming soon"
{
Path: "",
Type: 'c',
Major: 136,
Minor: Wildcard,
Permissions: "rwm",
},
{
Path: "",
Type: 'c',
Major: 5,
Minor: 2,
Permissions: "rwm",
},
// tuntap
{
Path: "",
Type: 'c',
Major: 10,
Minor: 200,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{
{
// /dev/fuse is created but not allowed.
// This is to allow java to work. Because java
// Insists on there being a /dev/fuse
// https://github.com/docker/docker/issues/514
// https://github.com/docker/docker/issues/2393
//
Path: "/dev/fuse",
Type: 'c',
Major: 10,
Minor: 229,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
)

View File

@ -0,0 +1,9 @@
package configs
type HugepageLimit struct {
// which type of hugepage to limit.
Pagesize string `json:"page_size"`
// usage limit for hugepage.
Limit int `json:"limit"`
}

View File

@ -0,0 +1,14 @@
package configs
import (
"fmt"
)
type IfPrioMap struct {
Interface string `json:"interface"`
Priority int64 `json:"priority"`
}
func (i *IfPrioMap) CgroupString() string {
return fmt.Sprintf("%s %d", i.Interface, i.Priority)
}

View File

@ -0,0 +1,34 @@
package configs
type Mount struct {
// Source path for the mount.
Source string `json:"source"`
// Destination path for the mount inside the container.
Destination string `json:"destination"`
// Device the mount is for.
Device string `json:"device"`
// Mount flags.
Flags int `json:"flags"`
// Mount data applied to the mount.
Data string `json:"data"`
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`
// Optional Command to be run before Source is mounted.
PremountCmds []Command `json:"premount_cmds"`
// Optional Command to be run after Source is mounted.
PostmountCmds []Command `json:"postmount_cmds"`
}
type Command struct {
Path string `json:"path"`
Args []string `json:"args"`
Env []string `json:"env"`
Dir string `json:"dir"`
}

View File

@ -0,0 +1,5 @@
package configs
type NamespaceType string
type Namespaces []Namespace

View File

@ -0,0 +1,31 @@
// +build linux
package configs
import "syscall"
func (n *Namespace) Syscall() int {
return namespaceInfo[n.Type]
}
var namespaceInfo = map[NamespaceType]int{
NEWNET: syscall.CLONE_NEWNET,
NEWNS: syscall.CLONE_NEWNS,
NEWUSER: syscall.CLONE_NEWUSER,
NEWIPC: syscall.CLONE_NEWIPC,
NEWUTS: syscall.CLONE_NEWUTS,
NEWPID: syscall.CLONE_NEWPID,
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This functions returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
var flag int
for _, v := range *n {
if v.Path != "" {
continue
}
flag |= namespaceInfo[v.Type]
}
return uintptr(flag)
}

View File

@ -0,0 +1,15 @@
// +build !linux,!windows
package configs
func (n *Namespace) Syscall() int {
panic("No namespace syscall support")
return 0
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This functions returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
panic("No namespace syscall support")
return uintptr(0)
}

View File

@ -0,0 +1,89 @@
// +build linux freebsd
package configs
import "fmt"
const (
NEWNET NamespaceType = "NEWNET"
NEWPID NamespaceType = "NEWPID"
NEWNS NamespaceType = "NEWNS"
NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
)
func NamespaceTypes() []NamespaceType {
return []NamespaceType{
NEWNET,
NEWPID,
NEWNS,
NEWUTS,
NEWIPC,
NEWUSER,
}
}
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
Type NamespaceType `json:"type"`
Path string `json:"path"`
}
func (n *Namespace) GetPath(pid int) string {
if n.Path != "" {
return n.Path
}
return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file())
}
func (n *Namespace) file() string {
file := ""
switch n.Type {
case NEWNET:
file = "net"
case NEWNS:
file = "mnt"
case NEWPID:
file = "pid"
case NEWIPC:
file = "ipc"
case NEWUSER:
file = "user"
case NEWUTS:
file = "uts"
}
return file
}
func (n *Namespaces) Remove(t NamespaceType) bool {
i := n.index(t)
if i == -1 {
return false
}
*n = append((*n)[:i], (*n)[i+1:]...)
return true
}
func (n *Namespaces) Add(t NamespaceType, path string) {
i := n.index(t)
if i == -1 {
*n = append(*n, Namespace{Type: t, Path: path})
return
}
(*n)[i].Path = path
}
func (n *Namespaces) index(t NamespaceType) int {
for i, ns := range *n {
if ns.Type == t {
return i
}
}
return -1
}
func (n *Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1
}

View File

@ -0,0 +1,6 @@
package configs
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
}

View File

@ -0,0 +1,72 @@
package configs
// Network defines configuration for a container's networking stack
//
// The network configuration can be omitted from a container causing the
// container to be setup with the host's networking stack
type Network struct {
// Type sets the networks type, commonly veth and loopback
Type string `json:"type"`
// Name of the network interface
Name string `json:"name"`
// The bridge to use.
Bridge string `json:"bridge"`
// MacAddress contains the MAC address to set on the network interface
MacAddress string `json:"mac_address"`
// Address contains the IPv4 and mask to set on the network interface
Address string `json:"address"`
// Gateway sets the gateway address that is used as the default for the interface
Gateway string `json:"gateway"`
// IPv6Address contains the IPv6 and mask to set on the network interface
IPv6Address string `json:"ipv6_address"`
// IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface
IPv6Gateway string `json:"ipv6_gateway"`
// Mtu sets the mtu value for the interface and will be mirrored on both the host and
// container's interfaces if a pair is created, specifically in the case of type veth
// Note: This does not apply to loopback interfaces.
Mtu int `json:"mtu"`
// TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and
// container's interfaces if a pair is created, specifically in the case of type veth
// Note: This does not apply to loopback interfaces.
TxQueueLen int `json:"txqueuelen"`
// HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the
// container.
HostInterfaceName string `json:"host_interface_name"`
// HairpinMode specifies if hairpin NAT should be enabled on the virtual interface
// bridge port in the case of type veth
// Note: This is unsupported on some systems.
// Note: This does not apply to loopback interfaces.
HairpinMode bool `json:"hairpin_mode"`
}
// Routes can be specified to create entries in the route table as the container is started
//
// All of destination, source, and gateway should be either IPv4 or IPv6.
// One of the three options must be present, and omitted entries will use their
// IP family default for the route table. For IPv4 for example, setting the
// gateway to 1.2.3.4 and the interface to eth0 will set up a standard
// destination of 0.0.0.0(or *) when viewed in the route table.
type Route struct {
// Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6
Destination string `json:"destination"`
// Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6
Source string `json:"source"`
// Sets the gateway. Accepts IPv4 and IPv6
Gateway string `json:"gateway"`
// The device to set this route up for, for example: eth0
InterfaceName string `json:"interface_name"`
}

View File

@ -0,0 +1,93 @@
package validate
import (
"fmt"
"os"
"path/filepath"
"github.com/opencontainers/runc/libcontainer/configs"
)
type Validator interface {
Validate(*configs.Config) error
}
func New() Validator {
return &ConfigValidator{}
}
type ConfigValidator struct {
}
func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.rootfs(config); err != nil {
return err
}
if err := v.network(config); err != nil {
return err
}
if err := v.hostname(config); err != nil {
return err
}
if err := v.security(config); err != nil {
return err
}
if err := v.usernamespace(config); err != nil {
return err
}
return nil
}
// rootfs validates the the rootfs is an absolute path and is not a symlink
// to the container's root filesystem.
func (v *ConfigValidator) rootfs(config *configs.Config) error {
cleaned, err := filepath.Abs(config.Rootfs)
if err != nil {
return err
}
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
return err
}
if config.Rootfs != cleaned {
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
}
return nil
}
func (v *ConfigValidator) network(config *configs.Config) error {
if !config.Namespaces.Contains(configs.NEWNET) {
if len(config.Networks) > 0 || len(config.Routes) > 0 {
return fmt.Errorf("unable to apply network settings without a private NET namespace")
}
}
return nil
}
func (v *ConfigValidator) hostname(config *configs.Config) error {
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
return fmt.Errorf("unable to set hostname without a private UTS namespace")
}
return nil
}
func (v *ConfigValidator) security(config *configs.Config) error {
// restrict sys without mount namespace
if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
!config.Namespaces.Contains(configs.NEWNS) {
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
}
return nil
}
func (v *ConfigValidator) usernamespace(config *configs.Config) error {
if config.Namespaces.Contains(configs.NEWUSER) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
return fmt.Errorf("USER namespaces aren't enabled in the kernel")
}
} else {
if config.UidMappings != nil || config.GidMappings != nil {
return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
}
}
return nil
}

View File

@ -0,0 +1,25 @@
## nsenter
The `nsenter` package registers a special init constructor that is called before
the Go runtime has a chance to boot. This provides us the ability to `setns` on
existing namespaces and avoid the issues that the Go runtime has with multiple
threads. This constructor will be called if this package is registered,
imported, in your go application.
The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd/cgo/)
package. In cgo, if the import of "C" is immediately preceded by a comment, that comment,
called the preamble, is used as a header when compiling the C parts of the package.
So every time we import package `nsenter`, the C code function `nsexec()` would be
called. And package `nsenter` is now only imported in Docker execdriver, so every time
before we call `execdriver.Exec()`, that C code would run.
`nsexec()` will first check the environment variable `_LIBCONTAINER_INITPID`
which will give the process of the container that should be joined. Namespaces fd will
be found from `/proc/[pid]/ns` and set by `setns` syscall.
And then get the pipe number from `_LIBCONTAINER_INITPIPE`, error message could
be transfered through it. If tty is added, `_LIBCONTAINER_CONSOLE_PATH` will
have value and start a console for output.
Finally, `nsexec()` will clone a child process , exit the parent process and let
the Go runtime take over.

View File

@ -0,0 +1,12 @@
// +build linux,!gccgo
package nsenter
/*
#cgo CFLAGS: -Wall
extern void nsexec();
void __attribute__((constructor)) init(void) {
nsexec();
}
*/
import "C"

View File

@ -0,0 +1,25 @@
// +build linux,gccgo
package nsenter
/*
#cgo CFLAGS: -Wall
extern void nsexec();
void __attribute__((constructor)) init(void) {
nsexec();
}
*/
import "C"
// AlwaysFalse is here to stay false
// (and be exported so the compiler doesn't optimize out its reference)
var AlwaysFalse bool
func init() {
if AlwaysFalse {
// by referencing this C init() in a noop test, it will ensure the compiler
// links in the C function.
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134
C.init()
}
}

View File

@ -0,0 +1,91 @@
package nsenter
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"strings"
"testing"
)
type pid struct {
Pid int `json:"Pid"`
}
func TestNsenterAlivePid(t *testing.T) {
args := []string{"nsenter-exec"}
r, w, err := os.Pipe()
if err != nil {
t.Fatalf("failed to create pipe %v", err)
}
cmd := &exec.Cmd{
Path: os.Args[0],
Args: args,
ExtraFiles: []*os.File{w},
Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", os.Getpid()), "_LIBCONTAINER_INITPIPE=3"},
}
if err := cmd.Start(); err != nil {
t.Fatalf("nsenter failed to start %v", err)
}
w.Close()
decoder := json.NewDecoder(r)
var pid *pid
if err := decoder.Decode(&pid); err != nil {
t.Fatalf("%v", err)
}
if err := cmd.Wait(); err != nil {
t.Fatalf("nsenter exits with a non-zero exit status")
}
p, err := os.FindProcess(pid.Pid)
if err != nil {
t.Fatalf("%v", err)
}
p.Wait()
}
func TestNsenterInvalidPid(t *testing.T) {
args := []string{"nsenter-exec"}
cmd := &exec.Cmd{
Path: os.Args[0],
Args: args,
Env: []string{"_LIBCONTAINER_INITPID=-1"},
}
err := cmd.Run()
if err == nil {
t.Fatal("nsenter exits with a zero exit status")
}
}
func TestNsenterDeadPid(t *testing.T) {
dead_cmd := exec.Command("true")
if err := dead_cmd.Run(); err != nil {
t.Fatal(err)
}
args := []string{"nsenter-exec"}
cmd := &exec.Cmd{
Path: os.Args[0],
Args: args,
Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", dead_cmd.Process.Pid)},
}
err := cmd.Run()
if err == nil {
t.Fatal("nsenter exits with a zero exit status")
}
}
func init() {
if strings.HasPrefix(os.Args[0], "nsenter-") {
os.Exit(0)
}
return
}

View File

@ -0,0 +1,5 @@
// +build !linux !cgo
package nsenter
import "C"

View File

@ -0,0 +1,190 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <linux/limits.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <signal.h>
#include <setjmp.h>
#include <sched.h>
#include <signal.h>
/* All arguments should be above stack, because it grows down */
struct clone_arg {
/*
* Reserve some space for clone() to locate arguments
* and retcode in this place
*/
char stack[4096] __attribute__ ((aligned(8)));
char stack_ptr[0];
jmp_buf *env;
};
#define pr_perror(fmt, ...) fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__)
static int child_func(void *_arg)
{
struct clone_arg *arg = (struct clone_arg *)_arg;
longjmp(*arg->env, 1);
}
// Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12)
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
#define _GNU_SOURCE
#include "syscall.h"
#if defined(__NR_setns) && !defined(SYS_setns)
#define SYS_setns __NR_setns
#endif
#ifdef SYS_setns
int setns(int fd, int nstype)
{
return syscall(SYS_setns, fd, nstype);
}
#endif
#endif
static int clone_parent(jmp_buf * env) __attribute__ ((noinline));
static int clone_parent(jmp_buf * env)
{
struct clone_arg ca;
int child;
ca.env = env;
child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
return child;
}
void nsexec()
{
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" };
const int num = sizeof(namespaces) / sizeof(char *);
jmp_buf env;
char buf[PATH_MAX], *val;
int i, tfd, child, len, pipenum, consolefd = -1;
pid_t pid;
char *console;
val = getenv("_LIBCONTAINER_INITPID");
if (val == NULL)
return;
pid = atoi(val);
snprintf(buf, sizeof(buf), "%d", pid);
if (strcmp(val, buf)) {
pr_perror("Unable to parse _LIBCONTAINER_INITPID");
exit(1);
}
val = getenv("_LIBCONTAINER_INITPIPE");
if (val == NULL) {
pr_perror("Child pipe not found");
exit(1);
}
pipenum = atoi(val);
snprintf(buf, sizeof(buf), "%d", pipenum);
if (strcmp(val, buf)) {
pr_perror("Unable to parse _LIBCONTAINER_INITPIPE");
exit(1);
}
console = getenv("_LIBCONTAINER_CONSOLE_PATH");
if (console != NULL) {
consolefd = open(console, O_RDWR);
if (consolefd < 0) {
pr_perror("Failed to open console %s", console);
exit(1);
}
}
/* Check that the specified process exists */
snprintf(buf, PATH_MAX - 1, "/proc/%d/ns", pid);
tfd = open(buf, O_DIRECTORY | O_RDONLY);
if (tfd == -1) {
pr_perror("Failed to open \"%s\"", buf);
exit(1);
}
for (i = 0; i < num; i++) {
struct stat st;
int fd;
/* Symlinks on all namespaces exist for dead processes, but they can't be opened */
if (fstatat(tfd, namespaces[i], &st, AT_SYMLINK_NOFOLLOW) == -1) {
// Ignore nonexistent namespaces.
if (errno == ENOENT)
continue;
}
fd = openat(tfd, namespaces[i], O_RDONLY);
if (fd == -1) {
pr_perror("Failed to open ns file %s for ns %s", buf,
namespaces[i]);
exit(1);
}
// Set the namespace.
if (setns(fd, 0) == -1) {
pr_perror("Failed to setns for %s", namespaces[i]);
exit(1);
}
close(fd);
}
if (setjmp(env) == 1) {
// Child
if (setsid() == -1) {
pr_perror("setsid failed");
exit(1);
}
if (consolefd != -1) {
if (ioctl(consolefd, TIOCSCTTY, 0) == -1) {
pr_perror("ioctl TIOCSCTTY failed");
exit(1);
}
if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) {
pr_perror("Failed to dup 0");
exit(1);
}
if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) {
pr_perror("Failed to dup 1");
exit(1);
}
if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) {
pr_perror("Failed to dup 2");
exit(1);
}
}
// Finish executing, let the Go runtime take over.
return;
}
// Parent
// We must fork to actually enter the PID namespace, use CLONE_PARENT
// so the child can have the right parent, and we don't need to forward
// the child's exit code or resend its death signal.
child = clone_parent(&env);
if (child < 0) {
pr_perror("Unable to fork");
exit(1);
}
len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", child);
if (write(pipenum, buf, len) != len) {
pr_perror("Unable to send a child pid");
kill(child, SIGKILL);
exit(1);
}
exit(0);
}

View File

@ -0,0 +1,77 @@
// +build linux
package system
import (
"os/exec"
"syscall"
"unsafe"
)
type ParentDeathSignal int
func (p ParentDeathSignal) Restore() error {
if p == 0 {
return nil
}
current, err := GetParentDeathSignal()
if err != nil {
return err
}
if p == current {
return nil
}
return p.Set()
}
func (p ParentDeathSignal) Set() error {
return SetParentDeathSignal(uintptr(p))
}
func Execv(cmd string, args []string, env []string) error {
name, err := exec.LookPath(cmd)
if err != nil {
return err
}
return syscall.Exec(name, args, env)
}
func SetParentDeathSignal(sig uintptr) error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
return err
}
return nil
}
func GetParentDeathSignal() (ParentDeathSignal, error) {
var sig int
_, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0)
if err != 0 {
return -1, err
}
return ParentDeathSignal(sig), nil
}
func SetKeepCaps() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 {
return err
}
return nil
}
func ClearKeepCaps() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 {
return err
}
return nil
}
func Setctty() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 {
return err
}
return nil
}

View File

@ -0,0 +1,27 @@
package system
import (
"io/ioutil"
"path/filepath"
"strconv"
"strings"
)
// look in /proc to find the process start time so that we can verify
// that this pid has started after ourself
func GetProcessStartTime(pid int) (string, error) {
data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
if err != nil {
return "", err
}
parts := strings.Split(string(data), " ")
// the starttime is located at pos 22
// from the man page
//
// starttime %llu (was %lu before Linux 2.6)
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
// (divide by sysconf(_SC_CLK_TCK)).
return parts[22-1], nil // starts at 1
}

View File

@ -0,0 +1,40 @@
package system
import (
"fmt"
"runtime"
"syscall"
)
// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092
//
// We need different setns values for the different platforms and arch
// We are declaring the macro here because the SETNS syscall does not exist in th stdlib
var setNsMap = map[string]uintptr{
"linux/386": 346,
"linux/arm64": 268,
"linux/amd64": 308,
"linux/arm": 375,
"linux/ppc": 350,
"linux/ppc64": 350,
"linux/ppc64le": 350,
"linux/s390x": 339,
}
var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
func SysSetns() uint32 {
return uint32(sysSetns)
}
func Setns(fd uintptr, flags uintptr) error {
ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
if !exists {
return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
}
_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
if err != 0 {
return err
}
return nil
}

View File

@ -0,0 +1,25 @@
// +build linux,386
package system
import (
"syscall"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
return
}
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}
return
}

View File

@ -0,0 +1,25 @@
// +build linux,arm64 linux,amd64 linux,ppc linux,ppc64 linux,ppc64le linux,s390x
package system
import (
"syscall"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
return
}
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}
return
}

View File

@ -0,0 +1,25 @@
// +build linux,arm
package system
import (
"syscall"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
return
}
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}
return
}

View File

@ -0,0 +1,12 @@
// +build cgo,linux cgo,freebsd
package system
/*
#include <unistd.h>
*/
import "C"
func GetClockTicks() int {
return int(C.sysconf(C._SC_CLK_TCK))
}

View File

@ -0,0 +1,15 @@
// +build !cgo windows
package system
func GetClockTicks() int {
// TODO figure out a better alternative for platforms where we're missing cgo
//
// TODO Windows. This could be implemented using Win32 QueryPerformanceFrequency().
// https://msdn.microsoft.com/en-us/library/windows/desktop/ms644905(v=vs.85).aspx
//
// An example of its usage can be found here.
// https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx
return 100
}

View File

@ -0,0 +1,99 @@
package system
import (
"syscall"
"unsafe"
)
var _zero uintptr
// Returns the size of xattrs and nil error
// Requires path, takes allocated []byte or nil as last argument
func Llistxattr(path string, dest []byte) (size int, err error) {
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return -1, err
}
var newpathBytes unsafe.Pointer
if len(dest) > 0 {
newpathBytes = unsafe.Pointer(&dest[0])
} else {
newpathBytes = unsafe.Pointer(&_zero)
}
_size, _, errno := syscall.Syscall6(syscall.SYS_LLISTXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(newpathBytes), uintptr(len(dest)), 0, 0, 0)
size = int(_size)
if errno != 0 {
return -1, errno
}
return size, nil
}
// Returns a []byte slice if the xattr is set and nil otherwise
// Requires path and its attribute as arguments
func Lgetxattr(path string, attr string) ([]byte, error) {
var sz int
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return nil, err
}
attrBytes, err := syscall.BytePtrFromString(attr)
if err != nil {
return nil, err
}
// Start with a 128 length byte array
sz = 128
dest := make([]byte, sz)
destBytes := unsafe.Pointer(&dest[0])
_sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
switch {
case errno == syscall.ENODATA:
return nil, errno
case errno == syscall.ENOTSUP:
return nil, errno
case errno == syscall.ERANGE:
// 128 byte array might just not be good enough,
// A dummy buffer is used ``uintptr(0)`` to get real size
// of the xattrs on disk
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0)
sz = int(_sz)
if sz < 0 {
return nil, errno
}
dest = make([]byte, sz)
destBytes := unsafe.Pointer(&dest[0])
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
if errno != 0 {
return nil, errno
}
case errno != 0:
return nil, errno
}
sz = int(_sz)
return dest[:sz], nil
}
func Lsetxattr(path string, attr string, data []byte, flags int) error {
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return err
}
attrBytes, err := syscall.BytePtrFromString(attr)
if err != nil {
return err
}
var dataBytes unsafe.Pointer
if len(data) > 0 {
dataBytes = unsafe.Pointer(&data[0])
} else {
dataBytes = unsafe.Pointer(&_zero)
}
_, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0)
if errno != 0 {
return errno
}
return nil
}

View File

@ -2,6 +2,7 @@ package dockerlaunch
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
@ -17,10 +18,15 @@ import (
"github.com/rancher/netconf"
)
const defaultPrefix = "/usr"
const (
defaultPrefix = "/usr"
iptables = "/sbin/iptables"
modprobe = "/sbin/modprobe"
systemdRoot = "/sys/fs/cgroup/systemd/user.slice"
)
var (
mounts [][]string = [][]string{
mounts = [][]string{
{"devtmpfs", "/dev", "devtmpfs", ""},
{"none", "/dev/pts", "devpts", ""},
{"none", "/proc", "proc", ""},
@ -28,6 +34,9 @@ var (
{"none", "/sys", "sysfs", ""},
{"none", "/sys/fs/cgroup", "tmpfs", ""},
}
systemdMounts = [][]string{
{"systemd", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd"},
}
)
type Config struct {
@ -40,6 +49,9 @@ type Config struct {
CgroupHierarchy map[string]string
LogFile string
NoLog bool
EmulateSystemd bool
NoFiles uint64
Environment []string
}
func createMounts(mounts ...[]string) error {
@ -162,16 +174,22 @@ func execDocker(config *Config, docker, cmd string, args []string) (*exec.Cmd, e
}
log.Debugf("Launching Docker %s %s %v", docker, cmd, args)
env := os.Environ()
if len(config.Environment) != 0 {
env = append(env, config.Environment...)
}
if config.Fork {
cmd := exec.Command(docker, args...)
if !config.NoLog {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
cmd.Env = env
err := cmd.Start()
return cmd, err
} else {
err := syscall.Exec(docker, append([]string{cmd}, args...), os.Environ())
err := syscall.Exec(docker, append([]string{cmd}, args...), env)
return nil, err
}
}
@ -421,18 +439,51 @@ func prepare(config *Config, docker string, args ...string) error {
return err
}
if err := setupBin(config, docker); err != nil {
for _, i := range []string{docker, iptables, modprobe} {
if err := setupBin(config, i); err != nil {
return err
}
}
if err := setUlimit(config); err != nil {
return err
}
if err := setupSystemd(config); err != nil {
return err
}
return nil
}
func setupBin(config *Config, docker string) error {
if _, err := os.Stat(docker); os.IsNotExist(err) {
dist := docker + ".dist"
func setupSystemd(config *Config) error {
if !config.EmulateSystemd {
cgroups, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", os.Getpid()))
if err != nil {
return err
}
if !strings.Contains(string(cgroups), "name=systemd") {
return nil
}
}
if err := createMounts(systemdMounts...); err != nil {
return err
}
if err := os.Mkdir(systemdRoot, 0755); err != nil {
return err
}
return ioutil.WriteFile(path.Join(systemdRoot, "cgroup.procs"), []byte(strconv.Itoa(os.Getpid())+"\n"), 0644)
}
func setupBin(config *Config, bin string) error {
if _, err := os.Stat(bin); os.IsNotExist(err) {
dist := bin + ".dist"
if _, err := os.Stat(dist); err == nil {
return os.Symlink(dist, docker)
return os.Symlink(dist, bin)
}
}
@ -459,6 +510,20 @@ func setupLogging(config *Config) error {
return nil
}
func setUlimit(cfg *Config) error {
var rLimit syscall.Rlimit
if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimit); err != nil {
return err
}
if cfg.NoFiles == 0 {
rLimit.Max = 1000000
} else {
rLimit.Max = cfg.NoFiles
}
rLimit.Cur = rLimit.Max
return syscall.Setrlimit(syscall.RLIMIT_NOFILE, &rLimit)
}
func runOrExec(config *Config, docker string, args ...string) (*exec.Cmd, error) {
if err := prepare(config, docker, args...); err != nil {
return nil, err

View File

@ -131,13 +131,19 @@ func tlsConfCreate(c *cli.Context) {
}
func generate(c *cli.Context) error {
generateServer := c.Bool("server")
outDir := c.String("dir")
hostnames := c.StringSlice("hostname")
return Generate(generateServer, outDir, hostnames)
}
func Generate(generateServer bool, outDir string, hostnames []string) error {
cfg, err := config.LoadConfig()
if err != nil {
return err
}
generateServer := c.Bool("server")
outDir := c.String("dir")
if outDir == "" {
return fmt.Errorf("out directory (-d, --dir) not specified")
}
@ -161,6 +167,5 @@ func generate(c *cli.Context) error {
return err
}
hostnames := c.StringSlice("hostname")
return writeCerts(generateServer, hostnames, cfg, certPath, keyPath, caCertPath, caKeyPath)
}

267
cmd/userdocker/main.go Normal file
View File

@ -0,0 +1,267 @@
package userdocker
import (
"bufio"
"encoding/json"
"fmt"
"os"
"os/exec"
"os/signal"
"strings"
"syscall"
"time"
log "github.com/Sirupsen/logrus"
"github.com/docker/libcompose/docker"
"github.com/docker/libcompose/project"
"github.com/rancherio/os/cmd/control"
"github.com/rancherio/os/compose"
"github.com/rancherio/os/config"
"github.com/opencontainers/runc/libcontainer/cgroups"
_ "github.com/opencontainers/runc/libcontainer/nsenter"
"github.com/opencontainers/runc/libcontainer/system"
)
const (
DEFAULT_STORAGE_CONTEXT = "console"
userDocker = "user-docker"
)
func Main() {
cfg, err := config.LoadConfig()
if err != nil {
log.Fatal(err)
}
if len(os.Args) == 1 {
if err := enter(cfg); err != nil {
log.Fatal(err)
}
} else {
if err := main(cfg); err != nil {
log.Fatal(err)
}
}
}
func enter(cfg *config.CloudConfig) error {
context := cfg.Rancher.UserDocker.StorageContext
if context == "" {
context = DEFAULT_STORAGE_CONTEXT
}
p, err := compose.GetProject(cfg)
if err != nil {
return err
}
pid, err := waitForPid(context, p)
if err != nil {
return err
}
log.Infof("%s PID %d", context, pid)
return runNsenter(pid)
}
type result struct {
Pid int `json:"Pid"`
}
func expand(first, second string, args []string) ([]string, error) {
var err error
prog := ""
prog, err = exec.LookPath(first)
if err != nil {
prog, err = exec.LookPath(second)
}
if err != nil {
return nil, err
}
return append([]string{prog}, args...), nil
}
func runNsenter(pid int) error {
args, err := expand(userDocker, "", []string{"main"})
if err != nil {
return err
}
r, w, err := os.Pipe()
if err != nil {
return err
}
cmd := &exec.Cmd{
Path: args[0],
Args: args,
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
ExtraFiles: []*os.File{w},
Env: append(os.Environ(),
"_LIBCONTAINER_INITPIPE=3",
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", pid),
),
}
if err := cmd.Start(); err != nil {
return err
}
w.Close()
var result result
if err := json.NewDecoder(r).Decode(&result); err != nil {
return err
}
if err := cmd.Wait(); err != nil {
return err
}
log.Infof("Docker PID %d", result.Pid)
p, err := os.FindProcess(result.Pid)
if err != nil {
return err
}
handleTerm(p)
if err := switchCgroup(result.Pid, pid); err != nil {
return err
}
_, err = p.Wait()
return err
}
func handleTerm(p *os.Process) {
term := make(chan os.Signal)
signal.Notify(term, syscall.SIGTERM)
go func() {
<-term
p.Signal(syscall.SIGTERM)
}()
}
func waitForPid(service string, project *project.Project) (int, error) {
for {
if pid, err := getPid(service, project); err != nil || pid == 0 {
log.Infof("Waiting for %s : %d : %v", service, pid, err)
time.Sleep(250)
} else {
return pid, err
}
}
}
func getPid(service string, project *project.Project) (int, error) {
s, err := project.CreateService(service)
if err != nil {
return 0, err
}
containers, err := s.Containers()
if err != nil {
return 0, err
}
if len(containers) == 0 {
return 0, nil
}
client, err := docker.CreateClient(docker.ClientOpts{
Host: config.DOCKER_SYSTEM_HOST,
})
if err != nil {
return 0, err
}
id, err := containers[0].Id()
if err != nil {
return 0, err
}
info, err := client.InspectContainer(id)
if err != nil || info == nil {
return 0, err
}
if info.State.Running {
return info.State.Pid, nil
}
return 0, nil
}
func main(cfg *config.CloudConfig) error {
os.Unsetenv("_LIBCONTAINER_INITPIPE")
os.Unsetenv("_LIBCONTAINER_INITPID")
if err := system.ParentDeathSignal(syscall.SIGKILL).Set(); err != nil {
return err
}
if err := os.Remove("/var/run/docker.pid"); err != nil && !os.IsNotExist(err) {
return err
}
dockerCfg := cfg.Rancher.UserDocker
args := dockerCfg.FullArgs()
log.Debugf("User Docker args: %v", args)
if dockerCfg.TLS {
log.Debug("Generating TLS certs if needed")
if err := control.Generate(true, "/etc/docker/tls", []string{"localhost"}); err != nil {
return err
}
}
args, err := expand("docker-init", "docker", args)
if err != nil {
return err
}
log.Infof("Running %v", args)
err = syscall.Exec(args[0], args, dockerCfg.AppendEnv())
return err
}
func switchCgroup(src, target int) error {
cgroupFile := fmt.Sprintf("/proc/%d/cgroup", target)
f, err := os.Open(cgroupFile)
if err != nil {
return err
}
defer f.Close()
targetCgroups := map[string]string{}
s := bufio.NewScanner(f)
for s.Scan() {
text := s.Text()
parts := strings.Split(text, ":")
subparts := strings.Split(parts[1], "=")
subsystem := subparts[0]
if len(subparts) > 1 {
subsystem = subparts[1]
}
targetPath := fmt.Sprintf("/host/sys/fs/cgroup/%s%s", subsystem, parts[2])
log.Infof("Moving Docker to cgroup %s", targetPath)
targetCgroups[subsystem] = targetPath
}
if err := s.Err(); err != nil {
return err
}
return cgroups.EnterPid(targetCgroups, src)
}

View File

@ -49,6 +49,10 @@ func RunServices(cfg *config.CloudConfig) error {
return p.Up()
}
func GetProject(cfg *config.CloudConfig) (*project.Project, error) {
return newCoreServiceProject(cfg)
}
func newProject(name string, cfg *config.CloudConfig) (*project.Project, error) {
clientFactory, err := rosDocker.NewClientFactory(docker.ClientOpts{})
if err != nil {

17
config/docker_config.go Normal file
View File

@ -0,0 +1,17 @@
package config
import "os"
func (d *DockerConfig) FullArgs() []string {
args := append(d.Args, d.ExtraArgs...)
if d.TLS {
args = append(args, d.TLSArgs...)
}
return args
}
func (d *DockerConfig) AppendEnv() []string {
return append(os.Environ(), d.Environment...)
}

View File

@ -91,14 +91,16 @@ type UpgradeConfig struct {
}
type DockerConfig struct {
TLS bool `yaml:"tls,omitempty"`
TLSArgs []string `yaml:"tls_args,flow,omitempty"`
Args []string `yaml:"args,flow,omitempty"`
ExtraArgs []string `yaml:"extra_args,flow,omitempty"`
ServerCert string `yaml:"server_cert,omitempty"`
ServerKey string `yaml:"server_key,omitempty"`
CACert string `yaml:"ca_cert,omitempty"`
CAKey string `yaml:"ca_key,omitempty"`
TLS bool `yaml:"tls,omitempty"`
TLSArgs []string `yaml:"tls_args,flow,omitempty"`
Args []string `yaml:"args,flow,omitempty"`
ExtraArgs []string `yaml:"extra_args,flow,omitempty"`
ServerCert string `yaml:"server_cert,omitempty"`
ServerKey string `yaml:"server_key,omitempty"`
CACert string `yaml:"ca_cert,omitempty"`
CAKey string `yaml:"ca_key,omitempty"`
Environment []string `yaml:"environment,omitempty"`
StorageContext string `yaml:"storage_context,omitempty"`
}
type SshConfig struct {

View File

@ -139,6 +139,7 @@ func getLaunchConfig(cfg *config.CloudConfig, dockerCfg *config.DockerConfig) (*
launchConfig.DnsConfig.Nameservers = cfg.Rancher.Network.Dns.Nameservers
launchConfig.DnsConfig.Search = cfg.Rancher.Network.Dns.Search
launchConfig.Environment = dockerCfg.Environment
if !cfg.Rancher.Debug {
launchConfig.LogFile = config.SYSTEM_DOCKER_LOG

View File

@ -14,6 +14,7 @@ import (
"github.com/rancherio/os/cmd/respawn"
"github.com/rancherio/os/cmd/sysinit"
"github.com/rancherio/os/cmd/systemdocker"
"github.com/rancherio/os/cmd/userdocker"
"github.com/rancherio/os/cmd/wait"
"github.com/rancherio/os/config"
osInit "github.com/rancherio/os/init"
@ -41,6 +42,7 @@ func main() {
registerCmd("/init", osInit.MainInit)
registerCmd(config.SYSINIT_BIN, sysinit.Main)
registerCmd("/usr/bin/dockerlaunch", dockerlaunchMain.Main)
registerCmd("/usr/bin/user-docker", userdocker.Main)
registerCmd("/usr/bin/system-docker", systemdocker.Main)
registerCmd("/sbin/poweroff", power.PowerOff)
registerCmd("/sbin/reboot", power.Reboot)

View File

@ -122,19 +122,20 @@ rancher:
privileged: true
read_only: true
volumes:
- /usr/bin/docker:/usr/bin/docker.dist:ro
- /usr/bin/ros:/sbin/halt:ro
- /usr/bin/ros:/sbin/netconf:ro
- /usr/bin/ros:/sbin/poweroff:ro
- /usr/bin/ros:/sbin/reboot:ro
- /usr/bin/ros:/sbin/shutdown:ro
- /usr/bin/ros:/sbin/netconf:ro
- /usr/bin/ros:/usr/bin/cloud-init:ro
- /usr/bin/ros:/usr/bin/rancherctl:ro
- /usr/bin/ros:/usr/bin/ros:ro
- /usr/bin/ros:/usr/bin/respawn:ro
- /usr/bin/ros:/usr/bin/system-docker:ro
- /usr/bin/ros:/usr/sbin/wait-for-docker:ro
- /usr/bin/ros:/usr/bin/dockerlaunch:ro
- /usr/bin/docker:/usr/bin/docker.dist:ro
- /usr/bin/ros:/usr/bin/rancherctl:ro
- /usr/bin/ros:/usr/bin/respawn:ro
- /usr/bin/ros:/usr/bin/ros:ro
- /usr/bin/ros:/usr/bin/system-docker:ro
- /usr/bin/ros:/usr/bin/user-docker:ro
- /usr/bin/ros:/usr/sbin/wait-for-docker:ro
console:
image: rancher/os-console:v0.4.0-dev
labels:
@ -228,15 +229,16 @@ rancher:
read_only: true
volumes:
- /dev:/host/dev
- /usr/share/ros/os-config.yml:/usr/share/ros/os-config.yml
- /var/lib/rancher:/var/lib/rancher
- /var/lib/rancher/conf:/var/lib/rancher/conf
- /etc/docker:/etc/docker
- /etc/ssl/certs/ca-certificates.crt:/etc/ssl/certs/ca-certificates.crt.rancher
- /lib/modules:/lib/modules
- /lib/firmware:/lib/firmware
- /lib/modules:/lib/modules
- /run:/run
- /var/run:/var/run
- /usr/share/ros/os-config.yml:/usr/share/ros/os-config.yml
- /var/lib/rancher/conf:/var/lib/rancher/conf
- /var/lib/rancher:/var/lib/rancher
- /var/log:/var/log
- /var/run:/var/run
udev-cold:
image: rancher/os-udev:v0.4.0-dev
labels:
@ -272,6 +274,20 @@ rancher:
volumes:
- /home:/home
- /opt:/opt
docker:
image: rancher/os-docker:v0.4.0-dev
labels:
io.rancher.os.scope: system
net: host
pid: host
ipc: host
uts: host
privileged: true
restart: always
volumes_from:
- all-volumes
volumes:
- /sys/fs/cgroup:/host/sys/fs/cgroup
system_docker:
args: [daemon, --log-opt, max-size=25m, --log-opt, max-file=2, -s, overlay, -b, docker-sys,
--fixed-cidr, 172.18.42.1/16, --restart=false, -g, /var/lib/system-docker, -G, root,