diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 17824ac6..c3a8c1b0 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -330,6 +330,26 @@ "ImportPath": "github.com/kless/term", "Rev": "d57d9d2d5be197e12d9dee142d855470d83ce62f" }, + { + "ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups", + "Comment": "v0.0.2-32-gb40c790", + "Rev": "b40c7901845dcec5950ecb37cb9de178fc2c0604" + }, + { + "ImportPath": "github.com/opencontainers/runc/libcontainer/configs", + "Comment": "v0.0.2-32-gb40c790", + "Rev": "b40c7901845dcec5950ecb37cb9de178fc2c0604" + }, + { + "ImportPath": "github.com/opencontainers/runc/libcontainer/nsenter", + "Comment": "v0.0.2-32-gb40c790", + "Rev": "b40c7901845dcec5950ecb37cb9de178fc2c0604" + }, + { + "ImportPath": "github.com/opencontainers/runc/libcontainer/system", + "Comment": "v0.0.2-32-gb40c790", + "Rev": "b40c7901845dcec5950ecb37cb9de178fc2c0604" + }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/user", "Comment": "v0.0.2-32-gb40c790", @@ -337,8 +357,8 @@ }, { "ImportPath": "github.com/rancher/docker-from-scratch", - "Comment": "1.8.1", - "Rev": "f348f946923bd2938a56f3bee21cf44431e05181" + "Comment": "1.8.1-2-gbcc17ea", + "Rev": "bcc17eae6f79e46cb65a74e4406cb3bcc24c38fb" }, { "ImportPath": "github.com/rancher/netconf", diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go new file mode 100644 index 00000000..a08e905c --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go @@ -0,0 +1,61 @@ +// +build linux + +package cgroups + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager interface { + // Apply cgroup configuration to the process with the specified pid + Apply(pid int) error + + // Returns the PIDs inside the cgroup set + GetPids() ([]int, error) + + // Returns statistics for the cgroup set + GetStats() (*Stats, error) + + // Toggles the freezer cgroup according with specified state + Freeze(state configs.FreezerState) error + + // Destroys the cgroup set + Destroy() error + + // NewCgroupManager() and LoadCgroupManager() require following attributes: + // Paths map[string]string + // Cgroups *cgroups.Cgroup + // Paths maps cgroup subsystem to path at which it is mounted. + // Cgroups specifies specific cgroup settings for the various subsystems + + // Returns cgroup paths to save in a state file and to be able to + // restore the object later. + GetPaths() map[string]string + + // Set the cgroup as configured. + Set(container *configs.Config) error +} + +type NotFoundError struct { + Subsystem string +} + +func (e *NotFoundError) Error() string { + return fmt.Sprintf("mountpoint for %s not found", e.Subsystem) +} + +func NewNotFoundError(sub string) error { + return &NotFoundError{ + Subsystem: sub, + } +} + +func IsNotFound(err error) bool { + if err == nil { + return false + } + _, ok := err.(*NotFoundError) + return ok +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_test.go new file mode 100644 index 00000000..597834a0 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_test.go @@ -0,0 +1,29 @@ +// +build linux + +package cgroups + +import ( + "bytes" + "testing" +) + +const ( + cgroupsContents = `11:hugetlb:/ +10:perf_event:/ +9:blkio:/ +8:net_cls:/ +7:freezer:/ +6:devices:/ +5:memory:/ +4:cpuacct,cpu:/ +3:cpuset:/ +2:name=systemd:/user.slice/user-1000.slice/session-16.scope` +) + +func TestParseCgroups(t *testing.T) { + r := bytes.NewBuffer([]byte(cgroupsContents)) + _, err := ParseCgroupFile("blkio", r) + if err != nil { + t.Fatal(err) + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go new file mode 100644 index 00000000..278d507e --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux + +package cgroups diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go new file mode 100644 index 00000000..c8ea761d --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go @@ -0,0 +1,333 @@ +// +build linux + +package fs + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "sync" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +var ( + subsystems = map[string]subsystem{ + "devices": &DevicesGroup{}, + "memory": &MemoryGroup{}, + "cpu": &CpuGroup{}, + "cpuset": &CpusetGroup{}, + "cpuacct": &CpuacctGroup{}, + "blkio": &BlkioGroup{}, + "hugetlb": &HugetlbGroup{}, + "net_cls": &NetClsGroup{}, + "net_prio": &NetPrioGroup{}, + "perf_event": &PerfEventGroup{}, + "freezer": &FreezerGroup{}, + } + CgroupProcesses = "cgroup.procs" + HugePageSizes, _ = cgroups.GetHugePageSize() +) + +type subsystem interface { + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Removes the cgroup represented by 'data'. + Remove(*data) error + // Creates and joins the cgroup represented by data. + Apply(*data) error + // Set the cgroup represented by cgroup. + Set(path string, cgroup *configs.Cgroup) error +} + +type Manager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string +} + +// The absolute path to the root of the cgroup hierarchies. +var cgroupRootLock sync.Mutex +var cgroupRoot string + +// Gets the cgroupRoot. +func getCgroupRoot() (string, error) { + cgroupRootLock.Lock() + defer cgroupRootLock.Unlock() + + if cgroupRoot != "" { + return cgroupRoot, nil + } + + root, err := cgroups.FindCgroupMountpointDir() + if err != nil { + return "", err + } + + if _, err := os.Stat(root); err != nil { + return "", err + } + + cgroupRoot = root + return cgroupRoot, nil +} + +type data struct { + root string + cgroup string + c *configs.Cgroup + pid int +} + +func (m *Manager) Apply(pid int) error { + if m.Cgroups == nil { + return nil + } + + var c = m.Cgroups + + d, err := getCgroupData(m.Cgroups, pid) + if err != nil { + return err + } + + paths := make(map[string]string) + defer func() { + if err != nil { + cgroups.RemovePaths(paths) + } + }() + for name, sys := range subsystems { + if err := sys.Apply(d); err != nil { + return err + } + // TODO: Apply should, ideally, be reentrant or be broken up into a separate + // create and join phase so that the cgroup hierarchy for a container can be + // created then join consists of writing the process pids to cgroup.procs + p, err := d.path(name) + if err != nil { + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[name] = p + } + m.Paths = paths + + if paths["cpu"] != "" { + if err := CheckCpushares(paths["cpu"], c.CpuShares); err != nil { + return err + } + } + + return nil +} + +func (m *Manager) Destroy() error { + m.mu.Lock() + defer m.mu.Unlock() + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil +} + +func (m *Manager) GetPaths() map[string]string { + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for name, path := range m.Paths { + sys, ok := subsystems[name] + if !ok || !cgroups.PathExists(path) { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + + return stats, nil +} + +func (m *Manager) Set(container *configs.Config) error { + for name, path := range m.Paths { + sys, ok := subsystems[name] + if !ok || !cgroups.PathExists(path) { + continue + } + if err := sys.Set(path, container.Cgroups); err != nil { + return err + } + } + + return nil +} + +// Freeze toggles the container's freezer cgroup depending on the state +// provided +func (m *Manager) Freeze(state configs.FreezerState) error { + d, err := getCgroupData(m.Cgroups, 0) + if err != nil { + return err + } + + dir, err := d.path("freezer") + if err != nil { + return err + } + + prevState := m.Cgroups.Freezer + m.Cgroups.Freezer = state + + freezer := subsystems["freezer"] + err = freezer.Set(dir, m.Cgroups) + if err != nil { + m.Cgroups.Freezer = prevState + return err + } + + return nil +} + +func (m *Manager) GetPids() ([]int, error) { + d, err := getCgroupData(m.Cgroups, 0) + if err != nil { + return nil, err + } + + dir, err := d.path("devices") + if err != nil { + return nil, err + } + + return cgroups.ReadProcsFile(dir) +} + +func getCgroupData(c *configs.Cgroup, pid int) (*data, error) { + root, err := getCgroupRoot() + if err != nil { + return nil, err + } + + cgroup := c.Name + if c.Parent != "" { + cgroup = filepath.Join(c.Parent, cgroup) + } + + return &data{ + root: root, + cgroup: cgroup, + c: c, + pid: pid, + }, nil +} + +func (raw *data) parent(subsystem, mountpoint, src string) (string, error) { + initPath, err := cgroups.GetInitCgroupDir(subsystem) + if err != nil { + return "", err + } + relDir, err := filepath.Rel(src, initPath) + if err != nil { + return "", err + } + return filepath.Join(mountpoint, relDir), nil +} + +func (raw *data) path(subsystem string) (string, error) { + mnt, src, err := cgroups.FindCgroupMountpointAndSource(subsystem) + // If we didn't mount the subsystem, there is no point we make the path. + if err != nil { + return "", err + } + + // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. + if filepath.IsAbs(raw.cgroup) { + return filepath.Join(raw.root, filepath.Base(mnt), raw.cgroup), nil + } + + parent, err := raw.parent(subsystem, mnt, src) + if err != nil { + return "", err + } + + return filepath.Join(parent, raw.cgroup), nil +} + +func (raw *data) join(subsystem string) (string, error) { + path, err := raw.path(subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return "", err + } + if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil { + return "", err + } + return path, nil +} + +func writeFile(dir, file, data string) error { + // Normally dir should not be empty, one case is that cgroup subsystem + // is not mounted, we will get empty dir, and we want it fail here. + if dir == "" { + return fmt.Errorf("no such directory for %s.", file) + } + return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) +} + +func readFile(dir, file string) (string, error) { + data, err := ioutil.ReadFile(filepath.Join(dir, file)) + return string(data), err +} + +func removePath(p string, err error) error { + if err != nil { + return err + } + if p != "" { + return os.RemoveAll(p) + } + return nil +} + +func CheckCpushares(path string, c int64) error { + var cpuShares int64 + + if c == 0 { + return nil + } + + fd, err := os.Open(filepath.Join(path, "cpu.shares")) + if err != nil { + return err + } + defer fd.Close() + + _, err = fmt.Fscanf(fd, "%d", &cpuShares) + if err != nil && err != io.EOF { + return err + } + + if c > cpuShares { + return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares) + } else if c < cpuShares { + return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares) + } + + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go new file mode 100644 index 00000000..3b940554 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go @@ -0,0 +1,230 @@ +// +build linux + +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type BlkioGroup struct { +} + +func (s *BlkioGroup) Apply(d *data) error { + dir, err := d.join("blkio") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := s.Set(dir, d.c); err != nil { + return err + } + + return nil +} + +func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.BlkioWeight != 0 { + if err := writeFile(path, "blkio.weight", strconv.FormatInt(cgroup.BlkioWeight, 10)); err != nil { + return err + } + } + + if cgroup.BlkioWeightDevice != "" { + if err := writeFile(path, "blkio.weight_device", cgroup.BlkioWeightDevice); err != nil { + return err + } + } + if cgroup.BlkioThrottleReadBpsDevice != "" { + if err := writeFile(path, "blkio.throttle.read_bps_device", cgroup.BlkioThrottleReadBpsDevice); err != nil { + return err + } + } + if cgroup.BlkioThrottleWriteBpsDevice != "" { + if err := writeFile(path, "blkio.throttle.write_bps_device", cgroup.BlkioThrottleWriteBpsDevice); err != nil { + return err + } + } + if cgroup.BlkioThrottleReadIOpsDevice != "" { + if err := writeFile(path, "blkio.throttle.read_iops_device", cgroup.BlkioThrottleReadIOpsDevice); err != nil { + return err + } + } + if cgroup.BlkioThrottleWriteIOpsDevice != "" { + if err := writeFile(path, "blkio.throttle.write_iops_device", cgroup.BlkioThrottleWriteIOpsDevice); err != nil { + return err + } + } + + return nil +} + +func (s *BlkioGroup) Remove(d *data) error { + return removePath(d.path("blkio")) +} + +/* +examples: + + blkio.sectors + 8:0 6792 + + blkio.io_service_bytes + 8:0 Read 1282048 + 8:0 Write 2195456 + 8:0 Sync 2195456 + 8:0 Async 1282048 + 8:0 Total 3477504 + Total 3477504 + + blkio.io_serviced + 8:0 Read 124 + 8:0 Write 104 + 8:0 Sync 104 + 8:0 Async 124 + 8:0 Total 228 + Total 228 + + blkio.io_queued + 8:0 Read 0 + 8:0 Write 0 + 8:0 Sync 0 + 8:0 Async 0 + 8:0 Total 0 + Total 0 +*/ + +func splitBlkioStatLine(r rune) bool { + return r == ' ' || r == ':' +} + +func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) { + var blkioStats []cgroups.BlkioStatEntry + f, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return blkioStats, nil + } + return nil, err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + // format: dev type amount + fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) + if len(fields) < 3 { + if len(fields) == 2 && fields[0] == "Total" { + // skip total line + continue + } else { + return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text()) + } + } + + v, err := strconv.ParseUint(fields[0], 10, 64) + if err != nil { + return nil, err + } + major := v + + v, err = strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return nil, err + } + minor := v + + op := "" + valueField := 2 + if len(fields) == 4 { + op = fields[2] + valueField = 3 + } + v, err = strconv.ParseUint(fields[valueField], 10, 64) + if err != nil { + return nil, err + } + blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v}) + } + + return blkioStats, nil +} + +func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error { + // Try to read CFQ stats available on all CFQ enabled kernels first + if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil { + return getCFQStats(path, stats) + } + return getStats(path, stats) // Use generic stats as fallback +} + +func getCFQStats(path string, stats *cgroups.Stats) error { + var blkioStats []cgroups.BlkioStatEntry + var err error + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil { + return err + } + stats.BlkioStats.SectorsRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServiceBytesRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServicedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil { + return err + } + stats.BlkioStats.IoQueuedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoServiceTimeRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoWaitTimeRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil { + return err + } + stats.BlkioStats.IoMergedRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil { + return err + } + stats.BlkioStats.IoTimeRecursive = blkioStats + + return nil +} + +func getStats(path string, stats *cgroups.Stats) error { + var blkioStats []cgroups.BlkioStatEntry + var err error + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil { + return err + } + stats.BlkioStats.IoServiceBytesRecursive = blkioStats + + if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil { + return err + } + stats.BlkioStats.IoServicedRecursive = blkioStats + + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio_test.go new file mode 100644 index 00000000..485ab1ce --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio_test.go @@ -0,0 +1,570 @@ +// +build linux + +package fs + +import ( + "strconv" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" +) + +const ( + sectorsRecursiveContents = `8:0 1024` + serviceBytesRecursiveContents = `8:0 Read 100 +8:0 Write 200 +8:0 Sync 300 +8:0 Async 500 +8:0 Total 500 +Total 500` + servicedRecursiveContents = `8:0 Read 10 +8:0 Write 40 +8:0 Sync 20 +8:0 Async 30 +8:0 Total 50 +Total 50` + queuedRecursiveContents = `8:0 Read 1 +8:0 Write 4 +8:0 Sync 2 +8:0 Async 3 +8:0 Total 5 +Total 5` + serviceTimeRecursiveContents = `8:0 Read 173959 +8:0 Write 0 +8:0 Sync 0 +8:0 Async 173959 +8:0 Total 17395 +Total 17395` + waitTimeRecursiveContents = `8:0 Read 15571 +8:0 Write 0 +8:0 Sync 0 +8:0 Async 15571 +8:0 Total 15571` + mergedRecursiveContents = `8:0 Read 5 +8:0 Write 10 +8:0 Sync 0 +8:0 Async 0 +8:0 Total 15 +Total 15` + timeRecursiveContents = `8:0 8` + throttleServiceBytes = `8:0 Read 11030528 +8:0 Write 23 +8:0 Sync 42 +8:0 Async 11030528 +8:0 Total 11030528 +252:0 Read 11030528 +252:0 Write 23 +252:0 Sync 42 +252:0 Async 11030528 +252:0 Total 11030528 +Total 22061056` + throttleServiced = `8:0 Read 164 +8:0 Write 23 +8:0 Sync 42 +8:0 Async 164 +8:0 Total 164 +252:0 Read 164 +252:0 Write 23 +252:0 Sync 42 +252:0 Async 164 +252:0 Total 164 +Total 328` + throttleBefore = `8:0 1024` + throttleAfter = `8:0 2048` +) + +func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) { + *blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op}) +} + +func TestBlkioSetWeight(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + const ( + weightBefore = 100 + weightAfter = 200 + ) + + helper.writeFileContents(map[string]string{ + "blkio.weight": strconv.Itoa(weightBefore), + }) + + helper.CgroupData.c.BlkioWeight = weightAfter + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamUint(helper.CgroupPath, "blkio.weight") + if err != nil { + t.Fatalf("Failed to parse blkio.weight - %s", err) + } + + if value != weightAfter { + t.Fatal("Got the wrong value, set blkio.weight failed.") + } +} + +func TestBlkioSetWeightDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + const ( + weightDeviceBefore = "8:0 400" + weightDeviceAfter = "8:0 500" + ) + + helper.writeFileContents(map[string]string{ + "blkio.weight_device": weightDeviceBefore, + }) + + helper.CgroupData.c.BlkioWeightDevice = weightDeviceAfter + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device") + if err != nil { + t.Fatalf("Failed to parse blkio.weight_device - %s", err) + } + + if value != weightDeviceAfter { + t.Fatal("Got the wrong value, set blkio.weight_device failed.") + } +} + +func TestBlkioStats(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } + + // Verify expected stats. + expectedStats := cgroups.BlkioStats{} + appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "") + + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total") + + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total") + + appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read") + appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write") + appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync") + appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async") + appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total") + + appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Read") + appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Write") + appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Sync") + appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Async") + appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 17395, "Total") + + appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Read") + appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Write") + appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Sync") + appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Async") + appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Total") + + appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 5, "Read") + appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 10, "Write") + appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Sync") + appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Async") + appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 15, "Total") + + appendBlkioStatEntry(&expectedStats.IoTimeRecursive, 8, 0, 8, "") + + expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats) +} + +func TestBlkioStatsNoSectorsFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoServiceBytesFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoServicedFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoQueuedFile(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoServiceTimeFile(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoWaitTimeFile(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoMergedFile(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsNoTimeFile(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": serviceBytesRecursiveContents, + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatalf("Failed unexpectedly: %s", err) + } +} + +func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": "8:0 Read 100 100", + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected to fail, but did not") + } +} + +func TestBlkioStatsUnexpectedFieldType(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": "8:0 Read Write", + "blkio.io_serviced_recursive": servicedRecursiveContents, + "blkio.io_queued_recursive": queuedRecursiveContents, + "blkio.sectors_recursive": sectorsRecursiveContents, + "blkio.io_service_time_recursive": serviceTimeRecursiveContents, + "blkio.io_wait_time_recursive": waitTimeRecursiveContents, + "blkio.io_merged_recursive": mergedRecursiveContents, + "blkio.time_recursive": timeRecursiveContents, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected to fail, but did not") + } +} + +func TestNonCFQBlkioStats(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "blkio.io_service_bytes_recursive": "", + "blkio.io_serviced_recursive": "", + "blkio.io_queued_recursive": "", + "blkio.sectors_recursive": "", + "blkio.io_service_time_recursive": "", + "blkio.io_wait_time_recursive": "", + "blkio.io_merged_recursive": "", + "blkio.time_recursive": "", + "blkio.throttle.io_service_bytes": throttleServiceBytes, + "blkio.throttle.io_serviced": throttleServiced, + }) + + blkio := &BlkioGroup{} + actualStats := *cgroups.NewStats() + err := blkio.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } + + // Verify expected stats. + expectedStats := cgroups.BlkioStats{} + + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Read") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 23, "Write") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 42, "Sync") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Async") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Total") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Read") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 23, "Write") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 42, "Sync") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Async") + appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Total") + + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Read") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 23, "Write") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 42, "Sync") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Async") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Total") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Read") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 23, "Write") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 42, "Sync") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Async") + appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Total") + + expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats) +} + +func TestBlkioSetThrottleReadBpsDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "blkio.throttle.read_bps_device": throttleBefore, + }) + + helper.CgroupData.c.BlkioThrottleReadBpsDevice = throttleAfter + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_bps_device") + if err != nil { + t.Fatalf("Failed to parse blkio.throttle.read_bps_device - %s", err) + } + + if value != throttleAfter { + t.Fatal("Got the wrong value, set blkio.throttle.read_bps_device failed.") + } +} +func TestBlkioSetThrottleWriteBpsDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "blkio.throttle.write_bps_device": throttleBefore, + }) + + helper.CgroupData.c.BlkioThrottleWriteBpsDevice = throttleAfter + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_bps_device") + if err != nil { + t.Fatalf("Failed to parse blkio.throttle.write_bps_device - %s", err) + } + + if value != throttleAfter { + t.Fatal("Got the wrong value, set blkio.throttle.write_bps_device failed.") + } +} +func TestBlkioSetThrottleReadIOpsDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "blkio.throttle.read_iops_device": throttleBefore, + }) + + helper.CgroupData.c.BlkioThrottleReadIOpsDevice = throttleAfter + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_iops_device") + if err != nil { + t.Fatalf("Failed to parse blkio.throttle.read_iops_device - %s", err) + } + + if value != throttleAfter { + t.Fatal("Got the wrong value, set blkio.throttle.read_iops_device failed.") + } +} +func TestBlkioSetThrottleWriteIOpsDevice(t *testing.T) { + helper := NewCgroupTestUtil("blkio", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "blkio.throttle.write_iops_device": throttleBefore, + }) + + helper.CgroupData.c.BlkioThrottleWriteIOpsDevice = throttleAfter + blkio := &BlkioGroup{} + if err := blkio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_iops_device") + if err != nil { + t.Fatalf("Failed to parse blkio.throttle.write_iops_device - %s", err) + } + + if value != throttleAfter { + t.Fatal("Got the wrong value, set blkio.throttle.write_iops_device failed.") + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go new file mode 100644 index 00000000..5afd5b8d --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go @@ -0,0 +1,95 @@ +// +build linux + +package fs + +import ( + "bufio" + "os" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type CpuGroup struct { +} + +func (s *CpuGroup) Apply(d *data) error { + // We always want to join the cpu group, to allow fair cpu scheduling + // on a container basis + dir, err := d.join("cpu") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := s.Set(dir, d.c); err != nil { + return err + } + + return nil +} + +func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.CpuShares != 0 { + if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.CpuShares, 10)); err != nil { + return err + } + } + if cgroup.CpuPeriod != 0 { + if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.CpuPeriod, 10)); err != nil { + return err + } + } + if cgroup.CpuQuota != 0 { + if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.CpuQuota, 10)); err != nil { + return err + } + } + if cgroup.CpuRtPeriod != 0 { + if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.CpuRtPeriod, 10)); err != nil { + return err + } + } + if cgroup.CpuRtRuntime != 0 { + if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.CpuRtRuntime, 10)); err != nil { + return err + } + } + + return nil +} + +func (s *CpuGroup) Remove(d *data) error { + return removePath(d.path("cpu")) +} + +func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error { + f, err := os.Open(filepath.Join(path, "cpu.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer f.Close() + + sc := bufio.NewScanner(f) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return err + } + switch t { + case "nr_periods": + stats.CpuStats.ThrottlingData.Periods = v + + case "nr_throttled": + stats.CpuStats.ThrottlingData.ThrottledPeriods = v + + case "throttled_time": + stats.CpuStats.ThrottlingData.ThrottledTime = v + } + } + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu_test.go new file mode 100644 index 00000000..f3c1782e --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu_test.go @@ -0,0 +1,163 @@ +// +build linux + +package fs + +import ( + "fmt" + "strconv" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" +) + +func TestCpuSetShares(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + + const ( + sharesBefore = 1024 + sharesAfter = 512 + ) + + helper.writeFileContents(map[string]string{ + "cpu.shares": strconv.Itoa(sharesBefore), + }) + + helper.CgroupData.c.CpuShares = sharesAfter + cpu := &CpuGroup{} + if err := cpu.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamUint(helper.CgroupPath, "cpu.shares") + if err != nil { + t.Fatalf("Failed to parse cpu.shares - %s", err) + } + + if value != sharesAfter { + t.Fatal("Got the wrong value, set cpu.shares failed.") + } +} + +func TestCpuSetBandWidth(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + + const ( + quotaBefore = 8000 + quotaAfter = 5000 + periodBefore = 10000 + periodAfter = 7000 + rtRuntimeBefore = 8000 + rtRuntimeAfter = 5000 + rtPeriodBefore = 10000 + rtPeriodAfter = 7000 + ) + + helper.writeFileContents(map[string]string{ + "cpu.cfs_quota_us": strconv.Itoa(quotaBefore), + "cpu.cfs_period_us": strconv.Itoa(periodBefore), + "cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore), + "cpu.rt_period_us": strconv.Itoa(rtPeriodBefore), + }) + + helper.CgroupData.c.CpuQuota = quotaAfter + helper.CgroupData.c.CpuPeriod = periodAfter + helper.CgroupData.c.CpuRtRuntime = rtRuntimeAfter + helper.CgroupData.c.CpuRtPeriod = rtPeriodAfter + cpu := &CpuGroup{} + if err := cpu.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + quota, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us") + if err != nil { + t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err) + } + if quota != quotaAfter { + t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.") + } + + period, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us") + if err != nil { + t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err) + } + if period != periodAfter { + t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.") + } + rtRuntime, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us") + if err != nil { + t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err) + } + if rtRuntime != rtRuntimeAfter { + t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.") + } + rtPeriod, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us") + if err != nil { + t.Fatalf("Failed to parse cpu.rt_period_us - %s", err) + } + if rtPeriod != rtPeriodAfter { + t.Fatal("Got the wrong value, set cpu.rt_period_us failed.") + } +} + +func TestCpuStats(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + + const ( + kNrPeriods = 2000 + kNrThrottled = 200 + kThrottledTime = uint64(18446744073709551615) + ) + + cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n", + kNrPeriods, kNrThrottled, kThrottledTime) + helper.writeFileContents(map[string]string{ + "cpu.stat": cpuStatContent, + }) + + cpu := &CpuGroup{} + actualStats := *cgroups.NewStats() + err := cpu.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } + + expectedStats := cgroups.ThrottlingData{ + Periods: kNrPeriods, + ThrottledPeriods: kNrThrottled, + ThrottledTime: kThrottledTime} + + expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData) +} + +func TestNoCpuStatFile(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + + cpu := &CpuGroup{} + actualStats := *cgroups.NewStats() + err := cpu.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal("Expected not to fail, but did") + } +} + +func TestInvalidCpuStat(t *testing.T) { + helper := NewCgroupTestUtil("cpu", t) + defer helper.cleanup() + cpuStatContent := `nr_periods 2000 + nr_throttled 200 + throttled_time fortytwo` + helper.writeFileContents(map[string]string{ + "cpu.stat": cpuStatContent, + }) + + cpu := &CpuGroup{} + actualStats := *cgroups.NewStats() + err := cpu.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failed stat parsing.") + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go new file mode 100644 index 00000000..4ff4fefe --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go @@ -0,0 +1,117 @@ +// +build linux + +package fs + +import ( + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/system" +) + +const ( + cgroupCpuacctStat = "cpuacct.stat" + nanosecondsInSecond = 1000000000 +) + +var clockTicks = uint64(system.GetClockTicks()) + +type CpuacctGroup struct { +} + +func (s *CpuacctGroup) Apply(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) { + return err + } + + return nil +} + +func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *CpuacctGroup) Remove(d *data) error { + return removePath(d.path("cpuacct")) +} + +func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { + userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path) + if err != nil { + return err + } + + totalUsage, err := getCgroupParamUint(path, "cpuacct.usage") + if err != nil { + return err + } + + percpuUsage, err := getPercpuUsage(path) + if err != nil { + return err + } + + stats.CpuStats.CpuUsage.TotalUsage = totalUsage + stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage + stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage + stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage + return nil +} + +// Returns user and kernel usage breakdown in nanoseconds. +func getCpuUsageBreakdown(path string) (uint64, uint64, error) { + userModeUsage := uint64(0) + kernelModeUsage := uint64(0) + const ( + userField = "user" + systemField = "system" + ) + + // Expected format: + // user + // system + data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat)) + if err != nil { + return 0, 0, err + } + fields := strings.Fields(string(data)) + if len(fields) != 4 { + return 0, 0, fmt.Errorf("failure - %s is expected to have 4 fields", filepath.Join(path, cgroupCpuacctStat)) + } + if fields[0] != userField { + return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField) + } + if fields[2] != systemField { + return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField) + } + if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil { + return 0, 0, err + } + if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil { + return 0, 0, err + } + + return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil +} + +func getPercpuUsage(path string) ([]uint64, error) { + percpuUsage := []uint64{} + data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu")) + if err != nil { + return percpuUsage, err + } + for _, value := range strings.Fields(string(data)) { + value, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err) + } + percpuUsage = append(percpuUsage, value) + } + return percpuUsage, nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go new file mode 100644 index 00000000..b6c04b0c --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go @@ -0,0 +1,135 @@ +// +build linux + +package fs + +import ( + "bytes" + "io/ioutil" + "os" + "path/filepath" + "strconv" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type CpusetGroup struct { +} + +func (s *CpusetGroup) Apply(d *data) error { + dir, err := d.path("cpuset") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + return s.ApplyDir(dir, d.c, d.pid) +} + +func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.CpusetCpus != "" { + if err := writeFile(path, "cpuset.cpus", cgroup.CpusetCpus); err != nil { + return err + } + } + if cgroup.CpusetMems != "" { + if err := writeFile(path, "cpuset.mems", cgroup.CpusetMems); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroup) Remove(d *data) error { + return removePath(d.path("cpuset")) +} + +func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} + +func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error { + // This might happen if we have no cpuset cgroup mounted. + // Just do nothing and don't fail. + if dir == "" { + return nil + } + root, err := getCgroupRoot() + if err != nil { + return err + } + if err := s.ensureParent(dir, root); err != nil { + return err + } + // because we are not using d.join we need to place the pid into the procs file + // unlike the other subsystems + if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil { + return err + } + + // the default values inherit from parent cgroup are already set in + // s.ensureParent, cover these if we have our own + if err := s.Set(dir, cgroup); err != nil { + return err + } + + return nil +} + +func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) { + if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil { + return + } + if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil { + return + } + return cpus, mems, nil +} + +// ensureParent makes sure that the parent directory of current is created +// and populated with the proper cpus and mems files copied from +// it's parent. +func (s *CpusetGroup) ensureParent(current, root string) error { + parent := filepath.Dir(current) + if filepath.Clean(parent) == root { + return nil + } + if err := s.ensureParent(parent, root); err != nil { + return err + } + if err := os.MkdirAll(current, 0755); err != nil && !os.IsExist(err) { + return err + } + return s.copyIfNeeded(current, parent) +} + +// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent +// directory to the current directory if the file's contents are 0 +func (s *CpusetGroup) copyIfNeeded(current, parent string) error { + var ( + err error + currentCpus, currentMems []byte + parentCpus, parentMems []byte + ) + + if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil { + return err + } + if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil { + return err + } + + if s.isEmpty(currentCpus) { + if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil { + return err + } + } + if s.isEmpty(currentMems) { + if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil { + return err + } + } + return nil +} + +func (s *CpusetGroup) isEmpty(b []byte) bool { + return len(bytes.Trim(b, "\n")) == 0 +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset_test.go new file mode 100644 index 00000000..44b2f943 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset_test.go @@ -0,0 +1,65 @@ +// +build linux + +package fs + +import ( + "testing" +) + +func TestCpusetSetCpus(t *testing.T) { + helper := NewCgroupTestUtil("cpuset", t) + defer helper.cleanup() + + const ( + cpusBefore = "0" + cpusAfter = "1-3" + ) + + helper.writeFileContents(map[string]string{ + "cpuset.cpus": cpusBefore, + }) + + helper.CgroupData.c.CpusetCpus = cpusAfter + cpuset := &CpusetGroup{} + if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "cpuset.cpus") + if err != nil { + t.Fatalf("Failed to parse cpuset.cpus - %s", err) + } + + if value != cpusAfter { + t.Fatal("Got the wrong value, set cpuset.cpus failed.") + } +} + +func TestCpusetSetMems(t *testing.T) { + helper := NewCgroupTestUtil("cpuset", t) + defer helper.cleanup() + + const ( + memsBefore = "0" + memsAfter = "1" + ) + + helper.writeFileContents(map[string]string{ + "cpuset.mems": memsBefore, + }) + + helper.CgroupData.c.CpusetMems = memsAfter + cpuset := &CpusetGroup{} + if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "cpuset.mems") + if err != nil { + t.Fatalf("Failed to parse cpuset.mems - %s", err) + } + + if value != memsAfter { + t.Fatal("Got the wrong value, set cpuset.mems failed.") + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go new file mode 100644 index 00000000..d621d275 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go @@ -0,0 +1,61 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type DevicesGroup struct { +} + +func (s *DevicesGroup) Apply(d *data) error { + dir, err := d.join("devices") + if err != nil { + // We will return error even it's `not found` error, devices + // cgroup is hard requirement for container's security. + return err + } + + if err := s.Set(dir, d.c); err != nil { + return err + } + + return nil +} + +func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error { + if !cgroup.AllowAllDevices { + if err := writeFile(path, "devices.deny", "a"); err != nil { + return err + } + + for _, dev := range cgroup.AllowedDevices { + if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil { + return err + } + } + return nil + } + + if err := writeFile(path, "devices.allow", "a"); err != nil { + return err + } + + for _, dev := range cgroup.DeniedDevices { + if err := writeFile(path, "devices.deny", dev.CgroupString()); err != nil { + return err + } + } + + return nil +} + +func (s *DevicesGroup) Remove(d *data) error { + return removePath(d.path("devices")) +} + +func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices_test.go new file mode 100644 index 00000000..f8422644 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices_test.go @@ -0,0 +1,84 @@ +// +build linux + +package fs + +import ( + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +var ( + allowedDevices = []*configs.Device{ + { + Path: "/dev/zero", + Type: 'c', + Major: 1, + Minor: 5, + Permissions: "rwm", + FileMode: 0666, + }, + } + allowedList = "c 1:5 rwm" + deniedDevices = []*configs.Device{ + { + Path: "/dev/null", + Type: 'c', + Major: 1, + Minor: 3, + Permissions: "rwm", + FileMode: 0666, + }, + } + deniedList = "c 1:3 rwm" +) + +func TestDevicesSetAllow(t *testing.T) { + helper := NewCgroupTestUtil("devices", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "devices.deny": "a", + }) + + helper.CgroupData.c.AllowAllDevices = false + helper.CgroupData.c.AllowedDevices = allowedDevices + devices := &DevicesGroup{} + if err := devices.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "devices.allow") + if err != nil { + t.Fatalf("Failed to parse devices.allow - %s", err) + } + + if value != allowedList { + t.Fatal("Got the wrong value, set devices.allow failed.") + } +} + +func TestDevicesSetDeny(t *testing.T) { + helper := NewCgroupTestUtil("devices", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "devices.allow": "a", + }) + + helper.CgroupData.c.AllowAllDevices = true + helper.CgroupData.c.DeniedDevices = deniedDevices + devices := &DevicesGroup{} + if err := devices.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "devices.deny") + if err != nil { + t.Fatalf("Failed to parse devices.deny - %s", err) + } + + if value != deniedList { + t.Fatal("Got the wrong value, set devices.deny failed.") + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go new file mode 100644 index 00000000..47f74395 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go @@ -0,0 +1,62 @@ +// +build linux + +package fs + +import ( + "fmt" + "strings" + "time" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type FreezerGroup struct { +} + +func (s *FreezerGroup) Apply(d *data) error { + dir, err := d.join("freezer") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := s.Set(dir, d.c); err != nil { + return err + } + + return nil +} + +func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { + switch cgroup.Freezer { + case configs.Frozen, configs.Thawed: + if err := writeFile(path, "freezer.state", string(cgroup.Freezer)); err != nil { + return err + } + + for { + state, err := readFile(path, "freezer.state") + if err != nil { + return err + } + if strings.TrimSpace(state) == string(cgroup.Freezer) { + break + } + time.Sleep(1 * time.Millisecond) + } + case configs.Undefined: + return nil + default: + return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Freezer)) + } + + return nil +} + +func (s *FreezerGroup) Remove(d *data) error { + return removePath(d.path("freezer")) +} + +func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer_test.go new file mode 100644 index 00000000..88abb675 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer_test.go @@ -0,0 +1,45 @@ +package fs + +import ( + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +func TestFreezerSetState(t *testing.T) { + helper := NewCgroupTestUtil("freezer", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "freezer.state": string(configs.Frozen), + }) + + helper.CgroupData.c.Freezer = configs.Thawed + freezer := &FreezerGroup{} + if err := freezer.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "freezer.state") + if err != nil { + t.Fatalf("Failed to parse freezer.state - %s", err) + } + if value != string(configs.Thawed) { + t.Fatal("Got the wrong value, set freezer.state failed.") + } +} + +func TestFreezerSetInvalidState(t *testing.T) { + helper := NewCgroupTestUtil("freezer", t) + defer helper.cleanup() + + const ( + invalidArg configs.FreezerState = "Invalid" + ) + + helper.CgroupData.c.Freezer = invalidArg + freezer := &FreezerGroup{} + if err := freezer.Set(helper.CgroupPath, helper.CgroupData.c); err == nil { + t.Fatal("Failed to return invalid argument error") + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go new file mode 100644 index 00000000..3ef9e031 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs_unsupported.go @@ -0,0 +1,3 @@ +// +build !linux + +package fs diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go new file mode 100644 index 00000000..7f192a9a --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go @@ -0,0 +1,72 @@ +// +build linux + +package fs + +import ( + "fmt" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type HugetlbGroup struct { +} + +func (s *HugetlbGroup) Apply(d *data) error { + dir, err := d.join("hugetlb") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := s.Set(dir, d.c); err != nil { + return err + } + + return nil +} + +func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, hugetlb := range cgroup.HugetlbLimit { + if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.Itoa(hugetlb.Limit)); err != nil { + return err + } + } + + return nil +} + +func (s *HugetlbGroup) Remove(d *data) error { + return removePath(d.path("hugetlb")) +} + +func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error { + hugetlbStats := cgroups.HugetlbStats{} + for _, pageSize := range HugePageSizes { + usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".") + value, err := getCgroupParamUint(path, usage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", usage, err) + } + hugetlbStats.Usage = value + + maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".") + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + hugetlbStats.MaxUsage = value + + failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".") + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + hugetlbStats.Failcnt = value + + stats.HugetlbStats[pageSize] = hugetlbStats + } + + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb_test.go new file mode 100644 index 00000000..39c3ee65 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb_test.go @@ -0,0 +1,138 @@ +package fs + +import ( + "strconv" + "strings" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +const ( + hugetlbUsageContents = "128\n" + hugetlbMaxUsageContents = "256\n" + hugetlbFailcnt = "100\n" +) + +var ( + hugePageSize, _ = cgroups.GetHugePageSize() + usage = strings.Join([]string{"hugetlb", hugePageSize[0], "usage_in_bytes"}, ".") + limit = strings.Join([]string{"hugetlb", hugePageSize[0], "limit_in_bytes"}, ".") + maxUsage = strings.Join([]string{"hugetlb", hugePageSize[0], "max_usage_in_bytes"}, ".") + failcnt = strings.Join([]string{"hugetlb", hugePageSize[0], "failcnt"}, ".") +) + +func TestHugetlbSetHugetlb(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + + const ( + hugetlbBefore = 256 + hugetlbAfter = 512 + ) + + helper.writeFileContents(map[string]string{ + limit: strconv.Itoa(hugetlbBefore), + }) + + helper.CgroupData.c.HugetlbLimit = []*configs.HugepageLimit{ + { + Pagesize: hugePageSize[0], + Limit: hugetlbAfter, + }, + } + hugetlb := &HugetlbGroup{} + if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamUint(helper.CgroupPath, limit) + if err != nil { + t.Fatalf("Failed to parse %s - %s", limit, err) + } + if value != hugetlbAfter { + t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value) + } +} + +func TestHugetlbStats(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + usage: hugetlbUsageContents, + maxUsage: hugetlbMaxUsageContents, + failcnt: hugetlbFailcnt, + }) + + hugetlb := &HugetlbGroup{} + actualStats := *cgroups.NewStats() + err := hugetlb.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } + expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100} + expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[hugePageSize[0]]) +} + +func TestHugetlbStatsNoUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + maxUsage: hugetlbMaxUsageContents, + }) + + hugetlb := &HugetlbGroup{} + actualStats := *cgroups.NewStats() + err := hugetlb.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestHugetlbStatsNoMaxUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + usage: hugetlbUsageContents, + }) + + hugetlb := &HugetlbGroup{} + actualStats := *cgroups.NewStats() + err := hugetlb.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestHugetlbStatsBadUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + usage: "bad", + maxUsage: hugetlbMaxUsageContents, + }) + + hugetlb := &HugetlbGroup{} + actualStats := *cgroups.NewStats() + err := hugetlb.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestHugetlbStatsBadMaxUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("hugetlb", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + usage: hugetlbUsageContents, + maxUsage: "bad", + }) + + hugetlb := &HugetlbGroup{} + actualStats := *cgroups.NewStats() + err := hugetlb.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go new file mode 100644 index 00000000..8206b147 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go @@ -0,0 +1,171 @@ +// +build linux + +package fs + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type MemoryGroup struct { +} + +func (s *MemoryGroup) Apply(d *data) error { + path, err := d.path("memory") + if err != nil { + if cgroups.IsNotFound(err) { + return nil + } + return err + } + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return err + } + if err := s.Set(path, d.c); err != nil { + return err + } + + // We need to join memory cgroup after set memory limits, because + // kmem.limit_in_bytes can only be set when the cgroup is empty. + _, err = d.join("memory") + if err != nil { + return err + } + defer func() { + if err != nil { + os.RemoveAll(path) + } + }() + + return nil +} + +func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.Memory != 0 { + if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Memory, 10)); err != nil { + return err + } + } + if cgroup.MemoryReservation != 0 { + if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.MemoryReservation, 10)); err != nil { + return err + } + } + if cgroup.MemorySwap > 0 { + if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.MemorySwap, 10)); err != nil { + return err + } + } + if cgroup.KernelMemory > 0 { + if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.KernelMemory, 10)); err != nil { + return err + } + } + + if cgroup.OomKillDisable { + if err := writeFile(path, "memory.oom_control", "1"); err != nil { + return err + } + } + if cgroup.MemorySwappiness >= 0 && cgroup.MemorySwappiness <= 100 { + if err := writeFile(path, "memory.swappiness", strconv.FormatInt(cgroup.MemorySwappiness, 10)); err != nil { + return err + } + } else if cgroup.MemorySwappiness == -1 { + return nil + } else { + return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", cgroup.MemorySwappiness) + } + + return nil +} + +func (s *MemoryGroup) Remove(d *data) error { + return removePath(d.path("memory")) +} + +func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { + // Set stats from memory.stat. + statsFile, err := os.Open(filepath.Join(path, "memory.stat")) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + defer statsFile.Close() + + sc := bufio.NewScanner(statsFile) + for sc.Scan() { + t, v, err := getCgroupParamKeyValue(sc.Text()) + if err != nil { + return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err) + } + stats.MemoryStats.Stats[t] = v + } + stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] + + memoryUsage, err := getMemoryData(path, "") + if err != nil { + return err + } + stats.MemoryStats.Usage = memoryUsage + swapUsage, err := getMemoryData(path, "memsw") + if err != nil { + return err + } + stats.MemoryStats.SwapUsage = swapUsage + kernelUsage, err := getMemoryData(path, "kmem") + if err != nil { + return err + } + stats.MemoryStats.KernelUsage = kernelUsage + + return nil +} + +func getMemoryData(path, name string) (cgroups.MemoryData, error) { + memoryData := cgroups.MemoryData{} + + moduleName := "memory" + if name != "" { + moduleName = strings.Join([]string{"memory", name}, ".") + } + usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".") + maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".") + failcnt := strings.Join([]string{moduleName, "failcnt"}, ".") + + value, err := getCgroupParamUint(path, usage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err) + } + memoryData.Usage = value + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + memoryData.MaxUsage = value + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + memoryData.Failcnt = value + + return memoryData, nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory_test.go new file mode 100644 index 00000000..d7b5e5f4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory_test.go @@ -0,0 +1,294 @@ +// +build linux + +package fs + +import ( + "strconv" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" +) + +const ( + memoryStatContents = `cache 512 +rss 1024` + memoryUsageContents = "2048\n" + memoryMaxUsageContents = "4096\n" + memoryFailcnt = "100\n" +) + +func TestMemorySetMemory(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + memoryBefore = 314572800 // 300M + memoryAfter = 524288000 // 500M + reservationBefore = 209715200 // 200M + reservationAfter = 314572800 // 300M + ) + + helper.writeFileContents(map[string]string{ + "memory.limit_in_bytes": strconv.Itoa(memoryBefore), + "memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore), + }) + + helper.CgroupData.c.Memory = memoryAfter + helper.CgroupData.c.MemoryReservation = reservationAfter + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err) + } + if value != memoryAfter { + t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.") + } + + value, err = getCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err) + } + if value != reservationAfter { + t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.") + } +} + +func TestMemorySetMemoryswap(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + memoryswapBefore = 314572800 // 300M + memoryswapAfter = 524288000 // 500M + ) + + helper.writeFileContents(map[string]string{ + "memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore), + }) + + helper.CgroupData.c.MemorySwap = memoryswapAfter + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err) + } + if value != memoryswapAfter { + t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.") + } +} + +func TestMemorySetKernelMemory(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + kernelMemoryBefore = 314572800 // 300M + kernelMemoryAfter = 524288000 // 500M + ) + + helper.writeFileContents(map[string]string{ + "memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore), + }) + + helper.CgroupData.c.KernelMemory = kernelMemoryAfter + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes") + if err != nil { + t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err) + } + if value != kernelMemoryAfter { + t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.") + } +} + +func TestMemorySetMemorySwappinessDefault(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + swappinessBefore = 60 //deafult is 60 + swappinessAfter = 0 + ) + + helper.writeFileContents(map[string]string{ + "memory.swappiness": strconv.Itoa(swappinessBefore), + }) + + helper.CgroupData.c.Memory = swappinessAfter + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamUint(helper.CgroupPath, "memory.swappiness") + if err != nil { + t.Fatalf("Failed to parse memory.swappiness - %s", err) + } + if value != swappinessAfter { + t.Fatal("Got the wrong value, set memory.swappiness failed.") + } +} + +func TestMemoryStats(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + "memory.failcnt": memoryFailcnt, + "memory.memsw.usage_in_bytes": memoryUsageContents, + "memory.memsw.max_usage_in_bytes": memoryMaxUsageContents, + "memory.memsw.failcnt": memoryFailcnt, + "memory.kmem.usage_in_bytes": memoryUsageContents, + "memory.kmem.max_usage_in_bytes": memoryMaxUsageContents, + "memory.kmem.failcnt": memoryFailcnt, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } + expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100}, Stats: map[string]uint64{"cache": 512, "rss": 1024}} + expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats) +} + +func TestMemoryStatsNoStatFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err != nil { + t.Fatal(err) + } +} + +func TestMemoryStatsNoUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsNoMaxUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": memoryUsageContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsBadStatFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": "rss rss", + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": memoryMaxUsageContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsBadUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": "bad", + "memory.max_usage_in_bytes": memoryMaxUsageContents, + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemoryStatsBadMaxUsageFile(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + helper.writeFileContents(map[string]string{ + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": memoryUsageContents, + "memory.max_usage_in_bytes": "bad", + }) + + memory := &MemoryGroup{} + actualStats := *cgroups.NewStats() + err := memory.GetStats(helper.CgroupPath, &actualStats) + if err == nil { + t.Fatal("Expected failure") + } +} + +func TestMemorySetOomControl(t *testing.T) { + helper := NewCgroupTestUtil("memory", t) + defer helper.cleanup() + + const ( + oom_kill_disable = 1 // disable oom killer, default is 0 + ) + + helper.writeFileContents(map[string]string{ + "memory.oom_control": strconv.Itoa(oom_kill_disable), + }) + + memory := &MemoryGroup{} + if err := memory.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamUint(helper.CgroupPath, "memory.oom_control") + if err != nil { + t.Fatalf("Failed to parse memory.oom_control - %s", err) + } + + if value != oom_kill_disable { + t.Fatalf("Got the wrong value, set memory.oom_control failed.") + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go new file mode 100644 index 00000000..23335dd7 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go @@ -0,0 +1,40 @@ +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NetClsGroup struct { +} + +func (s *NetClsGroup) Apply(d *data) error { + dir, err := d.join("net_cls") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := s.Set(dir, d.c); err != nil { + return err + } + + return nil +} + +func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error { + if cgroup.NetClsClassid != "" { + if err := writeFile(path, "net_cls.classid", cgroup.NetClsClassid); err != nil { + return err + } + } + + return nil +} + +func (s *NetClsGroup) Remove(d *data) error { + return removePath(d.path("net_cls")) +} + +func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls_test.go new file mode 100644 index 00000000..c24ee037 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls_test.go @@ -0,0 +1,36 @@ +package fs + +import ( + "testing" +) + +const ( + classidBefore = "0x100002" + classidAfter = "0x100001" +) + +func TestNetClsSetClassid(t *testing.T) { + helper := NewCgroupTestUtil("net_cls", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "net_cls.classid": classidBefore, + }) + + helper.CgroupData.c.NetClsClassid = classidAfter + netcls := &NetClsGroup{} + if err := netcls.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + // As we are in mock environment, we can't get correct value of classid from + // net_cls.classid. + // So. we just judge if we successfully write classid into file + value, err := getCgroupParamString(helper.CgroupPath, "net_cls.classid") + if err != nil { + t.Fatalf("Failed to parse net_cls.classid - %s", err) + } + if value != classidAfter { + t.Fatal("Got the wrong value, set net_cls.classid failed.") + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go new file mode 100644 index 00000000..88f2ea8b --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go @@ -0,0 +1,40 @@ +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type NetPrioGroup struct { +} + +func (s *NetPrioGroup) Apply(d *data) error { + dir, err := d.join("net_prio") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := s.Set(dir, d.c); err != nil { + return err + } + + return nil +} + +func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, prioMap := range cgroup.NetPrioIfpriomap { + if err := writeFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil { + return err + } + } + + return nil +} + +func (s *NetPrioGroup) Remove(d *data) error { + return removePath(d.path("net_prio")) +} + +func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio_test.go new file mode 100644 index 00000000..0ef4955b --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio_test.go @@ -0,0 +1,36 @@ +package fs + +import ( + "strings" + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +var ( + prioMap = []*configs.IfPrioMap{ + { + Interface: "test", + Priority: 5, + }, + } +) + +func TestNetPrioSetIfPrio(t *testing.T) { + helper := NewCgroupTestUtil("net_prio", t) + defer helper.cleanup() + + helper.CgroupData.c.NetPrioIfpriomap = prioMap + netPrio := &NetPrioGroup{} + if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.c); err != nil { + t.Fatal(err) + } + + value, err := getCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap") + if err != nil { + t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err) + } + if !strings.Contains(value, "test 5") { + t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.") + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go new file mode 100644 index 00000000..30be9e56 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go @@ -0,0 +1,31 @@ +// +build linux + +package fs + +import ( + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type PerfEventGroup struct { +} + +func (s *PerfEventGroup) Apply(d *data) error { + // we just want to join this group even though we don't set anything + if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) { + return err + } + return nil +} + +func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error { + return nil +} + +func (s *PerfEventGroup) Remove(d *data) error { + return removePath(d.path("perf_event")) +} + +func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error { + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/stats_util_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/stats_util_test.go new file mode 100644 index 00000000..b1777cc0 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/stats_util_test.go @@ -0,0 +1,113 @@ +// +build linux + +package fs + +import ( + "fmt" + "testing" + + "github.com/Sirupsen/logrus" + "github.com/opencontainers/runc/libcontainer/cgroups" +) + +func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error { + if len(expected) != len(actual) { + return fmt.Errorf("blkioStatEntries length do not match") + } + for i, expValue := range expected { + actValue := actual[i] + if expValue != actValue { + return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue) + } + } + return nil +} + +func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) { + if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil { + logrus.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil { + logrus.Printf("blkio IoServicedRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil { + logrus.Printf("blkio IoQueuedRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil { + logrus.Printf("blkio SectorsRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoServiceTimeRecursive, actual.IoServiceTimeRecursive); err != nil { + logrus.Printf("blkio IoServiceTimeRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoWaitTimeRecursive, actual.IoWaitTimeRecursive); err != nil { + logrus.Printf("blkio IoWaitTimeRecursive do not match - %s\n", err) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil { + logrus.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive) + t.Fail() + } + + if err := blkioStatEntryEquals(expected.IoTimeRecursive, actual.IoTimeRecursive); err != nil { + logrus.Printf("blkio IoTimeRecursive do not match - %s\n", err) + t.Fail() + } +} + +func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) { + if expected != actual { + logrus.Printf("Expected throttling data %v but found %v\n", expected, actual) + t.Fail() + } +} + +func expectHugetlbStatEquals(t *testing.T, expected, actual cgroups.HugetlbStats) { + if expected != actual { + logrus.Printf("Expected hugetlb stats %v but found %v\n", expected, actual) + t.Fail() + } +} + +func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) { + expectMemoryDataEquals(t, expected.Usage, actual.Usage) + expectMemoryDataEquals(t, expected.SwapUsage, actual.SwapUsage) + expectMemoryDataEquals(t, expected.KernelUsage, actual.KernelUsage) + + for key, expValue := range expected.Stats { + actValue, ok := actual.Stats[key] + if !ok { + logrus.Printf("Expected memory stat key %s not found\n", key) + t.Fail() + } + if expValue != actValue { + logrus.Printf("Expected memory stat value %d but found %d\n", expValue, actValue) + t.Fail() + } + } +} + +func expectMemoryDataEquals(t *testing.T, expected, actual cgroups.MemoryData) { + if expected.Usage != actual.Usage { + logrus.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage) + t.Fail() + } + if expected.MaxUsage != actual.MaxUsage { + logrus.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage) + t.Fail() + } + if expected.Failcnt != actual.Failcnt { + logrus.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt) + t.Fail() + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/util_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/util_test.go new file mode 100644 index 00000000..0f362560 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/util_test.go @@ -0,0 +1,66 @@ +// +build linux + +/* +Utility for testing cgroup operations. + +Creates a mock of the cgroup filesystem for the duration of the test. +*/ +package fs + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type cgroupTestUtil struct { + // data to use in tests. + CgroupData *data + + // Path to the mock cgroup directory. + CgroupPath string + + // Temporary directory to store mock cgroup filesystem. + tempDir string + t *testing.T +} + +// Creates a new test util for the specified subsystem +func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil { + d := &data{ + c: &configs.Cgroup{}, + } + tempDir, err := ioutil.TempDir("", "cgroup_test") + if err != nil { + t.Fatal(err) + } + d.root = tempDir + testCgroupPath := filepath.Join(d.root, subsystem) + if err != nil { + t.Fatal(err) + } + + // Ensure the full mock cgroup path exists. + err = os.MkdirAll(testCgroupPath, 0755) + if err != nil { + t.Fatal(err) + } + return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t} +} + +func (c *cgroupTestUtil) cleanup() { + os.RemoveAll(c.tempDir) +} + +// Write the specified contents on the mock of the specified cgroup files. +func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) { + for file, contents := range fileContents { + err := writeFile(c.CgroupPath, file, contents) + if err != nil { + c.t.Fatal(err) + } + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go new file mode 100644 index 00000000..3a6eec79 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go @@ -0,0 +1,74 @@ +// +build linux + +package fs + +import ( + "errors" + "fmt" + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +var ( + ErrNotSupportStat = errors.New("stats are not supported for subsystem") + ErrNotValidFormat = errors.New("line is not a valid key value format") +) + +// Saturates negative values at zero and returns a uint64. +// Due to kernel bugs, some of the memory cgroup stats can be negative. +func parseUint(s string, base, bitSize int) (uint64, error) { + value, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { + return 0, nil + } + + return value, err + } + + return value, nil +} + +// Parses a cgroup param and returns as name, value +// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234 +func getCgroupParamKeyValue(t string) (string, uint64, error) { + parts := strings.Fields(t) + switch len(parts) { + case 2: + value, err := parseUint(parts[1], 10, 64) + if err != nil { + return "", 0, fmt.Errorf("Unable to convert param value (%q) to uint64: %v", parts[1], err) + } + + return parts[0], value, nil + default: + return "", 0, ErrNotValidFormat + } +} + +// Gets a single uint64 value from the specified cgroup file. +func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) { + contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) + if err != nil { + return 0, err + } + + return parseUint(strings.TrimSpace(string(contents)), 10, 64) +} + +// Gets a string value from the specified cgroup file +func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) { + contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile)) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(contents)), nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils_test.go new file mode 100644 index 00000000..99cdc18e --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils_test.go @@ -0,0 +1,97 @@ +// +build linux + +package fs + +import ( + "io/ioutil" + "math" + "os" + "path/filepath" + "strconv" + "testing" +) + +const ( + cgroupFile = "cgroup.file" + floatValue = 2048.0 + floatString = "2048" +) + +func TestGetCgroupParamsInt(t *testing.T) { + // Setup tempdir. + tempDir, err := ioutil.TempDir("", "cgroup_utils_test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tempDir) + tempFile := filepath.Join(tempDir, cgroupFile) + + // Success. + err = ioutil.WriteFile(tempFile, []byte(floatString), 0755) + if err != nil { + t.Fatal(err) + } + value, err := getCgroupParamUint(tempDir, cgroupFile) + if err != nil { + t.Fatal(err) + } else if value != floatValue { + t.Fatalf("Expected %d to equal %f", value, floatValue) + } + + // Success with new line. + err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755) + if err != nil { + t.Fatal(err) + } + value, err = getCgroupParamUint(tempDir, cgroupFile) + if err != nil { + t.Fatal(err) + } else if value != floatValue { + t.Fatalf("Expected %d to equal %f", value, floatValue) + } + + // Success with negative values + err = ioutil.WriteFile(tempFile, []byte("-12345"), 0755) + if err != nil { + t.Fatal(err) + } + value, err = getCgroupParamUint(tempDir, cgroupFile) + if err != nil { + t.Fatal(err) + } else if value != 0 { + t.Fatalf("Expected %d to equal %d", value, 0) + } + + // Success with negative values lesser than min int64 + s := strconv.FormatFloat(math.MinInt64, 'f', -1, 64) + err = ioutil.WriteFile(tempFile, []byte(s), 0755) + if err != nil { + t.Fatal(err) + } + value, err = getCgroupParamUint(tempDir, cgroupFile) + if err != nil { + t.Fatal(err) + } else if value != 0 { + t.Fatalf("Expected %d to equal %d", value, 0) + } + + // Not a float. + err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755) + if err != nil { + t.Fatal(err) + } + _, err = getCgroupParamUint(tempDir, cgroupFile) + if err == nil { + t.Fatal("Expecting error, got none") + } + + // Unknown file. + err = os.Remove(tempFile) + if err != nil { + t.Fatal(err) + } + _, err = getCgroupParamUint(tempDir, cgroupFile) + if err == nil { + t.Fatal("Expecting error, got none") + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/stats.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/stats.go new file mode 100644 index 00000000..bda32b20 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/stats.go @@ -0,0 +1,92 @@ +// +build linux + +package cgroups + +type ThrottlingData struct { + // Number of periods with throttling active + Periods uint64 `json:"periods,omitempty"` + // Number of periods when the container hit its throttling limit. + ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` + // Aggregate time the container was throttled for in nanoseconds. + ThrottledTime uint64 `json:"throttled_time,omitempty"` +} + +// All CPU stats are aggregate since container inception. +type CpuUsage struct { + // Total CPU time consumed. + // Units: nanoseconds. + TotalUsage uint64 `json:"total_usage,omitempty"` + // Total CPU time consumed per core. + // Units: nanoseconds. + PercpuUsage []uint64 `json:"percpu_usage,omitempty"` + // Time spent by tasks of the cgroup in kernel mode. + // Units: nanoseconds. + UsageInKernelmode uint64 `json:"usage_in_kernelmode"` + // Time spent by tasks of the cgroup in user mode. + // Units: nanoseconds. + UsageInUsermode uint64 `json:"usage_in_usermode"` +} + +type CpuStats struct { + CpuUsage CpuUsage `json:"cpu_usage,omitempty"` + ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` +} + +type MemoryData struct { + Usage uint64 `json:"usage,omitempty"` + MaxUsage uint64 `json:"max_usage,omitempty"` + Failcnt uint64 `json:"failcnt"` +} +type MemoryStats struct { + // memory used for cache + Cache uint64 `json:"cache,omitempty"` + // usage of memory + Usage MemoryData `json:"usage,omitempty"` + // usage of memory + swap + SwapUsage MemoryData `json:"swap_usage,omitempty"` + // usafe of kernel memory + KernelUsage MemoryData `json:"kernel_usage,omitempty"` + Stats map[string]uint64 `json:"stats,omitempty"` +} + +type BlkioStatEntry struct { + Major uint64 `json:"major,omitempty"` + Minor uint64 `json:"minor,omitempty"` + Op string `json:"op,omitempty"` + Value uint64 `json:"value,omitempty"` +} + +type BlkioStats struct { + // number of bytes tranferred to and from the block device + IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` + IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` + IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` + IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` + IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` + IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` + IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` + SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` +} + +type HugetlbStats struct { + // current res_counter usage for hugetlb + Usage uint64 `json:"usage,omitempty"` + // maximum usage ever recorded. + MaxUsage uint64 `json:"max_usage,omitempty"` + // number of times htgetlb usage allocation failure. + Failcnt uint64 `json:"failcnt"` +} + +type Stats struct { + CpuStats CpuStats `json:"cpu_stats,omitempty"` + MemoryStats MemoryStats `json:"memory_stats,omitempty"` + BlkioStats BlkioStats `json:"blkio_stats,omitempty"` + // the map is in the format "size of hugepage: stats of the hugepage" + HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` +} + +func NewStats() *Stats { + memoryStats := MemoryStats{Stats: make(map[string]uint64)} + hugetlbStats := make(map[string]HugetlbStats) + return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats} +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go new file mode 100644 index 00000000..fa3485f1 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go @@ -0,0 +1,51 @@ +// +build !linux + +package systemd + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager struct { + Cgroups *configs.Cgroup + Paths map[string]string +} + +func UseSystemd() bool { + return false +} + +func (m *Manager) Apply(pid int) error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetPids() ([]int, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Destroy() error { + return fmt.Errorf("Systemd not supported") +} + +func (m *Manager) GetPaths() map[string]string { + return nil +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Set(container *configs.Config) error { + return nil, fmt.Errorf("Systemd not supported") +} + +func (m *Manager) Freeze(state configs.FreezerState) error { + return fmt.Errorf("Systemd not supported") +} + +func Freeze(c *configs.Cgroup, state configs.FreezerState) error { + return fmt.Errorf("Systemd not supported") +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go new file mode 100644 index 00000000..8c27f11a --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go @@ -0,0 +1,587 @@ +// +build linux + +package systemd + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + "time" + + systemd "github.com/coreos/go-systemd/dbus" + "github.com/godbus/dbus" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Manager struct { + mu sync.Mutex + Cgroups *configs.Cgroup + Paths map[string]string +} + +type subsystem interface { + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Set the cgroup represented by cgroup. + Set(path string, cgroup *configs.Cgroup) error +} + +var subsystems = map[string]subsystem{ + "devices": &fs.DevicesGroup{}, + "memory": &fs.MemoryGroup{}, + "cpu": &fs.CpuGroup{}, + "cpuset": &fs.CpusetGroup{}, + "cpuacct": &fs.CpuacctGroup{}, + "blkio": &fs.BlkioGroup{}, + "hugetlb": &fs.HugetlbGroup{}, + "perf_event": &fs.PerfEventGroup{}, + "freezer": &fs.FreezerGroup{}, + "net_prio": &fs.NetPrioGroup{}, + "net_cls": &fs.NetClsGroup{}, +} + +const ( + testScopeWait = 4 +) + +var ( + connLock sync.Mutex + theConn *systemd.Conn + hasStartTransientUnit bool + hasTransientDefaultDependencies bool +) + +func newProp(name string, units interface{}) systemd.Property { + return systemd.Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + +func UseSystemd() bool { + s, err := os.Stat("/run/systemd/system") + if err != nil || !s.IsDir() { + return false + } + + connLock.Lock() + defer connLock.Unlock() + + if theConn == nil { + var err error + theConn, err = systemd.New() + if err != nil { + return false + } + + // Assume we have StartTransientUnit + hasStartTransientUnit = true + + // But if we get UnknownMethod error we don't + if _, err := theConn.StartTransientUnit("test.scope", "invalid"); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" { + hasStartTransientUnit = false + return hasStartTransientUnit + } + } + } + + // Ensure the scope name we use doesn't exist. Use the Pid to + // avoid collisions between multiple libcontainer users on a + // single host. + scope := fmt.Sprintf("libcontainer-%d-systemd-test-default-dependencies.scope", os.Getpid()) + testScopeExists := true + for i := 0; i <= testScopeWait; i++ { + if _, err := theConn.StopUnit(scope, "replace"); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") { + testScopeExists = false + break + } + } + } + time.Sleep(time.Millisecond) + } + + // Bail out if we can't kill this scope without testing for DefaultDependencies + if testScopeExists { + return hasStartTransientUnit + } + + // Assume StartTransientUnit on a scope allows DefaultDependencies + hasTransientDefaultDependencies = true + ddf := newProp("DefaultDependencies", false) + if _, err := theConn.StartTransientUnit(scope, "replace", ddf); err != nil { + if dbusError, ok := err.(dbus.Error); ok { + if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") { + hasTransientDefaultDependencies = false + } + } + } + + // Not critical because of the stop unit logic above. + theConn.StopUnit(scope, "replace") + } + return hasStartTransientUnit +} + +func getIfaceForUnit(unitName string) string { + if strings.HasSuffix(unitName, ".scope") { + return "Scope" + } + if strings.HasSuffix(unitName, ".service") { + return "Service" + } + return "Unit" +} + +func (m *Manager) Apply(pid int) error { + var ( + c = m.Cgroups + unitName = getUnitName(c) + slice = "system.slice" + properties []systemd.Property + ) + + if c.Slice != "" { + slice = c.Slice + } + + properties = append(properties, + systemd.PropSlice(slice), + systemd.PropDescription("docker container "+c.Name), + newProp("PIDs", []uint32{uint32(pid)}), + ) + + // Always enable accounting, this gets us the same behaviour as the fs implementation, + // plus the kernel has some problems with joining the memory cgroup at a later time. + properties = append(properties, + newProp("MemoryAccounting", true), + newProp("CPUAccounting", true), + newProp("BlockIOAccounting", true)) + + if hasTransientDefaultDependencies { + properties = append(properties, + newProp("DefaultDependencies", false)) + } + + if c.Memory != 0 { + properties = append(properties, + newProp("MemoryLimit", uint64(c.Memory))) + } + // TODO: MemoryReservation and MemorySwap not available in systemd + + if c.CpuShares != 0 { + properties = append(properties, + newProp("CPUShares", uint64(c.CpuShares))) + } + + if c.BlkioWeight != 0 { + properties = append(properties, + newProp("BlockIOWeight", uint64(c.BlkioWeight))) + } + + // We need to set kernel memory before processes join cgroup because + // kmem.limit_in_bytes can only be set when the cgroup is empty. + // And swap memory limit needs to be set after memory limit, only + // memory limit is handled by systemd, so it's kind of ugly here. + if c.KernelMemory > 0 { + if err := setKernelMemory(c); err != nil { + return err + } + } + + if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil { + return err + } + + if err := joinDevices(c, pid); err != nil { + return err + } + + // TODO: CpuQuota and CpuPeriod not available in systemd + // we need to manually join the cpu.cfs_quota_us and cpu.cfs_period_us + if err := joinCpu(c, pid); err != nil { + return err + } + + if err := joinMemory(c, pid); err != nil { + return err + } + + // we need to manually join the freezer, net_cls, net_prio and cpuset cgroup in systemd + // because it does not currently support it via the dbus api. + if err := joinFreezer(c, pid); err != nil { + return err + } + + if err := joinNetPrio(c, pid); err != nil { + return err + } + if err := joinNetCls(c, pid); err != nil { + return err + } + + if err := joinCpuset(c, pid); err != nil { + return err + } + + if err := joinHugetlb(c, pid); err != nil { + return err + } + // FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem + // using that (at least on systemd 208, see https://github.com/opencontainers/runc/libcontainer/pull/354), + // so use fs work around for now. + if err := joinBlkio(c, pid); err != nil { + return err + } + + paths := make(map[string]string) + for sysname := range subsystems { + subsystemPath, err := getSubsystemPath(m.Cgroups, sysname) + if err != nil { + // Don't fail if a cgroup hierarchy was not found, just skip this subsystem + if cgroups.IsNotFound(err) { + continue + } + return err + } + paths[sysname] = subsystemPath + } + m.Paths = paths + + if paths["cpu"] != "" { + if err := fs.CheckCpushares(paths["cpu"], c.CpuShares); err != nil { + return err + } + } + + return nil +} + +func (m *Manager) Destroy() error { + m.mu.Lock() + defer m.mu.Unlock() + theConn.StopUnit(getUnitName(m.Cgroups), "replace") + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil +} + +func (m *Manager) GetPaths() map[string]string { + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths +} + +func writeFile(dir, file, data string) error { + // Normally dir should not be empty, one case is that cgroup subsystem + // is not mounted, we will get empty dir, and we want it fail here. + if dir == "" { + return fmt.Errorf("no such directory for %s.", file) + } + return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) +} + +func join(c *configs.Cgroup, subsystem string, pid int) (string, error) { + path, err := getSubsystemPath(c, subsystem) + if err != nil { + return "", err + } + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return "", err + } + if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil { + return "", err + } + + return path, nil +} + +func joinCpu(c *configs.Cgroup, pid int) error { + path, err := getSubsystemPath(c, "cpu") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + if c.CpuQuota != 0 { + if err = writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(c.CpuQuota, 10)); err != nil { + return err + } + } + if c.CpuPeriod != 0 { + if err = writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(c.CpuPeriod, 10)); err != nil { + return err + } + } + if c.CpuRtPeriod != 0 { + if err = writeFile(path, "cpu.rt_period_us", strconv.FormatInt(c.CpuRtPeriod, 10)); err != nil { + return err + } + } + if c.CpuRtRuntime != 0 { + if err = writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(c.CpuRtRuntime, 10)); err != nil { + return err + } + } + + return nil +} + +func joinFreezer(c *configs.Cgroup, pid int) error { + path, err := join(c, "freezer", pid) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + freezer := subsystems["freezer"] + return freezer.Set(path, c) +} + +func joinNetPrio(c *configs.Cgroup, pid int) error { + path, err := join(c, "net_prio", pid) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + netPrio := subsystems["net_prio"] + + return netPrio.Set(path, c) +} + +func joinNetCls(c *configs.Cgroup, pid int) error { + path, err := join(c, "net_cls", pid) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + netcls := subsystems["net_cls"] + + return netcls.Set(path, c) +} + +func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { + mountpoint, err := cgroups.FindCgroupMountpoint(subsystem) + if err != nil { + return "", err + } + + initPath, err := cgroups.GetInitCgroupDir(subsystem) + if err != nil { + return "", err + } + + slice := "system.slice" + if c.Slice != "" { + slice = c.Slice + } + + return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil +} + +func (m *Manager) Freeze(state configs.FreezerState) error { + path, err := getSubsystemPath(m.Cgroups, "freezer") + if err != nil { + return err + } + + prevState := m.Cgroups.Freezer + m.Cgroups.Freezer = state + + freezer := subsystems["freezer"] + err = freezer.Set(path, m.Cgroups) + if err != nil { + m.Cgroups.Freezer = prevState + return err + } + + return nil +} + +func (m *Manager) GetPids() ([]int, error) { + path, err := getSubsystemPath(m.Cgroups, "cpu") + if err != nil { + return nil, err + } + + return cgroups.ReadProcsFile(path) +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() + stats := cgroups.NewStats() + for name, path := range m.Paths { + sys, ok := subsystems[name] + if !ok || !cgroups.PathExists(path) { + continue + } + if err := sys.GetStats(path, stats); err != nil { + return nil, err + } + } + + return stats, nil +} + +func (m *Manager) Set(container *configs.Config) error { + for name, path := range m.Paths { + sys, ok := subsystems[name] + if !ok || !cgroups.PathExists(path) { + continue + } + if err := sys.Set(path, container.Cgroups); err != nil { + return err + } + } + + return nil +} + +func getUnitName(c *configs.Cgroup) string { + return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name) +} + +// Atm we can't use the systemd device support because of two missing things: +// * Support for wildcards to allow mknod on any device +// * Support for wildcards to allow /dev/pts support +// +// The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is +// in wide use. When both these are available we will be able to switch, but need to keep the old +// implementation for backwards compat. +// +// Note: we can't use systemd to set up the initial limits, and then change the cgroup +// because systemd will re-write the device settings if it needs to re-apply the cgroup context. +// This happens at least for v208 when any sibling unit is started. +func joinDevices(c *configs.Cgroup, pid int) error { + path, err := join(c, "devices", pid) + // Even if it's `not found` error, we'll return err because devices cgroup + // is hard requirement for container security. + if err != nil { + return err + } + + devices := subsystems["devices"] + return devices.Set(path, c) +} + +func setKernelMemory(c *configs.Cgroup) error { + path, err := getSubsystemPath(c, "memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + return err + } + + if c.KernelMemory > 0 { + err = writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(c.KernelMemory, 10)) + if err != nil { + return err + } + } + + return nil +} + +func joinMemory(c *configs.Cgroup, pid int) error { + path, err := getSubsystemPath(c, "memory") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + // -1 disables memoryswap + if c.MemorySwap > 0 { + err = writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.MemorySwap, 10)) + if err != nil { + return err + } + } + if c.OomKillDisable { + if err := writeFile(path, "memory.oom_control", "1"); err != nil { + return err + } + } + + if c.MemorySwappiness >= 0 && c.MemorySwappiness <= 100 { + err = writeFile(path, "memory.swappiness", strconv.FormatInt(c.MemorySwappiness, 10)) + if err != nil { + return err + } + } else if c.MemorySwappiness == -1 { + return nil + } else { + return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", c.MemorySwappiness) + } + + return nil +} + +// systemd does not atm set up the cpuset controller, so we must manually +// join it. Additionally that is a very finicky controller where each +// level must have a full setup as the default for a new directory is "no cpus" +func joinCpuset(c *configs.Cgroup, pid int) error { + path, err := getSubsystemPath(c, "cpuset") + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + s := &fs.CpusetGroup{} + + return s.ApplyDir(path, c, pid) +} + +// `BlockIODeviceWeight` property of systemd does not work properly, and systemd +// expects device path instead of major minor numbers, which is also confusing +// for users. So we use fs work around for now. +func joinBlkio(c *configs.Cgroup, pid int) error { + path, err := getSubsystemPath(c, "blkio") + if err != nil { + return err + } + if c.BlkioWeightDevice != "" { + if err := writeFile(path, "blkio.weight_device", c.BlkioWeightDevice); err != nil { + return err + } + } + if c.BlkioThrottleReadBpsDevice != "" { + if err := writeFile(path, "blkio.throttle.read_bps_device", c.BlkioThrottleReadBpsDevice); err != nil { + return err + } + } + if c.BlkioThrottleWriteBpsDevice != "" { + if err := writeFile(path, "blkio.throttle.write_bps_device", c.BlkioThrottleWriteBpsDevice); err != nil { + return err + } + } + if c.BlkioThrottleReadIOpsDevice != "" { + if err := writeFile(path, "blkio.throttle.read_iops_device", c.BlkioThrottleReadIOpsDevice); err != nil { + return err + } + } + if c.BlkioThrottleWriteIOpsDevice != "" { + if err := writeFile(path, "blkio.throttle.write_iops_device", c.BlkioThrottleWriteIOpsDevice); err != nil { + return err + } + } + + return nil +} + +func joinHugetlb(c *configs.Cgroup, pid int) error { + path, err := join(c, "hugetlb", pid) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + hugetlb := subsystems["hugetlb"] + return hugetlb.Set(path, c) +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go new file mode 100644 index 00000000..1d0cb502 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -0,0 +1,293 @@ +// +build linux + +package cgroups + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/docker/docker/pkg/mount" + "github.com/docker/docker/pkg/units" +) + +const cgroupNamePrefix = "name=" + +// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt +func FindCgroupMountpoint(subsystem string) (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + txt := scanner.Text() + fields := strings.Split(txt, " ") + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[4], nil + } + } + } + if err := scanner.Err(); err != nil { + return "", err + } + + return "", NewNotFoundError(subsystem) +} + +func FindCgroupMountpointAndSource(subsystem string) (string, string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", "", err + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + txt := scanner.Text() + fields := strings.Split(txt, " ") + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[4], fields[3], nil + } + } + } + if err := scanner.Err(); err != nil { + return "", "", err + } + + return "", "", NewNotFoundError(subsystem) +} + +func FindCgroupMountpointDir() (string, error) { + mounts, err := mount.GetMounts() + if err != nil { + return "", err + } + + for _, mount := range mounts { + if mount.Fstype == "cgroup" { + return filepath.Dir(mount.Mountpoint), nil + } + } + + return "", NewNotFoundError("cgroup") +} + +type Mount struct { + Mountpoint string + Root string + Subsystems []string +} + +func (m Mount) GetThisCgroupDir() (string, error) { + if len(m.Subsystems) == 0 { + return "", fmt.Errorf("no subsystem for mount") + } + + return GetThisCgroupDir(m.Subsystems[0]) +} + +func GetCgroupMounts() ([]Mount, error) { + mounts, err := mount.GetMounts() + if err != nil { + return nil, err + } + + all, err := GetAllSubsystems() + if err != nil { + return nil, err + } + + allMap := make(map[string]bool) + for _, s := range all { + allMap[s] = true + } + + res := []Mount{} + for _, mount := range mounts { + if mount.Fstype == "cgroup" { + m := Mount{Mountpoint: mount.Mountpoint, Root: mount.Root} + + for _, opt := range strings.Split(mount.VfsOpts, ",") { + if strings.HasPrefix(opt, cgroupNamePrefix) { + m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):]) + } + if allMap[opt] { + m.Subsystems = append(m.Subsystems, opt) + } + } + res = append(res, m) + } + } + return res, nil +} + +// Returns all the cgroup subsystems supported by the kernel +func GetAllSubsystems() ([]string, error) { + f, err := os.Open("/proc/cgroups") + if err != nil { + return nil, err + } + defer f.Close() + + subsystems := []string{} + + s := bufio.NewScanner(f) + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + text := s.Text() + if text[0] != '#' { + parts := strings.Fields(text) + if len(parts) >= 4 && parts[3] != "0" { + subsystems = append(subsystems, parts[0]) + } + } + } + return subsystems, nil +} + +// Returns the relative path to the cgroup docker is running in. +func GetThisCgroupDir(subsystem string) (string, error) { + f, err := os.Open("/proc/self/cgroup") + if err != nil { + return "", err + } + defer f.Close() + + return ParseCgroupFile(subsystem, f) +} + +func GetInitCgroupDir(subsystem string) (string, error) { + f, err := os.Open("/proc/1/cgroup") + if err != nil { + return "", err + } + defer f.Close() + + return ParseCgroupFile(subsystem, f) +} + +func ReadProcsFile(dir string) ([]int, error) { + f, err := os.Open(filepath.Join(dir, "cgroup.procs")) + if err != nil { + return nil, err + } + defer f.Close() + + var ( + s = bufio.NewScanner(f) + out = []int{} + ) + + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, pid) + } + } + return out, nil +} + +func ParseCgroupFile(subsystem string, r io.Reader) (string, error) { + s := bufio.NewScanner(r) + + for s.Scan() { + if err := s.Err(); err != nil { + return "", err + } + + text := s.Text() + parts := strings.Split(text, ":") + + for _, subs := range strings.Split(parts[1], ",") { + if subs == subsystem || subs == cgroupNamePrefix+subsystem { + return parts[2], nil + } + } + } + + return "", NewNotFoundError(subsystem) +} + +func PathExists(path string) bool { + if _, err := os.Stat(path); err != nil { + return false + } + return true +} + +func EnterPid(cgroupPaths map[string]string, pid int) error { + for _, path := range cgroupPaths { + if PathExists(path) { + if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), + []byte(strconv.Itoa(pid)), 0700); err != nil { + return err + } + } + } + return nil +} + +// RemovePaths iterates over the provided paths removing them. +// We trying to remove all paths five times with increasing delay between tries. +// If after all there are not removed cgroups - appropriate error will be +// returned. +func RemovePaths(paths map[string]string) (err error) { + delay := 10 * time.Millisecond + for i := 0; i < 5; i++ { + if i != 0 { + time.Sleep(delay) + delay *= 2 + } + for s, p := range paths { + os.RemoveAll(p) + // TODO: here probably should be logging + _, err := os.Stat(p) + // We need this strange way of checking cgroups existence because + // RemoveAll almost always returns error, even on already removed + // cgroups + if os.IsNotExist(err) { + delete(paths, s) + } + } + if len(paths) == 0 { + return nil + } + } + return fmt.Errorf("Failed to remove paths: %s", paths) +} + +func GetHugePageSize() ([]string, error) { + var pageSizes []string + sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"} + files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages") + if err != nil { + return pageSizes, err + } + for _, st := range files { + nameArray := strings.Split(st.Name(), "-") + pageSize, err := units.RAMInBytes(nameArray[1]) + if err != nil { + return []string{}, err + } + sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList) + pageSizes = append(pageSizes, sizeString) + } + + return pageSizes, nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/cgroup.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/cgroup.go new file mode 100644 index 00000000..140b530d --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/cgroup.go @@ -0,0 +1,98 @@ +package configs + +type FreezerState string + +const ( + Undefined FreezerState = "" + Frozen FreezerState = "FROZEN" + Thawed FreezerState = "THAWED" +) + +// TODO Windows: This can be factored out in the future as Cgroups are not +// supported on the Windows platform. + +type Cgroup struct { + Name string `json:"name"` + + // name of parent cgroup or slice + Parent string `json:"parent"` + + // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. + AllowAllDevices bool `json:"allow_all_devices"` + + AllowedDevices []*Device `json:"allowed_devices"` + + DeniedDevices []*Device `json:"denied_devices"` + + // Memory limit (in bytes) + Memory int64 `json:"memory"` + + // Memory reservation or soft_limit (in bytes) + MemoryReservation int64 `json:"memory_reservation"` + + // Total memory usage (memory + swap); set `-1' to disable swap + MemorySwap int64 `json:"memory_swap"` + + // Kernel memory limit (in bytes) + KernelMemory int64 `json:"kernel_memory"` + + // CPU shares (relative weight vs. other containers) + CpuShares int64 `json:"cpu_shares"` + + // CPU hardcap limit (in usecs). Allowed cpu time in a given period. + CpuQuota int64 `json:"cpu_quota"` + + // CPU period to be used for hardcapping (in usecs). 0 to use system default. + CpuPeriod int64 `json:"cpu_period"` + + // How many time CPU will use in realtime scheduling (in usecs). + CpuRtRuntime int64 `json:"cpu_quota"` + + // CPU period to be used for realtime scheduling (in usecs). + CpuRtPeriod int64 `json:"cpu_period"` + + // CPU to use + CpusetCpus string `json:"cpuset_cpus"` + + // MEM to use + CpusetMems string `json:"cpuset_mems"` + + // IO read rate limit per cgroup per device, bytes per second. + BlkioThrottleReadBpsDevice string `json:"blkio_throttle_read_bps_device"` + + // IO write rate limit per cgroup per divice, bytes per second. + BlkioThrottleWriteBpsDevice string `json:"blkio_throttle_write_bps_device"` + + // IO read rate limit per cgroup per device, IO per second. + BlkioThrottleReadIOpsDevice string `json:"blkio_throttle_read_iops_device"` + + // IO write rate limit per cgroup per device, IO per second. + BlkioThrottleWriteIOpsDevice string `json:"blkio_throttle_write_iops_device"` + + // Specifies per cgroup weight, range is from 10 to 1000. + BlkioWeight int64 `json:"blkio_weight"` + + // Weight per cgroup per device, can override BlkioWeight. + BlkioWeightDevice string `json:"blkio_weight_device"` + + // set the freeze value for the process + Freezer FreezerState `json:"freezer"` + + // Hugetlb limit (in bytes) + HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"` + + // Parent slice to use for systemd TODO: remove in favor or parent + Slice string `json:"slice"` + + // Whether to disable OOM Killer + OomKillDisable bool `json:"oom_kill_disable"` + + // Tuning swappiness behaviour per cgroup + MemorySwappiness int64 `json:"memory_swappiness"` + + // Set priority of network traffic for container + NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` + + // Set class identifier for container's network packets + NetClsClassid string `json:"net_cls_classid"` +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/config.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/config.go new file mode 100644 index 00000000..83381c84 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -0,0 +1,146 @@ +package configs + +type Rlimit struct { + Type int `json:"type"` + Hard uint64 `json:"hard"` + Soft uint64 `json:"soft"` +} + +// IDMap represents UID/GID Mappings for User Namespaces. +type IDMap struct { + ContainerID int `json:"container_id"` + HostID int `json:"host_id"` + Size int `json:"size"` +} + +type Seccomp struct { + Syscalls []*Syscall `json:"syscalls"` +} + +type Action int + +const ( + Kill Action = iota - 3 + Trap + Allow +) + +type Operator int + +const ( + EqualTo Operator = iota + NotEqualTo + GreatherThan + LessThan + MaskEqualTo +) + +type Arg struct { + Index int `json:"index"` + Value uint32 `json:"value"` + Op Operator `json:"op"` +} + +type Syscall struct { + Value int `json:"value"` + Action Action `json:"action"` + Args []*Arg `json:"args"` +} + +// TODO Windows. Many of these fields should be factored out into those parts +// which are common across platforms, and those which are platform specific. + +// Config defines configuration options for executing a process inside a contained environment. +type Config struct { + // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs + // This is a common option when the container is running in ramdisk + NoPivotRoot bool `json:"no_pivot_root"` + + // ParentDeathSignal specifies the signal that is sent to the container's process in the case + // that the parent process dies. + ParentDeathSignal int `json:"parent_death_signal"` + + // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. + // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. + // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. + PivotDir string `json:"pivot_dir"` + + // Path to a directory containing the container's root filesystem. + Rootfs string `json:"rootfs"` + + // Readonlyfs will remount the container's rootfs as readonly where only externally mounted + // bind mounts are writtable. + Readonlyfs bool `json:"readonlyfs"` + + // Privatefs will mount the container's rootfs as private where mount points from the parent will not propogate + Privatefs bool `json:"privatefs"` + + // Mounts specify additional source and destination paths that will be mounted inside the container's + // rootfs and mount namespace if specified + Mounts []*Mount `json:"mounts"` + + // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! + Devices []*Device `json:"devices"` + + MountLabel string `json:"mount_label"` + + // Hostname optionally sets the container's hostname if provided + Hostname string `json:"hostname"` + + // Namespaces specifies the container's namespaces that it should setup when cloning the init process + // If a namespace is not provided that namespace is shared from the container's parent process + Namespaces Namespaces `json:"namespaces"` + + // Capabilities specify the capabilities to keep when executing the process inside the container + // All capbilities not specified will be dropped from the processes capability mask + Capabilities []string `json:"capabilities"` + + // Networks specifies the container's network setup to be created + Networks []*Network `json:"networks"` + + // Routes can be specified to create entries in the route table as the container is started + Routes []*Route `json:"routes"` + + // Cgroups specifies specific cgroup settings for the various subsystems that the container is + // placed into to limit the resources the container has available + Cgroups *Cgroup `json:"cgroups"` + + // AppArmorProfile specifies the profile to apply to the process running in the container and is + // change at the time the process is execed + AppArmorProfile string `json:"apparmor_profile"` + + // ProcessLabel specifies the label to apply to the process running in the container. It is + // commonly used by selinux + ProcessLabel string `json:"process_label"` + + // Rlimits specifies the resource limits, such as max open files, to set in the container + // If Rlimits are not set, the container will inherit rlimits from the parent process + Rlimits []Rlimit `json:"rlimits"` + + // AdditionalGroups specifies the gids that should be added to supplementary groups + // in addition to those that the user belongs to. + AdditionalGroups []string `json:"additional_groups"` + + // UidMappings is an array of User ID mappings for User Namespaces + UidMappings []IDMap `json:"uid_mappings"` + + // GidMappings is an array of Group ID mappings for User Namespaces + GidMappings []IDMap `json:"gid_mappings"` + + // MaskPaths specifies paths within the container's rootfs to mask over with a bind + // mount pointing to /dev/null as to prevent reads of the file. + MaskPaths []string `json:"mask_paths"` + + // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only + // so that these files prevent any writes. + ReadonlyPaths []string `json:"readonly_paths"` + + // Sysctl is a map of properties and their values. It is the equivalent of using + // sysctl -w my.property.name value in Linux. + Sysctl map[string]string `json:"sysctl"` + + // Seccomp allows actions to be taken whenever a syscall is made within the container. + // By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno + // can be specified on a per syscall basis. + Seccomp *Seccomp `json:"seccomp"` +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/config_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/config_test.go new file mode 100644 index 00000000..385e3775 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/config_test.go @@ -0,0 +1,154 @@ +package configs + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "testing" +) + +// Checks whether the expected capability is specified in the capabilities. +func contains(expected string, values []string) bool { + for _, v := range values { + if v == expected { + return true + } + } + return false +} + +func containsDevice(expected *Device, values []*Device) bool { + for _, d := range values { + if d.Path == expected.Path && + d.Permissions == expected.Permissions && + d.FileMode == expected.FileMode && + d.Major == expected.Major && + d.Minor == expected.Minor && + d.Type == expected.Type { + return true + } + } + return false +} + +func loadConfig(name string) (*Config, error) { + f, err := os.Open(filepath.Join("../sample_configs", name)) + if err != nil { + return nil, err + } + defer f.Close() + + var container *Config + if err := json.NewDecoder(f).Decode(&container); err != nil { + return nil, err + } + + // Check that a config doesn't contain extra fields + var configMap, abstractMap map[string]interface{} + + if _, err := f.Seek(0, 0); err != nil { + return nil, err + } + + if err := json.NewDecoder(f).Decode(&abstractMap); err != nil { + return nil, err + } + + configData, err := json.Marshal(&container) + if err != nil { + return nil, err + } + + if err := json.Unmarshal(configData, &configMap); err != nil { + return nil, err + } + + for k := range configMap { + delete(abstractMap, k) + } + + if len(abstractMap) != 0 { + return nil, fmt.Errorf("unknown fields: %s", abstractMap) + } + + return container, nil +} + +func TestRemoveNamespace(t *testing.T) { + ns := Namespaces{ + {Type: NEWNET}, + } + if !ns.Remove(NEWNET) { + t.Fatal("NEWNET was not removed") + } + if len(ns) != 0 { + t.Fatalf("namespaces should have 0 items but reports %d", len(ns)) + } +} + +func TestHostUIDNoUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{}, + } + uid, err := config.HostUID() + if err != nil { + t.Fatal(err) + } + if uid != 0 { + t.Fatalf("expected uid 0 with no USERNS but received %d", uid) + } +} + +func TestHostUIDWithUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{{Type: NEWUSER}}, + UidMappings: []IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 1, + }, + }, + } + uid, err := config.HostUID() + if err != nil { + t.Fatal(err) + } + if uid != 1000 { + t.Fatalf("expected uid 1000 with no USERNS but received %d", uid) + } +} + +func TestHostGIDNoUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{}, + } + uid, err := config.HostGID() + if err != nil { + t.Fatal(err) + } + if uid != 0 { + t.Fatalf("expected gid 0 with no USERNS but received %d", uid) + } +} + +func TestHostGIDWithUSERNS(t *testing.T) { + config := &Config{ + Namespaces: Namespaces{{Type: NEWUSER}}, + GidMappings: []IDMap{ + { + ContainerID: 0, + HostID: 1000, + Size: 1, + }, + }, + } + uid, err := config.HostGID() + if err != nil { + t.Fatal(err) + } + if uid != 1000 { + t.Fatalf("expected gid 1000 with no USERNS but received %d", uid) + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/config_unix.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/config_unix.go new file mode 100644 index 00000000..89f580bf --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/config_unix.go @@ -0,0 +1,51 @@ +// +build freebsd linux + +package configs + +import "fmt" + +// Gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c Config) HostUID() (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.UidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") + } + id, found := c.hostIDFromMapping(0, c.UidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + } + return id, nil + } + // Return default root uid 0 + return 0, nil +} + +// Gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c Config) HostGID() (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.GidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") + } + id, found := c.hostIDFromMapping(0, c.GidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + } + return id, nil + } + // Return default root uid 0 + return 0, nil +} + +// Utility function that gets a host ID for a container ID from user namespace map +// if that ID is present in the map. +func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { + for _, m := range uMap { + if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { + hostID := m.HostID + (containerID - m.ContainerID) + return hostID, true + } + } + return -1, false +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/device.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/device.go new file mode 100644 index 00000000..a52a024a --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/device.go @@ -0,0 +1,54 @@ +package configs + +import ( + "fmt" + "os" +) + +const ( + Wildcard = -1 +) + +// TODO Windows: This can be factored out in the future + +type Device struct { + // Device type, block, char, etc. + Type rune `json:"type"` + + // Path to the device. + Path string `json:"path"` + + // Major is the device's major number. + Major int64 `json:"major"` + + // Minor is the device's minor number. + Minor int64 `json:"minor"` + + // Cgroup permissions format, rwm. + Permissions string `json:"permissions"` + + // FileMode permission bits for the device. + FileMode os.FileMode `json:"file_mode"` + + // Uid of the device. + Uid uint32 `json:"uid"` + + // Gid of the device. + Gid uint32 `json:"gid"` +} + +func (d *Device) CgroupString() string { + return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) +} + +func (d *Device) Mkdev() int { + return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) +} + +// deviceNumberString converts the device number to a string return result. +func deviceNumberString(number int64) string { + if number == Wildcard { + return "*" + } + return fmt.Sprint(number) +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go new file mode 100644 index 00000000..0ce040fd --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go @@ -0,0 +1,139 @@ +// +build linux freebsd + +package configs + +var ( + // These are devices that are to be both allowed and created. + DefaultSimpleDevices = []*Device{ + // /dev/null and zero + { + Path: "/dev/null", + Type: 'c', + Major: 1, + Minor: 3, + Permissions: "rwm", + FileMode: 0666, + }, + { + Path: "/dev/zero", + Type: 'c', + Major: 1, + Minor: 5, + Permissions: "rwm", + FileMode: 0666, + }, + + { + Path: "/dev/full", + Type: 'c', + Major: 1, + Minor: 7, + Permissions: "rwm", + FileMode: 0666, + }, + + // consoles and ttys + { + Path: "/dev/tty", + Type: 'c', + Major: 5, + Minor: 0, + Permissions: "rwm", + FileMode: 0666, + }, + + // /dev/urandom,/dev/random + { + Path: "/dev/urandom", + Type: 'c', + Major: 1, + Minor: 9, + Permissions: "rwm", + FileMode: 0666, + }, + { + Path: "/dev/random", + Type: 'c', + Major: 1, + Minor: 8, + Permissions: "rwm", + FileMode: 0666, + }, + } + DefaultAllowedDevices = append([]*Device{ + // allow mknod for any device + { + Type: 'c', + Major: Wildcard, + Minor: Wildcard, + Permissions: "m", + }, + { + Type: 'b', + Major: Wildcard, + Minor: Wildcard, + Permissions: "m", + }, + + { + Path: "/dev/console", + Type: 'c', + Major: 5, + Minor: 1, + Permissions: "rwm", + }, + { + Path: "/dev/tty0", + Type: 'c', + Major: 4, + Minor: 0, + Permissions: "rwm", + }, + { + Path: "/dev/tty1", + Type: 'c', + Major: 4, + Minor: 1, + Permissions: "rwm", + }, + // /dev/pts/ - pts namespaces are "coming soon" + { + Path: "", + Type: 'c', + Major: 136, + Minor: Wildcard, + Permissions: "rwm", + }, + { + Path: "", + Type: 'c', + Major: 5, + Minor: 2, + Permissions: "rwm", + }, + + // tuntap + { + Path: "", + Type: 'c', + Major: 10, + Minor: 200, + Permissions: "rwm", + }, + }, DefaultSimpleDevices...) + DefaultAutoCreatedDevices = append([]*Device{ + { + // /dev/fuse is created but not allowed. + // This is to allow java to work. Because java + // Insists on there being a /dev/fuse + // https://github.com/docker/docker/issues/514 + // https://github.com/docker/docker/issues/2393 + // + Path: "/dev/fuse", + Type: 'c', + Major: 10, + Minor: 229, + Permissions: "rwm", + }, + }, DefaultSimpleDevices...) +) diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go new file mode 100644 index 00000000..1cce8d09 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go @@ -0,0 +1,9 @@ +package configs + +type HugepageLimit struct { + // which type of hugepage to limit. + Pagesize string `json:"page_size"` + + // usage limit for hugepage. + Limit int `json:"limit"` +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go new file mode 100644 index 00000000..9a0395ea --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go @@ -0,0 +1,14 @@ +package configs + +import ( + "fmt" +) + +type IfPrioMap struct { + Interface string `json:"interface"` + Priority int64 `json:"priority"` +} + +func (i *IfPrioMap) CgroupString() string { + return fmt.Sprintf("%s %d", i.Interface, i.Priority) +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/mount.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/mount.go new file mode 100644 index 00000000..5a69f815 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/mount.go @@ -0,0 +1,34 @@ +package configs + +type Mount struct { + // Source path for the mount. + Source string `json:"source"` + + // Destination path for the mount inside the container. + Destination string `json:"destination"` + + // Device the mount is for. + Device string `json:"device"` + + // Mount flags. + Flags int `json:"flags"` + + // Mount data applied to the mount. + Data string `json:"data"` + + // Relabel source if set, "z" indicates shared, "Z" indicates unshared. + Relabel string `json:"relabel"` + + // Optional Command to be run before Source is mounted. + PremountCmds []Command `json:"premount_cmds"` + + // Optional Command to be run after Source is mounted. + PostmountCmds []Command `json:"postmount_cmds"` +} + +type Command struct { + Path string `json:"path"` + Args []string `json:"args"` + Env []string `json:"env"` + Dir string `json:"dir"` +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces.go new file mode 100644 index 00000000..a3329a31 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces.go @@ -0,0 +1,5 @@ +package configs + +type NamespaceType string + +type Namespaces []Namespace diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go new file mode 100644 index 00000000..c962999e --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go @@ -0,0 +1,31 @@ +// +build linux + +package configs + +import "syscall" + +func (n *Namespace) Syscall() int { + return namespaceInfo[n.Type] +} + +var namespaceInfo = map[NamespaceType]int{ + NEWNET: syscall.CLONE_NEWNET, + NEWNS: syscall.CLONE_NEWNS, + NEWUSER: syscall.CLONE_NEWUSER, + NEWIPC: syscall.CLONE_NEWIPC, + NEWUTS: syscall.CLONE_NEWUTS, + NEWPID: syscall.CLONE_NEWPID, +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This functions returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + var flag int + for _, v := range *n { + if v.Path != "" { + continue + } + flag |= namespaceInfo[v.Type] + } + return uintptr(flag) +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go new file mode 100644 index 00000000..1644588d --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go @@ -0,0 +1,15 @@ +// +build !linux,!windows + +package configs + +func (n *Namespace) Syscall() int { + panic("No namespace syscall support") + return 0 +} + +// CloneFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare. This functions returns flags only for new namespaces. +func (n *Namespaces) CloneFlags() uintptr { + panic("No namespace syscall support") + return uintptr(0) +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_unix.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_unix.go new file mode 100644 index 00000000..7bc90854 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_unix.go @@ -0,0 +1,89 @@ +// +build linux freebsd + +package configs + +import "fmt" + +const ( + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" +) + +func NamespaceTypes() []NamespaceType { + return []NamespaceType{ + NEWNET, + NEWPID, + NEWNS, + NEWUTS, + NEWIPC, + NEWUSER, + } +} + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { + Type NamespaceType `json:"type"` + Path string `json:"path"` +} + +func (n *Namespace) GetPath(pid int) string { + if n.Path != "" { + return n.Path + } + return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file()) +} + +func (n *Namespace) file() string { + file := "" + switch n.Type { + case NEWNET: + file = "net" + case NEWNS: + file = "mnt" + case NEWPID: + file = "pid" + case NEWIPC: + file = "ipc" + case NEWUSER: + file = "user" + case NEWUTS: + file = "uts" + } + return file +} + +func (n *Namespaces) Remove(t NamespaceType) bool { + i := n.index(t) + if i == -1 { + return false + } + *n = append((*n)[:i], (*n)[i+1:]...) + return true +} + +func (n *Namespaces) Add(t NamespaceType, path string) { + i := n.index(t) + if i == -1 { + *n = append(*n, Namespace{Type: t, Path: path}) + return + } + (*n)[i].Path = path +} + +func (n *Namespaces) index(t NamespaceType) int { + for i, ns := range *n { + if ns.Type == t { + return i + } + } + return -1 +} + +func (n *Namespaces) Contains(t NamespaceType) bool { + return n.index(t) != -1 +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_windows.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_windows.go new file mode 100644 index 00000000..f8fe47a8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/namespaces_windows.go @@ -0,0 +1,6 @@ +package configs + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/network.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/network.go new file mode 100644 index 00000000..ccdb228e --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/network.go @@ -0,0 +1,72 @@ +package configs + +// Network defines configuration for a container's networking stack +// +// The network configuration can be omitted from a container causing the +// container to be setup with the host's networking stack +type Network struct { + // Type sets the networks type, commonly veth and loopback + Type string `json:"type"` + + // Name of the network interface + Name string `json:"name"` + + // The bridge to use. + Bridge string `json:"bridge"` + + // MacAddress contains the MAC address to set on the network interface + MacAddress string `json:"mac_address"` + + // Address contains the IPv4 and mask to set on the network interface + Address string `json:"address"` + + // Gateway sets the gateway address that is used as the default for the interface + Gateway string `json:"gateway"` + + // IPv6Address contains the IPv6 and mask to set on the network interface + IPv6Address string `json:"ipv6_address"` + + // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface + IPv6Gateway string `json:"ipv6_gateway"` + + // Mtu sets the mtu value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + Mtu int `json:"mtu"` + + // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + TxQueueLen int `json:"txqueuelen"` + + // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the + // container. + HostInterfaceName string `json:"host_interface_name"` + + // HairpinMode specifies if hairpin NAT should be enabled on the virtual interface + // bridge port in the case of type veth + // Note: This is unsupported on some systems. + // Note: This does not apply to loopback interfaces. + HairpinMode bool `json:"hairpin_mode"` +} + +// Routes can be specified to create entries in the route table as the container is started +// +// All of destination, source, and gateway should be either IPv4 or IPv6. +// One of the three options must be present, and omitted entries will use their +// IP family default for the route table. For IPv4 for example, setting the +// gateway to 1.2.3.4 and the interface to eth0 will set up a standard +// destination of 0.0.0.0(or *) when viewed in the route table. +type Route struct { + // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 + Destination string `json:"destination"` + + // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 + Source string `json:"source"` + + // Sets the gateway. Accepts IPv4 and IPv6 + Gateway string `json:"gateway"` + + // The device to set this route up for, for example: eth0 + InterfaceName string `json:"interface_name"` +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/validate/config.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/validate/config.go new file mode 100644 index 00000000..848a67c3 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/configs/validate/config.go @@ -0,0 +1,93 @@ +package validate + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +type Validator interface { + Validate(*configs.Config) error +} + +func New() Validator { + return &ConfigValidator{} +} + +type ConfigValidator struct { +} + +func (v *ConfigValidator) Validate(config *configs.Config) error { + if err := v.rootfs(config); err != nil { + return err + } + if err := v.network(config); err != nil { + return err + } + if err := v.hostname(config); err != nil { + return err + } + if err := v.security(config); err != nil { + return err + } + if err := v.usernamespace(config); err != nil { + return err + } + return nil +} + +// rootfs validates the the rootfs is an absolute path and is not a symlink +// to the container's root filesystem. +func (v *ConfigValidator) rootfs(config *configs.Config) error { + cleaned, err := filepath.Abs(config.Rootfs) + if err != nil { + return err + } + if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil { + return err + } + if config.Rootfs != cleaned { + return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs) + } + return nil +} + +func (v *ConfigValidator) network(config *configs.Config) error { + if !config.Namespaces.Contains(configs.NEWNET) { + if len(config.Networks) > 0 || len(config.Routes) > 0 { + return fmt.Errorf("unable to apply network settings without a private NET namespace") + } + } + return nil +} + +func (v *ConfigValidator) hostname(config *configs.Config) error { + if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { + return fmt.Errorf("unable to set hostname without a private UTS namespace") + } + return nil +} + +func (v *ConfigValidator) security(config *configs.Config) error { + // restrict sys without mount namespace + if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) && + !config.Namespaces.Contains(configs.NEWNS) { + return fmt.Errorf("unable to restrict sys entries without a private MNT namespace") + } + return nil +} + +func (v *ConfigValidator) usernamespace(config *configs.Config) error { + if config.Namespaces.Contains(configs.NEWUSER) { + if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { + return fmt.Errorf("USER namespaces aren't enabled in the kernel") + } + } else { + if config.UidMappings != nil || config.GidMappings != nil { + return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config") + } + } + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/README.md b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/README.md new file mode 100644 index 00000000..d1a60ef9 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/README.md @@ -0,0 +1,25 @@ +## nsenter + +The `nsenter` package registers a special init constructor that is called before +the Go runtime has a chance to boot. This provides us the ability to `setns` on +existing namespaces and avoid the issues that the Go runtime has with multiple +threads. This constructor will be called if this package is registered, +imported, in your go application. + +The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd/cgo/) +package. In cgo, if the import of "C" is immediately preceded by a comment, that comment, +called the preamble, is used as a header when compiling the C parts of the package. +So every time we import package `nsenter`, the C code function `nsexec()` would be +called. And package `nsenter` is now only imported in Docker execdriver, so every time +before we call `execdriver.Exec()`, that C code would run. + +`nsexec()` will first check the environment variable `_LIBCONTAINER_INITPID` +which will give the process of the container that should be joined. Namespaces fd will +be found from `/proc/[pid]/ns` and set by `setns` syscall. + +And then get the pipe number from `_LIBCONTAINER_INITPIPE`, error message could +be transfered through it. If tty is added, `_LIBCONTAINER_CONSOLE_PATH` will +have value and start a console for output. + +Finally, `nsexec()` will clone a child process , exit the parent process and let +the Go runtime take over. diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go new file mode 100644 index 00000000..07f4d63e --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go @@ -0,0 +1,12 @@ +// +build linux,!gccgo + +package nsenter + +/* +#cgo CFLAGS: -Wall +extern void nsexec(); +void __attribute__((constructor)) init(void) { + nsexec(); +} +*/ +import "C" diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go new file mode 100644 index 00000000..63c7a3ec --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go @@ -0,0 +1,25 @@ +// +build linux,gccgo + +package nsenter + +/* +#cgo CFLAGS: -Wall +extern void nsexec(); +void __attribute__((constructor)) init(void) { + nsexec(); +} +*/ +import "C" + +// AlwaysFalse is here to stay false +// (and be exported so the compiler doesn't optimize out its reference) +var AlwaysFalse bool + +func init() { + if AlwaysFalse { + // by referencing this C init() in a noop test, it will ensure the compiler + // links in the C function. + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134 + C.init() + } +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_test.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_test.go new file mode 100644 index 00000000..db27b8a4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_test.go @@ -0,0 +1,91 @@ +package nsenter + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "strings" + "testing" +) + +type pid struct { + Pid int `json:"Pid"` +} + +func TestNsenterAlivePid(t *testing.T) { + args := []string{"nsenter-exec"} + r, w, err := os.Pipe() + if err != nil { + t.Fatalf("failed to create pipe %v", err) + } + + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + ExtraFiles: []*os.File{w}, + Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", os.Getpid()), "_LIBCONTAINER_INITPIPE=3"}, + } + + if err := cmd.Start(); err != nil { + t.Fatalf("nsenter failed to start %v", err) + } + w.Close() + + decoder := json.NewDecoder(r) + var pid *pid + + if err := decoder.Decode(&pid); err != nil { + t.Fatalf("%v", err) + } + + if err := cmd.Wait(); err != nil { + t.Fatalf("nsenter exits with a non-zero exit status") + } + p, err := os.FindProcess(pid.Pid) + if err != nil { + t.Fatalf("%v", err) + } + p.Wait() +} + +func TestNsenterInvalidPid(t *testing.T) { + args := []string{"nsenter-exec"} + + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + Env: []string{"_LIBCONTAINER_INITPID=-1"}, + } + + err := cmd.Run() + if err == nil { + t.Fatal("nsenter exits with a zero exit status") + } +} + +func TestNsenterDeadPid(t *testing.T) { + dead_cmd := exec.Command("true") + if err := dead_cmd.Run(); err != nil { + t.Fatal(err) + } + args := []string{"nsenter-exec"} + + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", dead_cmd.Process.Pid)}, + } + + err := cmd.Run() + if err == nil { + t.Fatal("nsenter exits with a zero exit status") + } +} + +func init() { + if strings.HasPrefix(os.Args[0], "nsenter-") { + os.Exit(0) + } + return +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go new file mode 100644 index 00000000..ac701ca3 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go @@ -0,0 +1,5 @@ +// +build !linux !cgo + +package nsenter + +import "C" diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c new file mode 100644 index 00000000..cd02d00a --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -0,0 +1,190 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* All arguments should be above stack, because it grows down */ +struct clone_arg { + /* + * Reserve some space for clone() to locate arguments + * and retcode in this place + */ + char stack[4096] __attribute__ ((aligned(8))); + char stack_ptr[0]; + jmp_buf *env; +}; + +#define pr_perror(fmt, ...) fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__) + +static int child_func(void *_arg) +{ + struct clone_arg *arg = (struct clone_arg *)_arg; + longjmp(*arg->env, 1); +} + +// Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12) +#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 +#define _GNU_SOURCE +#include "syscall.h" +#if defined(__NR_setns) && !defined(SYS_setns) +#define SYS_setns __NR_setns +#endif +#ifdef SYS_setns +int setns(int fd, int nstype) +{ + return syscall(SYS_setns, fd, nstype); +} +#endif +#endif + +static int clone_parent(jmp_buf * env) __attribute__ ((noinline)); +static int clone_parent(jmp_buf * env) +{ + struct clone_arg ca; + int child; + + ca.env = env; + child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca); + + return child; +} + +void nsexec() +{ + char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" }; + const int num = sizeof(namespaces) / sizeof(char *); + jmp_buf env; + char buf[PATH_MAX], *val; + int i, tfd, child, len, pipenum, consolefd = -1; + pid_t pid; + char *console; + + val = getenv("_LIBCONTAINER_INITPID"); + if (val == NULL) + return; + + pid = atoi(val); + snprintf(buf, sizeof(buf), "%d", pid); + if (strcmp(val, buf)) { + pr_perror("Unable to parse _LIBCONTAINER_INITPID"); + exit(1); + } + + val = getenv("_LIBCONTAINER_INITPIPE"); + if (val == NULL) { + pr_perror("Child pipe not found"); + exit(1); + } + + pipenum = atoi(val); + snprintf(buf, sizeof(buf), "%d", pipenum); + if (strcmp(val, buf)) { + pr_perror("Unable to parse _LIBCONTAINER_INITPIPE"); + exit(1); + } + + console = getenv("_LIBCONTAINER_CONSOLE_PATH"); + if (console != NULL) { + consolefd = open(console, O_RDWR); + if (consolefd < 0) { + pr_perror("Failed to open console %s", console); + exit(1); + } + } + + /* Check that the specified process exists */ + snprintf(buf, PATH_MAX - 1, "/proc/%d/ns", pid); + tfd = open(buf, O_DIRECTORY | O_RDONLY); + if (tfd == -1) { + pr_perror("Failed to open \"%s\"", buf); + exit(1); + } + + for (i = 0; i < num; i++) { + struct stat st; + int fd; + + /* Symlinks on all namespaces exist for dead processes, but they can't be opened */ + if (fstatat(tfd, namespaces[i], &st, AT_SYMLINK_NOFOLLOW) == -1) { + // Ignore nonexistent namespaces. + if (errno == ENOENT) + continue; + } + + fd = openat(tfd, namespaces[i], O_RDONLY); + if (fd == -1) { + pr_perror("Failed to open ns file %s for ns %s", buf, + namespaces[i]); + exit(1); + } + // Set the namespace. + if (setns(fd, 0) == -1) { + pr_perror("Failed to setns for %s", namespaces[i]); + exit(1); + } + close(fd); + } + + if (setjmp(env) == 1) { + // Child + + if (setsid() == -1) { + pr_perror("setsid failed"); + exit(1); + } + if (consolefd != -1) { + if (ioctl(consolefd, TIOCSCTTY, 0) == -1) { + pr_perror("ioctl TIOCSCTTY failed"); + exit(1); + } + if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) { + pr_perror("Failed to dup 0"); + exit(1); + } + if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) { + pr_perror("Failed to dup 1"); + exit(1); + } + if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) { + pr_perror("Failed to dup 2"); + exit(1); + } + } + // Finish executing, let the Go runtime take over. + return; + } + // Parent + + // We must fork to actually enter the PID namespace, use CLONE_PARENT + // so the child can have the right parent, and we don't need to forward + // the child's exit code or resend its death signal. + child = clone_parent(&env); + if (child < 0) { + pr_perror("Unable to fork"); + exit(1); + } + + len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", child); + + if (write(pipenum, buf, len) != len) { + pr_perror("Unable to send a child pid"); + kill(child, SIGKILL); + exit(1); + } + + exit(0); +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/linux.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/linux.go new file mode 100644 index 00000000..2cc3ef80 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/linux.go @@ -0,0 +1,77 @@ +// +build linux + +package system + +import ( + "os/exec" + "syscall" + "unsafe" +) + +type ParentDeathSignal int + +func (p ParentDeathSignal) Restore() error { + if p == 0 { + return nil + } + current, err := GetParentDeathSignal() + if err != nil { + return err + } + if p == current { + return nil + } + return p.Set() +} + +func (p ParentDeathSignal) Set() error { + return SetParentDeathSignal(uintptr(p)) +} + +func Execv(cmd string, args []string, env []string) error { + name, err := exec.LookPath(cmd) + if err != nil { + return err + } + + return syscall.Exec(name, args, env) +} + +func SetParentDeathSignal(sig uintptr) error { + if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 { + return err + } + return nil +} + +func GetParentDeathSignal() (ParentDeathSignal, error) { + var sig int + _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0) + if err != 0 { + return -1, err + } + return ParentDeathSignal(sig), nil +} + +func SetKeepCaps() error { + if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 { + return err + } + + return nil +} + +func ClearKeepCaps() error { + if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 { + return err + } + + return nil +} + +func Setctty() error { + if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 { + return err + } + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/proc.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/proc.go new file mode 100644 index 00000000..37808a29 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/proc.go @@ -0,0 +1,27 @@ +package system + +import ( + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +// look in /proc to find the process start time so that we can verify +// that this pid has started after ourself +func GetProcessStartTime(pid int) (string, error) { + data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) + if err != nil { + return "", err + } + + parts := strings.Split(string(data), " ") + // the starttime is located at pos 22 + // from the man page + // + // starttime %llu (was %lu before Linux 2.6) + // (22) The time the process started after system boot. In kernels before Linux 2.6, this + // value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks + // (divide by sysconf(_SC_CLK_TCK)). + return parts[22-1], nil // starts at 1 +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/setns_linux.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/setns_linux.go new file mode 100644 index 00000000..615ff4c8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/setns_linux.go @@ -0,0 +1,40 @@ +package system + +import ( + "fmt" + "runtime" + "syscall" +) + +// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092 +// +// We need different setns values for the different platforms and arch +// We are declaring the macro here because the SETNS syscall does not exist in th stdlib +var setNsMap = map[string]uintptr{ + "linux/386": 346, + "linux/arm64": 268, + "linux/amd64": 308, + "linux/arm": 375, + "linux/ppc": 350, + "linux/ppc64": 350, + "linux/ppc64le": 350, + "linux/s390x": 339, +} + +var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] + +func SysSetns() uint32 { + return uint32(sysSetns) +} + +func Setns(fd uintptr, flags uintptr) error { + ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] + if !exists { + return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) + } + _, _, err := syscall.RawSyscall(ns, fd, flags, 0) + if err != 0 { + return err + } + return nil +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go new file mode 100644 index 00000000..c9900651 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go @@ -0,0 +1,25 @@ +// +build linux,386 + +package system + +import ( + "syscall" +) + +// Setuid sets the uid of the calling thread to the specified uid. +func Setuid(uid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +// Setgid sets the gid of the calling thread to the specified gid. +func Setgid(gid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go new file mode 100644 index 00000000..0816bf82 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go @@ -0,0 +1,25 @@ +// +build linux,arm64 linux,amd64 linux,ppc linux,ppc64 linux,ppc64le linux,s390x + +package system + +import ( + "syscall" +) + +// Setuid sets the uid of the calling thread to the specified uid. +func Setuid(uid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +// Setgid sets the gid of the calling thread to the specified gid. +func Setgid(gid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(gid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go new file mode 100644 index 00000000..3f780f31 --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go @@ -0,0 +1,25 @@ +// +build linux,arm + +package system + +import ( + "syscall" +) + +// Setuid sets the uid of the calling thread to the specified uid. +func Setuid(uid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} + +// Setgid sets the gid of the calling thread to the specified gid. +func Setgid(gid int) (err error) { + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0) + if e1 != 0 { + err = e1 + } + return +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/sysconfig.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/sysconfig.go new file mode 100644 index 00000000..b3a07cba --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/sysconfig.go @@ -0,0 +1,12 @@ +// +build cgo,linux cgo,freebsd + +package system + +/* +#include +*/ +import "C" + +func GetClockTicks() int { + return int(C.sysconf(C._SC_CLK_TCK)) +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go new file mode 100644 index 00000000..d93b5d5f --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go @@ -0,0 +1,15 @@ +// +build !cgo windows + +package system + +func GetClockTicks() int { + // TODO figure out a better alternative for platforms where we're missing cgo + // + // TODO Windows. This could be implemented using Win32 QueryPerformanceFrequency(). + // https://msdn.microsoft.com/en-us/library/windows/desktop/ms644905(v=vs.85).aspx + // + // An example of its usage can be found here. + // https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx + + return 100 +} diff --git a/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go new file mode 100644 index 00000000..30f74dfb --- /dev/null +++ b/Godeps/_workspace/src/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go @@ -0,0 +1,99 @@ +package system + +import ( + "syscall" + "unsafe" +) + +var _zero uintptr + +// Returns the size of xattrs and nil error +// Requires path, takes allocated []byte or nil as last argument +func Llistxattr(path string, dest []byte) (size int, err error) { + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return -1, err + } + var newpathBytes unsafe.Pointer + if len(dest) > 0 { + newpathBytes = unsafe.Pointer(&dest[0]) + } else { + newpathBytes = unsafe.Pointer(&_zero) + } + + _size, _, errno := syscall.Syscall6(syscall.SYS_LLISTXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(newpathBytes), uintptr(len(dest)), 0, 0, 0) + size = int(_size) + if errno != 0 { + return -1, errno + } + + return size, nil +} + +// Returns a []byte slice if the xattr is set and nil otherwise +// Requires path and its attribute as arguments +func Lgetxattr(path string, attr string) ([]byte, error) { + var sz int + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return nil, err + } + attrBytes, err := syscall.BytePtrFromString(attr) + if err != nil { + return nil, err + } + + // Start with a 128 length byte array + sz = 128 + dest := make([]byte, sz) + destBytes := unsafe.Pointer(&dest[0]) + _sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) + + switch { + case errno == syscall.ENODATA: + return nil, errno + case errno == syscall.ENOTSUP: + return nil, errno + case errno == syscall.ERANGE: + // 128 byte array might just not be good enough, + // A dummy buffer is used ``uintptr(0)`` to get real size + // of the xattrs on disk + _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0) + sz = int(_sz) + if sz < 0 { + return nil, errno + } + dest = make([]byte, sz) + destBytes := unsafe.Pointer(&dest[0]) + _sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0) + if errno != 0 { + return nil, errno + } + case errno != 0: + return nil, errno + } + sz = int(_sz) + return dest[:sz], nil +} + +func Lsetxattr(path string, attr string, data []byte, flags int) error { + pathBytes, err := syscall.BytePtrFromString(path) + if err != nil { + return err + } + attrBytes, err := syscall.BytePtrFromString(attr) + if err != nil { + return err + } + var dataBytes unsafe.Pointer + if len(data) > 0 { + dataBytes = unsafe.Pointer(&data[0]) + } else { + dataBytes = unsafe.Pointer(&_zero) + } + _, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0) + if errno != 0 { + return errno + } + return nil +} diff --git a/Godeps/_workspace/src/github.com/rancher/docker-from-scratch/scratch.go b/Godeps/_workspace/src/github.com/rancher/docker-from-scratch/scratch.go index ea686426..358d1543 100644 --- a/Godeps/_workspace/src/github.com/rancher/docker-from-scratch/scratch.go +++ b/Godeps/_workspace/src/github.com/rancher/docker-from-scratch/scratch.go @@ -2,6 +2,7 @@ package dockerlaunch import ( "bufio" + "fmt" "io" "io/ioutil" "os" @@ -17,10 +18,15 @@ import ( "github.com/rancher/netconf" ) -const defaultPrefix = "/usr" +const ( + defaultPrefix = "/usr" + iptables = "/sbin/iptables" + modprobe = "/sbin/modprobe" + systemdRoot = "/sys/fs/cgroup/systemd/user.slice" +) var ( - mounts [][]string = [][]string{ + mounts = [][]string{ {"devtmpfs", "/dev", "devtmpfs", ""}, {"none", "/dev/pts", "devpts", ""}, {"none", "/proc", "proc", ""}, @@ -28,6 +34,9 @@ var ( {"none", "/sys", "sysfs", ""}, {"none", "/sys/fs/cgroup", "tmpfs", ""}, } + systemdMounts = [][]string{ + {"systemd", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd"}, + } ) type Config struct { @@ -40,6 +49,9 @@ type Config struct { CgroupHierarchy map[string]string LogFile string NoLog bool + EmulateSystemd bool + NoFiles uint64 + Environment []string } func createMounts(mounts ...[]string) error { @@ -162,16 +174,22 @@ func execDocker(config *Config, docker, cmd string, args []string) (*exec.Cmd, e } log.Debugf("Launching Docker %s %s %v", docker, cmd, args) + env := os.Environ() + if len(config.Environment) != 0 { + env = append(env, config.Environment...) + } + if config.Fork { cmd := exec.Command(docker, args...) if !config.NoLog { cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr } + cmd.Env = env err := cmd.Start() return cmd, err } else { - err := syscall.Exec(docker, append([]string{cmd}, args...), os.Environ()) + err := syscall.Exec(docker, append([]string{cmd}, args...), env) return nil, err } } @@ -421,18 +439,51 @@ func prepare(config *Config, docker string, args ...string) error { return err } - if err := setupBin(config, docker); err != nil { + for _, i := range []string{docker, iptables, modprobe} { + if err := setupBin(config, i); err != nil { + return err + } + } + + if err := setUlimit(config); err != nil { + return err + } + + if err := setupSystemd(config); err != nil { return err } return nil } -func setupBin(config *Config, docker string) error { - if _, err := os.Stat(docker); os.IsNotExist(err) { - dist := docker + ".dist" +func setupSystemd(config *Config) error { + if !config.EmulateSystemd { + cgroups, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", os.Getpid())) + if err != nil { + return err + } + + if !strings.Contains(string(cgroups), "name=systemd") { + return nil + } + } + + if err := createMounts(systemdMounts...); err != nil { + return err + } + + if err := os.Mkdir(systemdRoot, 0755); err != nil { + return err + } + + return ioutil.WriteFile(path.Join(systemdRoot, "cgroup.procs"), []byte(strconv.Itoa(os.Getpid())+"\n"), 0644) +} + +func setupBin(config *Config, bin string) error { + if _, err := os.Stat(bin); os.IsNotExist(err) { + dist := bin + ".dist" if _, err := os.Stat(dist); err == nil { - return os.Symlink(dist, docker) + return os.Symlink(dist, bin) } } @@ -459,6 +510,20 @@ func setupLogging(config *Config) error { return nil } +func setUlimit(cfg *Config) error { + var rLimit syscall.Rlimit + if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimit); err != nil { + return err + } + if cfg.NoFiles == 0 { + rLimit.Max = 1000000 + } else { + rLimit.Max = cfg.NoFiles + } + rLimit.Cur = rLimit.Max + return syscall.Setrlimit(syscall.RLIMIT_NOFILE, &rLimit) +} + func runOrExec(config *Config, docker string, args ...string) (*exec.Cmd, error) { if err := prepare(config, docker, args...); err != nil { return nil, err diff --git a/cmd/control/tlsconf.go b/cmd/control/tlsconf.go index b23e8b38..7842c431 100644 --- a/cmd/control/tlsconf.go +++ b/cmd/control/tlsconf.go @@ -131,13 +131,19 @@ func tlsConfCreate(c *cli.Context) { } func generate(c *cli.Context) error { + generateServer := c.Bool("server") + outDir := c.String("dir") + hostnames := c.StringSlice("hostname") + + return Generate(generateServer, outDir, hostnames) +} + +func Generate(generateServer bool, outDir string, hostnames []string) error { cfg, err := config.LoadConfig() if err != nil { return err } - generateServer := c.Bool("server") - outDir := c.String("dir") if outDir == "" { return fmt.Errorf("out directory (-d, --dir) not specified") } @@ -161,6 +167,5 @@ func generate(c *cli.Context) error { return err } - hostnames := c.StringSlice("hostname") return writeCerts(generateServer, hostnames, cfg, certPath, keyPath, caCertPath, caKeyPath) } diff --git a/cmd/userdocker/main.go b/cmd/userdocker/main.go new file mode 100644 index 00000000..bcc75e55 --- /dev/null +++ b/cmd/userdocker/main.go @@ -0,0 +1,267 @@ +package userdocker + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + "os/exec" + "os/signal" + "strings" + "syscall" + "time" + + log "github.com/Sirupsen/logrus" + "github.com/docker/libcompose/docker" + "github.com/docker/libcompose/project" + "github.com/rancherio/os/cmd/control" + "github.com/rancherio/os/compose" + "github.com/rancherio/os/config" + + "github.com/opencontainers/runc/libcontainer/cgroups" + _ "github.com/opencontainers/runc/libcontainer/nsenter" + "github.com/opencontainers/runc/libcontainer/system" +) + +const ( + DEFAULT_STORAGE_CONTEXT = "console" + userDocker = "user-docker" +) + +func Main() { + cfg, err := config.LoadConfig() + if err != nil { + log.Fatal(err) + } + + if len(os.Args) == 1 { + if err := enter(cfg); err != nil { + log.Fatal(err) + } + } else { + if err := main(cfg); err != nil { + log.Fatal(err) + } + } +} + +func enter(cfg *config.CloudConfig) error { + context := cfg.Rancher.UserDocker.StorageContext + if context == "" { + context = DEFAULT_STORAGE_CONTEXT + } + + p, err := compose.GetProject(cfg) + if err != nil { + return err + } + + pid, err := waitForPid(context, p) + if err != nil { + return err + } + + log.Infof("%s PID %d", context, pid) + + return runNsenter(pid) +} + +type result struct { + Pid int `json:"Pid"` +} + +func expand(first, second string, args []string) ([]string, error) { + var err error + prog := "" + + prog, err = exec.LookPath(first) + if err != nil { + prog, err = exec.LookPath(second) + } + if err != nil { + return nil, err + } + + return append([]string{prog}, args...), nil +} + +func runNsenter(pid int) error { + args, err := expand(userDocker, "", []string{"main"}) + if err != nil { + return err + } + + r, w, err := os.Pipe() + if err != nil { + return err + } + + cmd := &exec.Cmd{ + Path: args[0], + Args: args, + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, + ExtraFiles: []*os.File{w}, + Env: append(os.Environ(), + "_LIBCONTAINER_INITPIPE=3", + fmt.Sprintf("_LIBCONTAINER_INITPID=%d", pid), + ), + } + + if err := cmd.Start(); err != nil { + return err + } + w.Close() + + var result result + if err := json.NewDecoder(r).Decode(&result); err != nil { + return err + } + + if err := cmd.Wait(); err != nil { + return err + } + + log.Infof("Docker PID %d", result.Pid) + + p, err := os.FindProcess(result.Pid) + if err != nil { + return err + } + + handleTerm(p) + + if err := switchCgroup(result.Pid, pid); err != nil { + return err + } + + _, err = p.Wait() + return err +} + +func handleTerm(p *os.Process) { + term := make(chan os.Signal) + signal.Notify(term, syscall.SIGTERM) + go func() { + <-term + p.Signal(syscall.SIGTERM) + }() +} + +func waitForPid(service string, project *project.Project) (int, error) { + for { + if pid, err := getPid(service, project); err != nil || pid == 0 { + log.Infof("Waiting for %s : %d : %v", service, pid, err) + time.Sleep(250) + } else { + return pid, err + } + } +} + +func getPid(service string, project *project.Project) (int, error) { + s, err := project.CreateService(service) + if err != nil { + return 0, err + } + + containers, err := s.Containers() + if err != nil { + return 0, err + } + + if len(containers) == 0 { + return 0, nil + } + + client, err := docker.CreateClient(docker.ClientOpts{ + Host: config.DOCKER_SYSTEM_HOST, + }) + if err != nil { + return 0, err + } + + id, err := containers[0].Id() + if err != nil { + return 0, err + } + + info, err := client.InspectContainer(id) + if err != nil || info == nil { + return 0, err + } + + if info.State.Running { + return info.State.Pid, nil + } + + return 0, nil +} + +func main(cfg *config.CloudConfig) error { + os.Unsetenv("_LIBCONTAINER_INITPIPE") + os.Unsetenv("_LIBCONTAINER_INITPID") + + if err := system.ParentDeathSignal(syscall.SIGKILL).Set(); err != nil { + return err + } + + if err := os.Remove("/var/run/docker.pid"); err != nil && !os.IsNotExist(err) { + return err + } + + dockerCfg := cfg.Rancher.UserDocker + + args := dockerCfg.FullArgs() + + log.Debugf("User Docker args: %v", args) + + if dockerCfg.TLS { + log.Debug("Generating TLS certs if needed") + if err := control.Generate(true, "/etc/docker/tls", []string{"localhost"}); err != nil { + return err + } + } + + args, err := expand("docker-init", "docker", args) + if err != nil { + return err + } + + log.Infof("Running %v", args) + err = syscall.Exec(args[0], args, dockerCfg.AppendEnv()) + return err +} + +func switchCgroup(src, target int) error { + cgroupFile := fmt.Sprintf("/proc/%d/cgroup", target) + f, err := os.Open(cgroupFile) + if err != nil { + return err + } + defer f.Close() + + targetCgroups := map[string]string{} + + s := bufio.NewScanner(f) + for s.Scan() { + text := s.Text() + parts := strings.Split(text, ":") + subparts := strings.Split(parts[1], "=") + subsystem := subparts[0] + if len(subparts) > 1 { + subsystem = subparts[1] + } + + targetPath := fmt.Sprintf("/host/sys/fs/cgroup/%s%s", subsystem, parts[2]) + log.Infof("Moving Docker to cgroup %s", targetPath) + targetCgroups[subsystem] = targetPath + } + + if err := s.Err(); err != nil { + return err + } + + return cgroups.EnterPid(targetCgroups, src) +} diff --git a/compose/project.go b/compose/project.go index 168bd599..c05faf3b 100644 --- a/compose/project.go +++ b/compose/project.go @@ -49,6 +49,10 @@ func RunServices(cfg *config.CloudConfig) error { return p.Up() } +func GetProject(cfg *config.CloudConfig) (*project.Project, error) { + return newCoreServiceProject(cfg) +} + func newProject(name string, cfg *config.CloudConfig) (*project.Project, error) { clientFactory, err := rosDocker.NewClientFactory(docker.ClientOpts{}) if err != nil { diff --git a/config/docker_config.go b/config/docker_config.go new file mode 100644 index 00000000..c2ccd739 --- /dev/null +++ b/config/docker_config.go @@ -0,0 +1,17 @@ +package config + +import "os" + +func (d *DockerConfig) FullArgs() []string { + args := append(d.Args, d.ExtraArgs...) + + if d.TLS { + args = append(args, d.TLSArgs...) + } + + return args +} + +func (d *DockerConfig) AppendEnv() []string { + return append(os.Environ(), d.Environment...) +} diff --git a/config/types.go b/config/types.go index 81a45a4f..4435ba35 100644 --- a/config/types.go +++ b/config/types.go @@ -91,14 +91,16 @@ type UpgradeConfig struct { } type DockerConfig struct { - TLS bool `yaml:"tls,omitempty"` - TLSArgs []string `yaml:"tls_args,flow,omitempty"` - Args []string `yaml:"args,flow,omitempty"` - ExtraArgs []string `yaml:"extra_args,flow,omitempty"` - ServerCert string `yaml:"server_cert,omitempty"` - ServerKey string `yaml:"server_key,omitempty"` - CACert string `yaml:"ca_cert,omitempty"` - CAKey string `yaml:"ca_key,omitempty"` + TLS bool `yaml:"tls,omitempty"` + TLSArgs []string `yaml:"tls_args,flow,omitempty"` + Args []string `yaml:"args,flow,omitempty"` + ExtraArgs []string `yaml:"extra_args,flow,omitempty"` + ServerCert string `yaml:"server_cert,omitempty"` + ServerKey string `yaml:"server_key,omitempty"` + CACert string `yaml:"ca_cert,omitempty"` + CAKey string `yaml:"ca_key,omitempty"` + Environment []string `yaml:"environment,omitempty"` + StorageContext string `yaml:"storage_context,omitempty"` } type SshConfig struct { diff --git a/init/init.go b/init/init.go index 6d4b2855..528670c6 100644 --- a/init/init.go +++ b/init/init.go @@ -139,6 +139,7 @@ func getLaunchConfig(cfg *config.CloudConfig, dockerCfg *config.DockerConfig) (* launchConfig.DnsConfig.Nameservers = cfg.Rancher.Network.Dns.Nameservers launchConfig.DnsConfig.Search = cfg.Rancher.Network.Dns.Search + launchConfig.Environment = dockerCfg.Environment if !cfg.Rancher.Debug { launchConfig.LogFile = config.SYSTEM_DOCKER_LOG diff --git a/main.go b/main.go index 15a8bd81..0d1bcfd7 100644 --- a/main.go +++ b/main.go @@ -14,6 +14,7 @@ import ( "github.com/rancherio/os/cmd/respawn" "github.com/rancherio/os/cmd/sysinit" "github.com/rancherio/os/cmd/systemdocker" + "github.com/rancherio/os/cmd/userdocker" "github.com/rancherio/os/cmd/wait" "github.com/rancherio/os/config" osInit "github.com/rancherio/os/init" @@ -41,6 +42,7 @@ func main() { registerCmd("/init", osInit.MainInit) registerCmd(config.SYSINIT_BIN, sysinit.Main) registerCmd("/usr/bin/dockerlaunch", dockerlaunchMain.Main) + registerCmd("/usr/bin/user-docker", userdocker.Main) registerCmd("/usr/bin/system-docker", systemdocker.Main) registerCmd("/sbin/poweroff", power.PowerOff) registerCmd("/sbin/reboot", power.Reboot) diff --git a/os-config.yml b/os-config.yml index c24191d9..f07676c9 100644 --- a/os-config.yml +++ b/os-config.yml @@ -122,19 +122,20 @@ rancher: privileged: true read_only: true volumes: + - /usr/bin/docker:/usr/bin/docker.dist:ro - /usr/bin/ros:/sbin/halt:ro + - /usr/bin/ros:/sbin/netconf:ro - /usr/bin/ros:/sbin/poweroff:ro - /usr/bin/ros:/sbin/reboot:ro - /usr/bin/ros:/sbin/shutdown:ro - - /usr/bin/ros:/sbin/netconf:ro - /usr/bin/ros:/usr/bin/cloud-init:ro - - /usr/bin/ros:/usr/bin/rancherctl:ro - - /usr/bin/ros:/usr/bin/ros:ro - - /usr/bin/ros:/usr/bin/respawn:ro - - /usr/bin/ros:/usr/bin/system-docker:ro - - /usr/bin/ros:/usr/sbin/wait-for-docker:ro - /usr/bin/ros:/usr/bin/dockerlaunch:ro - - /usr/bin/docker:/usr/bin/docker.dist:ro + - /usr/bin/ros:/usr/bin/rancherctl:ro + - /usr/bin/ros:/usr/bin/respawn:ro + - /usr/bin/ros:/usr/bin/ros:ro + - /usr/bin/ros:/usr/bin/system-docker:ro + - /usr/bin/ros:/usr/bin/user-docker:ro + - /usr/bin/ros:/usr/sbin/wait-for-docker:ro console: image: rancher/os-console:v0.4.0-dev labels: @@ -228,15 +229,16 @@ rancher: read_only: true volumes: - /dev:/host/dev - - /usr/share/ros/os-config.yml:/usr/share/ros/os-config.yml - - /var/lib/rancher:/var/lib/rancher - - /var/lib/rancher/conf:/var/lib/rancher/conf + - /etc/docker:/etc/docker - /etc/ssl/certs/ca-certificates.crt:/etc/ssl/certs/ca-certificates.crt.rancher - - /lib/modules:/lib/modules - /lib/firmware:/lib/firmware + - /lib/modules:/lib/modules - /run:/run - - /var/run:/var/run + - /usr/share/ros/os-config.yml:/usr/share/ros/os-config.yml + - /var/lib/rancher/conf:/var/lib/rancher/conf + - /var/lib/rancher:/var/lib/rancher - /var/log:/var/log + - /var/run:/var/run udev-cold: image: rancher/os-udev:v0.4.0-dev labels: @@ -272,6 +274,20 @@ rancher: volumes: - /home:/home - /opt:/opt + docker: + image: rancher/os-docker:v0.4.0-dev + labels: + io.rancher.os.scope: system + net: host + pid: host + ipc: host + uts: host + privileged: true + restart: always + volumes_from: + - all-volumes + volumes: + - /sys/fs/cgroup:/host/sys/fs/cgroup system_docker: args: [daemon, --log-opt, max-size=25m, --log-opt, max-file=2, -s, overlay, -b, docker-sys, --fixed-cidr, 172.18.42.1/16, --restart=false, -g, /var/lib/system-docker, -G, root,