Merge pull request #29928 from dubstack/bump-libcontainer

Automatic merge from submit-queue

Bump Libcontainer to latest head

@Random-Liu or @yujuhong Can any one of you please do a quick review.

I updated libcontainer in a previous PR but  #29492 reverted those changes. This is needed for #27204. 

Signed-off-by: Buddha Prakash <buddhap@google.com>
This commit is contained in:
Kubernetes Submit Queue 2016-08-04 15:12:13 -07:00 committed by GitHub
commit d10e47b891
45 changed files with 874 additions and 415 deletions

64
Godeps/Godeps.json generated
View File

@ -1643,83 +1643,83 @@
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/apparmor",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups/fs",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups/systemd",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/configs",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/configs/validate",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/criurpc",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/keys",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/label",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/seccomp",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/selinux",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/stacktrace",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/system",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/user",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/opencontainers/runc/libcontainer/utils",
"Comment": "v0.1.1",
"Rev": "baf6536d6259209c3edfa2b22237af82942d3dfa"
"Comment": "v1.0.0-rc1-100-g142df38",
"Rev": "142df3836b740af53dc6da59eed8dbc92f62917c"
},
{
"ImportPath": "github.com/pborman/uuid",

View File

@ -188,12 +188,13 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
// Create a cgroup container manager.
func createManager(containerName string) *fs.Manager {
allowAllDevices := true
return &fs.Manager{
Cgroups: &configs.Cgroup{
Parent: "/",
Name: containerName,
Resources: &configs.Resources{
AllowAllDevices: true,
AllowAllDevices: &allowAllDevices,
},
},
}
@ -319,7 +320,7 @@ func (cm *containerManagerImpl) setupNode() error {
}
glog.V(2).Infof("Configure resource-only container %s with memory limit: %d", cm.RuntimeCgroupsName, memoryLimit)
allowAllDevices := true
dockerContainer := &fs.Manager{
Cgroups: &configs.Cgroup{
Parent: "/",
@ -327,7 +328,7 @@ func (cm *containerManagerImpl) setupNode() error {
Resources: &configs.Resources{
Memory: memoryLimit,
MemorySwap: -1,
AllowAllDevices: true,
AllowAllDevices: &allowAllDevices,
},
},
}
@ -370,12 +371,13 @@ func (cm *containerManagerImpl) setupNode() error {
if cm.KubeletCgroupsName != "" {
cont := newSystemCgroups(cm.KubeletCgroupsName)
allowAllDevices := true
manager := fs.Manager{
Cgroups: &configs.Cgroup{
Parent: "/",
Name: cm.KubeletCgroupsName,
Resources: &configs.Resources{
AllowAllDevices: true,
AllowAllDevices: &allowAllDevices,
},
},
}

View File

@ -30,12 +30,13 @@ import (
//
// containerName must be an absolute container name.
func RunInResourceContainer(containerName string) error {
allowAllDevices := true
manager := fs.Manager{
Cgroups: &configs.Cgroup{
Parent: "/",
Name: containerName,
Resources: &configs.Resources{
AllowAllDevices: true,
AllowAllDevices: &allowAllDevices,
},
},
}

View File

@ -77,7 +77,7 @@ config := &configs.Config{
Parent: "system",
Resources: &configs.Resources{
MemorySwappiness: nil,
AllowAllDevices: false,
AllowAllDevices: nil,
AllowedDevices: configs.DefaultAllowedDevices,
},
},
@ -186,8 +186,8 @@ process := &libcontainer.Process{
err := container.Start(process)
if err != nil {
logrus.Fatal(err)
container.Destroy()
logrus.Fatal(err)
return
}
@ -219,6 +219,9 @@ container.Resume()
// send signal to container's init process.
container.Signal(signal)
// update container resource constraints.
container.Set(config)
```

View File

@ -90,7 +90,7 @@ in tmpfs.
After `/dev/null` has been setup we check for any external links between
the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing
to `/dev/null` outside the container we close and `dup2` the the `/dev/null`
to `/dev/null` outside the container we close and `dup2` the `/dev/null`
that is local to the container's rootfs.
@ -297,7 +297,7 @@ a container.
| -------------- | ------------------------------------------------------------------ |
| Get processes | Return all the pids for processes running inside a container |
| Get Stats | Return resource statistics for the container as a whole |
| Wait | Wait waits on the container's init process ( pid 1 ) |
| Wait | Waits on the container's init process ( pid 1 ) |
| Wait Process | Wait on any of the container's processes returning the exit status |
| Destroy | Kill the container's init process and remove any filesystem state |
| Signal | Send a signal to the container's init process |

View File

@ -7,6 +7,7 @@ package apparmor
// #include <stdlib.h>
import "C"
import (
"fmt"
"io/ioutil"
"os"
"unsafe"
@ -32,7 +33,7 @@ func ApplyProfile(name string) error {
cName := C.CString(name)
defer C.free(unsafe.Pointer(cName))
if _, err := C.aa_change_onexec(cName); err != nil {
return err
return fmt.Errorf("apparmor failed to apply profile: %s", err)
}
return nil
}

View File

@ -9,7 +9,6 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"sync"
"github.com/opencontainers/runc/libcontainer/cgroups"
@ -33,7 +32,6 @@ var (
&FreezerGroup{},
&NameGroup{GroupName: "name=systemd", Join: true},
}
CgroupProcesses = "cgroup.procs"
HugePageSizes, _ = cgroups.GetHugePageSize()
)
@ -142,7 +140,9 @@ func (m *Manager) Apply(pid int) (err error) {
// created then join consists of writing the process pids to cgroup.procs
p, err := d.path(sys.Name())
if err != nil {
if cgroups.IsNotFound(err) {
// The non-presence of the devices subsystem is
// considered fatal for security reasons.
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
continue
}
return err
@ -190,6 +190,11 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
}
func (m *Manager) Set(container *configs.Config) error {
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
if m.Cgroups.Paths != nil {
return nil
}
for _, sys := range subsystems {
// Generate fake cgroup data.
d, err := getCgroupData(container.Cgroups, -1)
@ -339,7 +344,7 @@ func (raw *cgroupData) join(subsystem string) (string, error) {
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil {
if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
return "", err
}
return path, nil
@ -349,7 +354,7 @@ func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s.", file)
return fmt.Errorf("no such directory for %s", file)
}
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)

View File

@ -8,7 +8,6 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
@ -67,7 +66,7 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil {
if err := cgroups.WriteCgroupProc(dir, pid); err != nil {
return err
}

View File

@ -43,21 +43,23 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
}
return nil
}
if !cgroup.Resources.AllowAllDevices {
if err := writeFile(path, "devices.deny", "a"); err != nil {
return err
}
for _, dev := range cgroup.Resources.AllowedDevices {
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
if cgroup.Resources.AllowAllDevices != nil {
if *cgroup.Resources.AllowAllDevices == false {
if err := writeFile(path, "devices.deny", "a"); err != nil {
return err
}
}
return nil
}
if err := writeFile(path, "devices.allow", "a"); err != nil {
return err
for _, dev := range cgroup.Resources.AllowedDevices {
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
return err
}
}
return nil
}
if err := writeFile(path, "devices.allow", "a"); err != nil {
return err
}
}
for _, dev := range cgroup.Resources.DeniedDevices {

View File

@ -5,15 +5,21 @@ package fs
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
const (
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
)
type MemoryGroup struct {
}
@ -32,13 +38,10 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
return err
}
}
// We have to set kernel memory here, as we can't change it once
// processes have been attached.
if err := s.SetKernelMemory(path, d.config); err != nil {
if err := EnableKernelMemoryAccounting(path); err != nil {
return err
}
}
defer func() {
if err != nil {
os.RemoveAll(path)
@ -54,13 +57,43 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
return nil
}
func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error {
// This has to be done separately because it has special constraints (it
// can't be done after there are processes attached to the cgroup).
if cgroup.Resources.KernelMemory > 0 {
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil {
return err
func EnableKernelMemoryAccounting(path string) error {
// Check if kernel memory is enabled
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
kernelMemoryLimit := int64(1)
if err := setKernelMemory(path, kernelMemoryLimit); err != nil {
return err
}
kernelMemoryLimit = int64(-1)
if err := setKernelMemory(path, kernelMemoryLimit); err != nil {
return err
}
return nil
}
func setKernelMemory(path string, kernelMemoryLimit int64) error {
if path == "" {
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
}
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
// kernel memory is not enabled on the system so we should do nothing
return nil
}
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
// Check if the error number returned by the syscall is "EBUSY"
// The EBUSY signal is returned on attempts to write to the
// memory.kmem.limit_in_bytes file if the cgroup has children or
// once tasks have been attached to the cgroup
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == syscall.EBUSY {
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
}
}
}
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
}
return nil
}
@ -113,11 +146,18 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}
if cgroup.Resources.KernelMemory != 0 {
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
return err
}
}
if cgroup.Resources.MemoryReservation != 0 {
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
return err
}
}
if cgroup.Resources.KernelMemoryTCP != 0 {
if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
return err

View File

@ -3,6 +3,8 @@
package fs
import (
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
@ -23,8 +25,8 @@ func (s *NetClsGroup) Apply(d *cgroupData) error {
}
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.NetClsClassid != "" {
if err := writeFile(path, "net_cls.classid", cgroup.Resources.NetClsClassid); err != nil {
if cgroup.Resources.NetClsClassid != 0 {
if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
return err
}
}

View File

@ -12,7 +12,6 @@ import (
)
var (
ErrNotSupportStat = errors.New("stats are not supported for subsystem")
ErrNotValidFormat = errors.New("line is not a valid key value format")
)

View File

@ -11,6 +11,7 @@ type ThrottlingData struct {
ThrottledTime uint64 `json:"throttled_time,omitempty"`
}
// CpuUsage denotes the usage of a CPU.
// All CPU stats are aggregate since container inception.
type CpuUsage struct {
// Total CPU time consumed.

View File

@ -74,6 +74,7 @@ var (
theConn *systemdDbus.Conn
hasStartTransientUnit bool
hasTransientDefaultDependencies bool
hasDelegate bool
)
func newProp(name string, units interface{}) systemdDbus.Property {
@ -146,6 +147,20 @@ func UseSystemd() bool {
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
// Assume StartTransientUnit on a scope allows Delegate
hasDelegate = true
dl := newProp("Delegate", true)
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dl}, nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
hasDelegate = false
}
}
}
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
}
return hasStartTransientUnit
}
@ -183,10 +198,13 @@ func (m *Manager) Apply(pid int) error {
systemdDbus.PropSlice(slice),
systemdDbus.PropDescription("docker container "+c.Name),
newProp("PIDs", []uint32{uint32(pid)}),
// This is only supported on systemd versions 218 and above.
newProp("Delegate", true),
)
if hasDelegate {
// This is only supported on systemd versions 218 and above.
properties = append(properties, newProp("Delegate", true))
}
// Always enable accounting, this gets us the same behaviour as the fs implementation,
// plus the kernel has some problems with joining the memory cgroup at a later time.
properties = append(properties,
@ -214,11 +232,9 @@ func (m *Manager) Apply(pid int) error {
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
}
// We need to set kernel memory before processes join cgroup because
// kmem.limit_in_bytes can only be set when the cgroup is empty.
// And swap memory limit needs to be set after memory limit, only
// memory limit is handled by systemd, so it's kind of ugly here.
if c.Resources.KernelMemory > 0 {
// We have to set kernel memory here, as we can't change it once
// processes have been attached to the cgroup.
if c.Resources.KernelMemory != 0 {
if err := setKernelMemory(c); err != nil {
return err
}
@ -273,7 +289,7 @@ func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s.", file)
return fmt.Errorf("no such directory for %s", file)
}
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
@ -372,6 +388,8 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
if err != nil {
return "", err
}
// if pid 1 is systemd 226 or later, it will be in init.scope, not the root
initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope")
slice := "system.slice"
if c.Parent != "" {
@ -439,6 +457,11 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
}
func (m *Manager) Set(container *configs.Config) error {
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
if m.Cgroups.Paths != nil {
return nil
}
for _, sys := range subsystems {
// Get the subsystem path, but don't error out for not found cgroups.
path, err := getSubsystemPath(container.Cgroups, sys.Name())
@ -472,8 +495,5 @@ func setKernelMemory(c *configs.Cgroup) error {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// This doesn't get called by manager.Set, so we need to do it here.
s := &fs.MemoryGroup{}
return s.SetKernelMemory(path, c)
return fs.EnableKernelMemoryAccounting(path)
}

View File

@ -16,13 +16,19 @@ import (
"github.com/docker/go-units"
)
const cgroupNamePrefix = "name="
const (
cgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
)
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
@ -47,6 +53,9 @@ func FindCgroupMountpoint(subsystem string) (string, error) {
}
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
@ -70,6 +79,15 @@ func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
return "", "", NewNotFoundError(subsystem)
}
func isSubsystemAvailable(subsystem string) bool {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return false
}
_, avail := cgroups[subsystem]
return avail
}
func FindCgroupMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
@ -124,7 +142,8 @@ func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
res := make([]Mount, 0, len(ss))
scanner := bufio.NewScanner(mi)
for scanner.Scan() {
numFound := 0
for scanner.Scan() && numFound < len(ss) {
txt := scanner.Text()
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
@ -139,12 +158,15 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
Root: fields[3],
}
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if !ss[opt] {
continue
}
if strings.HasPrefix(opt, cgroupNamePrefix) {
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
}
if ss[opt] {
} else {
m.Subsystems = append(m.Subsystems, opt)
}
numFound++
}
res = append(res, m)
}
@ -161,19 +183,19 @@ func GetCgroupMounts() ([]Mount, error) {
}
defer f.Close()
all, err := GetAllSubsystems()
all, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for _, s := range all {
for s := range all {
allMap[s] = true
}
return getCgroupMountsHelper(allMap, f)
}
// Returns all the cgroup subsystems supported by the kernel
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
func GetAllSubsystems() ([]string, error) {
f, err := os.Open("/proc/cgroups")
if err != nil {
@ -199,7 +221,7 @@ func GetAllSubsystems() ([]string, error) {
return subsystems, nil
}
// Returns the relative path to the cgroup docker is running in.
// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
@ -220,7 +242,7 @@ func GetInitCgroupDir(subsystem string) (string, error) {
}
func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, "cgroup.procs"))
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
if err != nil {
return nil, err
}
@ -243,6 +265,8 @@ func readProcsFile(dir string) ([]int, error) {
return out, nil
}
// ParseCgroupFile parses the given cgroup file, typically from
// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
func ParseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
@ -250,7 +274,12 @@ func ParseCgroupFile(path string) (map[string]string, error) {
}
defer f.Close()
s := bufio.NewScanner(f)
return parseCgroupFromReader(f)
}
// helper function for ParseCgroupFile to make testing easier
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
s := bufio.NewScanner(r)
cgroups := make(map[string]string)
for s.Scan() {
@ -259,7 +288,16 @@ func ParseCgroupFile(path string) (map[string]string, error) {
}
text := s.Text()
parts := strings.Split(text, ":")
// from cgroups(7):
// /proc/[pid]/cgroup
// ...
// For each cgroup hierarchy ... there is one entry
// containing three colon-separated fields of the form:
// hierarchy-ID:subsystem-list:cgroup-path
parts := strings.SplitN(text, ":", 3)
if len(parts) < 3 {
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
}
for _, subs := range strings.Split(parts[1], ",") {
cgroups[subs] = parts[2]
@ -291,8 +329,7 @@ func PathExists(path string) bool {
func EnterPid(cgroupPaths map[string]string, pid int) error {
for _, path := range cgroupPaths {
if PathExists(path) {
if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"),
[]byte(strconv.Itoa(pid)), 0700); err != nil {
if err := WriteCgroupProc(path, pid); err != nil {
return err
}
}
@ -361,7 +398,7 @@ func GetAllPids(path string) ([]int, error) {
// collect pids from all sub-cgroups
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
dir, file := filepath.Split(p)
if file != "cgroup.procs" {
if file != CgroupProcesses {
return nil
}
if iErr != nil {
@ -376,3 +413,20 @@ func GetAllPids(path string) ([]int, error) {
})
return pids, err
}
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
func WriteCgroupProc(dir string, pid int) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", CgroupProcesses)
}
// Dont attach any pid to the cgroup if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}
}
return nil
}

View File

@ -36,7 +36,7 @@ type Cgroup struct {
type Resources struct {
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
// Deprecated
AllowAllDevices bool `json:"allow_all_devices,omitempty"`
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
// Deprecated
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
// Deprecated
@ -69,10 +69,10 @@ type Resources struct {
CpuPeriod int64 `json:"cpu_period"`
// How many time CPU will use in realtime scheduling (in usecs).
CpuRtRuntime int64 `json:"cpu_quota"`
CpuRtRuntime int64 `json:"cpu_rt_quota"`
// CPU period to be used for realtime scheduling (in usecs).
CpuRtPeriod int64 `json:"cpu_period"`
CpuRtPeriod int64 `json:"cpu_rt_period"`
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
@ -120,5 +120,5 @@ type Resources struct {
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
// Set class identifier for container's network packets
NetClsClassid string `json:"net_cls_classid"`
NetClsClassid uint32 `json:"net_cls_classid"`
}

View File

@ -33,7 +33,7 @@ type Seccomp struct {
Syscalls []*Syscall `json:"syscalls"`
}
// An action to be taken upon rule match in Seccomp
// Action is taken upon rule match in Seccomp
type Action int
const (
@ -44,7 +44,7 @@ const (
Trace
)
// A comparison operator to be used when matching syscall arguments in Seccomp
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
type Operator int
const (
@ -57,7 +57,7 @@ const (
MaskEqualTo
)
// A rule to match a specific syscall argument in Seccomp
// Arg is a rule to match a specific syscall argument in Seccomp
type Arg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
@ -65,7 +65,7 @@ type Arg struct {
Op Operator `json:"op"`
}
// An rule to match a syscall in Seccomp
// Syscall is a rule to match a syscall in Seccomp
type Syscall struct {
Name string `json:"name"`
Action Action `json:"action"`
@ -148,10 +148,6 @@ type Config struct {
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
OomScoreAdj int `json:"oom_score_adj"`
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []string `json:"additional_groups"`
// UidMappings is an array of User ID mappings for User Namespaces
UidMappings []IDMap `json:"uid_mappings"`
@ -187,6 +183,10 @@ type Config struct {
// Labels are user defined metadata that is stored in the config and populated on the state
Labels []string `json:"labels"`
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`
}
type Hooks struct {
@ -261,7 +261,7 @@ type Hook interface {
Run(HookState) error
}
// NewFunctionHooks will call the provided function when the hook is run.
// NewFunctionHook will call the provided function when the hook is run.
func NewFunctionHook(f func(HookState) error) FuncHook {
return FuncHook{
run: f,
@ -284,7 +284,7 @@ type Command struct {
Timeout *time.Duration `json:"timeout"`
}
// NewCommandHooks will execute the provided command when the hook is run.
// NewCommandHook will execute the provided command when the hook is run.
func NewCommandHook(cmd Command) CommandHook {
return CommandHook{
Command: cmd,

View File

@ -4,7 +4,7 @@ package configs
import "fmt"
// Gets the root uid for the process on host which could be non-zero
// HostUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
@ -21,7 +21,7 @@ func (c Config) HostUID() (int, error) {
return 0, nil
}
// Gets the root gid for the process on host which could be non-zero
// HostGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {

View File

@ -3,7 +3,7 @@
package configs
var (
// These are devices that are to be both allowed and created.
// DefaultSimpleDevices are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{

View File

@ -7,6 +7,7 @@ import (
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/selinux"
)
type Validator interface {
@ -80,6 +81,10 @@ func (v *ConfigValidator) security(config *configs.Config) error {
!config.Namespaces.Contains(configs.NEWNS) {
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
}
if config.ProcessLabel != "" && !selinux.SelinuxEnabled() {
return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
}
return nil
}

View File

@ -0,0 +1,11 @@
package libcontainer
import (
"errors"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
return nil, errors.New("libcontainer console is not supported on Solaris")
}

View File

@ -1,4 +1,4 @@
// Libcontainer provides a native Go implementation for creating containers
// Package libcontainer provides a native Go implementation for creating containers
// with namespaces, cgroups, capabilities, and filesystem access controls.
// It allows you to manage the lifecycle of the container performing additional operations
// after the container is created.
@ -11,24 +11,20 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
// The status of a container.
// Status is the status of a container.
type Status int
const (
// The container exists but has not been run yet
// Created is the status that denotes the container exists but has not been run yet.
Created Status = iota
// The container exists and is running.
// Running is the status that denotes the container exists and is running.
Running
// The container exists, it is in the process of being paused.
// Pausing is the status that denotes the container exists, it is in the process of being paused.
Pausing
// The container exists, but all its processes are paused.
// Paused is the status that denotes the container exists, but all its processes are paused.
Paused
// The container does not exist.
Destroyed
// Stopped is the status that denotes the container does not have a created or running process.
Stopped
)
func (s Status) String() string {
@ -41,8 +37,8 @@ func (s Status) String() string {
return "pausing"
case Paused:
return "paused"
case Destroyed:
return "destroyed"
case Stopped:
return "stopped"
default:
return "unknown"
}
@ -67,7 +63,7 @@ type BaseState struct {
Config configs.Config `json:"config"`
}
// A libcontainer container object.
// BaseContainer is a libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
@ -80,13 +76,13 @@ type BaseContainer interface {
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
// SystemError - System error.
Status() (Status, error)
// State returns the current container's state information.
//
// errors:
// Systemerror - System error.
// SystemError - System error.
State() (*State, error)
// Returns the current config of the container.
@ -96,7 +92,7 @@ type BaseContainer interface {
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
// SystemError - System error.
//
// Some of the returned PIDs may no longer refer to processes in the Container, unless
// the Container state is PAUSED in which case every PID in the slice is valid.
@ -106,7 +102,7 @@ type BaseContainer interface {
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
// SystemError - System error.
Stats() (*Stats, error)
// Set resources of container as configured
@ -114,7 +110,7 @@ type BaseContainer interface {
// We can use this to change resources when containers are running.
//
// errors:
// Systemerror - System error.
// SystemError - System error.
Set(config configs.Config) error
// Start a process inside the container. Returns error if process fails to
@ -124,21 +120,38 @@ type BaseContainer interface {
// ContainerDestroyed - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// Systemerror - System error.
// SystemError - System error.
Start(process *Process) (err error)
// Run immediatly starts the process inside the conatiner. Returns error if process
// fails to start. It does not block waiting for the exec fifo after start returns but
// opens the fifo after start returns.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// SystemError - System error.
Run(process *Process) (err error)
// Destroys the container after killing all running processes.
//
// Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed.
//
// errors:
// Systemerror - System error.
// SystemError - System error.
Destroy() error
// Signal sends the provided signal code to the container's initial process.
//
// errors:
// Systemerror - System error.
// SystemError - System error.
Signal(s os.Signal) error
// Exec signals the container to exec the users process at the end of the init.
//
// errors:
// SystemError - System error.
Exec() error
}

View File

@ -22,6 +22,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/syndtr/gocapability/capability"
"github.com/vishvananda/netlink/nl"
@ -30,18 +31,19 @@ import (
const stdioFdCount = 3
type linuxContainer struct {
id string
root string
config *configs.Config
cgroupManager cgroups.Manager
initPath string
initArgs []string
initProcess parentProcess
criuPath string
m sync.Mutex
criuVersion int
state containerState
created time.Time
id string
root string
config *configs.Config
cgroupManager cgroups.Manager
initPath string
initArgs []string
initProcess parentProcess
initProcessStartTime string
criuPath string
m sync.Mutex
criuVersion int
state containerState
created time.Time
}
// State represents a running container's state
@ -62,7 +64,7 @@ type State struct {
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
}
// A libcontainer container object.
// Container is a libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
@ -84,7 +86,7 @@ type Container interface {
// Systemerror - System error.
Restore(process *Process, criuOpts *CriuOpts) error
// If the Container state is RUNNING or PAUSING, sets the Container state to PAUSING and pauses
// If the Container state is RUNNING, sets the Container state to PAUSING and pauses
// the execution of any user processes. Asynchronously, when the container finished being paused the
// state is changed to PAUSED.
// If the Container state is PAUSED, do nothing.
@ -141,7 +143,7 @@ func (c *linuxContainer) State() (*State, error) {
func (c *linuxContainer) Processes() ([]int, error) {
pids, err := c.cgroupManager.GetAllPids()
if err != nil {
return nil, newSystemError(err)
return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups")
}
return pids, nil
}
@ -152,14 +154,14 @@ func (c *linuxContainer) Stats() (*Stats, error) {
stats = &Stats{}
)
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
return stats, newSystemError(err)
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
if err != nil {
return stats, newSystemError(err)
return stats, newSystemErrorWithCausef(err, "getting network stats for interface %q", iface.HostInterfaceName)
}
stats.Interfaces = append(stats.Interfaces, istats)
}
@ -170,6 +172,13 @@ func (c *linuxContainer) Stats() (*Stats, error) {
func (c *linuxContainer) Set(config configs.Config) error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return err
}
if status == Stopped {
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
}
c.config = &config
return c.cgroupManager.Set(c.config)
}
@ -181,28 +190,76 @@ func (c *linuxContainer) Start(process *Process) error {
if err != nil {
return err
}
doInit := status == Destroyed
parent, err := c.newParentProcess(process, doInit)
return c.start(process, status == Stopped)
}
func (c *linuxContainer) Run(process *Process) error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return newSystemError(err)
return err
}
if err := c.start(process, status == Stopped); err != nil {
return err
}
if status == Stopped {
return c.exec()
}
return nil
}
func (c *linuxContainer) Exec() error {
c.m.Lock()
defer c.m.Unlock()
return c.exec()
}
func (c *linuxContainer) exec() error {
path := filepath.Join(c.root, execFifoFilename)
f, err := os.OpenFile(path, os.O_RDONLY, 0)
if err != nil {
return newSystemErrorWithCause(err, "open exec fifo for reading")
}
defer f.Close()
data, err := ioutil.ReadAll(f)
if err != nil {
return err
}
if len(data) > 0 {
os.Remove(path)
return nil
}
return fmt.Errorf("cannot start an already running container")
}
func (c *linuxContainer) start(process *Process, isInit bool) error {
parent, err := c.newParentProcess(process, isInit)
if err != nil {
return newSystemErrorWithCause(err, "creating new parent process")
}
if err := parent.start(); err != nil {
// terminate the process to ensure that it properly is reaped.
if err := parent.terminate(); err != nil {
logrus.Warn(err)
}
return newSystemError(err)
return newSystemErrorWithCause(err, "starting container process")
}
// generate a timestamp indicating when the container was started
c.created = time.Now().UTC()
c.state = &runningState{
c: c,
}
if doInit {
if err := c.updateState(parent); err != nil {
if isInit {
c.state = &createdState{
c: c,
}
state, err := c.updateState(parent)
if err != nil {
return err
}
c.initProcessStartTime = state.InitProcessStartTime
if c.config.Hooks != nil {
s := configs.HookState{
Version: c.config.Version,
@ -211,12 +268,12 @@ func (c *linuxContainer) Start(process *Process) error {
Root: c.config.Rootfs,
BundlePath: utils.SearchLabels(c.config.Labels, "bundle"),
}
for _, hook := range c.config.Hooks.Poststart {
for i, hook := range c.config.Hooks.Poststart {
if err := hook.Run(s); err != nil {
if err := parent.terminate(); err != nil {
logrus.Warn(err)
}
return newSystemError(err)
return newSystemErrorWithCausef(err, "running poststart hook %d", i)
}
}
}
@ -226,7 +283,7 @@ func (c *linuxContainer) Start(process *Process) error {
func (c *linuxContainer) Signal(s os.Signal) error {
if err := c.initProcess.signal(s); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "signaling init process")
}
return nil
}
@ -234,19 +291,23 @@ func (c *linuxContainer) Signal(s os.Signal) error {
func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) {
parentPipe, childPipe, err := newPipe()
if err != nil {
return nil, newSystemError(err)
return nil, newSystemErrorWithCause(err, "creating new init pipe")
}
cmd, err := c.commandTemplate(p, childPipe)
rootDir, err := os.Open(c.root)
if err != nil {
return nil, newSystemError(err)
return nil, err
}
cmd, err := c.commandTemplate(p, childPipe, rootDir)
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new command template")
}
if !doInit {
return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
return c.newSetnsProcess(p, cmd, parentPipe, childPipe, rootDir)
}
return c.newInitProcess(p, cmd, parentPipe, childPipe)
return c.newInitProcess(p, cmd, parentPipe, childPipe, rootDir)
}
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
func (c *linuxContainer) commandTemplate(p *Process, childPipe, rootDir *os.File) (*exec.Cmd, error) {
cmd := &exec.Cmd{
Path: c.initPath,
Args: c.initArgs,
@ -258,8 +319,10 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
}
cmd.ExtraFiles = append(p.ExtraFiles, childPipe)
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
cmd.ExtraFiles = append(p.ExtraFiles, childPipe, rootDir)
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-2),
fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
// even with the parent still running.
@ -269,7 +332,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
return cmd, nil
}
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range c.config.Namespaces {
@ -292,14 +355,15 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
process: p,
bootstrapData: data,
sharePidns: sharePidns,
rootDir: rootDir,
}, nil
}
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*setnsProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
state, err := c.currentState()
if err != nil {
return nil, newSystemError(err)
return nil, newSystemErrorWithCause(err, "getting container's current state")
}
// for setns process, we dont have to set cloneflags as the process namespaces
// will only be set via setns syscall
@ -316,6 +380,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
rootDir: rootDir,
}, nil
}
@ -325,6 +390,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
Args: process.Args,
Env: process.Env,
User: process.User,
AdditionalGroups: process.AdditionalGroups,
Cwd: process.Cwd,
Console: process.consolePath,
Capabilities: process.Capabilities,
@ -334,6 +400,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
AppArmorProfile: c.config.AppArmorProfile,
ProcessLabel: c.config.ProcessLabel,
Rlimits: c.config.Rlimits,
ExecFifoPath: filepath.Join(c.root, execFifoFilename),
}
if process.NoNewPrivileges != nil {
cfg.NoNewPrivileges = *process.NoNewPrivileges
@ -371,15 +438,16 @@ func (c *linuxContainer) Pause() error {
if err != nil {
return err
}
if status != Running {
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
switch status {
case Running, Created:
if err := c.cgroupManager.Freeze(configs.Frozen); err != nil {
return err
}
return c.state.transition(&pausedState{
c: c,
})
}
if err := c.cgroupManager.Freeze(configs.Frozen); err != nil {
return err
}
return c.state.transition(&pausedState{
c: c,
})
return newGenericError(fmt.Errorf("container not running: %s", status), ContainerNotRunning)
}
func (c *linuxContainer) Resume() error {
@ -408,13 +476,13 @@ func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struc
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
}
// check Criu version greater than or equal to min_version
func (c *linuxContainer) checkCriuVersion(min_version string) error {
// checkCriuVersion checks Criu version greater than or equal to minVersion
func (c *linuxContainer) checkCriuVersion(minVersion string) error {
var x, y, z, versionReq int
_, err := fmt.Sscanf(min_version, "%d.%d.%d\n", &x, &y, &z) // 1.5.2
_, err := fmt.Sscanf(minVersion, "%d.%d.%d\n", &x, &y, &z) // 1.5.2
if err != nil {
_, err = fmt.Sscanf(min_version, "Version: %d.%d\n", &x, &y) // 1.6
_, err = fmt.Sscanf(minVersion, "Version: %d.%d\n", &x, &y) // 1.6
}
versionReq = x*10000 + y*100 + z
@ -459,7 +527,7 @@ func (c *linuxContainer) checkCriuVersion(min_version string) error {
c.criuVersion = x*10000 + y*100 + z
if c.criuVersion < versionReq {
return fmt.Errorf("CRIU version must be %s or higher", min_version)
return fmt.Errorf("CRIU version must be %s or higher", minVersion)
}
return nil
@ -607,6 +675,27 @@ func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mo
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
}
func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) {
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(iface.HostInterfaceName)
veth.IfIn = proto.String(iface.Name)
req.Opts.Veths = append(req.Opts.Veths, veth)
break
case "loopback":
break
}
}
for _, i := range criuOpts.VethPairs {
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(i.HostInterfaceName)
veth.IfIn = proto.String(i.ContainerInterfaceName)
req.Opts.Veths = append(req.Opts.Veths, veth)
}
}
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
@ -690,23 +779,9 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
break
}
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(iface.HostInterfaceName)
veth.IfIn = proto.String(iface.Name)
req.Opts.Veths = append(req.Opts.Veths, veth)
break
case "loopback":
break
}
}
for _, i := range criuOpts.VethPairs {
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(i.HostInterfaceName)
veth.IfIn = proto.String(i.ContainerInterfaceName)
req.Opts.Veths = append(req.Opts.Veths, veth)
if criuOpts.EmptyNs&syscall.CLONE_NEWNET == 0 {
c.restoreNetwork(req, criuOpts)
}
// append optional manage cgroups mode
@ -955,9 +1030,9 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
Pid: int(notify.GetPid()),
Root: c.config.Rootfs,
}
for _, hook := range c.config.Hooks.Prestart {
for i, hook := range c.config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemError(err)
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
}
}
}
@ -974,7 +1049,7 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
}); err != nil {
return err
}
if err := c.updateState(r); err != nil {
if _, err := c.updateState(r); err != nil {
return err
}
if err := os.Remove(filepath.Join(c.root, "checkpoint")); err != nil {
@ -986,13 +1061,17 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
return nil
}
func (c *linuxContainer) updateState(process parentProcess) error {
func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
c.initProcess = process
state, err := c.currentState()
if err != nil {
return err
return nil, err
}
return c.saveState(state)
err = c.saveState(state)
if err != nil {
return nil, err
}
return state, nil
}
func (c *linuxContainer) saveState(s *State) error {
@ -1027,37 +1106,75 @@ func (c *linuxContainer) refreshState() error {
if paused {
return c.state.transition(&pausedState{c: c})
}
running, err := c.isRunning()
t, err := c.runType()
if err != nil {
return err
}
if running {
switch t {
case Created:
return c.state.transition(&createdState{c: c})
case Running:
return c.state.transition(&runningState{c: c})
}
return c.state.transition(&stoppedState{c: c})
}
func (c *linuxContainer) isRunning() (bool, error) {
if c.initProcess == nil {
// doesInitProcessExist checks if the init process is still the same process
// as the initial one, it could happen that the original process has exited
// and a new process has been created with the same pid, in this case, the
// container would already be stopped.
func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) {
startTime, err := system.GetProcessStartTime(initPid)
if err != nil {
return false, newSystemErrorWithCausef(err, "getting init process %d start time", initPid)
}
if c.initProcessStartTime != startTime {
return false, nil
}
// return Running if the init process is alive
if err := syscall.Kill(c.initProcess.pid(), 0); err != nil {
if err == syscall.ESRCH {
return false, nil
}
return false, newSystemError(err)
}
return true, nil
}
func (c *linuxContainer) runType() (Status, error) {
if c.initProcess == nil {
return Stopped, nil
}
pid := c.initProcess.pid()
// return Running if the init process is alive
if err := syscall.Kill(pid, 0); err != nil {
if err == syscall.ESRCH {
// It means the process does not exist anymore, could happen when the
// process exited just when we call the function, we should not return
// error in this case.
return Stopped, nil
}
return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid)
}
// check if the process is still the original init process.
exist, err := c.doesInitProcessExist(pid)
if !exist || err != nil {
return Stopped, err
}
// check if the process that is running is the init process or the user's process.
// this is the difference between the container Running and Created.
environ, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/environ", pid))
if err != nil {
return Stopped, newSystemErrorWithCausef(err, "reading /proc/%d/environ", pid)
}
check := []byte("_LIBCONTAINER")
if bytes.Contains(environ, check) {
return Created, nil
}
return Running, nil
}
func (c *linuxContainer) isPaused() (bool, error) {
data, err := ioutil.ReadFile(filepath.Join(c.cgroupManager.GetPaths()["freezer"], "freezer.state"))
if err != nil {
// If freezer cgroup is not mounted, the container would just be not paused.
if os.IsNotExist(err) {
return false, nil
}
return false, newSystemError(err)
return false, newSystemErrorWithCause(err, "checking if container is paused")
}
return bytes.Equal(bytes.TrimSpace(data), []byte("FROZEN")), nil
}
@ -1125,7 +1242,7 @@ func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceTyp
}
// only set to join this namespace if it exists
if _, err := os.Lstat(p); err != nil {
return nil, newSystemError(err)
return nil, newSystemErrorWithCausef(err, "running lstat on namespace path %q", p)
}
// do not allow namespace path with comma as we use it to separate
// the namespace paths

View File

@ -0,0 +1,20 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View File

@ -3,13 +3,13 @@
package libcontainer
// cgroup restoring strategy provided by criu
type cg_mode uint32
type cgMode uint32
const (
CRIU_CG_MODE_SOFT cg_mode = 3 + iota // restore cgroup properties if only dir created by criu
CRIU_CG_MODE_FULL // always restore all cgroups and their properties
CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system
CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT
CRIU_CG_MODE_SOFT cgMode = 3 + iota // restore cgroup properties if only dir created by criu
CRIU_CG_MODE_FULL // always restore all cgroups and their properties
CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system
CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT
)
type CriuPageServerInfo struct {
@ -32,6 +32,6 @@ type CriuOpts struct {
FileLocks bool // handle file locks, for safety
PageServer CriuPageServerInfo // allow to dump to criu page server
VethPairs []VethPairName // pass the veth to criu when restore
ManageCgroupsMode cg_mode // dump or restore cgroup mode
ManageCgroupsMode cgMode // dump or restore cgroup mode
EmptyNs uint32 // don't c/r properties for namespace from this mask
}

View File

@ -2,7 +2,7 @@ package libcontainer
import "io"
// API error code type.
// ErrorCode is the API error code type.
type ErrorCode int
// API error codes.
@ -56,7 +56,7 @@ func (c ErrorCode) String() string {
}
}
// API Error type.
// Error is the API error type.
type Error interface {
error

View File

@ -23,11 +23,12 @@ import (
)
const (
stateFilename = "state.json"
stateFilename = "state.json"
execFifoFilename = "exec.fifo"
)
var (
idRegex = regexp.MustCompile(`^[\w-\.]+$`)
idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
maxIdLen = 1024
)
@ -102,6 +103,15 @@ func TmpfsRoot(l *LinuxFactory) error {
return nil
}
// CriuPath returns an option func to configure a LinuxFactory with the
// provided criupath
func CriuPath(criupath string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.CriuPath = criupath
return nil
}
}
// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
@ -158,13 +168,34 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
if err := l.Validator.Validate(config); err != nil {
return nil, newGenericError(err, ConfigInvalid)
}
uid, err := config.HostUID()
if err != nil {
return nil, newGenericError(err, SystemError)
}
gid, err := config.HostGID()
if err != nil {
return nil, newGenericError(err, SystemError)
}
containerRoot := filepath.Join(l.Root, id)
if _, err := os.Stat(containerRoot); err == nil {
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
} else if !os.IsNotExist(err) {
return nil, newGenericError(err, SystemError)
}
if err := os.MkdirAll(containerRoot, 0700); err != nil {
if err := os.MkdirAll(containerRoot, 0711); err != nil {
return nil, newGenericError(err, SystemError)
}
if err := os.Chown(containerRoot, uid, gid); err != nil {
return nil, newGenericError(err, SystemError)
}
fifoName := filepath.Join(containerRoot, execFifoFilename)
oldMask := syscall.Umask(0000)
if err := syscall.Mkfifo(fifoName, 0622); err != nil {
syscall.Umask(oldMask)
return nil, newGenericError(err, SystemError)
}
syscall.Umask(oldMask)
if err := os.Chown(fifoName, uid, gid); err != nil {
return nil, newGenericError(err, SystemError)
}
c := &linuxContainer{
@ -195,17 +226,18 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
fds: state.ExternalDescriptors,
}
c := &linuxContainer{
initProcess: r,
id: id,
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
created: state.Created,
initProcess: r,
initProcessStartTime: state.InitProcessStartTime,
id: id,
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
created: state.Created,
}
c.state = &createdState{c: c, s: Created}
c.state = &loadedState{c: c}
if err := c.refreshState(); err != nil {
return nil, err
}
@ -219,10 +251,18 @@ func (l *LinuxFactory) Type() string {
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
fdStr := os.Getenv("_LIBCONTAINER_INITPIPE")
pipefd, err := strconv.Atoi(fdStr)
if err != nil {
return fmt.Errorf("error converting env var _LIBCONTAINER_INITPIPE(%q) to an int: %s", fdStr, err)
var pipefd, rootfd int
for k, v := range map[string]*int{
"_LIBCONTAINER_INITPIPE": &pipefd,
"_LIBCONTAINER_STATEDIR": &rootfd,
} {
s := os.Getenv(k)
i, err := strconv.Atoi(s)
if err != nil {
return fmt.Errorf("unable to convert %s=%s to int", k, s)
}
*v = i
}
var (
pipe = os.NewFile(uintptr(pipefd), "pipe")
@ -231,6 +271,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
var i initer
defer func() {
// We have an error during the initialization of the container's init,
@ -239,24 +280,22 @@ func (l *LinuxFactory) StartInitialization() (err error) {
// this defer function will never be called.
if _, ok := i.(*linuxStandardInit); ok {
// Synchronisation only necessary for standard init.
if err := utils.WriteJSON(pipe, syncT{procError}); err != nil {
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
panic(err)
}
}
if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
panic(err)
}
// ensure that this pipe is always closed
pipe.Close()
}()
defer func() {
if e := recover(); e != nil {
err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
}
}()
i, err = newContainerInit(it, pipe)
i, err = newContainerInit(it, pipe, rootfd)
if err != nil {
return err
}

View File

@ -1,6 +1,7 @@
package libcontainer
import (
"fmt"
"io"
"text/template"
"time"
@ -51,6 +52,21 @@ func newGenericError(err error, c ErrorCode) Error {
}
func newSystemError(err error) Error {
return createSystemError(err, "")
}
func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error {
return createSystemError(err, fmt.Sprintf(cause, v...))
}
func newSystemErrorWithCause(err error, cause string) Error {
return createSystemError(err, cause)
}
// createSystemError creates the specified error with the correct number of
// stack frames skipped. This is only to be called by the other functions for
// formatting the error.
func createSystemError(err error, cause string) Error {
if le, ok := err.(Error); ok {
return le
}
@ -58,7 +74,8 @@ func newSystemError(err error) Error {
Timestamp: time.Now(),
Err: err,
ECode: SystemError,
Stack: stacktrace.Capture(1),
Cause: cause,
Stack: stacktrace.Capture(2),
}
if err != nil {
gerr.Message = err.Error()
@ -70,12 +87,17 @@ type genericError struct {
Timestamp time.Time
ECode ErrorCode
Err error `json:"-"`
Cause string
Message string
Stack stacktrace.Stacktrace
}
func (e *genericError) Error() string {
return e.Message
if e.Cause == "" {
return e.Message
}
frame := e.Stack.Frames[0]
return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message)
}
func (e *genericError) Code() ErrorCode {

View File

@ -52,19 +52,21 @@ type initConfig struct {
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"`
AdditionalGroups []string `json:"additional_groups"`
Config *configs.Config `json:"config"`
Console string `json:"console"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
ExecFifoPath string `json:"start_pipe_path"`
}
type initer interface {
Init() error
}
func newContainerInit(t initType, pipe *os.File) (initer, error) {
func newContainerInit(t initType, pipe *os.File, stateDirFD int) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
@ -79,9 +81,10 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) {
}, nil
case initStandard:
return &linuxStandardInit{
pipe: pipe,
parentPid: syscall.Getppid(),
config: config,
pipe: pipe,
parentPid: syscall.Getppid(),
config: config,
stateDirFD: stateDirFD,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
@ -211,8 +214,8 @@ func setupUser(config *initConfig) error {
}
var addGroups []int
if len(config.Config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath)
if len(config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
if err != nil {
return err
}

View File

@ -1,12 +1,12 @@
// +build linux
package keyctl
package keys
import (
"fmt"
"syscall"
"strings"
"strconv"
"strings"
"syscall"
"unsafe"
)
@ -17,7 +17,7 @@ const KEYCTL_DESCRIBE = 6
type KeySerial uint32
func JoinSessionKeyring(name string) (KeySerial, error) {
var _name *byte = nil
var _name *byte
var err error
if len(name) > 0 {
@ -34,7 +34,7 @@ func JoinSessionKeyring(name string) (KeySerial, error) {
return KeySerial(sessKeyId), nil
}
// modify permissions on a keyring by reading the current permissions,
// ModKeyringPerm modifies permissions on a keyring by reading the current permissions,
// anding the bits with the given mask (clearing permissions) and setting
// additional permission bits
func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
@ -64,4 +64,3 @@ func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
return nil
}

View File

@ -107,7 +107,7 @@ func SetFileLabel(path string, fileLabel string) error {
return nil
}
// Tell the kernel the label for all files to be created
// SetFileCreateLabel tells the kernel the label for all files to be created
func SetFileCreateLabel(fileLabel string) error {
if selinux.SelinuxEnabled() {
return selinux.Setfscreatecon(fileLabel)
@ -115,7 +115,7 @@ func SetFileCreateLabel(fileLabel string) error {
return nil
}
// Change the label of path to the filelabel string.
// Relabel changes the label of path to the filelabel string.
// It changes the MCS label to s0 if shared is true.
// This will allow all containers to share the content.
func Relabel(path string, fileLabel string, shared bool) error {

View File

@ -27,7 +27,8 @@ type Int32msg struct {
Value uint32
}
// int32msg has the following representation
// Serialize serializes the message.
// Int32msg has the following representation
// | nlattr len | nlattr type |
// | uint32 value |
func (msg *Int32msg) Serialize() []byte {
@ -43,7 +44,7 @@ func (msg *Int32msg) Len() int {
return syscall_NLA_HDRLEN + 4
}
// bytemsg has the following representation
// Bytemsg has the following representation
// | nlattr len | nlattr type |
// | value | pad |
type Bytemsg struct {

View File

@ -28,6 +28,10 @@ type Process struct {
// local to the container's user and group configuration.
User string
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []string
// Cwd will change the processes current working directory inside the container's rootfs.
Cwd string
@ -102,8 +106,8 @@ type IO struct {
}
// NewConsole creates new console for process and returns it
func (p *Process) NewConsole(rootuid int) (Console, error) {
console, err := NewConsole(rootuid, rootuid)
func (p *Process) NewConsole(rootuid, rootgid int) (Console, error) {
console, err := NewConsole(rootuid, rootgid)
if err != nil {
return nil, err
}

View File

@ -51,6 +51,7 @@ type setnsProcess struct {
fds []string
process *Process
bootstrapData io.Reader
rootDir *os.File
}
func (p *setnsProcess) startTime() (string, error) {
@ -69,48 +70,49 @@ func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
err = p.cmd.Start()
p.childPipe.Close()
p.rootDir.Close()
if err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "starting setns process")
}
if p.bootstrapData != nil {
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
}
}
if err = p.execSetns(); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "executing setns process")
}
if len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
return newSystemError(err)
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
}
}
// set oom_score_adj
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "setting oom score")
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "setting rlimits for process")
}
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "writing config to pipe")
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "calling shutdown on init pipe")
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *genericError
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
return newSystemError(err)
return newSystemErrorWithCause(err, "decoding init error from pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
return newSystemError(ierr)
return ierr
}
return nil
}
@ -123,7 +125,7 @@ func (p *setnsProcess) execSetns() error {
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()
return newSystemError(err)
return newSystemErrorWithCause(err, "waiting on setns process to finish")
}
if !status.Success() {
p.cmd.Wait()
@ -132,7 +134,7 @@ func (p *setnsProcess) execSetns() error {
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
p.cmd.Wait()
return newSystemError(err)
return newSystemErrorWithCause(err, "reading pid from init pipe")
}
process, err := os.FindProcess(pid.Pid)
if err != nil {
@ -186,6 +188,7 @@ type initProcess struct {
process *Process
bootstrapData io.Reader
sharePidns bool
rootDir *os.File
}
func (p *initProcess) pid() int {
@ -221,6 +224,7 @@ func (p *initProcess) execSetns() error {
return err
}
p.cmd.Process = process
p.process.ops = p
return nil
}
@ -229,28 +233,29 @@ func (p *initProcess) start() error {
err := p.cmd.Start()
p.process.ops = p
p.childPipe.Close()
p.rootDir.Close()
if err != nil {
p.process.ops = nil
return newSystemError(err)
return newSystemErrorWithCause(err, "starting init process command")
}
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
return err
}
if err := p.execSetns(); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "running exec setns process for init")
}
// Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up.
fds, err := getPipeFds(p.pid())
if err != nil {
return newSystemError(err)
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
}
p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children
// can escape the cgroup
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
defer func() {
if err != nil {
@ -259,10 +264,10 @@ func (p *initProcess) start() error {
}
}()
if err := p.createNetworkInterfaces(); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "creating nework interfaces")
}
if err := p.sendConfig(); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "sending config to init process")
}
var (
procSync syncT
@ -278,21 +283,21 @@ loop:
if err == io.EOF {
break loop
}
return newSystemError(err)
return newSystemErrorWithCause(err, "decoding sync type from init pipe")
}
switch procSync.Type {
case procReady:
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
}
// set oom_score_adj
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "setting oom score for ready process")
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "setting rlimits for ready process")
}
// call prestart hooks
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
@ -303,16 +308,16 @@ loop:
Pid: p.pid(),
Root: p.config.Config.Rootfs,
}
for _, hook := range p.config.Config.Hooks.Prestart {
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemError(err)
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
}
}
}
}
// Sync with child.
if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "reading syncT run type")
}
sentRun = true
case procHooks:
@ -324,22 +329,22 @@ loop:
Root: p.config.Config.Rootfs,
BundlePath: utils.SearchLabels(p.config.Config.Labels, "bundle"),
}
for _, hook := range p.config.Config.Hooks.Prestart {
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemError(err)
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
}
}
}
// Sync with child.
if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "reading syncT resume type")
}
sentResume = true
case procError:
// wait for the child process to fully complete and receive an error message
// if one was encoutered
if err := dec.Decode(&ierr); err != nil && err != io.EOF {
return newSystemError(err)
return newSystemErrorWithCause(err, "decoding proc error from init")
}
if ierr != nil {
break loop
@ -347,22 +352,22 @@ loop:
// Programmer error.
panic("No error following JSON procError payload.")
default:
return newSystemError(fmt.Errorf("invalid JSON synchronisation payload from child"))
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
}
}
if !sentRun {
return newSystemError(fmt.Errorf("could not synchronise with container process: %v", ierr))
return newSystemErrorWithCause(ierr, "container init failed")
}
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "shutting down init pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
return newSystemError(ierr)
return ierr
}
return nil
}
@ -447,7 +452,7 @@ func getPipeFds(pid int) ([]string, error) {
// InitializeIO creates pipes for use with the process's STDIO
// and returns the opposite side for each
func (p *Process) InitializeIO(rootuid int) (i *IO, err error) {
func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
var fds []uintptr
i = &IO{}
// cleanup in case of an error
@ -479,7 +484,7 @@ func (p *Process) InitializeIO(rootuid int) (i *IO, err error) {
p.Stderr, i.Stderr = w, r
// change ownership of the pipes incase we are in a user namespace
for _, fd := range fds {
if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil {
if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil {
return nil, err
}
}

View File

@ -25,10 +25,10 @@ import (
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// setupDev returns true if /dev needs to be set up.
// needsSetupDev returns true if /dev needs to be set up.
func needsSetupDev(config *configs.Config) bool {
for _, m := range config.Mounts {
if m.Device == "bind" && (m.Destination == "/dev" || m.Destination == "/dev/") {
if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
return false
}
}
@ -39,35 +39,35 @@ func needsSetupDev(config *configs.Config) bool {
// new mount namespace.
func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWriter) (err error) {
if err := prepareRoot(config); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "preparing rootfs")
}
setupDev := needsSetupDev(config)
for _, m := range config.Mounts {
for _, precmd := range m.PremountCmds {
if err := mountCmd(precmd); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "running premount command")
}
}
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
return newSystemError(err)
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q", m.Destination, config.Rootfs)
}
for _, postcmd := range m.PostmountCmds {
if err := mountCmd(postcmd); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "running postmount command")
}
}
}
if setupDev {
if err := createDevices(config); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "creating device nodes")
}
if err := setupPtmx(config, console); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "setting up ptmx")
}
if err := setupDevSymlinks(config.Rootfs); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "setting up /dev symlinks")
}
}
// Signal the parent to run the pre-start hooks.
@ -78,7 +78,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
return err
}
if err := syscall.Chdir(config.Rootfs); err != nil {
return newSystemError(err)
return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
}
if config.NoPivotRoot {
err = msMoveRoot(config.Rootfs)
@ -86,19 +86,19 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
err = pivotRoot(config.Rootfs, config.PivotDir)
}
if err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "jailing process inside rootfs")
}
if setupDev {
if err := reOpenDevNull(); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "reopening /dev/null inside container")
}
}
// remount dev as ro if specifed
for _, m := range config.Mounts {
if m.Destination == "/dev" {
if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
if m.Flags&syscall.MS_RDONLY != 0 {
if err := remountReadonly(m.Destination); err != nil {
return newSystemError(err)
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
}
}
break
@ -107,7 +107,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
// set rootfs ( / ) as readonly
if config.Readonlyfs {
if err := setReadonly(); err != nil {
return newSystemError(err)
return newSystemErrorWithCause(err, "setting rootfs as readonly")
}
}
syscall.Umask(0022)
@ -115,14 +115,12 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
}
func mountCmd(cmd configs.Command) error {
command := exec.Command(cmd.Path, cmd.Args[:]...)
command.Env = cmd.Env
command.Dir = cmd.Dir
if out, err := command.CombinedOutput(); err != nil {
return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
}
return nil
}
@ -240,34 +238,23 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
return err
}
}
// create symlinks for merged cgroups
cwd, err := os.Getwd()
if err != nil {
return err
}
if err := os.Chdir(filepath.Join(rootfs, m.Destination)); err != nil {
return err
}
for _, mc := range merged {
for _, ss := range strings.Split(mc, ",") {
if err := os.Symlink(mc, ss); err != nil {
// if cgroup already exists, then okay(it could have been created before)
if os.IsExist(err) {
continue
}
os.Chdir(cwd)
// symlink(2) is very dumb, it will just shove the path into
// the link and doesn't do any checks or relative path
// conversion. Also, don't error out if the cgroup already exists.
if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) {
return err
}
}
}
if err := os.Chdir(cwd); err != nil {
return err
}
if m.Flags&syscall.MS_RDONLY != 0 {
// remount cgroup root as readonly
mcgrouproot := &configs.Mount{
Source: m.Destination,
Device: "bind",
Destination: m.Destination,
Flags: defaultMountFlags | syscall.MS_RDONLY,
Flags: defaultMountFlags | syscall.MS_RDONLY | syscall.MS_BIND,
}
if err := remount(mcgrouproot, rootfs); err != nil {
return err
@ -515,10 +502,10 @@ func getParentMount(rootfs string) (string, string, error) {
}
// Make parent mount private if it was shared
func rootfsParentMountPrivate(config *configs.Config) error {
func rootfsParentMountPrivate(rootfs string) error {
sharedMount := false
parentMount, optionalOpts, err := getParentMount(config.Rootfs)
parentMount, optionalOpts, err := getParentMount(rootfs)
if err != nil {
return err
}
@ -550,9 +537,10 @@ func prepareRoot(config *configs.Config) error {
if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
return err
}
if err := rootfsParentMountPrivate(config); err != nil {
return err
if config.NoPivotRoot {
if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
return err
}
}
return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "")
@ -595,7 +583,14 @@ func pivotRoot(rootfs, pivotBaseDir string) (err error) {
}
}()
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
// Make the parent mount private
if err := rootfsParentMountPrivate(rootfs); err != nil {
return err
}
// Try again
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
}
if err := syscall.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
@ -705,7 +700,7 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
data = label.FormatMountLabel(m.Data, mountLabel)
flags = m.Flags
)
if dest == "/dev" {
if libcontainerUtils.CleanPath(dest) == "/dev" {
flags &= ^syscall.MS_RDONLY
}
if !strings.HasPrefix(dest, rootfs) {

View File

@ -36,6 +36,11 @@ var archs = map[string]string{
"SCMP_ARCH_MIPSEL": "mipsel",
"SCMP_ARCH_MIPSEL64": "mipsel64",
"SCMP_ARCH_MIPSEL64N32": "mipsel64n32",
"SCMP_ARCH_PPC": "ppc",
"SCMP_ARCH_PPC64": "ppc64",
"SCMP_ARCH_PPC64LE": "ppc64le",
"SCMP_ARCH_S390": "s390",
"SCMP_ARCH_S390X": "s390x",
}
// ConvertStringToOperator converts a string into a Seccomp comparison operator.

View File

@ -10,7 +10,7 @@ import (
var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")
// Seccomp not supported, do nothing
// InitSeccomp does nothing because seccomp is not supported.
func InitSeccomp(config *configs.Seccomp) error {
if config != nil {
return ErrSeccompNotEnabled

View File

@ -16,7 +16,6 @@ import (
"sync"
"syscall"
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/system"
)
@ -60,16 +59,31 @@ func getSelinuxMountPoint() string {
}
selinuxfs = ""
mounts, err := mount.GetMounts()
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return selinuxfs
}
for _, mount := range mounts {
if mount.Fstype == "selinuxfs" {
selinuxfs = mount.Mountpoint
break
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
// Safe as mountinfo encodes mountpoints with spaces as \040.
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
continue
}
if !strings.Contains(txt[sepIdx:], "selinuxfs") {
continue
}
fields := strings.Split(txt, " ")
if len(fields) < 5 {
continue
}
selinuxfs = fields[4]
break
}
if selinuxfs != "" {
var buf syscall.Statfs_t
syscall.Statfs(selinuxfs, &buf)
@ -297,7 +311,7 @@ func IntToMcs(id int, catRange uint32) string {
for ORD > TIER {
ORD = ORD - TIER
TIER -= 1
TIER--
}
TIER = SETSIZE - TIER
ORD = ORD + TIER
@ -438,7 +452,7 @@ func badPrefix(fpath string) error {
return nil
}
// Change the fpath file object to the SELinux label scon.
// Chcon changes the fpath file object to the SELinux label scon.
// If the fpath is a directory and recurse is true Chcon will walk the
// directory tree setting the label
func Chcon(fpath string, scon string, recurse bool) error {
@ -472,14 +486,14 @@ func DupSecOpt(src string) []string {
con["level"] == "" {
return nil
}
return []string{"label:user:" + con["user"],
"label:role:" + con["role"],
"label:type:" + con["type"],
"label:level:" + con["level"]}
return []string{"label=user:" + con["user"],
"label=role:" + con["role"],
"label=type:" + con["type"],
"label=level:" + con["level"]}
}
// DisableSecOpt returns a security opt that can be used to disabling SELinux
// labeling support for future container processes
func DisableSecOpt() []string {
return []string{"label:disable"}
return []string{"label=disable"}
}

View File

@ -24,9 +24,11 @@ func (l *linuxSetnsInit) getSessionRingName() string {
}
func (l *linuxSetnsInit) Init() error {
// do not inherit the parent's session keyring
if _, err := keyctl.JoinSessionKeyring(l.getSessionRingName()); err != nil {
return err
if !l.config.Config.NoNewKeyring {
// do not inherit the parent's session keyring
if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
return err
}
}
if l.config.NoNewPrivileges {
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
@ -44,10 +46,8 @@ func (l *linuxSetnsInit) Init() error {
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err
}
if l.config.ProcessLabel != "" {
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err
}
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}

View File

@ -2,14 +2,14 @@ package stacktrace
import "runtime"
// Caputure captures a stacktrace for the current calling go program
// Capture captures a stacktrace for the current calling go program
//
// skip is the number of frames to skip
func Capture(userSkip int) Stacktrace {
var (
skip = userSkip + 1 // add one for our own function
frames []Frame
prevPc uintptr = 0
prevPc uintptr
)
for i := skip; ; i++ {
pc, file, line, ok := runtime.Caller(i)

View File

@ -6,6 +6,7 @@ import (
"fmt"
"io"
"os"
"os/exec"
"syscall"
"github.com/opencontainers/runc/libcontainer/apparmor"
@ -17,9 +18,10 @@ import (
)
type linuxStandardInit struct {
pipe io.ReadWriter
parentPid int
config *initConfig
pipe io.ReadWriteCloser
parentPid int
stateDirFD int
config *initConfig
}
func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
@ -43,16 +45,18 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
const PR_SET_NO_NEW_PRIVS = 0x26
func (l *linuxStandardInit) Init() error {
ringname, keepperms, newperms := l.getSessionRingParams()
if !l.config.Config.NoNewKeyring {
ringname, keepperms, newperms := l.getSessionRingParams()
// do not inherit the parent's session keyring
sessKeyId, err := keyctl.JoinSessionKeyring(ringname)
if err != nil {
return err
}
// make session keyring searcheable
if err := keyctl.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
return err
// do not inherit the parent's session keyring
sessKeyId, err := keys.JoinSessionKeyring(ringname)
if err != nil {
return err
}
// make session keyring searcheable
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
return err
}
}
var console *linuxConsole
@ -123,7 +127,10 @@ func (l *linuxStandardInit) Init() error {
if err := syncParentReady(l.pipe); err != nil {
return err
}
if l.config.Config.Seccomp != nil {
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
// do this before dropping capabilities; otherwise do it as late as possible
// just before execve so as few syscalls take place after it as possible.
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
@ -137,11 +144,35 @@ func (l *linuxStandardInit) Init() error {
return err
}
// compare the parent from the inital start of the init process and make sure that it did not change.
// if the parent changes that means it died and we were reparened to something else so we should
// if the parent changes that means it died and we were reparented to something else so we should
// just kill ourself and not cause problems for someone else.
if syscall.Getppid() != l.parentPid {
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
// check for the arg before waiting to make sure it exists and it is returned
// as a create time error.
name, err := exec.LookPath(l.config.Args[0])
if err != nil {
return err
}
// close the pipe to signal that we have completed our init.
l.pipe.Close()
// wait for the fifo to be opened on the other side before
// exec'ing the users process.
fd, err := syscall.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|syscall.O_CLOEXEC, 0)
if err != nil {
return newSystemErrorWithCause(err, "openat exec fifo")
}
if _, err := syscall.Write(fd, []byte("0")); err != nil {
return newSystemErrorWithCause(err, "write 0 exec fifo")
}
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return newSystemErrorWithCause(err, "init seccomp")
}
}
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
return newSystemErrorWithCause(err, "exec user process")
}
return nil
}

View File

@ -6,6 +6,7 @@ import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/configs"
@ -77,7 +78,7 @@ type stoppedState struct {
}
func (b *stoppedState) status() Status {
return Destroyed
return Stopped
}
func (b *stoppedState) transition(s containerState) error {
@ -110,11 +111,11 @@ func (r *runningState) status() Status {
func (r *runningState) transition(s containerState) error {
switch s.(type) {
case *stoppedState:
running, err := r.c.isRunning()
t, err := r.c.runType()
if err != nil {
return err
}
if running {
if t == Running {
return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
}
r.c.state = s
@ -129,16 +130,40 @@ func (r *runningState) transition(s containerState) error {
}
func (r *runningState) destroy() error {
running, err := r.c.isRunning()
t, err := r.c.runType()
if err != nil {
return err
}
if running {
if t == Running {
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
}
return destroy(r.c)
}
type createdState struct {
c *linuxContainer
}
func (i *createdState) status() Status {
return Created
}
func (i *createdState) transition(s containerState) error {
switch s.(type) {
case *runningState, *pausedState, *stoppedState:
i.c.state = s
return nil
case *createdState:
return nil
}
return newStateTransitionError(i, s)
}
func (i *createdState) destroy() error {
i.c.initProcess.signal(syscall.SIGKILL)
return destroy(i.c)
}
// pausedState represents a container that is currently pause. It cannot be destroyed in a
// paused state and must transition back to running first.
type pausedState struct {
@ -161,11 +186,11 @@ func (p *pausedState) transition(s containerState) error {
}
func (p *pausedState) destroy() error {
isRunning, err := p.c.isRunning()
t, err := p.c.runType()
if err != nil {
return err
}
if !isRunning {
if t != Running && t != Created {
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
return err
}
@ -175,7 +200,7 @@ func (p *pausedState) destroy() error {
}
// restoredState is the same as the running state but also has accociated checkpoint
// information that maybe need destroyed when the container is stopped and destory is called.
// information that maybe need destroyed when the container is stopped and destroy is called.
type restoredState struct {
imageDir string
c *linuxContainer
@ -204,23 +229,23 @@ func (r *restoredState) destroy() error {
return destroy(r.c)
}
// createdState is used whenever a container is restored, loaded, or setting additional
// loadedState is used whenever a container is restored, loaded, or setting additional
// processes inside and it should not be destroyed when it is exiting.
type createdState struct {
type loadedState struct {
c *linuxContainer
s Status
}
func (n *createdState) status() Status {
func (n *loadedState) status() Status {
return n.s
}
func (n *createdState) transition(s containerState) error {
func (n *loadedState) transition(s containerState) error {
n.c.state = s
return nil
}
func (n *createdState) destroy() error {
func (n *loadedState) destroy() error {
if err := n.c.refreshState(); err != nil {
return err
}

View File

@ -0,0 +1,7 @@
package libcontainer
// Solaris - TODO
type Stats struct {
Interfaces []*NetworkInterface
}

View File

@ -100,17 +100,12 @@ func Setctty() error {
return nil
}
/*
* Detect whether we are currently running in a user namespace.
* Copied from github.com/lxc/lxd/shared/util.go
*/
// RunningInUserNS detects whether we are currently running in a user namespace.
// Copied from github.com/lxc/lxd/shared/util.go
func RunningInUserNS() bool {
file, err := os.Open("/proc/self/uid_map")
if err != nil {
/*
* This kernel-provided file only exists if user namespaces are
* supported
*/
// This kernel-provided file only exists if user namespaces are supported
return false
}
defer file.Close()

View File

@ -100,3 +100,22 @@ func SearchLabels(labels []string, query string) string {
}
return ""
}
// Annotations returns the bundle path and user defined annotations from the
// libcontianer state. We need to remove the bundle because that is a label
// added by libcontainer.
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
userAnnotations = make(map[string]string)
for _, l := range labels {
parts := strings.SplitN(l, "=", 2)
if len(parts) < 2 {
continue
}
if parts[0] == "bundle" {
bundle = parts[1]
} else {
userAnnotations[parts[0]] = parts[1]
}
}
return
}