1
0
mirror of https://github.com/rancher/os.git synced 2025-09-05 16:52:20 +00:00

Bump libcompose and its dependencies

This commit is contained in:
Josh Curl
2016-05-23 17:22:40 -07:00
parent c18cd26e78
commit 50de80d09a
1109 changed files with 35052 additions and 125685 deletions

View File

@@ -1,91 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"github.com/syndtr/gocapability/capability"
)
const allCapabilityTypes = capability.CAPS | capability.BOUNDS
var capabilityList = map[string]capability.Cap{
"SETPCAP": capability.CAP_SETPCAP,
"SYS_MODULE": capability.CAP_SYS_MODULE,
"SYS_RAWIO": capability.CAP_SYS_RAWIO,
"SYS_PACCT": capability.CAP_SYS_PACCT,
"SYS_ADMIN": capability.CAP_SYS_ADMIN,
"SYS_NICE": capability.CAP_SYS_NICE,
"SYS_RESOURCE": capability.CAP_SYS_RESOURCE,
"SYS_TIME": capability.CAP_SYS_TIME,
"SYS_TTY_CONFIG": capability.CAP_SYS_TTY_CONFIG,
"MKNOD": capability.CAP_MKNOD,
"AUDIT_WRITE": capability.CAP_AUDIT_WRITE,
"AUDIT_CONTROL": capability.CAP_AUDIT_CONTROL,
"MAC_OVERRIDE": capability.CAP_MAC_OVERRIDE,
"MAC_ADMIN": capability.CAP_MAC_ADMIN,
"NET_ADMIN": capability.CAP_NET_ADMIN,
"SYSLOG": capability.CAP_SYSLOG,
"CHOWN": capability.CAP_CHOWN,
"NET_RAW": capability.CAP_NET_RAW,
"DAC_OVERRIDE": capability.CAP_DAC_OVERRIDE,
"FOWNER": capability.CAP_FOWNER,
"DAC_READ_SEARCH": capability.CAP_DAC_READ_SEARCH,
"FSETID": capability.CAP_FSETID,
"KILL": capability.CAP_KILL,
"SETGID": capability.CAP_SETGID,
"SETUID": capability.CAP_SETUID,
"LINUX_IMMUTABLE": capability.CAP_LINUX_IMMUTABLE,
"NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE,
"NET_BROADCAST": capability.CAP_NET_BROADCAST,
"IPC_LOCK": capability.CAP_IPC_LOCK,
"IPC_OWNER": capability.CAP_IPC_OWNER,
"SYS_CHROOT": capability.CAP_SYS_CHROOT,
"SYS_PTRACE": capability.CAP_SYS_PTRACE,
"SYS_BOOT": capability.CAP_SYS_BOOT,
"LEASE": capability.CAP_LEASE,
"SETFCAP": capability.CAP_SETFCAP,
"WAKE_ALARM": capability.CAP_WAKE_ALARM,
"BLOCK_SUSPEND": capability.CAP_BLOCK_SUSPEND,
"AUDIT_READ": capability.CAP_AUDIT_READ,
}
func newCapWhitelist(caps []string) (*whitelist, error) {
l := []capability.Cap{}
for _, c := range caps {
v, ok := capabilityList[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
l = append(l, v)
}
pid, err := capability.NewPid(os.Getpid())
if err != nil {
return nil, err
}
return &whitelist{
keep: l,
pid: pid,
}, nil
}
type whitelist struct {
pid capability.Capabilities
keep []capability.Cap
}
// dropBoundingSet drops the capability bounding set to those specified in the whitelist.
func (w *whitelist) dropBoundingSet() error {
w.pid.Clear(capability.BOUNDS)
w.pid.Set(capability.BOUNDS, w.keep...)
return w.pid.Apply(capability.BOUNDS)
}
// drop drops all capabilities for the current process except those specified in the whitelist.
func (w *whitelist) drop() error {
w.pid.Clear(allCapabilityTypes)
w.pid.Set(allCapabilityTypes, w.keep...)
return w.pid.Apply(allCapabilityTypes)
}

View File

@@ -1,15 +0,0 @@
package libcontainer
import "io"
// Console represents a pseudo TTY.
type Console interface {
io.ReadWriter
io.Closer
// Path returns the filesystem path to the slave side of the pty.
Path() string
// Fd returns the fd for the master of the pty.
Fd() uintptr
}

View File

@@ -1,13 +0,0 @@
// +build freebsd
package libcontainer
import (
"errors"
)
// newConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole(uid, gid int) (Console, error) {
return nil, errors.New("libcontainer console is not supported on FreeBSD")
}

View File

@@ -1,145 +0,0 @@
package libcontainer
import (
"fmt"
"os"
"path/filepath"
"syscall"
"unsafe"
"github.com/docker/libcontainer/label"
)
// newConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole(uid, gid int) (Console, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
console, err := ptsname(master)
if err != nil {
return nil, err
}
if err := unlockpt(master); err != nil {
return nil, err
}
if err := os.Chmod(console, 0600); err != nil {
return nil, err
}
if err := os.Chown(console, uid, gid); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
}, nil
}
// newConsoleFromPath is an internal function returning an initialized console for use inside
// a container's MNT namespace.
func newConsoleFromPath(slavePath string) *linuxConsole {
return &linuxConsole{
slavePath: slavePath,
}
}
// linuxConsole is a linux psuedo TTY for use within a container.
type linuxConsole struct {
master *os.File
slavePath string
}
func (c *linuxConsole) Fd() uintptr {
return c.master.Fd()
}
func (c *linuxConsole) Path() string {
return c.slavePath
}
func (c *linuxConsole) Read(b []byte) (int, error) {
return c.master.Read(b)
}
func (c *linuxConsole) Write(b []byte) (int, error) {
return c.master.Write(b)
}
func (c *linuxConsole) Close() error {
if m := c.master; m != nil {
return m.Close()
}
return nil
}
// mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console.
func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
return err
}
dest := filepath.Join(rootfs, "/dev/console")
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "")
}
// dupStdio opens the slavePath for the console and dups the fds to the current
// processes stdio, fd 0,1,2.
func (c *linuxConsole) dupStdio() error {
slave, err := c.open(syscall.O_RDWR)
if err != nil {
return err
}
fd := int(slave.Fd())
for _, i := range []int{0, 1, 2} {
if err := syscall.Dup3(fd, i, 0); err != nil {
return err
}
}
return nil
}
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
func (c *linuxConsole) open(flag int) (*os.File, error) {
r, e := syscall.Open(c.slavePath, flag, 0)
if e != nil {
return nil, &os.PathError{
Op: "open",
Path: c.slavePath,
Err: e,
}
}
return os.NewFile(uintptr(r), c.slavePath), nil
}
func ioctl(fd uintptr, flag, data uintptr) error {
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
var n int32
if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}

View File

@@ -1,30 +0,0 @@
package libcontainer
// newConsole returns an initalized console that can be used within a container
func newConsole(uid, gid int) (Console, error) {
return &windowsConsole{}, nil
}
// windowsConsole is a Windows psuedo TTY for use within a container.
type windowsConsole struct {
}
func (c *windowsConsole) Fd() uintptr {
return 0
}
func (c *windowsConsole) Path() string {
return ""
}
func (c *windowsConsole) Read(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Write(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Close() error {
return nil
}

View File

@@ -1,162 +0,0 @@
// Libcontainer provides a native Go implementation for creating containers
// with namespaces, cgroups, capabilities, and filesystem access controls.
// It allows you to manage the lifecycle of the container performing additional operations
// after the container is created.
package libcontainer
import (
"github.com/docker/libcontainer/configs"
)
// The status of a container.
type Status int
const (
// The container exists and is running.
Running Status = iota + 1
// The container exists, it is in the process of being paused.
Pausing
// The container exists, but all its processes are paused.
Paused
// The container exists, but its state is saved on disk
Checkpointed
// The container does not exist.
Destroyed
)
// State represents a running container's state
type State struct {
// ID is the container ID.
ID string `json:"id"`
// InitProcessPid is the init process id in the parent namespace.
InitProcessPid int `json:"init_process_pid"`
// InitProcessStartTime is the init process start time.
InitProcessStartTime string `json:"init_process_start"`
// Path to all the cgroups setup for a container. Key is cgroup subsystem name
// with the value as the path.
CgroupPaths map[string]string `json:"cgroup_paths"`
// NamespacePaths are filepaths to the container's namespaces. Key is the namespace type
// with the value as the path.
NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"`
// Config is the container's configuration.
Config configs.Config `json:"config"`
// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
// Returns the ID of the container
ID() string
// Returns the current status of the container.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
Status() (Status, error)
// State returns the current container's state information.
//
// errors:
// Systemerror - System error.
State() (*State, error)
// Returns the current config of the container.
Config() configs.Config
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
//
// Some of the returned PIDs may no longer refer to processes in the Container, unless
// the Container state is PAUSED in which case every PID in the slice is valid.
Processes() ([]int, error)
// Returns statistics for the container.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
Stats() (*Stats, error)
// Set cgroup resources of container as configured
//
// We can use this to change resources when containers are running.
//
// errors:
// Systemerror - System error.
Set(config configs.Config) error
// Start a process inside the container. Returns error if process fails to
// start. You can track process lifecycle with passed Process structure.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// Systemerror - System error.
Start(process *Process) (err error)
// Checkpoint checkpoints the running container's state to disk using the criu(8) utility.
//
// errors:
// Systemerror - System error.
Checkpoint(criuOpts *CriuOpts) error
// Restore restores the checkpointed container to a running state using the criu(8) utiity.
//
// errors:
// Systemerror - System error.
Restore(process *Process, criuOpts *CriuOpts) error
// Destroys the container after killing all running processes.
//
// Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed.
//
// errors:
// Systemerror - System error.
Destroy() error
// If the Container state is RUNNING or PAUSING, sets the Container state to PAUSING and pauses
// the execution of any user processes. Asynchronously, when the container finished being paused the
// state is changed to PAUSED.
// If the Container state is PAUSED, do nothing.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
Pause() error
// If the Container state is PAUSED, resumes the execution of any user processes in the
// Container before setting the Container state to RUNNING.
// If the Container state is RUNNING, do nothing.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
Resume() error
// NotifyOOM returns a read-only channel signaling when the container receives an OOM notification.
//
// errors:
// Systemerror - System error.
NotifyOOM() (<-chan struct{}, error)
}

View File

@@ -1,779 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/criurpc"
"github.com/golang/protobuf/proto"
)
const stdioFdCount = 3
type linuxContainer struct {
id string
root string
config *configs.Config
cgroupManager cgroups.Manager
initPath string
initArgs []string
initProcess parentProcess
criuPath string
m sync.Mutex
}
// ID returns the container's unique ID
func (c *linuxContainer) ID() string {
return c.id
}
// Config returns the container's configuration
func (c *linuxContainer) Config() configs.Config {
return *c.config
}
func (c *linuxContainer) Status() (Status, error) {
c.m.Lock()
defer c.m.Unlock()
return c.currentStatus()
}
func (c *linuxContainer) State() (*State, error) {
c.m.Lock()
defer c.m.Unlock()
return c.currentState()
}
func (c *linuxContainer) Processes() ([]int, error) {
pids, err := c.cgroupManager.GetPids()
if err != nil {
return nil, newSystemError(err)
}
return pids, nil
}
func (c *linuxContainer) Stats() (*Stats, error) {
var (
err error
stats = &Stats{}
)
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
return stats, newSystemError(err)
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
if err != nil {
return stats, newSystemError(err)
}
stats.Interfaces = append(stats.Interfaces, istats)
}
}
return stats, nil
}
func (c *linuxContainer) Set(config configs.Config) error {
c.m.Lock()
defer c.m.Unlock()
c.config = &config
return c.cgroupManager.Set(c.config)
}
func (c *linuxContainer) Start(process *Process) error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return err
}
doInit := status == Destroyed
parent, err := c.newParentProcess(process, doInit)
if err != nil {
return newSystemError(err)
}
if err := parent.start(); err != nil {
// terminate the process to ensure that it properly is reaped.
if err := parent.terminate(); err != nil {
logrus.Warn(err)
}
return newSystemError(err)
}
process.ops = parent
if doInit {
c.updateState(parent)
}
return nil
}
func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) {
parentPipe, childPipe, err := newPipe()
if err != nil {
return nil, newSystemError(err)
}
cmd, err := c.commandTemplate(p, childPipe)
if err != nil {
return nil, newSystemError(err)
}
if !doInit {
return c.newSetnsProcess(p, cmd, parentPipe, childPipe), nil
}
return c.newInitProcess(p, cmd, parentPipe, childPipe)
}
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
cmd := &exec.Cmd{
Path: c.initPath,
Args: c.initArgs,
}
cmd.Stdin = p.Stdin
cmd.Stdout = p.Stdout
cmd.Stderr = p.Stderr
cmd.Dir = c.config.Rootfs
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
}
cmd.ExtraFiles = append(p.ExtraFiles, childPipe)
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
// even with the parent still running.
if c.config.ParentDeathSignal > 0 {
cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal)
}
return cmd, nil
}
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
t := "_LIBCONTAINER_INITTYPE=standard"
cloneFlags := c.config.Namespaces.CloneFlags()
if cloneFlags&syscall.CLONE_NEWUSER != 0 {
if err := c.addUidGidMappings(cmd.SysProcAttr); err != nil {
// user mappings are not supported
return nil, err
}
// Default to root user when user namespaces are enabled.
if cmd.SysProcAttr.Credential == nil {
cmd.SysProcAttr.Credential = &syscall.Credential{}
}
}
cmd.Env = append(cmd.Env, t)
cmd.SysProcAttr.Cloneflags = cloneFlags
return &initProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
config: c.newInitConfig(p),
}, nil
}
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *setnsProcess {
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()),
"_LIBCONTAINER_INITTYPE=setns",
)
if p.consolePath != "" {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_CONSOLE_PATH="+p.consolePath)
}
// TODO: set on container for process management
return &setnsProcess{
cmd: cmd,
cgroupPaths: c.cgroupManager.GetPaths(),
childPipe: childPipe,
parentPipe: parentPipe,
config: c.newInitConfig(p),
}
}
func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
return &initConfig{
Config: c.config,
Args: process.Args,
Env: process.Env,
User: process.User,
Cwd: process.Cwd,
Console: process.consolePath,
Capabilities: process.Capabilities,
PassedFilesCount: len(process.ExtraFiles),
}
}
func newPipe() (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}
func (c *linuxContainer) Destroy() error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return err
}
if status != Destroyed {
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
}
if !c.config.Namespaces.Contains(configs.NEWPID) {
if err := killCgroupProcesses(c.cgroupManager); err != nil {
logrus.Warn(err)
}
}
err = c.cgroupManager.Destroy()
if rerr := os.RemoveAll(c.root); err == nil {
err = rerr
}
c.initProcess = nil
return err
}
func (c *linuxContainer) Pause() error {
c.m.Lock()
defer c.m.Unlock()
return c.cgroupManager.Freeze(configs.Frozen)
}
func (c *linuxContainer) Resume() error {
c.m.Lock()
defer c.m.Unlock()
return c.cgroupManager.Freeze(configs.Thawed)
}
func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
return notifyOnOOM(c.cgroupManager.GetPaths())
}
// XXX debug support, remove when debugging done.
func addArgsFromEnv(evar string, args *[]string) {
if e := os.Getenv(evar); e != "" {
for _, f := range strings.Fields(e) {
*args = append(*args, f)
}
}
fmt.Printf(">>> criu %v\n", *args)
}
func (c *linuxContainer) checkCriuVersion() error {
var x, y, z int
out, err := exec.Command(c.criuPath, "-V").Output()
if err != nil {
return err
}
n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2
if err != nil {
n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6
}
if n < 2 || err != nil {
return fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err)
}
if x*10000+y*100+z < 10502 {
return fmt.Errorf("CRIU version must be 1.5.2 or higher")
}
return nil
}
const descriptors_filename = "descriptors.json"
func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
if err := c.checkCriuVersion(); err != nil {
return err
}
if criuOpts.ImagesDirectory == "" {
criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image")
}
// Since a container can be C/R'ed multiple times,
// the checkpoint directory may already exist.
if err := os.Mkdir(criuOpts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) {
return err
}
if criuOpts.WorkDirectory == "" {
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
}
if err := os.Mkdir(criuOpts.WorkDirectory, 0755); err != nil && !os.IsExist(err) {
return err
}
workDir, err := os.Open(criuOpts.WorkDirectory)
if err != nil {
return err
}
defer workDir.Close()
imageDir, err := os.Open(criuOpts.ImagesDirectory)
if err != nil {
return err
}
defer imageDir.Close()
rpcOpts := criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
LogLevel: proto.Int32(4),
LogFile: proto.String("dump.log"),
Root: proto.String(c.config.Rootfs),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
Pid: proto.Int32(int32(c.initProcess.pid())),
ShellJob: proto.Bool(criuOpts.ShellJob),
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
}
// append optional criu opts, e.g., page-server and port
if criuOpts.PageServer.Address != "" && criuOpts.PageServer.Port != 0 {
rpcOpts.Ps = &criurpc.CriuPageServerInfo{
Address: proto.String(criuOpts.PageServer.Address),
Port: proto.Int32(criuOpts.PageServer.Port),
}
}
t := criurpc.CriuReqType_DUMP
req := criurpc.CriuReq{
Type: &t,
Opts: &rpcOpts,
}
for _, m := range c.config.Mounts {
if m.Device == "bind" {
mountDest := m.Destination
if strings.HasPrefix(mountDest, c.config.Rootfs) {
mountDest = mountDest[len(c.config.Rootfs):]
}
extMnt := new(criurpc.ExtMountMap)
extMnt.Key = proto.String(mountDest)
extMnt.Val = proto.String(mountDest)
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
}
}
// Write the FD info to a file in the image directory
fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors())
if err != nil {
return err
}
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename), fdsJSON, 0655)
if err != nil {
return err
}
err = c.criuSwrk(nil, &req, criuOpts)
if err != nil {
return err
}
return nil
}
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
if err := c.checkCriuVersion(); err != nil {
return err
}
if criuOpts.WorkDirectory == "" {
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
}
// Since a container can be C/R'ed multiple times,
// the work directory may already exist.
if err := os.Mkdir(criuOpts.WorkDirectory, 0655); err != nil && !os.IsExist(err) {
return err
}
workDir, err := os.Open(criuOpts.WorkDirectory)
if err != nil {
return err
}
defer workDir.Close()
if criuOpts.ImagesDirectory == "" {
criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image")
}
imageDir, err := os.Open(criuOpts.ImagesDirectory)
if err != nil {
return err
}
defer imageDir.Close()
// CRIU has a few requirements for a root directory:
// * it must be a mount point
// * its parent must not be overmounted
// c.config.Rootfs is bind-mounted to a temporary directory
// to satisfy these requirements.
root := filepath.Join(c.root, "criu-root")
if err := os.Mkdir(root, 0755); err != nil {
return err
}
defer os.Remove(root)
root, err = filepath.EvalSymlinks(root)
if err != nil {
return err
}
err = syscall.Mount(c.config.Rootfs, root, "", syscall.MS_BIND|syscall.MS_REC, "")
if err != nil {
return err
}
defer syscall.Unmount(root, syscall.MNT_DETACH)
t := criurpc.CriuReqType_RESTORE
req := criurpc.CriuReq{
Type: &t,
Opts: &criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
EvasiveDevices: proto.Bool(true),
LogLevel: proto.Int32(4),
LogFile: proto.String("restore.log"),
RstSibling: proto.Bool(true),
Root: proto.String(root),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
ShellJob: proto.Bool(criuOpts.ShellJob),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
},
}
for _, m := range c.config.Mounts {
if m.Device == "bind" {
mountDest := m.Destination
if strings.HasPrefix(mountDest, c.config.Rootfs) {
mountDest = mountDest[len(c.config.Rootfs):]
}
extMnt := new(criurpc.ExtMountMap)
extMnt.Key = proto.String(mountDest)
extMnt.Val = proto.String(m.Source)
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
}
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(iface.HostInterfaceName)
veth.IfIn = proto.String(iface.Name)
req.Opts.Veths = append(req.Opts.Veths, veth)
break
case "loopback":
break
}
}
var (
fds []string
fdJSON []byte
)
if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename)); err != nil {
return err
}
if err = json.Unmarshal(fdJSON, &fds); err != nil {
return err
}
for i := range fds {
if s := fds[i]; strings.Contains(s, "pipe:") {
inheritFd := new(criurpc.InheritFd)
inheritFd.Key = proto.String(s)
inheritFd.Fd = proto.Int32(int32(i))
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
}
}
err = c.criuSwrk(process, &req, criuOpts)
if err != nil {
return err
}
return nil
}
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts) error {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return err
}
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
defer criuClient.Close()
defer criuServer.Close()
args := []string{"swrk", "3"}
cmd := exec.Command(c.criuPath, args...)
if process != nil {
cmd.Stdin = process.Stdin
cmd.Stdout = process.Stdout
cmd.Stderr = process.Stderr
}
cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer)
if err := cmd.Start(); err != nil {
return err
}
criuServer.Close()
defer func() {
criuClient.Close()
_, err := cmd.Process.Wait()
if err != nil {
return
}
}()
var extFds []string
if process != nil {
extFds, err = getPipeFds(cmd.Process.Pid)
if err != nil {
return err
}
}
data, err := proto.Marshal(req)
if err != nil {
return err
}
_, err = criuClient.Write(data)
if err != nil {
return err
}
buf := make([]byte, 10*4096)
for true {
n, err := criuClient.Read(buf)
if err != nil {
return err
}
if n == 0 {
return fmt.Errorf("unexpected EOF")
}
if n == len(buf) {
return fmt.Errorf("buffer is too small")
}
resp := new(criurpc.CriuResp)
err = proto.Unmarshal(buf[:n], resp)
if err != nil {
return err
}
if !resp.GetSuccess() {
return fmt.Errorf("criu failed: type %s errno %d", req.GetType().String(), resp.GetCrErrno())
}
t := resp.GetType()
switch {
case t == criurpc.CriuReqType_NOTIFY:
if err := c.criuNotifications(resp, process, opts, extFds); err != nil {
return err
}
t = criurpc.CriuReqType_NOTIFY
req = &criurpc.CriuReq{
Type: &t,
NotifySuccess: proto.Bool(true),
}
data, err = proto.Marshal(req)
if err != nil {
return err
}
n, err = criuClient.Write(data)
if err != nil {
return err
}
continue
case t == criurpc.CriuReqType_RESTORE:
case t == criurpc.CriuReqType_DUMP:
break
default:
return fmt.Errorf("unable to parse the response %s", resp.String())
}
break
}
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
// Here we want to wait only the CRIU process.
st, err := cmd.Process.Wait()
if err != nil {
return err
}
if !st.Success() {
return fmt.Errorf("criu failed: %s", st.String())
}
return nil
}
// block any external network activity
func lockNetwork(config *configs.Config) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.detach(config); err != nil {
return err
}
}
return nil
}
func unlockNetwork(config *configs.Config) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err = strategy.attach(config); err != nil {
return err
}
}
return nil
}
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error {
notify := resp.GetNotify()
if notify == nil {
return fmt.Errorf("invalid response: %s", resp.String())
}
switch {
case notify.GetScript() == "post-dump":
if !opts.LeaveRunning {
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
if err != nil {
return err
}
f.Close()
}
break
case notify.GetScript() == "network-unlock":
if err := unlockNetwork(c.config); err != nil {
return err
}
break
case notify.GetScript() == "network-lock":
if err := lockNetwork(c.config); err != nil {
return err
}
break
case notify.GetScript() == "post-restore":
pid := notify.GetPid()
r, err := newRestoredProcess(int(pid), fds)
if err != nil {
return err
}
// TODO: crosbymichael restore previous process information by saving the init process information in
// the container's state file or separate process state files.
if err := c.updateState(r); err != nil {
return err
}
process.ops = r
break
}
return nil
}
func (c *linuxContainer) updateState(process parentProcess) error {
c.initProcess = process
state, err := c.currentState()
if err != nil {
return err
}
f, err := os.Create(filepath.Join(c.root, stateFilename))
if err != nil {
return err
}
defer f.Close()
os.Remove(filepath.Join(c.root, "checkpoint"))
return json.NewEncoder(f).Encode(state)
}
func (c *linuxContainer) currentStatus() (Status, error) {
if _, err := os.Stat(filepath.Join(c.root, "checkpoint")); err == nil {
return Checkpointed, nil
}
if c.initProcess == nil {
return Destroyed, nil
}
// return Running if the init process is alive
if err := syscall.Kill(c.initProcess.pid(), 0); err != nil {
if err == syscall.ESRCH {
return Destroyed, nil
}
return 0, newSystemError(err)
}
if c.config.Cgroups != nil && c.config.Cgroups.Freezer == configs.Frozen {
return Paused, nil
}
return Running, nil
}
func (c *linuxContainer) currentState() (*State, error) {
status, err := c.currentStatus()
if err != nil {
return nil, err
}
if status == Destroyed {
return nil, newGenericError(fmt.Errorf("container destroyed"), ContainerNotExists)
}
startTime, err := c.initProcess.startTime()
if err != nil {
return nil, newSystemError(err)
}
state := &State{
ID: c.ID(),
Config: *c.config,
InitProcessPid: c.initProcess.pid(),
InitProcessStartTime: startTime,
CgroupPaths: c.cgroupManager.GetPaths(),
NamespacePaths: make(map[configs.NamespaceType]string),
ExternalDescriptors: c.initProcess.externalDescriptors(),
}
for _, ns := range c.config.Namespaces {
state.NamespacePaths[ns.Type] = ns.GetPath(c.initProcess.pid())
}
for _, nsType := range configs.NamespaceTypes() {
if _, ok := state.NamespacePaths[nsType]; !ok {
ns := configs.Namespace{Type: nsType}
state.NamespacePaths[ns.Type] = ns.GetPath(c.initProcess.pid())
}
}
return state, nil
}

View File

@@ -1,212 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"testing"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
)
type mockCgroupManager struct {
pids []int
stats *cgroups.Stats
paths map[string]string
}
func (m *mockCgroupManager) GetPids() ([]int, error) {
return m.pids, nil
}
func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) {
return m.stats, nil
}
func (m *mockCgroupManager) Apply(pid int) error {
return nil
}
func (m *mockCgroupManager) Set(container *configs.Config) error {
return nil
}
func (m *mockCgroupManager) Destroy() error {
return nil
}
func (m *mockCgroupManager) GetPaths() map[string]string {
return m.paths
}
func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
return nil
}
type mockProcess struct {
_pid int
started string
}
func (m *mockProcess) terminate() error {
return nil
}
func (m *mockProcess) pid() int {
return m._pid
}
func (m *mockProcess) startTime() (string, error) {
return m.started, nil
}
func (m *mockProcess) start() error {
return nil
}
func (m *mockProcess) wait() (*os.ProcessState, error) {
return nil, nil
}
func (m *mockProcess) signal(_ os.Signal) error {
return nil
}
func (p *mockProcess) externalDescriptors() []string {
return []string{}
}
func (p *mockProcess) setExternalDescriptors(newFds []string) {
}
func TestGetContainerPids(t *testing.T) {
container := &linuxContainer{
id: "myid",
config: &configs.Config{},
cgroupManager: &mockCgroupManager{pids: []int{1, 2, 3}},
}
pids, err := container.Processes()
if err != nil {
t.Fatal(err)
}
for i, expected := range []int{1, 2, 3} {
if pids[i] != expected {
t.Fatalf("expected pid %d but received %d", expected, pids[i])
}
}
}
func TestGetContainerStats(t *testing.T) {
container := &linuxContainer{
id: "myid",
config: &configs.Config{},
cgroupManager: &mockCgroupManager{
pids: []int{1, 2, 3},
stats: &cgroups.Stats{
MemoryStats: cgroups.MemoryStats{
Usage: cgroups.MemoryData{
Usage: 1024,
},
},
},
},
}
stats, err := container.Stats()
if err != nil {
t.Fatal(err)
}
if stats.CgroupStats == nil {
t.Fatal("cgroup stats are nil")
}
if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 {
t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage.Usage)
}
}
func TestGetContainerState(t *testing.T) {
var (
pid = os.Getpid()
expectedMemoryPath = "/sys/fs/cgroup/memory/myid"
expectedNetworkPath = "/networks/fd"
)
container := &linuxContainer{
id: "myid",
config: &configs.Config{
Namespaces: []configs.Namespace{
{Type: configs.NEWPID},
{Type: configs.NEWNS},
{Type: configs.NEWNET, Path: expectedNetworkPath},
{Type: configs.NEWUTS},
// emulate host for IPC
//{Type: configs.NEWIPC},
},
},
initProcess: &mockProcess{
_pid: pid,
started: "010",
},
cgroupManager: &mockCgroupManager{
pids: []int{1, 2, 3},
stats: &cgroups.Stats{
MemoryStats: cgroups.MemoryStats{
Usage: cgroups.MemoryData{
Usage: 1024,
},
},
},
paths: map[string]string{
"memory": expectedMemoryPath,
},
},
}
state, err := container.State()
if err != nil {
t.Fatal(err)
}
if state.InitProcessPid != pid {
t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid)
}
if state.InitProcessStartTime != "010" {
t.Fatalf("expected process start time 010 but received %s", state.InitProcessStartTime)
}
paths := state.CgroupPaths
if paths == nil {
t.Fatal("cgroup paths should not be nil")
}
if memPath := paths["memory"]; memPath != expectedMemoryPath {
t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath)
}
for _, ns := range container.config.Namespaces {
path := state.NamespacePaths[ns.Type]
if path == "" {
t.Fatalf("expected non nil namespace path for %s", ns.Type)
}
if ns.Type == configs.NEWNET {
if path != expectedNetworkPath {
t.Fatalf("expected path %q but received %q", expectedNetworkPath, path)
}
} else {
file := ""
switch ns.Type {
case configs.NEWNET:
file = "net"
case configs.NEWNS:
file = "mnt"
case configs.NEWPID:
file = "pid"
case configs.NEWIPC:
file = "ipc"
case configs.NEWUSER:
file = "user"
case configs.NEWUTS:
file = "uts"
}
expected := fmt.Sprintf("/proc/%d/ns/%s", pid, file)
if expected != path {
t.Fatalf("expected path %q but received %q", expected, path)
}
}
}
}

View File

@@ -1,13 +0,0 @@
// +build !go1.4
package libcontainer
import (
"fmt"
"syscall"
)
// not available before go 1.4
func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error {
return fmt.Errorf("User namespace is not supported in golang < 1.4")
}

View File

@@ -1,26 +0,0 @@
// +build go1.4
package libcontainer
import "syscall"
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error {
if c.config.UidMappings != nil {
sys.UidMappings = make([]syscall.SysProcIDMap, len(c.config.UidMappings))
for i, um := range c.config.UidMappings {
sys.UidMappings[i].ContainerID = um.ContainerID
sys.UidMappings[i].HostID = um.HostID
sys.UidMappings[i].Size = um.Size
}
}
if c.config.GidMappings != nil {
sys.GidMappings = make([]syscall.SysProcIDMap, len(c.config.GidMappings))
for i, gm := range c.config.GidMappings {
sys.GidMappings[i].ContainerID = gm.ContainerID
sys.GidMappings[i].HostID = gm.HostID
sys.GidMappings[i].Size = gm.Size
}
}
return nil
}

View File

@@ -1,16 +0,0 @@
package libcontainer
type CriuPageServerInfo struct {
Address string // IP address of CRIU page server
Port int32 // port number of CRIU page server
}
type CriuOpts struct {
ImagesDirectory string // directory for storing image files
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
LeaveRunning bool // leave container in running state after checkpoint
TcpEstablished bool // checkpoint/restore established TCP connections
ExternalUnixConnections bool // allow external unix connections
ShellJob bool // allow to dump and restore shell jobs
PageServer CriuPageServerInfo // allow to dump to criu page server
}

View File

@@ -1,62 +0,0 @@
package libcontainer
import "io"
// API error code type.
type ErrorCode int
// API error codes.
const (
// Factory errors
IdInUse ErrorCode = iota
InvalidIdFormat
// Container errors
ContainerNotExists
ContainerPaused
ContainerNotStopped
ContainerNotRunning
// Process errors
ProcessNotExecuted
// Common errors
ConfigInvalid
SystemError
)
func (c ErrorCode) String() string {
switch c {
case IdInUse:
return "Id already in use"
case InvalidIdFormat:
return "Invalid format"
case ContainerPaused:
return "Container paused"
case ConfigInvalid:
return "Invalid configuration"
case SystemError:
return "System error"
case ContainerNotExists:
return "Container does not exist"
case ContainerNotStopped:
return "Container is not stopped"
case ContainerNotRunning:
return "Container is not running"
default:
return "Unknown error"
}
}
// API Error type.
type Error interface {
error
// Returns a verbose string including the error message
// and a representation of the stack trace suitable for
// printing.
Detail(w io.Writer) error
// Returns the error code for this error.
Code() ErrorCode
}

View File

@@ -1,20 +0,0 @@
package libcontainer
import "testing"
func TestErrorCode(t *testing.T) {
codes := map[ErrorCode]string{
IdInUse: "Id already in use",
InvalidIdFormat: "Invalid format",
ContainerPaused: "Container paused",
ConfigInvalid: "Invalid configuration",
SystemError: "System error",
ContainerNotExists: "Container does not exist",
}
for code, expected := range codes {
if actual := code.String(); actual != expected {
t.Fatalf("expected string %q but received %q", expected, actual)
}
}
}

View File

@@ -1,45 +0,0 @@
package libcontainer
import (
"github.com/docker/libcontainer/configs"
)
type Factory interface {
// Creates a new container with the given id and starts the initial process inside it.
// id must be a string containing only letters, digits and underscores and must contain
// between 1 and 1024 characters, inclusive.
//
// The id must not already be in use by an existing container. Containers created using
// a factory with the same path (and file system) must have distinct ids.
//
// Returns the new container with a running process.
//
// errors:
// IdInUse - id is already in use by a container
// InvalidIdFormat - id has incorrect format
// ConfigInvalid - config is invalid
// Systemerror - System error
//
// On error, any partially created container parts are cleaned up (the operation is atomic).
Create(id string, config *configs.Config) (Container, error)
// Load takes an ID for an existing container and returns the container information
// from the state. This presents a read only view of the container.
//
// errors:
// Path does not exist
// Container is stopped
// System error
Load(id string) (Container, error)
// StartInitialization is an internal API to libcontainer used during the reexec of the
// container.
//
// Errors:
// Pipe connection error
// System error
StartInitialization() error
// Type returns info string about factory type (e.g. lxc, libcontainer...)
Type() string
}

View File

@@ -1,263 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"syscall"
"github.com/docker/docker/pkg/mount"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/configs/validate"
)
const (
stateFilename = "state.json"
)
var (
idRegex = regexp.MustCompile(`^[\w_]+$`)
maxIdLen = 1024
)
// InitArgs returns an options func to configure a LinuxFactory with the
// provided init arguments.
func InitArgs(args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
name := args[0]
if filepath.Base(name) == name {
if lp, err := exec.LookPath(name); err == nil {
name = lp
}
}
l.InitPath = name
l.InitArgs = append([]string{name}, args[1:]...)
return nil
}
}
// InitPath returns an options func to configure a LinuxFactory with the
// provided absolute path to the init binary and arguements.
func InitPath(path string, args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.InitPath = path
l.InitArgs = args
return nil
}
}
// SystemdCgroups is an options func to configure a LinuxFactory to return
// containers that use systemd to create and manage cgroups.
func SystemdCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &systemd.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// Cgroupfs is an options func to configure a LinuxFactory to return
// containers that use the native cgroups filesystem implementation to
// create and manage cgroups.
func Cgroupfs(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &fs.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root)
if err != nil {
return err
}
if !mounted {
if err := syscall.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
return err
}
}
return nil
}
// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
if root != "" {
if err := os.MkdirAll(root, 0700); err != nil {
return nil, newGenericError(err, SystemError)
}
}
l := &LinuxFactory{
Root: root,
Validator: validate.New(),
CriuPath: "criu",
}
InitArgs(os.Args[0], "init")(l)
Cgroupfs(l)
for _, opt := range options {
if err := opt(l); err != nil {
return nil, err
}
}
return l, nil
}
// LinuxFactory implements the default factory interface for linux based systems.
type LinuxFactory struct {
// Root directory for the factory to store state.
Root string
// InitPath is the absolute path to the init binary.
InitPath string
// InitArgs are arguments for calling the init responsibilities for spawning
// a container.
InitArgs []string
// CriuPath is the path to the criu binary used for checkpoint and restore of
// containers.
CriuPath string
// Validator provides validation to container configurations.
Validator validate.Validator
// NewCgroupsManager returns an initialized cgroups manager for a single container.
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
}
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
if err := l.validateID(id); err != nil {
return nil, err
}
if err := l.Validator.Validate(config); err != nil {
return nil, newGenericError(err, ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
if _, err := os.Stat(containerRoot); err == nil {
return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse)
} else if !os.IsNotExist(err) {
return nil, newGenericError(err, SystemError)
}
if err := os.MkdirAll(containerRoot, 0700); err != nil {
return nil, newGenericError(err, SystemError)
}
return &linuxContainer{
id: id,
root: containerRoot,
config: config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}, nil
}
func (l *LinuxFactory) Load(id string) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
state, err := l.loadState(containerRoot)
if err != nil {
return nil, err
}
r := &nonChildProcess{
processPid: state.InitProcessPid,
processStartTime: state.InitProcessStartTime,
fds: state.ExternalDescriptors,
}
return &linuxContainer{
initProcess: r,
id: id,
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
}, nil
}
func (l *LinuxFactory) Type() string {
return "libcontainer"
}
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
pipefd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_INITPIPE"))
if err != nil {
return err
}
var (
pipe = os.NewFile(uintptr(pipefd), "pipe")
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
)
// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
defer func() {
// if we have an error during the initialization of the container's init then send it back to the
// parent process in the form of an initError.
if err != nil {
// ensure that any data sent from the parent is consumed so it doesn't
// receive ECONNRESET when the child writes to the pipe.
ioutil.ReadAll(pipe)
if err := json.NewEncoder(pipe).Encode(newSystemError(err)); err != nil {
panic(err)
}
}
// ensure that this pipe is always closed
pipe.Close()
}()
i, err := newContainerInit(it, pipe)
if err != nil {
return err
}
return i.Init()
}
func (l *LinuxFactory) loadState(root string) (*State, error) {
f, err := os.Open(filepath.Join(root, stateFilename))
if err != nil {
if os.IsNotExist(err) {
return nil, newGenericError(err, ContainerNotExists)
}
return nil, newGenericError(err, SystemError)
}
defer f.Close()
var state *State
if err := json.NewDecoder(f).Decode(&state); err != nil {
return nil, newGenericError(err, SystemError)
}
return state, nil
}
func (l *LinuxFactory) validateID(id string) error {
if !idRegex.MatchString(id) {
return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat)
}
if len(id) > maxIdLen {
return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat)
}
return nil
}

View File

@@ -1,179 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/docker/docker/pkg/mount"
"github.com/docker/libcontainer/configs"
)
func newTestRoot() (string, error) {
dir, err := ioutil.TempDir("", "libcontainer")
if err != nil {
return "", err
}
return dir, nil
}
func TestFactoryNew(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
if factory.Type() != "libcontainer" {
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
}
}
func TestFactoryNewTmpfs(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs, TmpfsRoot)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
if factory.Type() != "libcontainer" {
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
}
mounted, err := mount.Mounted(lfactory.Root)
if err != nil {
t.Fatal(err)
}
if !mounted {
t.Fatalf("Factory Root is not mounted")
}
mounts, err := mount.GetMounts()
if err != nil {
t.Fatal(err)
}
var found bool
for _, m := range mounts {
if m.Mountpoint == lfactory.Root {
if m.Fstype != "tmpfs" {
t.Fatalf("Fstype of root: %s, expected %s", m.Fstype, "tmpfs")
}
if m.Source != "tmpfs" {
t.Fatalf("Source of root: %s, expected %s", m.Source, "tmpfs")
}
found = true
}
}
if !found {
t.Fatalf("Factory Root is not listed in mounts list")
}
}
func TestFactoryLoadNotExists(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
_, err = factory.Load("nocontainer")
if err == nil {
t.Fatal("expected nil error loading non-existing container")
}
lerr, ok := err.(Error)
if !ok {
t.Fatal("expected libcontainer error type")
}
if lerr.Code() != ContainerNotExists {
t.Fatalf("expected error code %s but received %s", ContainerNotExists, lerr.Code())
}
}
func TestFactoryLoadContainer(t *testing.T) {
root, err := newTestRoot()
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
// setup default container config and state for mocking
var (
id = "1"
expectedConfig = &configs.Config{
Rootfs: "/mycontainer/root",
}
expectedState = &State{
InitProcessPid: 1024,
Config: *expectedConfig,
}
)
if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil {
t.Fatal(err)
}
if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil {
t.Fatal(err)
}
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
container, err := factory.Load(id)
if err != nil {
t.Fatal(err)
}
if container.ID() != id {
t.Fatalf("expected container id %q but received %q", id, container.ID())
}
config := container.Config()
if config.Rootfs != expectedConfig.Rootfs {
t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs)
}
lcontainer, ok := container.(*linuxContainer)
if !ok {
t.Fatal("expected linux container on linux based systems")
}
if lcontainer.initProcess.pid() != expectedState.InitProcessPid {
t.Fatalf("expected init pid %d but received %d", expectedState.InitProcessPid, lcontainer.initProcess.pid())
}
}
func marshal(path string, v interface{}) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return json.NewEncoder(f).Encode(v)
}

View File

@@ -1,74 +0,0 @@
package libcontainer
import (
"fmt"
"io"
"text/template"
"time"
"github.com/docker/libcontainer/stacktrace"
)
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
Code: {{.ECode}}
{{if .Message }}
Message: {{.Message}}
{{end}}
Frames:{{range $i, $frame := .Stack.Frames}}
---
{{$i}}: {{$frame.Function}}
Package: {{$frame.Package}}
File: {{$frame.File}}@{{$frame.Line}}{{end}}
`))
func newGenericError(err error, c ErrorCode) Error {
if le, ok := err.(Error); ok {
return le
}
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: c,
Stack: stacktrace.Capture(1),
}
if err != nil {
gerr.Message = err.Error()
}
return gerr
}
func newSystemError(err error) Error {
if le, ok := err.(Error); ok {
return le
}
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: SystemError,
Stack: stacktrace.Capture(1),
}
if err != nil {
gerr.Message = err.Error()
}
return gerr
}
type genericError struct {
Timestamp time.Time
ECode ErrorCode
Err error `json:"-"`
Message string
Stack stacktrace.Stacktrace
}
func (e *genericError) Error() string {
return fmt.Sprintf("[%d] %s: %s", e.ECode, e.ECode, e.Message)
}
func (e *genericError) Code() ErrorCode {
return e.ECode
}
func (e *genericError) Detail(w io.Writer) error {
return errorTemplate.Execute(w, e)
}

View File

@@ -1,14 +0,0 @@
package libcontainer
import (
"fmt"
"io/ioutil"
"testing"
)
func TestErrorDetail(t *testing.T) {
err := newGenericError(fmt.Errorf("test error"), SystemError)
if derr := err.Detail(ioutil.Discard); derr != nil {
t.Fatal(derr)
}
}

View File

@@ -1,330 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"os"
"strings"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/netlink"
"github.com/docker/libcontainer/seccomp"
"github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/user"
"github.com/docker/libcontainer/utils"
)
type initType string
const (
initSetns initType = "setns"
initStandard initType = "standard"
)
type pid struct {
Pid int `json:"pid"`
}
// network is an internal struct used to setup container networks.
type network struct {
configs.Network
// TempVethPeerName is a unique tempory veth peer name that was placed into
// the container's namespace.
TempVethPeerName string `json:"temp_veth_peer_name"`
}
// initConfig is used for transferring parameters from Exec() to Init()
type initConfig struct {
Args []string `json:"args"`
Env []string `json:"env"`
Cwd string `json:"cwd"`
Capabilities []string `json:"capabilities"`
User string `json:"user"`
Config *configs.Config `json:"config"`
Console string `json:"console"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
}
type initer interface {
Init() error
}
func newContainerInit(t initType, pipe *os.File) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
}
if err := populateProcessEnvironment(config.Env); err != nil {
return nil, err
}
switch t {
case initSetns:
return &linuxSetnsInit{
config: config,
}, nil
case initStandard:
return &linuxStandardInit{
parentPid: syscall.Getppid(),
config: config,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
}
// populateProcessEnvironment loads the provided environment variables into the
// current processes's environment.
func populateProcessEnvironment(env []string) error {
for _, pair := range env {
p := strings.SplitN(pair, "=", 2)
if len(p) < 2 {
return fmt.Errorf("invalid environment '%v'", pair)
}
if err := os.Setenv(p[0], p[1]); err != nil {
return err
}
}
return nil
}
// finalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaked file descriptors
// before executing the command inside the namespace
func finalizeNamespace(config *initConfig) error {
// Ensure that all unwanted fds we may have accidentally
// inherited are marked close-on-exec so they stay out of the
// container
if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
return err
}
capabilities := config.Config.Capabilities
if config.Capabilities != nil {
capabilities = config.Capabilities
}
w, err := newCapWhitelist(capabilities)
if err != nil {
return err
}
// drop capabilities in bounding set before changing user
if err := w.dropBoundingSet(); err != nil {
return err
}
// preserve existing capabilities while we change users
if err := system.SetKeepCaps(); err != nil {
return err
}
if err := setupUser(config); err != nil {
return err
}
if err := system.ClearKeepCaps(); err != nil {
return err
}
// drop all other capabilities
if err := w.drop(); err != nil {
return err
}
if config.Cwd != "" {
if err := syscall.Chdir(config.Cwd); err != nil {
return err
}
}
return nil
}
// joinExistingNamespaces gets all the namespace paths specified for the container and
// does a setns on the namespace fd so that the current process joins the namespace.
func joinExistingNamespaces(namespaces []configs.Namespace) error {
for _, ns := range namespaces {
if ns.Path != "" {
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
if err != nil {
return err
}
err = system.Setns(f.Fd(), uintptr(ns.Syscall()))
f.Close()
if err != nil {
return err
}
}
}
return nil
}
// setupUser changes the groups, gid, and uid for the user inside the container
func setupUser(config *initConfig) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
}
var addGroups []int
if len(config.Config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath)
if err != nil {
return err
}
}
suppGroups := append(execUser.Sgids, addGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return err
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
}
if err := system.Setuid(execUser.Uid); err != nil {
return err
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
return err
}
}
return nil
}
// setupNetwork sets up and initializes any network interface inside the container.
func setupNetwork(config *initConfig) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.initialize(config); err != nil {
return err
}
}
return nil
}
func setupRoute(config *configs.Config) error {
for _, config := range config.Routes {
if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
return err
}
}
return nil
}
func setupRlimits(config *configs.Config) error {
for _, rlimit := range config.Rlimits {
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}
return nil
}
// killCgroupProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending a SIGKILL to each process then waiting for them to
// exit.
func killCgroupProcesses(m cgroups.Manager) error {
var procs []*os.Process
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Warn(err)
}
pids, err := m.GetPids()
if err != nil {
m.Freeze(configs.Thawed)
return err
}
for _, pid := range pids {
if p, err := os.FindProcess(pid); err == nil {
procs = append(procs, p)
if err := p.Kill(); err != nil {
logrus.Warn(err)
}
}
}
if err := m.Freeze(configs.Thawed); err != nil {
logrus.Warn(err)
}
for _, p := range procs {
if _, err := p.Wait(); err != nil {
logrus.Warn(err)
}
}
return nil
}
func finalizeSeccomp(config *initConfig) error {
if config.Config.Seccomp == nil {
return nil
}
context := seccomp.New()
for _, s := range config.Config.Seccomp.Syscalls {
ss := &seccomp.Syscall{
Value: uint32(s.Value),
Action: seccompAction(s.Action),
}
if len(s.Args) > 0 {
ss.Args = seccompArgs(s.Args)
}
context.Add(ss)
}
return context.Load()
}
func seccompAction(a configs.Action) seccomp.Action {
switch a {
case configs.Kill:
return seccomp.Kill
case configs.Trap:
return seccomp.Trap
case configs.Allow:
return seccomp.Allow
}
return seccomp.Error(syscall.Errno(int(a)))
}
func seccompArgs(args []*configs.Arg) seccomp.Args {
var sa []seccomp.Arg
for _, a := range args {
sa = append(sa, seccomp.Arg{
Index: uint32(a.Index),
Op: seccompOperator(a.Op),
Value: uint(a.Value),
})
}
return seccomp.Args{sa}
}
func seccompOperator(o configs.Operator) seccomp.Operator {
switch o {
case configs.EqualTo:
return seccomp.EqualTo
case configs.NotEqualTo:
return seccomp.NotEqualTo
case configs.GreatherThan:
return seccomp.GreatherThan
case configs.LessThan:
return seccomp.LessThan
case configs.MaskEqualTo:
return seccomp.MaskEqualTo
}
return 0
}

View File

@@ -1,408 +0,0 @@
package netlink
import (
"net"
"strings"
"syscall"
"testing"
)
type testLink struct {
name string
linkType string
}
func addLink(t *testing.T, name string, linkType string) {
if err := NetworkLinkAdd(name, linkType); err != nil {
t.Fatalf("Unable to create %s link: %s", name, err)
}
}
func readLink(t *testing.T, name string) *net.Interface {
iface, err := net.InterfaceByName(name)
if err != nil {
t.Fatalf("Could not find %s interface: %s", name, err)
}
return iface
}
func deleteLink(t *testing.T, name string) {
if err := NetworkLinkDel(name); err != nil {
t.Fatalf("Unable to delete %s link: %s", name, err)
}
}
func upLink(t *testing.T, name string) {
iface := readLink(t, name)
if err := NetworkLinkUp(iface); err != nil {
t.Fatalf("Could not bring UP %#v interface: %s", iface, err)
}
}
func downLink(t *testing.T, name string) {
iface := readLink(t, name)
if err := NetworkLinkDown(iface); err != nil {
t.Fatalf("Could not bring DOWN %#v interface: %s", iface, err)
}
}
func ipAssigned(iface *net.Interface, ip net.IP) bool {
addrs, _ := iface.Addrs()
for _, addr := range addrs {
args := strings.SplitN(addr.String(), "/", 2)
if args[0] == ip.String() {
return true
}
}
return false
}
func TestNetworkLinkAddDel(t *testing.T) {
if testing.Short() {
return
}
testLinks := []testLink{
{"tstEth", "dummy"},
{"tstBr", "bridge"},
}
for _, tl := range testLinks {
addLink(t, tl.name, tl.linkType)
defer deleteLink(t, tl.name)
readLink(t, tl.name)
}
}
func TestNetworkLinkUpDown(t *testing.T) {
if testing.Short() {
return
}
tl := testLink{name: "tstEth", linkType: "dummy"}
addLink(t, tl.name, tl.linkType)
defer deleteLink(t, tl.name)
upLink(t, tl.name)
ifcAfterUp := readLink(t, tl.name)
if (ifcAfterUp.Flags & syscall.IFF_UP) != syscall.IFF_UP {
t.Fatalf("Could not bring UP %#v initerface", tl)
}
downLink(t, tl.name)
ifcAfterDown := readLink(t, tl.name)
if (ifcAfterDown.Flags & syscall.IFF_UP) == syscall.IFF_UP {
t.Fatalf("Could not bring DOWN %#v initerface", tl)
}
}
func TestNetworkSetMacAddress(t *testing.T) {
if testing.Short() {
return
}
tl := testLink{name: "tstEth", linkType: "dummy"}
macaddr := "22:ce:e0:99:63:6f"
addLink(t, tl.name, tl.linkType)
defer deleteLink(t, tl.name)
ifcBeforeSet := readLink(t, tl.name)
if err := NetworkSetMacAddress(ifcBeforeSet, macaddr); err != nil {
t.Fatalf("Could not set %s MAC address on %#v interface: %s", macaddr, tl, err)
}
ifcAfterSet := readLink(t, tl.name)
if ifcAfterSet.HardwareAddr.String() != macaddr {
t.Fatalf("Could not set %s MAC address on %#v interface", macaddr, tl)
}
}
func TestNetworkSetMTU(t *testing.T) {
if testing.Short() {
return
}
tl := testLink{name: "tstEth", linkType: "dummy"}
mtu := 1400
addLink(t, tl.name, tl.linkType)
defer deleteLink(t, tl.name)
ifcBeforeSet := readLink(t, tl.name)
if err := NetworkSetMTU(ifcBeforeSet, mtu); err != nil {
t.Fatalf("Could not set %d MTU on %#v interface: %s", mtu, tl, err)
}
ifcAfterSet := readLink(t, tl.name)
if ifcAfterSet.MTU != mtu {
t.Fatalf("Could not set %d MTU on %#v interface", mtu, tl)
}
}
func TestNetworkSetMasterNoMaster(t *testing.T) {
if testing.Short() {
return
}
master := testLink{"tstBr", "bridge"}
slave := testLink{"tstEth", "dummy"}
testLinks := []testLink{master, slave}
for _, tl := range testLinks {
addLink(t, tl.name, tl.linkType)
defer deleteLink(t, tl.name)
upLink(t, tl.name)
}
masterIfc := readLink(t, master.name)
slaveIfc := readLink(t, slave.name)
if err := NetworkSetMaster(slaveIfc, masterIfc); err != nil {
t.Fatalf("Could not set %#v to be the master of %#v: %s", master, slave, err)
}
// Trying to figure out a way to test which will not break on RHEL6.
// We could check for existence of /sys/class/net/tstEth/upper_tstBr
// which should point to the ../tstBr which is the UPPER device i.e. network bridge
if err := NetworkSetNoMaster(slaveIfc); err != nil {
t.Fatalf("Could not UNset %#v master of %#v: %s", master, slave, err)
}
}
func TestNetworkChangeName(t *testing.T) {
if testing.Short() {
return
}
tl := testLink{"tstEth", "dummy"}
newName := "newTst"
addLink(t, tl.name, tl.linkType)
linkIfc := readLink(t, tl.name)
if err := NetworkChangeName(linkIfc, newName); err != nil {
deleteLink(t, tl.name)
t.Fatalf("Could not change %#v interface name to %s: %s", tl, newName, err)
}
readLink(t, newName)
deleteLink(t, newName)
}
func TestNetworkLinkAddVlan(t *testing.T) {
if testing.Short() {
return
}
tl := struct {
name string
id uint16
}{
name: "tstVlan",
id: 32,
}
masterLink := testLink{"tstEth", "dummy"}
addLink(t, masterLink.name, masterLink.linkType)
defer deleteLink(t, masterLink.name)
if err := NetworkLinkAddVlan(masterLink.name, tl.name, tl.id); err != nil {
t.Fatalf("Unable to create %#v VLAN interface: %s", tl, err)
}
readLink(t, tl.name)
}
func TestNetworkLinkAddMacVlan(t *testing.T) {
if testing.Short() {
return
}
tl := struct {
name string
mode string
}{
name: "tstVlan",
mode: "private",
}
masterLink := testLink{"tstEth", "dummy"}
addLink(t, masterLink.name, masterLink.linkType)
defer deleteLink(t, masterLink.name)
if err := NetworkLinkAddMacVlan(masterLink.name, tl.name, tl.mode); err != nil {
t.Fatalf("Unable to create %#v MAC VLAN interface: %s", tl, err)
}
readLink(t, tl.name)
}
func TestNetworkLinkAddMacVtap(t *testing.T) {
if testing.Short() {
return
}
tl := struct {
name string
mode string
}{
name: "tstVtap",
mode: "private",
}
masterLink := testLink{"tstEth", "dummy"}
addLink(t, masterLink.name, masterLink.linkType)
defer deleteLink(t, masterLink.name)
if err := NetworkLinkAddMacVtap(masterLink.name, tl.name, tl.mode); err != nil {
t.Fatalf("Unable to create %#v MAC VTAP interface: %s", tl, err)
}
readLink(t, tl.name)
}
func TestAddDelNetworkIp(t *testing.T) {
if testing.Short() {
return
}
ifaceName := "lo"
ip := net.ParseIP("127.0.1.1")
mask := net.IPv4Mask(255, 255, 255, 255)
ipNet := &net.IPNet{IP: ip, Mask: mask}
iface, err := net.InterfaceByName(ifaceName)
if err != nil {
t.Skip("No 'lo' interface; skipping tests")
}
if err := NetworkLinkAddIp(iface, ip, ipNet); err != nil {
t.Fatalf("Could not add IP address %s to interface %#v: %s", ip.String(), iface, err)
}
if !ipAssigned(iface, ip) {
t.Fatalf("Could not locate address '%s' in lo address list.", ip.String())
}
if err := NetworkLinkDelIp(iface, ip, ipNet); err != nil {
t.Fatalf("Could not delete IP address %s from interface %#v: %s", ip.String(), iface, err)
}
if ipAssigned(iface, ip) {
t.Fatalf("Located address '%s' in lo address list after removal.", ip.String())
}
}
func TestAddRouteSourceSelection(t *testing.T) {
tstIp := "127.1.1.1"
tl := testLink{name: "tstEth", linkType: "dummy"}
addLink(t, tl.name, tl.linkType)
defer deleteLink(t, tl.name)
ip := net.ParseIP(tstIp)
mask := net.IPv4Mask(255, 255, 255, 255)
ipNet := &net.IPNet{IP: ip, Mask: mask}
iface, err := net.InterfaceByName(tl.name)
if err != nil {
t.Fatalf("Lost created link %#v", tl)
}
if err := NetworkLinkAddIp(iface, ip, ipNet); err != nil {
t.Fatalf("Could not add IP address %s to interface %#v: %s", ip.String(), iface, err)
}
upLink(t, tl.name)
defer downLink(t, tl.name)
if err := AddRoute("127.0.0.0/8", tstIp, "", tl.name); err != nil {
t.Fatalf("Failed to add route with source address")
}
}
func TestCreateVethPair(t *testing.T) {
if testing.Short() {
return
}
var (
name1 = "veth1"
name2 = "veth2"
)
if err := NetworkCreateVethPair(name1, name2, 0); err != nil {
t.Fatalf("Could not create veth pair %s %s: %s", name1, name2, err)
}
defer NetworkLinkDel(name1)
readLink(t, name1)
readLink(t, name2)
}
//
// netlink package tests which do not use RTNETLINK
//
func TestCreateBridgeWithMac(t *testing.T) {
if testing.Short() {
return
}
name := "testbridge"
if err := CreateBridge(name, true); err != nil {
t.Fatal(err)
}
if _, err := net.InterfaceByName(name); err != nil {
t.Fatal(err)
}
// cleanup and tests
if err := DeleteBridge(name); err != nil {
t.Fatal(err)
}
if _, err := net.InterfaceByName(name); err == nil {
t.Fatalf("expected error getting interface because %s bridge was deleted", name)
}
}
func TestSetMacAddress(t *testing.T) {
if testing.Short() {
return
}
name := "testmac"
mac := randMacAddr()
if err := NetworkLinkAdd(name, "bridge"); err != nil {
t.Fatal(err)
}
defer NetworkLinkDel(name)
if err := SetMacAddress(name, mac); err != nil {
t.Fatal(err)
}
iface, err := net.InterfaceByName(name)
if err != nil {
t.Fatal(err)
}
if iface.HardwareAddr.String() != mac {
t.Fatalf("mac address %q does not match %q", iface.HardwareAddr, mac)
}
}

View File

@@ -1,248 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"net"
"path/filepath"
"strconv"
"strings"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/netlink"
"github.com/docker/libcontainer/utils"
)
var strategies = map[string]networkStrategy{
"veth": &veth{},
"loopback": &loopback{},
}
// networkStrategy represents a specific network configuration for
// a container's networking stack
type networkStrategy interface {
create(*network, int) error
initialize(*network) error
detach(*configs.Network) error
attach(*configs.Network) error
}
// getStrategy returns the specific network strategy for the
// provided type.
func getStrategy(tpe string) (networkStrategy, error) {
s, exists := strategies[tpe]
if !exists {
return nil, fmt.Errorf("unknown strategy type %q", tpe)
}
return s, nil
}
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) {
out := &NetworkInterface{Name: interfaceName}
// This can happen if the network runtime information is missing - possible if the
// container was created by an old version of libcontainer.
if interfaceName == "" {
return out, nil
}
type netStatsPair struct {
// Where to write the output.
Out *uint64
// The network stats file to read.
File string
}
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
netStats := []netStatsPair{
{Out: &out.RxBytes, File: "tx_bytes"},
{Out: &out.RxPackets, File: "tx_packets"},
{Out: &out.RxErrors, File: "tx_errors"},
{Out: &out.RxDropped, File: "tx_dropped"},
{Out: &out.TxBytes, File: "rx_bytes"},
{Out: &out.TxPackets, File: "rx_packets"},
{Out: &out.TxErrors, File: "rx_errors"},
{Out: &out.TxDropped, File: "rx_dropped"},
}
for _, netStat := range netStats {
data, err := readSysfsNetworkStats(interfaceName, netStat.File)
if err != nil {
return nil, err
}
*(netStat.Out) = data
}
return out, nil
}
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
if err != nil {
return 0, err
}
return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
}
// loopback is a network strategy that provides a basic loopback device
type loopback struct {
}
func (l *loopback) create(n *network, nspid int) error {
return nil
}
func (l *loopback) initialize(config *network) error {
iface, err := net.InterfaceByName("lo")
if err != nil {
return err
}
return netlink.NetworkLinkUp(iface)
}
func (l *loopback) attach(n *configs.Network) (err error) {
return nil
}
func (l *loopback) detach(n *configs.Network) (err error) {
return nil
}
// veth is a network strategy that uses a bridge and creates
// a veth pair, one that is attached to the bridge on the host and the other
// is placed inside the container's namespace
type veth struct {
}
func (v *veth) detach(n *configs.Network) (err error) {
bridge, err := net.InterfaceByName(n.Bridge)
if err != nil {
return err
}
host, err := net.InterfaceByName(n.HostInterfaceName)
if err != nil {
return err
}
if err := netlink.DelFromBridge(host, bridge); err != nil {
return err
}
return nil
}
// attach a container network interface to an external network
func (v *veth) attach(n *configs.Network) (err error) {
bridge, err := net.InterfaceByName(n.Bridge)
if err != nil {
return err
}
host, err := net.InterfaceByName(n.HostInterfaceName)
if err != nil {
return err
}
if err := netlink.AddToBridge(host, bridge); err != nil {
return err
}
if err := netlink.NetworkSetMTU(host, n.Mtu); err != nil {
return err
}
if n.HairpinMode {
if err := netlink.SetHairpinMode(host, true); err != nil {
return err
}
}
if err := netlink.NetworkLinkUp(host); err != nil {
return err
}
return nil
}
func (v *veth) create(n *network, nspid int) (err error) {
tmpName, err := v.generateTempPeerName()
if err != nil {
return err
}
n.TempVethPeerName = tmpName
defer func() {
if err != nil {
netlink.NetworkLinkDel(n.HostInterfaceName)
netlink.NetworkLinkDel(n.TempVethPeerName)
}
}()
if n.Bridge == "" {
return fmt.Errorf("bridge is not specified")
}
if err := netlink.NetworkCreateVethPair(n.HostInterfaceName, n.TempVethPeerName, n.TxQueueLen); err != nil {
return err
}
if err := v.attach(&n.Network); err != nil {
return err
}
child, err := net.InterfaceByName(n.TempVethPeerName)
if err != nil {
return err
}
return netlink.NetworkSetNsPid(child, nspid)
}
func (v *veth) generateTempPeerName() (string, error) {
return utils.GenerateRandomName("veth", 7)
}
func (v *veth) initialize(config *network) error {
peer := config.TempVethPeerName
if peer == "" {
return fmt.Errorf("peer is not specified")
}
child, err := net.InterfaceByName(peer)
if err != nil {
return err
}
if err := netlink.NetworkLinkDown(child); err != nil {
return err
}
if err := netlink.NetworkChangeName(child, config.Name); err != nil {
return err
}
// get the interface again after we changed the name as the index also changes.
if child, err = net.InterfaceByName(config.Name); err != nil {
return err
}
if config.MacAddress != "" {
if err := netlink.NetworkSetMacAddress(child, config.MacAddress); err != nil {
return err
}
}
ip, ipNet, err := net.ParseCIDR(config.Address)
if err != nil {
return err
}
if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil {
return err
}
if config.IPv6Address != "" {
if ip, ipNet, err = net.ParseCIDR(config.IPv6Address); err != nil {
return err
}
if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil {
return err
}
}
if err := netlink.NetworkSetMTU(child, config.Mtu); err != nil {
return err
}
if err := netlink.NetworkLinkUp(child); err != nil {
return err
}
if config.Gateway != "" {
if err := netlink.AddDefaultGw(config.Gateway, config.Name); err != nil {
return err
}
}
if config.IPv6Gateway != "" {
if err := netlink.AddDefaultGw(config.IPv6Gateway, config.Name); err != nil {
return err
}
}
return nil
}

View File

@@ -1,63 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
)
const oomCgroupName = "memory"
// notifyOnOOM returns channel on which you can expect event about OOM,
// if process died without OOM this channel will be closed.
// s is current *libcontainer.State for container.
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
dir := paths[oomCgroupName]
if dir == "" {
return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName)
}
oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
if err != nil {
return nil, err
}
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
if syserr != 0 {
oomControl.Close()
return nil, syserr
}
eventfd := os.NewFile(fd, "eventfd")
eventControlPath := filepath.Join(dir, "cgroup.event_control")
data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
eventfd.Close()
oomControl.Close()
return nil, err
}
ch := make(chan struct{})
go func() {
defer func() {
close(ch)
eventfd.Close()
oomControl.Close()
}()
buf := make([]byte, 8)
for {
if _, err := eventfd.Read(buf); err != nil {
return
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
return
}
ch <- struct{}{}
}
}()
return ch, nil
}

View File

@@ -1,96 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/binary"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
"testing"
"time"
)
func TestNotifyOnOOM(t *testing.T) {
memoryPath, err := ioutil.TempDir("", "testnotifyoom-")
if err != nil {
t.Fatal(err)
}
oomPath := filepath.Join(memoryPath, "memory.oom_control")
eventPath := filepath.Join(memoryPath, "cgroup.event_control")
if err := ioutil.WriteFile(oomPath, []byte{}, 0700); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(eventPath, []byte{}, 0700); err != nil {
t.Fatal(err)
}
var eventFd, oomControlFd int
paths := map[string]string{
"memory": memoryPath,
}
ooms, err := notifyOnOOM(paths)
if err != nil {
t.Fatal("expected no error, got:", err)
}
data, err := ioutil.ReadFile(eventPath)
if err != nil {
t.Fatal("couldn't read event control file:", err)
}
if _, err := fmt.Sscanf(string(data), "%d %d", &eventFd, &oomControlFd); err != nil {
t.Fatalf("invalid control data %q: %s", data, err)
}
// re-open the eventfd
efd, err := syscall.Dup(eventFd)
if err != nil {
t.Fatal("unable to reopen eventfd:", err)
}
defer syscall.Close(efd)
if err != nil {
t.Fatal("unable to dup event fd:", err)
}
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, 1)
if _, err := syscall.Write(efd, buf); err != nil {
t.Fatal("unable to write to eventfd:", err)
}
select {
case <-ooms:
case <-time.After(100 * time.Millisecond):
t.Fatal("no notification on oom channel after 100ms")
}
// simulate what happens when a cgroup is destroyed by cleaning up and then
// writing to the eventfd.
if err := os.RemoveAll(memoryPath); err != nil {
t.Fatal(err)
}
if _, err := syscall.Write(efd, buf); err != nil {
t.Fatal("unable to write to eventfd:", err)
}
// give things a moment to shut down
select {
case _, ok := <-ooms:
if ok {
t.Fatal("expected no oom to be triggered")
}
case <-time.After(100 * time.Millisecond):
}
if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(oomControlFd), syscall.F_GETFD, 0); err != syscall.EBADF {
t.Error("expected oom control to be closed")
}
if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(eventFd), syscall.F_GETFD, 0); err != syscall.EBADF {
t.Error("expected event fd to be closed")
}
}

View File

@@ -1,89 +0,0 @@
package libcontainer
import (
"fmt"
"io"
"math"
"os"
)
type processOperations interface {
wait() (*os.ProcessState, error)
signal(sig os.Signal) error
pid() int
}
// Process specifies the configuration and IO for a process inside
// a container.
type Process struct {
// The command to be run followed by any arguments.
Args []string
// Env specifies the environment variables for the process.
Env []string
// User will set the uid and gid of the executing process running inside the container
// local to the container's user and group configuration.
User string
// Cwd will change the processes current working directory inside the container's rootfs.
Cwd string
// Stdin is a pointer to a reader which provides the standard input stream.
Stdin io.Reader
// Stdout is a pointer to a writer which receives the standard output stream.
Stdout io.Writer
// Stderr is a pointer to a writer which receives the standard error stream.
Stderr io.Writer
// ExtraFiles specifies additional open files to be inherited by the container
ExtraFiles []*os.File
// consolePath is the path to the console allocated to the container.
consolePath string
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capabilities not specified will be dropped from the processes capability mask
Capabilities []string
ops processOperations
}
// Wait waits for the process to exit.
// Wait releases any resources associated with the Process
func (p Process) Wait() (*os.ProcessState, error) {
if p.ops == nil {
return nil, newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted)
}
return p.ops.wait()
}
// Pid returns the process ID
func (p Process) Pid() (int, error) {
// math.MinInt32 is returned here, because it's invalid value
// for the kill() system call.
if p.ops == nil {
return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted)
}
return p.ops.pid(), nil
}
// Signal sends a signal to the Process.
func (p Process) Signal(sig os.Signal) error {
if p.ops == nil {
return newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted)
}
return p.ops.signal(sig)
}
// NewConsole creates new console for process and returns it
func (p *Process) NewConsole(rootuid int) (Console, error) {
console, err := newConsole(rootuid, rootuid)
if err != nil {
return nil, err
}
p.consolePath = console.Path()
return console, nil
}

View File

@@ -1,303 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"errors"
"io"
"os"
"os/exec"
"path/filepath"
"strconv"
"syscall"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/system"
)
type parentProcess interface {
// pid returns the pid for the running process.
pid() int
// start starts the process execution.
start() error
// send a SIGKILL to the process and wait for the exit.
terminate() error
// wait waits on the process returning the process state.
wait() (*os.ProcessState, error)
// startTime return's the process start time.
startTime() (string, error)
signal(os.Signal) error
externalDescriptors() []string
setExternalDescriptors(fds []string)
}
type setnsProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
cgroupPaths map[string]string
config *initConfig
fds []string
}
func (p *setnsProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
}
func (p *setnsProcess) signal(sig os.Signal) error {
s, ok := sig.(syscall.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
return syscall.Kill(p.cmd.Process.Pid, s)
}
func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
if err = p.execSetns(); err != nil {
return newSystemError(err)
}
if len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.cmd.Process.Pid); err != nil {
return newSystemError(err)
}
}
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
return newSystemError(err)
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemError(err)
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *genericError
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
return newSystemError(err)
}
if ierr != nil {
return newSystemError(ierr)
}
return nil
}
// execSetns runs the process that executes C code to perform the setns calls
// because setns support requires the C process to fork off a child and perform the setns
// before the go runtime boots, we wait on the process to die and receive the child's pid
// over the provided pipe.
func (p *setnsProcess) execSetns() error {
err := p.cmd.Start()
p.childPipe.Close()
if err != nil {
return newSystemError(err)
}
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()
return newSystemError(err)
}
if !status.Success() {
p.cmd.Wait()
return newSystemError(&exec.ExitError{ProcessState: status})
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
p.cmd.Wait()
return newSystemError(err)
}
process, err := os.FindProcess(pid.Pid)
if err != nil {
return err
}
p.cmd.Process = process
return nil
}
// terminate sends a SIGKILL to the forked process for the setns routine then waits to
// avoid the process becomming a zombie.
func (p *setnsProcess) terminate() error {
if p.cmd.Process == nil {
return nil
}
err := p.cmd.Process.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *setnsProcess) wait() (*os.ProcessState, error) {
err := p.cmd.Wait()
if err != nil {
return p.cmd.ProcessState, err
}
return p.cmd.ProcessState, nil
}
func (p *setnsProcess) pid() int {
return p.cmd.Process.Pid
}
func (p *setnsProcess) externalDescriptors() []string {
return p.fds
}
func (p *setnsProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
type initProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
config *initConfig
manager cgroups.Manager
container *linuxContainer
fds []string
}
func (p *initProcess) pid() int {
return p.cmd.Process.Pid
}
func (p *initProcess) externalDescriptors() []string {
return p.fds
}
func (p *initProcess) start() error {
defer p.parentPipe.Close()
err := p.cmd.Start()
p.childPipe.Close()
if err != nil {
return newSystemError(err)
}
// Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up.
fds, err := getPipeFds(p.pid())
if err != nil {
return newSystemError(err)
}
p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children
// can escape the cgroup
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemError(err)
}
defer func() {
if err != nil {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
}
}()
if err := p.createNetworkInterfaces(); err != nil {
return newSystemError(err)
}
if err := p.sendConfig(); err != nil {
return newSystemError(err)
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *genericError
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
return newSystemError(err)
}
if ierr != nil {
return newSystemError(ierr)
}
return nil
}
func (p *initProcess) wait() (*os.ProcessState, error) {
err := p.cmd.Wait()
if err != nil {
return p.cmd.ProcessState, err
}
// we should kill all processes in cgroup when init is died if we use host PID namespace
if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWPID == 0 {
killCgroupProcesses(p.manager)
}
return p.cmd.ProcessState, nil
}
func (p *initProcess) terminate() error {
if p.cmd.Process == nil {
return nil
}
err := p.cmd.Process.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *initProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
}
func (p *initProcess) sendConfig() error {
// send the state to the container's init process then shutdown writes for the parent
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
return err
}
// shutdown writes for the parent side of the pipe
return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR)
}
func (p *initProcess) createNetworkInterfaces() error {
for _, config := range p.config.Config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
n := &network{
Network: *config,
}
if err := strategy.create(n, p.pid()); err != nil {
return err
}
p.config.Networks = append(p.config.Networks, n)
}
return nil
}
func (p *initProcess) signal(sig os.Signal) error {
s, ok := sig.(syscall.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
return syscall.Kill(p.cmd.Process.Pid, s)
}
func (p *initProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
func getPipeFds(pid int) ([]string, error) {
var fds []string
fds = make([]string, 3)
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
for i := 0; i < 3; i++ {
f := filepath.Join(dirPath, strconv.Itoa(i))
target, err := os.Readlink(f)
if err != nil {
return fds, err
}
fds[i] = target
}
return fds, nil
}

View File

@@ -1,118 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"github.com/docker/libcontainer/system"
)
func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
var (
err error
)
proc, err := os.FindProcess(pid)
if err != nil {
return nil, err
}
started, err := system.GetProcessStartTime(pid)
if err != nil {
return nil, err
}
return &restoredProcess{
proc: proc,
processStartTime: started,
fds: fds,
}, nil
}
type restoredProcess struct {
proc *os.Process
processStartTime string
fds []string
}
func (p *restoredProcess) start() error {
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
}
func (p *restoredProcess) pid() int {
return p.proc.Pid
}
func (p *restoredProcess) terminate() error {
err := p.proc.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *restoredProcess) wait() (*os.ProcessState, error) {
// TODO: how do we wait on the actual process?
// maybe use --exec-cmd in criu
st, err := p.proc.Wait()
if err != nil {
return nil, err
}
return st, nil
}
func (p *restoredProcess) startTime() (string, error) {
return p.processStartTime, nil
}
func (p *restoredProcess) signal(s os.Signal) error {
return p.proc.Signal(s)
}
func (p *restoredProcess) externalDescriptors() []string {
return p.fds
}
func (p *restoredProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
// nonChildProcess represents a process where the calling process is not
// the parent process. This process is created when a factory loads a container from
// a persisted state.
type nonChildProcess struct {
processPid int
processStartTime string
fds []string
}
func (p *nonChildProcess) start() error {
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
}
func (p *nonChildProcess) pid() int {
return p.processPid
}
func (p *nonChildProcess) terminate() error {
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
}
func (p *nonChildProcess) wait() (*os.ProcessState, error) {
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
}
func (p *nonChildProcess) startTime() (string, error) {
return p.processStartTime, nil
}
func (p *nonChildProcess) signal(s os.Signal) error {
return newGenericError(fmt.Errorf("restored process cannot be signaled"), SystemError)
}
func (p *nonChildProcess) externalDescriptors() []string {
return p.fds
}
func (p *nonChildProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}

View File

@@ -1,462 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"syscall"
"time"
"github.com/docker/docker/pkg/symlink"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/label"
)
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// setupRootfs sets up the devices, mount points, and filesystems for use inside a
// new mount namespace.
func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
if err := prepareRoot(config); err != nil {
return newSystemError(err)
}
for _, m := range config.Mounts {
for _, precmd := range m.PremountCmds {
if err := mountCmd(precmd); err != nil {
return newSystemError(err)
}
}
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
return newSystemError(err)
}
for _, postcmd := range m.PostmountCmds {
if err := mountCmd(postcmd); err != nil {
return newSystemError(err)
}
}
}
if err := createDevices(config); err != nil {
return newSystemError(err)
}
if err := setupPtmx(config, console); err != nil {
return newSystemError(err)
}
if err := setupDevSymlinks(config.Rootfs); err != nil {
return newSystemError(err)
}
if err := syscall.Chdir(config.Rootfs); err != nil {
return newSystemError(err)
}
if config.NoPivotRoot {
err = msMoveRoot(config.Rootfs)
} else {
err = pivotRoot(config.Rootfs, config.PivotDir)
}
if err != nil {
return newSystemError(err)
}
if err := reOpenDevNull(config.Rootfs); err != nil {
return newSystemError(err)
}
if config.Readonlyfs {
if err := setReadonly(); err != nil {
return newSystemError(err)
}
}
syscall.Umask(0022)
return nil
}
func mountCmd(cmd configs.Command) error {
command := exec.Command(cmd.Path, cmd.Args[:]...)
command.Env = cmd.Env
command.Dir = cmd.Dir
if out, err := command.CombinedOutput(); err != nil {
return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
}
return nil
}
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
var (
dest = m.Destination
data = label.FormatMountLabel(m.Data, mountLabel)
)
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
switch m.Device {
case "proc", "sysfs":
if err := os.MkdirAll(dest, 0755); err != nil && !os.IsExist(err) {
return err
}
return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), "")
case "mqueue":
if err := os.MkdirAll(dest, 0755); err != nil && !os.IsExist(err) {
return err
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), ""); err != nil {
return err
}
return label.SetFileLabel(dest, mountLabel)
case "tmpfs":
stat, err := os.Stat(dest)
if err != nil {
if err := os.MkdirAll(dest, 0755); err != nil && !os.IsExist(err) {
return err
}
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
return err
}
if stat != nil {
if err = os.Chmod(dest, stat.Mode()); err != nil {
return err
}
}
return nil
case "devpts":
if err := os.MkdirAll(dest, 0755); err != nil && !os.IsExist(err) {
return err
}
return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
case "bind":
stat, err := os.Stat(m.Source)
if err != nil {
// error out if the source of a bind mount does not exist as we will be
// unable to bind anything to it.
return err
}
// ensure that the destination of the bind mount is resolved of symlinks at mount time because
// any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs.
if dest, err = symlink.FollowSymlinkInScope(filepath.Join(rootfs, m.Destination), rootfs); err != nil {
return err
}
if err := checkMountDestination(rootfs, dest); err != nil {
return err
}
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
return err
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
return err
}
if m.Flags&syscall.MS_RDONLY != 0 {
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
return err
}
}
if m.Relabel != "" {
if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil {
return err
}
}
if m.Flags&syscall.MS_PRIVATE != 0 {
if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
return err
}
}
case "cgroup":
mounts, err := cgroups.GetCgroupMounts()
if err != nil {
return err
}
var binds []*configs.Mount
for _, mm := range mounts {
dir, err := mm.GetThisCgroupDir()
if err != nil {
return err
}
binds = append(binds, &configs.Mount{
Device: "bind",
Source: filepath.Join(mm.Mountpoint, dir),
Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")),
Flags: syscall.MS_BIND | syscall.MS_REC | syscall.MS_RDONLY,
})
}
tmpfs := &configs.Mount{
Device: "tmpfs",
Destination: m.Destination,
Flags: syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV,
}
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
return err
}
for _, b := range binds {
if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
return err
}
}
default:
return fmt.Errorf("unknown mount device %q to %q", m.Device, m.Destination)
}
return nil
}
// checkMountDestination checks to ensure that the mount destination is not over the
// top of /proc or /sys.
// dest is required to be an abs path and have any symlinks resolved before calling this function.
func checkMountDestination(rootfs, dest string) error {
if filepath.Clean(rootfs) == filepath.Clean(dest) {
return fmt.Errorf("mounting into / is prohibited")
}
invalidDestinations := []string{
"/proc",
}
for _, invalid := range invalidDestinations {
path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest)
if err != nil {
return err
}
if path == "." || !strings.HasPrefix(path, "..") {
return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid)
}
}
return nil
}
func setupDevSymlinks(rootfs string) error {
var links = [][2]string{
{"/proc/self/fd", "/dev/fd"},
{"/proc/self/fd/0", "/dev/stdin"},
{"/proc/self/fd/1", "/dev/stdout"},
{"/proc/self/fd/2", "/dev/stderr"},
}
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
// in /dev if it exists in /proc.
if _, err := os.Stat("/proc/kcore"); err == nil {
links = append(links, [2]string{"/proc/kcore", "/dev/kcore"})
}
for _, link := range links {
var (
src = link[0]
dst = filepath.Join(rootfs, link[1])
)
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
return fmt.Errorf("symlink %s %s %s", src, dst, err)
}
}
return nil
}
// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs
// this method will make them point to `/dev/null` in this container's rootfs. This
// needs to be called after we chroot/pivot into the container's rootfs so that any
// symlinks are resolved locally.
func reOpenDevNull(rootfs string) error {
var stat, devNullStat syscall.Stat_t
file, err := os.Open("/dev/null")
if err != nil {
return fmt.Errorf("Failed to open /dev/null - %s", err)
}
defer file.Close()
if err := syscall.Fstat(int(file.Fd()), &devNullStat); err != nil {
return err
}
for fd := 0; fd < 3; fd++ {
if err := syscall.Fstat(fd, &stat); err != nil {
return err
}
if stat.Rdev == devNullStat.Rdev {
// Close and re-open the fd.
if err := syscall.Dup3(int(file.Fd()), fd, 0); err != nil {
return err
}
}
}
return nil
}
// Create the device nodes in the container.
func createDevices(config *configs.Config) error {
oldMask := syscall.Umask(0000)
for _, node := range config.Devices {
// containers running in a user namespace are not allowed to mknod
// devices so we can just bind mount it from the host.
if err := createDeviceNode(config.Rootfs, node, config.Namespaces.Contains(configs.NEWUSER)); err != nil {
syscall.Umask(oldMask)
return err
}
}
syscall.Umask(oldMask)
return nil
}
// Creates the device node in the rootfs of the container.
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
dest := filepath.Join(rootfs, node.Path)
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
return err
}
if bind {
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "")
}
if err := mknodDevice(dest, node); err != nil {
if os.IsExist(err) {
return nil
}
return err
}
return nil
}
func mknodDevice(dest string, node *configs.Device) error {
fileMode := node.FileMode
switch node.Type {
case 'c':
fileMode |= syscall.S_IFCHR
case 'b':
fileMode |= syscall.S_IFBLK
default:
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
}
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
return err
}
return syscall.Chown(dest, int(node.Uid), int(node.Gid))
}
func prepareRoot(config *configs.Config) error {
flag := syscall.MS_SLAVE | syscall.MS_REC
if config.Privatefs {
flag = syscall.MS_PRIVATE | syscall.MS_REC
}
if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
return err
}
return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "")
}
func setReadonly() error {
return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
}
func setupPtmx(config *configs.Config, console *linuxConsole) error {
ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if console != nil {
return console.mount(config.Rootfs, config.MountLabel, 0, 0)
}
return nil
}
func pivotRoot(rootfs, pivotBaseDir string) error {
if pivotBaseDir == "" {
pivotBaseDir = "/"
}
tmpDir := filepath.Join(rootfs, pivotBaseDir)
if err := os.MkdirAll(tmpDir, 0755); err != nil {
return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err)
}
pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root")
if err != nil {
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
}
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
if err := syscall.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
// path to pivot dir now changed, update
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err)
}
return os.Remove(pivotDir)
}
func msMoveRoot(rootfs string) error {
if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
return err
}
if err := syscall.Chroot("."); err != nil {
return err
}
return syscall.Chdir("/")
}
// createIfNotExists creates a file or a directory only if it does not already exist.
func createIfNotExists(path string, isDir bool) error {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
if isDir {
return os.MkdirAll(path, 0755)
}
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return err
}
f, err := os.OpenFile(path, os.O_CREATE, 0755)
if err != nil {
return err
}
f.Close()
}
}
return nil
}
// remountReadonly will bind over the top of an existing path and ensure that it is read-only.
func remountReadonly(path string) error {
for i := 0; i < 5; i++ {
if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) {
switch err {
case syscall.EINVAL:
// Probably not a mountpoint, use bind-mount
if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil {
return err
}
return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "")
case syscall.EBUSY:
time.Sleep(100 * time.Millisecond)
continue
default:
return err
}
}
return nil
}
return fmt.Errorf("unable to mount %s as readonly max retries reached", path)
}
// maskFile bind mounts /dev/null over the top of the specified path inside a container
// to avoid security issues from processes reading information from non-namespace aware mounts ( proc/kcore ).
func maskFile(path string) error {
if err := syscall.Mount("/dev/null", path, "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
return err
}
return nil
}
// writeSystemProperty writes the value to a path under /proc/sys as determined from the key.
// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
func writeSystemProperty(key, value string) error {
keyPath := strings.Replace(key, ".", "/", -1)
return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
}

View File

@@ -1,37 +0,0 @@
// +build linux
package libcontainer
import "testing"
func TestCheckMountDestOnProc(t *testing.T) {
dest := "/rootfs/proc/"
err := checkMountDestination("/rootfs", dest)
if err == nil {
t.Fatal("destination inside proc should return an error")
}
}
func TestCheckMountDestInSys(t *testing.T) {
dest := "/rootfs//sys/fs/cgroup"
err := checkMountDestination("/rootfs", dest)
if err != nil {
t.Fatal("destination inside /sys should not return an error")
}
}
func TestCheckMountDestFalsePositive(t *testing.T) {
dest := "/rootfs/sysfiles/fs/cgroup"
err := checkMountDestination("/rootfs", dest)
if err != nil {
t.Fatal(err)
}
}
func TestCheckMountRoot(t *testing.T) {
dest := "/rootfs"
err := checkMountDestination("/rootfs", dest)
if err == nil {
t.Fatal(err)
}
}

View File

@@ -1,35 +0,0 @@
// +build linux
package libcontainer
import (
"os"
"github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/system"
)
// linuxSetnsInit performs the container's initialization for running a new process
// inside an existing container.
type linuxSetnsInit struct {
config *initConfig
}
func (l *linuxSetnsInit) Init() error {
if err := setupRlimits(l.config.Config); err != nil {
return err
}
if err := finalizeNamespace(l.config); err != nil {
return err
}
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
return err
}
if l.config.Config.ProcessLabel != "" {
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
return err
}
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}

View File

@@ -1,106 +0,0 @@
// +build linux
package libcontainer
import (
"os"
"syscall"
"github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/system"
)
type linuxStandardInit struct {
parentPid int
config *initConfig
}
func (l *linuxStandardInit) Init() error {
// join any namespaces via a path to the namespace fd if provided
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
return err
}
var console *linuxConsole
if l.config.Console != "" {
console = newConsoleFromPath(l.config.Console)
if err := console.dupStdio(); err != nil {
return err
}
}
if _, err := syscall.Setsid(); err != nil {
return err
}
if console != nil {
if err := system.Setctty(); err != nil {
return err
}
}
if err := setupNetwork(l.config); err != nil {
return err
}
if err := setupRoute(l.config.Config); err != nil {
return err
}
if err := setupRlimits(l.config.Config); err != nil {
return err
}
label.Init()
// InitializeMountNamespace() can be executed only for a new mount namespace
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := setupRootfs(l.config.Config, console); err != nil {
return err
}
}
if hostname := l.config.Config.Hostname; hostname != "" {
if err := syscall.Sethostname([]byte(hostname)); err != nil {
return err
}
}
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
return err
}
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
return err
}
for key, value := range l.config.Config.SystemProperties {
if err := writeSystemProperty(key, value); err != nil {
return err
}
}
for _, path := range l.config.Config.ReadonlyPaths {
if err := remountReadonly(path); err != nil {
return err
}
}
for _, path := range l.config.Config.MaskPaths {
if err := maskFile(path); err != nil {
return err
}
}
pdeath, err := system.GetParentDeathSignal()
if err != nil {
return err
}
if err := finalizeNamespace(l.config); err != nil {
return err
}
// finalizeNamespace can change user/group which clears the parent death
// signal, so we restore it here.
if err := pdeath.Restore(); err != nil {
return err
}
// compare the parent from the inital start of the init process and make sure that it did not change.
// if the parent changes that means it died and we were reparened to something else so we should
// just kill ourself and not cause problems for someone else.
if syscall.Getppid() != l.parentPid {
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
}
if err := finalizeSeccomp(l.config); err != nil {
return err
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}

View File

@@ -1,15 +0,0 @@
package libcontainer
type NetworkInterface struct {
// Name is the name of the network interface.
Name string
RxBytes uint64
RxPackets uint64
RxErrors uint64
RxDropped uint64
TxBytes uint64
TxPackets uint64
TxErrors uint64
TxDropped uint64
}

View File

@@ -1,5 +0,0 @@
package libcontainer
type Stats struct {
Interfaces []*NetworkInterface
}

View File

@@ -1,8 +0,0 @@
package libcontainer
import "github.com/docker/libcontainer/cgroups"
type Stats struct {
Interfaces []*NetworkInterface
CgroupStats *cgroups.Stats
}

View File

@@ -1,5 +0,0 @@
package libcontainer
type Stats struct {
Interfaces []*NetworkInterface
}