1
0
mirror of https://github.com/rancher/os.git synced 2025-09-02 07:15:41 +00:00

Bump libcompose and its dependencies

This commit is contained in:
Josh Curl
2016-05-23 17:22:40 -07:00
parent c18cd26e78
commit 50de80d09a
1109 changed files with 35052 additions and 125685 deletions

View File

@@ -1,2 +1,4 @@
vendor/pkg
runc
/runc
Godeps/_workspace/src/github.com/opencontainers/runc
man/man8

3
vendor/github.com/opencontainers/runc/Dockerfile generated vendored Normal file
View File

@@ -0,0 +1,3 @@
FROM runc_test
ADD . /go/src/github.com/opencontainers/runc
RUN make

View File

@@ -4,4 +4,6 @@ Victor Marmol <vmarmol@google.com> (@vmarmol)
Mrunal Patel <mpatel@redhat.com> (@mrunalp)
Alexander Morozov <lk4d4@docker.com> (@LK4D4)
Daniel, Dao Quang Minh <dqminh89@gmail.com> (@dqminh)
Andrey Vagin <avagin@gmail.com> (@avagin)
Andrey Vagin <avagin@virtuozzo.com> (@avagin)
Qiang Huang <h.huangqiang@huawei.com> (@hqhq)
Aleksa Sarai <asarai@suse.de> (@cyphar)

View File

@@ -1,11 +1,24 @@
RUNC_IMAGE=runc_dev
RUNC_TEST_IMAGE=runc_test
PROJECT=github.com/opencontainers/runc
TEST_DOCKERFILE=script/test_Dockerfile
BUILDTAGS=seccomp
RUNC_BUILD_PATH=/go/src/github.com/opencontainers/runc/runc
RUNC_INSTANCE=runc_dev
COMMIT=$(shell git rev-parse HEAD 2> /dev/null || true)
RUNC_LINK=$(CURDIR)/Godeps/_workspace/src/github.com/opencontainers/runc
export GOPATH:=$(CURDIR)/Godeps/_workspace:$(GOPATH)
.PHONY=dbuild
all:
go build -tags "$(BUILDTAGS)" -o runc .
ifneq ($(RUNC_LINK), $(wildcard $(RUNC_LINK)))
ln -sfn $(CURDIR) $(RUNC_LINK)
endif
go build -ldflags "-X main.gitCommit=${COMMIT}" -tags "$(BUILDTAGS)" -o runc .
static:
CGO_ENABLED=1 go build -tags "$(BUILDTAGS) cgo static_build" -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT}" -o runc .
vet:
go get golang.org/x/tools/cmd/vet
@@ -18,17 +31,27 @@ runctestimage:
docker build -t $(RUNC_TEST_IMAGE) -f $(TEST_DOCKERFILE) .
test: runctestimage
docker run -e TESTFLAGS --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_TEST_IMAGE) make localtest
docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_TEST_IMAGE) make localtest
tests/sniffTest
localtest:
localtest: all
go test -tags "$(BUILDTAGS)" ${TESTFLAGS} -v ./...
dbuild: runctestimage
docker build -t $(RUNC_IMAGE) .
docker create --name=$(RUNC_INSTANCE) $(RUNC_IMAGE)
docker cp $(RUNC_INSTANCE):$(RUNC_BUILD_PATH) .
docker rm $(RUNC_INSTANCE)
install:
cp runc /usr/local/bin/runc
uninstall:
rm -f /usr/local/bin/runc
clean:
rm runc
rm -f runc
rm -f $(RUNC_LINK)
validate: vet
script/validate-gofmt

View File

@@ -1,3 +1,5 @@
[![Build Status](https://jenkins.dockerproject.org/buildStatus/icon?job=runc Master)](https://jenkins.dockerproject.org/job/runc Master)
## runc
`runc` is a CLI tool for spawning and running containers according to the OCF specification.
@@ -5,9 +7,8 @@
## State of the project
Currently `runc` is an implementation of the OCI specification. We are currently sprinting
to have a v1 of the spec out within a quick timeframe of a few weeks, ~July 2015,
so the `runc` config format will be constantly changing until
the spec is finalized. However, we encourage you to try out the tool and give feedback.
to have a v1 of the spec out. So the `runc` config format will be constantly changing until
the spec is finalized. However, we encourage you to try out the tool and give feedback.
### OCF
@@ -64,9 +65,11 @@ You can also run specific test cases by:
### Using:
To run a container, execute `runc start` in the bundle's root directory:
To run a container with the id "test", execute `runc start` with the containers id as arg one
in the bundle's root directory:
```bash
runc start
runc start test
/ $ ps
PID USER COMMAND
1 daemon sh
@@ -76,296 +79,10 @@ PID USER COMMAND
### OCI Container JSON Format:
Below are sample `config.json` and `runtime.json` configuration files. It assumes that
the file-system is found in a directory called `rootfs` and there is a
user with uid and gid of `0` defined within that file-system.
`config.json`:
```json
{
"version": "0.1.0",
"platform": {
"os": "linux",
"arch": "amd64"
},
"process": {
"terminal": true,
"user": {
"uid": 0,
"gid": 0,
"additionalGids": null
},
"args": [
"sh"
],
"env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm"
],
"cwd": ""
},
"root": {
"path": "rootfs",
"readonly": true
},
"hostname": "shell",
"mounts": [
{
"name": "proc",
"path": "/proc"
},
{
"name": "dev",
"path": "/dev"
},
{
"name": "devpts",
"path": "/dev/pts"
},
{
"name": "shm",
"path": "/dev/shm"
},
{
"name": "mqueue",
"path": "/dev/mqueue"
},
{
"name": "sysfs",
"path": "/sys"
},
{
"name": "cgroup",
"path": "/sys/fs/cgroup"
}
],
"linux": {
"capabilities": [
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE"
]
}
}
```
`runtime.json`:
```json
{
"mounts": {
"proc": {
"type": "proc",
"source": "proc",
"options": null
},
"dev": {
"type": "tmpfs",
"source": "tmpfs",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
]
},
"devpts": {
"type": "devpts",
"source": "devpts",
"options": [
"nosuid",
"noexec",
"newinstance",
"ptmxmode=0666",
"mode=0620",
"gid=5"
]
},
"shm": {
"type": "tmpfs",
"source": "shm",
"options": [
"nosuid",
"noexec",
"nodev",
"mode=1777",
"size=65536k"
]
},
"mqueue": {
"type": "mqueue",
"source": "mqueue",
"options": [
"nosuid",
"noexec",
"nodev"
]
},
"sysfs": {
"type": "sysfs",
"source": "sysfs",
"options": [
"nosuid",
"noexec",
"nodev"
]
},
"cgroup": {
"type": "cgroup",
"source": "cgroup",
"options": [
"nosuid",
"noexec",
"nodev",
"relatime",
"ro"
]
}
},
"hooks": {
"prestart": null,
"poststop": null
},
"linux": {
"uidMappings": null,
"gidMappings": null,
"rlimits": [
{
"type": "RLIMIT_NOFILE",
"hard": 1024,
"soft": 1024
}
],
"sysctl": null,
"resources": {
"disableOOMKiller": false,
"memory": {
"limit": 0,
"reservation": 0,
"swap": 0,
"kernel": 0,
"swappiness": -1
},
"cpu": {
"shares": 0,
"quota": 0,
"period": 0,
"realtimeRuntime": 0,
"realtimePeriod": 0,
"cpus": "",
"mems": ""
},
"pids": {
"limit": 0
},
"blockIO": {
"blkioWeight": 0,
"blkioWeightDevice": "",
"blkioThrottleReadBpsDevice": "",
"blkioThrottleWriteBpsDevice": "",
"blkioThrottleReadIopsDevice": "",
"blkioThrottleWriteIopsDevice": ""
},
"hugepageLimits": null,
"network": {
"classId": "",
"priorities": null
}
},
"cgroupsPath": "",
"namespaces": [
{
"type": "pid",
"path": ""
},
{
"type": "network",
"path": ""
},
{
"type": "ipc",
"path": ""
},
{
"type": "uts",
"path": ""
},
{
"type": "mount",
"path": ""
}
],
"devices": [
{
"path": "/dev/null",
"type": 99,
"major": 1,
"minor": 3,
"permissions": "rwm",
"fileMode": 438,
"uid": 0,
"gid": 0
},
{
"path": "/dev/random",
"type": 99,
"major": 1,
"minor": 8,
"permissions": "rwm",
"fileMode": 438,
"uid": 0,
"gid": 0
},
{
"path": "/dev/full",
"type": 99,
"major": 1,
"minor": 7,
"permissions": "rwm",
"fileMode": 438,
"uid": 0,
"gid": 0
},
{
"path": "/dev/tty",
"type": 99,
"major": 5,
"minor": 0,
"permissions": "rwm",
"fileMode": 438,
"uid": 0,
"gid": 0
},
{
"path": "/dev/zero",
"type": 99,
"major": 1,
"minor": 5,
"permissions": "rwm",
"fileMode": 438,
"uid": 0,
"gid": 0
},
{
"path": "/dev/urandom",
"type": 99,
"major": 1,
"minor": 9,
"permissions": "rwm",
"fileMode": 438,
"uid": 0,
"gid": 0
}
],
"apparmorProfile": "",
"selinuxProcessLabel": "",
"seccomp": {
"defaultAction": "SCMP_ACT_ALLOW",
"syscalls": []
},
"rootfsPropagation": ""
}
}
```
OCI container JSON format is based on OCI [specs](https://github.com/opencontainers/specs).
You can generate JSON files by using `runc spec`.
It assumes that the file-system is found in a directory called
`rootfs` and there is a user with uid and gid of `0` defined within that file-system.
### Examples:
@@ -380,11 +97,10 @@ To test using Docker's `busybox` image follow these steps:
mkdir rootfs
tar -C rootfs -xf busybox.tar
```
* Create `config.json` and `runtime.json` using the example from above. You can also
generate a spec using `runc spec`, which will create those files for you.
* Create `config.json` by using `runc spec`.
* Execute `runc start` and you should be placed into a shell where you can run `ps`:
```
$ runc start
$ runc start test
/ # ps
PID USER COMMAND
1 root sh
@@ -393,6 +109,10 @@ PID USER COMMAND
#### Using runc with systemd
To use runc with systemd, you can create a unit file
`/usr/lib/systemd/system/minecraft.service` as below (edit your
own Description or WorkingDirectory or service name as you need).
```service
[Unit]
Description=Minecraft Build Server
@@ -402,10 +122,23 @@ After=network.target
[Service]
CPUQuota=200%
MemoryLimit=1536M
ExecStart=/usr/local/bin/runc
ExecStart=/usr/local/bin/runc start minecraft
Restart=on-failure
WorkingDirectory=/containers/minecraftbuild
[Install]
WantedBy=multi-user.target
```
Make sure you have the bundle's root directory and JSON configs in
your WorkingDirectory, then use systemd commands to start the service:
```bash
systemctl daemon-reload
systemctl start minecraft.service
```
Note that if you use JSON configs by `runc spec`, you need to modify
`config.json` and change `process.terminal` to false so runc won't
create tty, because we can't set terminal from the stdin when using
systemd service.

View File

@@ -1,90 +0,0 @@
// +build linux
package main
import (
"fmt"
"strconv"
"strings"
"github.com/codegangsta/cli"
"github.com/opencontainers/runc/libcontainer"
)
var checkpointCommand = cli.Command{
Name: "checkpoint",
Usage: "checkpoint a running container",
Flags: []cli.Flag{
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'."},
},
Action: func(context *cli.Context) {
container, err := getContainer(context)
if err != nil {
fatal(err)
}
options := criuOptions(context)
status, err := container.Status()
if err != nil {
fatal(err)
}
if status == libcontainer.Checkpointed {
fatal(fmt.Errorf("Container with id %s already checkpointed", context.GlobalString("id")))
}
// these are the mandatory criu options for a container
setPageServer(context, options)
setManageCgroupsMode(context, options)
if err := container.Checkpoint(options); err != nil {
fatal(err)
}
},
}
func getCheckpointImagePath(context *cli.Context) string {
imagePath := context.String("image-path")
if imagePath == "" {
imagePath = getDefaultImagePath(context)
}
return imagePath
}
func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) {
// xxx following criu opts are optional
// The dump image can be sent to a criu page server
if psOpt := context.String("page-server"); psOpt != "" {
addressPort := strings.Split(psOpt, ":")
if len(addressPort) != 2 {
fatal(fmt.Errorf("Use --page-server ADDRESS:PORT to specify page server"))
}
portInt, err := strconv.Atoi(addressPort[1])
if err != nil {
fatal(fmt.Errorf("Invalid port number"))
}
options.PageServer = libcontainer.CriuPageServerInfo{
Address: addressPort[0],
Port: int32(portInt),
}
}
}
func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) {
if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
switch cgOpt {
case "soft":
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_SOFT
case "full":
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_FULL
case "strict":
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_STRICT
default:
fatal(fmt.Errorf("Invalid manage cgroups mode"))
}
}
}

View File

@@ -1,95 +0,0 @@
// +build linux
package main
import (
"encoding/json"
"os"
"sync"
"time"
"github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/opencontainers/runc/libcontainer"
)
// event struct for encoding the event data to json.
type event struct {
Type string `json:"type"`
ID string `json:"id"`
Data interface{} `json:"data,omitempty"`
}
var eventsCommand = cli.Command{
Name: "events",
Usage: "display container events such as OOM notifications and cpu, memeory, IO, and network stats",
Flags: []cli.Flag{
cli.DurationFlag{Name: "interval", Value: 5 * time.Second, Usage: "set the stats collection interval"},
cli.BoolFlag{Name: "stats", Usage: "display the container's stats then exit"},
},
Action: func(context *cli.Context) {
container, err := getContainer(context)
if err != nil {
logrus.Fatal(err)
}
var (
stats = make(chan *libcontainer.Stats, 1)
events = make(chan *event, 1024)
group = &sync.WaitGroup{}
)
group.Add(1)
go func() {
defer group.Done()
enc := json.NewEncoder(os.Stdout)
for e := range events {
if err := enc.Encode(e); err != nil {
logrus.Error(err)
}
}
}()
if context.Bool("stats") {
s, err := container.Stats()
if err != nil {
fatal(err)
}
events <- &event{Type: "stats", ID: container.ID(), Data: s}
close(events)
group.Wait()
return
}
go func() {
for range time.Tick(context.Duration("interval")) {
s, err := container.Stats()
if err != nil {
logrus.Error(err)
continue
}
stats <- s
}
}()
n, err := container.NotifyOOM()
if err != nil {
logrus.Fatal(err)
}
for {
select {
case _, ok := <-n:
if ok {
// this means an oom event was received, if it is !ok then
// the channel was closed because the container stopped and
// the cgroups no longer exist.
events <- &event{Type: "oom", ID: container.ID()}
} else {
n = nil
}
case s := <-stats:
events <- &event{Type: "stats", ID: container.ID(), Data: s}
}
if n == nil {
close(events)
break
}
}
group.Wait()
},
}

View File

@@ -1,72 +0,0 @@
// +build linux
package main
import (
"encoding/json"
"fmt"
"os"
"github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/opencontainers/specs"
)
var execCommand = cli.Command{
Name: "exec",
Usage: "execute new process inside the container",
Action: func(context *cli.Context) {
config, err := loadProcessConfig(context.Args().First())
if err != nil {
fatal(err)
}
if os.Geteuid() != 0 {
logrus.Fatal("runc should be run as root")
}
status, err := execProcess(context, config)
if err != nil {
logrus.Fatalf("exec failed: %v", err)
}
os.Exit(status)
},
}
func execProcess(context *cli.Context, config *specs.Process) (int, error) {
container, err := getContainer(context)
if err != nil {
return -1, err
}
process := newProcess(*config)
rootuid, err := container.Config().HostUID()
if err != nil {
return -1, err
}
tty, err := newTty(config.Terminal, process, rootuid)
if err != nil {
return -1, err
}
handler := newSignalHandler(tty)
defer handler.Close()
if err := container.Start(process); err != nil {
return -1, err
}
return handler.forward(process)
}
// loadProcessConfig loads the process configuration from the provided path.
func loadProcessConfig(path string) (*specs.Process, error) {
f, err := os.Open(path)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("JSON configuration file for %s not found", path)
}
return nil, err
}
defer f.Close()
var s *specs.Process
if err := json.NewDecoder(f).Decode(&s); err != nil {
return nil, err
}
return s, nil
}

View File

@@ -1,87 +0,0 @@
// +build linux
package main
import (
"fmt"
"strconv"
"strings"
"syscall"
"github.com/codegangsta/cli"
)
var signalMap = map[string]syscall.Signal{
"ABRT": syscall.SIGABRT,
"ALRM": syscall.SIGALRM,
"BUS": syscall.SIGBUS,
"CHLD": syscall.SIGCHLD,
"CLD": syscall.SIGCLD,
"CONT": syscall.SIGCONT,
"FPE": syscall.SIGFPE,
"HUP": syscall.SIGHUP,
"ILL": syscall.SIGILL,
"INT": syscall.SIGINT,
"IO": syscall.SIGIO,
"IOT": syscall.SIGIOT,
"KILL": syscall.SIGKILL,
"PIPE": syscall.SIGPIPE,
"POLL": syscall.SIGPOLL,
"PROF": syscall.SIGPROF,
"PWR": syscall.SIGPWR,
"QUIT": syscall.SIGQUIT,
"SEGV": syscall.SIGSEGV,
"STKFLT": syscall.SIGSTKFLT,
"STOP": syscall.SIGSTOP,
"SYS": syscall.SIGSYS,
"TERM": syscall.SIGTERM,
"TRAP": syscall.SIGTRAP,
"TSTP": syscall.SIGTSTP,
"TTIN": syscall.SIGTTIN,
"TTOU": syscall.SIGTTOU,
"UNUSED": syscall.SIGUNUSED,
"URG": syscall.SIGURG,
"USR1": syscall.SIGUSR1,
"USR2": syscall.SIGUSR2,
"VTALRM": syscall.SIGVTALRM,
"WINCH": syscall.SIGWINCH,
"XCPU": syscall.SIGXCPU,
"XFSZ": syscall.SIGXFSZ,
}
var killCommand = cli.Command{
Name: "kill",
Usage: "kill sends the specified signal (default: SIGTERM) to the container's init process",
Action: func(context *cli.Context) {
container, err := getContainer(context)
if err != nil {
fatal(err)
}
sigstr := context.Args().First()
if sigstr == "" {
sigstr = "SIGTERM"
}
signal, err := parseSignal(sigstr)
if err != nil {
fatal(err)
}
if err := container.Signal(signal); err != nil {
fatal(err)
}
},
}
func parseSignal(rawSignal string) (syscall.Signal, error) {
s, err := strconv.Atoi(rawSignal)
if err == nil {
return syscall.Signal(s), nil
}
signal, ok := signalMap[strings.TrimPrefix(strings.ToUpper(rawSignal), "SIG")]
if !ok {
return -1, fmt.Errorf("unknown signal %q", rawSignal)
}
return signal, nil
}

View File

@@ -10,80 +10,165 @@ host system and which is (optionally) isolated from other containers in the syst
#### Using libcontainer
To create a container you first have to initialize an instance of a factory
that will handle the creation and initialization for a container.
Because containers are spawned in a two step process you will need to provide
arguments to a binary that will be executed as the init process for the container.
To use the current binary that is spawning the containers and acting as the parent
you can use `os.Args[0]` and we have a command called `init` setup.
Because containers are spawned in a two step process you will need a binary that
will be executed as the init process for the container. In libcontainer, we use
the current binary (/proc/self/exe) to be executed as the init process, and use
arg "init", we call the first step process "bootstrap", so you always need a "init"
function as the entry of "bootstrap".
```go
root, err := libcontainer.New("/var/lib/container", libcontainer.InitArgs(os.Args[0], "init"))
func init() {
if len(os.Args) > 1 && os.Args[1] == "init" {
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
factory, _ := libcontainer.New("")
if err := factory.StartInitialization(); err != nil {
logrus.Fatal(err)
}
panic("--this line should have never been executed, congratulations--")
}
}
```
Then to create a container you first have to initialize an instance of a factory
that will handle the creation and initialization for a container.
```go
factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init"))
if err != nil {
log.Fatal(err)
logrus.Fatal(err)
return
}
```
Once you have an instance of the factory created we can create a configuration
struct describing how the container is to be created. A sample would look similar to this:
struct describing how the container is to be created. A sample would look similar to this:
```go
defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
config := &configs.Config{
Rootfs: rootfs,
Capabilities: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Namespaces: configs.Namespaces([]configs.Namespace{
{Type: configs.NEWNS},
{Type: configs.NEWUTS},
{Type: configs.NEWIPC},
{Type: configs.NEWPID},
{Type: configs.NEWNET},
}),
Cgroups: &configs.Cgroup{
Name: "test-container",
Parent: "system",
AllowAllDevices: false,
AllowedDevices: configs.DefaultAllowedDevices,
},
Devices: configs.DefaultAutoCreatedDevices,
Hostname: "testing",
Networks: []*configs.Network{
{
Type: "loopback",
Address: "127.0.0.1/0",
Gateway: "localhost",
},
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: uint64(1024),
Soft: uint64(1024),
},
},
Rootfs: "/your/path/to/rootfs",
Capabilities: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Namespaces: configs.Namespaces([]configs.Namespace{
{Type: configs.NEWNS},
{Type: configs.NEWUTS},
{Type: configs.NEWIPC},
{Type: configs.NEWPID},
{Type: configs.NEWUSER},
{Type: configs.NEWNET},
}),
Cgroups: &configs.Cgroup{
Name: "test-container",
Parent: "system",
Resources: &configs.Resources{
MemorySwappiness: nil,
AllowAllDevices: false,
AllowedDevices: configs.DefaultAllowedDevices,
},
},
MaskPaths: []string{
"/proc/kcore",
},
ReadonlyPaths: []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
},
Devices: configs.DefaultAutoCreatedDevices,
Hostname: "testing",
Mounts: []*configs.Mount{
{
Source: "proc",
Destination: "/proc",
Device: "proc",
Flags: defaultMountFlags,
},
{
Source: "tmpfs",
Destination: "/dev",
Device: "tmpfs",
Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME,
Data: "mode=755",
},
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC,
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
},
{
Device: "tmpfs",
Source: "shm",
Destination: "/dev/shm",
Data: "mode=1777,size=65536k",
Flags: defaultMountFlags,
},
{
Source: "mqueue",
Destination: "/dev/mqueue",
Device: "mqueue",
Flags: defaultMountFlags,
},
{
Source: "sysfs",
Destination: "/sys",
Device: "sysfs",
Flags: defaultMountFlags | syscall.MS_RDONLY,
},
},
UidMappings: []configs.IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 65536,
},
},
GidMappings: []configs.IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 65536,
},
},
Networks: []*configs.Network{
{
Type: "loopback",
Address: "127.0.0.1/0",
Gateway: "localhost",
},
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: uint64(1025),
Soft: uint64(1025),
},
},
}
```
Once you have the configuration populated you can create a container:
```go
container, err := root.Create("container-id", config)
container, err := factory.Create("container-id", config)
if err != nil {
logrus.Fatal(err)
return
}
```
To spawn bash as the initial process inside the container and have the
@@ -91,23 +176,25 @@ processes pid returned in order to wait, signal, or kill the process:
```go
process := &libcontainer.Process{
Args: []string{"/bin/bash"},
Env: []string{"PATH=/bin"},
User: "daemon",
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
Args: []string{"/bin/bash"},
Env: []string{"PATH=/bin"},
User: "daemon",
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
}
err := container.Start(process)
if err != nil {
log.Fatal(err)
logrus.Fatal(err)
container.Destroy()
return
}
// wait for the process to finish.
status, err := process.Wait()
_, err := process.Wait()
if err != nil {
log.Fatal(err)
logrus.Fatal(err)
}
// destroy the container.
@@ -124,7 +211,6 @@ processes, err := container.Processes()
// it's processes.
stats, err := container.Stats()
// pause all processes inside the container.
container.Pause()

View File

@@ -60,7 +60,7 @@ are required to be mounted within the rootfs that the runtime will setup.
After a container's filesystems are mounted within the newly created
mount namespace `/dev` will need to be populated with a set of device nodes.
It is expected that a rootfs does not need to have any device nodes specified
for `/dev` witin the rootfs as the container will setup the correct devices
for `/dev` within the rootfs as the container will setup the correct devices
that are required for executing a container's process.
| Path | Mode | Access |
@@ -97,12 +97,12 @@ that is local to the container's rootfs.
After the container has `/proc` mounted a few standard symlinks are setup
within `/dev/` for the io.
| Source | Destination |
| ------------ | ----------- |
| /proc/1/fd | /dev/fd |
| /proc/1/fd/0 | /dev/stdin |
| /proc/1/fd/1 | /dev/stdout |
| /proc/1/fd/2 | /dev/stderr |
| Source | Destination |
| --------------- | ----------- |
| /proc/self/fd | /dev/fd |
| /proc/self/fd/0 | /dev/stdin |
| /proc/self/fd/1 | /dev/stdout |
| /proc/self/fd/2 | /dev/stderr |
A `pivot_root` is used to change the root for the process, effectively
jailing the process inside the rootfs.
@@ -142,6 +142,7 @@ system resources like cpu, memory, and device access.
| perf_event | 1 |
| freezer | 1 |
| hugetlb | 1 |
| pids | 1 |
All cgroup subsystem are joined so that statistics can be collected from
@@ -199,7 +200,7 @@ provide a good default for security and flexibility for the applications.
| CAP_SYS_BOOT | 0 |
| CAP_LEASE | 0 |
| CAP_WAKE_ALARM | 0 |
| CAP_BLOCK_SUSPE | 0 |
| CAP_BLOCK_SUSPEND | 0 |
Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor)

View File

@@ -1,69 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"strings"
"github.com/syndtr/gocapability/capability"
)
const allCapabilityTypes = capability.CAPS | capability.BOUNDS
var capabilityMap map[string]capability.Cap
func init() {
capabilityMap = make(map[string]capability.Cap)
last := capability.CAP_LAST_CAP
// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
if last == capability.Cap(63) {
last = capability.CAP_BLOCK_SUSPEND
}
for _, cap := range capability.List() {
if cap > last {
continue
}
capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
capabilityMap[capKey] = cap
}
}
func newCapWhitelist(caps []string) (*whitelist, error) {
l := []capability.Cap{}
for _, c := range caps {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
l = append(l, v)
}
pid, err := capability.NewPid(os.Getpid())
if err != nil {
return nil, err
}
return &whitelist{
keep: l,
pid: pid,
}, nil
}
type whitelist struct {
pid capability.Capabilities
keep []capability.Cap
}
// dropBoundingSet drops the capability bounding set to those specified in the whitelist.
func (w *whitelist) dropBoundingSet() error {
w.pid.Clear(capability.BOUNDS)
w.pid.Set(capability.BOUNDS, w.keep...)
return w.pid.Apply(capability.BOUNDS)
}
// drop drops all capabilities for the current process except those specified in the whitelist.
func (w *whitelist) drop() error {
w.pid.Clear(allCapabilityTypes)
w.pid.Set(allCapabilityTypes, w.keep...)
return w.pid.Apply(allCapabilityTypes)
}

View File

@@ -1,61 +0,0 @@
// +build linux
package cgroups
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/configs"
)
type Manager interface {
// Apply cgroup configuration to the process with the specified pid
Apply(pid int) error
// Returns the PIDs inside the cgroup set
GetPids() ([]int, error)
// Returns statistics for the cgroup set
GetStats() (*Stats, error)
// Toggles the freezer cgroup according with specified state
Freeze(state configs.FreezerState) error
// Destroys the cgroup set
Destroy() error
// NewCgroupManager() and LoadCgroupManager() require following attributes:
// Paths map[string]string
// Cgroups *cgroups.Cgroup
// Paths maps cgroup subsystem to path at which it is mounted.
// Cgroups specifies specific cgroup settings for the various subsystems
// Returns cgroup paths to save in a state file and to be able to
// restore the object later.
GetPaths() map[string]string
// Set the cgroup as configured.
Set(container *configs.Config) error
}
type NotFoundError struct {
Subsystem string
}
func (e *NotFoundError) Error() string {
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
}
func NewNotFoundError(sub string) error {
return &NotFoundError{
Subsystem: sub,
}
}
func IsNotFound(err error) bool {
if err == nil {
return false
}
_, ok := err.(*NotFoundError)
return ok
}

View File

@@ -1,18 +0,0 @@
// +build linux
package cgroups
import (
"testing"
)
func TestParseCgroups(t *testing.T) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
t.Fatal(err)
}
if _, ok := cgroups["cpu"]; !ok {
t.Fail()
}
}

View File

@@ -1,3 +0,0 @@
// +build !linux
package cgroups

View File

@@ -1,92 +0,0 @@
// +build linux
package cgroups
type ThrottlingData struct {
// Number of periods with throttling active
Periods uint64 `json:"periods,omitempty"`
// Number of periods when the container hit its throttling limit.
ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
// Aggregate time the container was throttled for in nanoseconds.
ThrottledTime uint64 `json:"throttled_time,omitempty"`
}
// All CPU stats are aggregate since container inception.
type CpuUsage struct {
// Total CPU time consumed.
// Units: nanoseconds.
TotalUsage uint64 `json:"total_usage,omitempty"`
// Total CPU time consumed per core.
// Units: nanoseconds.
PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
// Time spent by tasks of the cgroup in kernel mode.
// Units: nanoseconds.
UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
// Time spent by tasks of the cgroup in user mode.
// Units: nanoseconds.
UsageInUsermode uint64 `json:"usage_in_usermode"`
}
type CpuStats struct {
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
}
type MemoryData struct {
Usage uint64 `json:"usage,omitempty"`
MaxUsage uint64 `json:"max_usage,omitempty"`
Failcnt uint64 `json:"failcnt"`
}
type MemoryStats struct {
// memory used for cache
Cache uint64 `json:"cache,omitempty"`
// usage of memory
Usage MemoryData `json:"usage,omitempty"`
// usage of memory + swap
SwapUsage MemoryData `json:"swap_usage,omitempty"`
// usafe of kernel memory
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
Stats map[string]uint64 `json:"stats,omitempty"`
}
type BlkioStatEntry struct {
Major uint64 `json:"major,omitempty"`
Minor uint64 `json:"minor,omitempty"`
Op string `json:"op,omitempty"`
Value uint64 `json:"value,omitempty"`
}
type BlkioStats struct {
// number of bytes tranferred to and from the block device
IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
}
type HugetlbStats struct {
// current res_counter usage for hugetlb
Usage uint64 `json:"usage,omitempty"`
// maximum usage ever recorded.
MaxUsage uint64 `json:"max_usage,omitempty"`
// number of times htgetlb usage allocation failure.
Failcnt uint64 `json:"failcnt"`
}
type Stats struct {
CpuStats CpuStats `json:"cpu_stats,omitempty"`
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
// the map is in the format "size of hugepage: stats of the hugepage"
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
}
func NewStats() *Stats {
memoryStats := MemoryStats{Stats: make(map[string]uint64)}
hugetlbStats := make(map[string]HugetlbStats)
return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
}

View File

@@ -1,347 +0,0 @@
// +build linux
package cgroups
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/units"
)
const cgroupNamePrefix = "name="
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Split(txt, " ")
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], nil
}
}
}
if err := scanner.Err(); err != nil {
return "", err
}
return "", NewNotFoundError(subsystem)
}
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Split(txt, " ")
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], fields[3], nil
}
}
}
if err := scanner.Err(); err != nil {
return "", "", err
}
return "", "", NewNotFoundError(subsystem)
}
func FindCgroupMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
text := scanner.Text()
fields := strings.Split(text, " ")
// Safe as mountinfo encodes mountpoints with spaces as \040.
index := strings.Index(text, " - ")
postSeparatorFields := strings.Fields(text[index+3:])
if len(postSeparatorFields) < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
if postSeparatorFields[0] == "cgroup" {
return filepath.Dir(fields[4]), nil
}
}
if err := scanner.Err(); err != nil {
return "", err
}
return "", NewNotFoundError("cgroup")
}
type Mount struct {
Mountpoint string
Root string
Subsystems []string
}
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
return getControllerPath(m.Subsystems[0], cgroups)
}
func GetCgroupMounts() ([]Mount, error) {
mounts, err := mount.GetMounts()
if err != nil {
return nil, err
}
all, err := GetAllSubsystems()
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for _, s := range all {
allMap[s] = true
}
res := []Mount{}
for _, mount := range mounts {
if mount.Fstype == "cgroup" {
m := Mount{Mountpoint: mount.Mountpoint, Root: mount.Root}
for _, opt := range strings.Split(mount.VfsOpts, ",") {
if strings.HasPrefix(opt, cgroupNamePrefix) {
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
}
if allMap[opt] {
m.Subsystems = append(m.Subsystems, opt)
}
}
res = append(res, m)
}
}
return res, nil
}
// Returns all the cgroup subsystems supported by the kernel
func GetAllSubsystems() ([]string, error) {
f, err := os.Open("/proc/cgroups")
if err != nil {
return nil, err
}
defer f.Close()
subsystems := []string{}
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
if text[0] != '#' {
parts := strings.Fields(text)
if len(parts) >= 4 && parts[3] != "0" {
subsystems = append(subsystems, parts[0])
}
}
}
return subsystems, nil
}
// Returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupDir(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, "cgroup.procs"))
if err != nil {
return nil, err
}
defer f.Close()
var (
s = bufio.NewScanner(f)
out = []int{}
)
for s.Scan() {
if t := s.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
out = append(out, pid)
}
}
return out, nil
}
func ParseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
s := bufio.NewScanner(f)
cgroups := make(map[string]string)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
parts := strings.Split(text, ":")
for _, subs := range strings.Split(parts[1], ",") {
cgroups[subs] = parts[2]
}
}
return cgroups, nil
}
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
if p, ok := cgroups[subsystem]; ok {
return p, nil
}
if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
return p, nil
}
return "", NewNotFoundError(subsystem)
}
func PathExists(path string) bool {
if _, err := os.Stat(path); err != nil {
return false
}
return true
}
func EnterPid(cgroupPaths map[string]string, pid int) error {
for _, path := range cgroupPaths {
if PathExists(path) {
if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"),
[]byte(strconv.Itoa(pid)), 0700); err != nil {
return err
}
}
}
return nil
}
// RemovePaths iterates over the provided paths removing them.
// We trying to remove all paths five times with increasing delay between tries.
// If after all there are not removed cgroups - appropriate error will be
// returned.
func RemovePaths(paths map[string]string) (err error) {
delay := 10 * time.Millisecond
for i := 0; i < 5; i++ {
if i != 0 {
time.Sleep(delay)
delay *= 2
}
for s, p := range paths {
os.RemoveAll(p)
// TODO: here probably should be logging
_, err := os.Stat(p)
// We need this strange way of checking cgroups existence because
// RemoveAll almost always returns error, even on already removed
// cgroups
if os.IsNotExist(err) {
delete(paths, s)
}
}
if len(paths) == 0 {
return nil
}
}
return fmt.Errorf("Failed to remove paths: %s", paths)
}
func GetHugePageSize() ([]string, error) {
var pageSizes []string
sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
if err != nil {
return pageSizes, err
}
for _, st := range files {
nameArray := strings.Split(st.Name(), "-")
pageSize, err := units.RAMInBytes(nameArray[1])
if err != nil {
return []string{}, err
}
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
pageSizes = append(pageSizes, sizeString)
}
return pageSizes, nil
}
// GetPids returns all pids, that were added to cgroup at path and to all its
// subcgroups.
func GetPids(path string) ([]int, error) {
var pids []int
// collect pids from all sub-cgroups
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
dir, file := filepath.Split(p)
if file != "cgroup.procs" {
return nil
}
if iErr != nil {
return iErr
}
cPids, err := readProcsFile(dir)
if err != nil {
return err
}
pids = append(pids, cPids...)
return nil
})
return pids, err
}

View File

@@ -1,10 +0,0 @@
// +build linux,!go1.5
package libcontainer
import "syscall"
// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building
// with earlier versions
func enableSetgroups(sys *syscall.SysProcAttr) {
}

View File

@@ -1,61 +0,0 @@
package configs
import "fmt"
// blockIODevice holds major:minor format supported in blkio cgroup
type blockIODevice struct {
// Major is the device's major number
Major int64 `json:"major"`
// Minor is the device's minor number
Minor int64 `json:"minor"`
}
// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
type WeightDevice struct {
blockIODevice
// Weight is the bandwidth rate for the device, range is from 10 to 1000
Weight uint16 `json:"weight"`
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
LeafWeight uint16 `json:"leafWeight"`
}
// NewWeightDevice returns a configured WeightDevice pointer
func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
wd := &WeightDevice{}
wd.Major = major
wd.Minor = minor
wd.Weight = weight
wd.LeafWeight = leafWeight
return wd
}
// WeightString formats the struct to be writable to the cgroup specific file
func (wd *WeightDevice) WeightString() string {
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
}
// LeafWeightString formats the struct to be writable to the cgroup specific file
func (wd *WeightDevice) LeafWeightString() string {
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
}
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
type ThrottleDevice struct {
blockIODevice
// Rate is the IO rate limit per cgroup per device
Rate uint64 `json:"rate"`
}
// NewThrottleDevice returns a configured ThrottleDevice pointer
func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
td := &ThrottleDevice{}
td.Major = major
td.Minor = minor
td.Rate = rate
return td
}
// String formats the struct to be writable to the cgroup specific file
func (td *ThrottleDevice) String() string {
return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
}

View File

@@ -1,101 +0,0 @@
package configs
type FreezerState string
const (
Undefined FreezerState = ""
Frozen FreezerState = "FROZEN"
Thawed FreezerState = "THAWED"
)
// TODO Windows: This can be factored out in the future as Cgroups are not
// supported on the Windows platform.
type Cgroup struct {
Name string `json:"name"`
// name of parent cgroup or slice
Parent string `json:"parent"`
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
AllowAllDevices bool `json:"allow_all_devices"`
AllowedDevices []*Device `json:"allowed_devices"`
DeniedDevices []*Device `json:"denied_devices"`
// Memory limit (in bytes)
Memory int64 `json:"memory"`
// Memory reservation or soft_limit (in bytes)
MemoryReservation int64 `json:"memory_reservation"`
// Total memory usage (memory + swap); set `-1' to disable swap
MemorySwap int64 `json:"memory_swap"`
// Kernel memory limit (in bytes)
KernelMemory int64 `json:"kernel_memory"`
// CPU shares (relative weight vs. other containers)
CpuShares int64 `json:"cpu_shares"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuQuota int64 `json:"cpu_quota"`
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod int64 `json:"cpu_period"`
// How many time CPU will use in realtime scheduling (in usecs).
CpuRtRuntime int64 `json:"cpu_quota"`
// CPU period to be used for realtime scheduling (in usecs).
CpuRtPeriod int64 `json:"cpu_period"`
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
// MEM to use
CpusetMems string `json:"cpuset_mems"`
// Specifies per cgroup weight, range is from 10 to 1000.
BlkioWeight uint16 `json:"blkio_weight"`
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
BlkioLeafWeight uint16 `json:"blkio_leaf_weight"`
// Weight per cgroup per device, can override BlkioWeight.
BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"`
// IO read rate limit per cgroup per device, bytes per second.
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
// IO write rate limit per cgroup per divice, bytes per second.
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
// IO read rate limit per cgroup per device, IO per second.
BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"`
// IO write rate limit per cgroup per device, IO per second.
BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"`
// set the freeze value for the process
Freezer FreezerState `json:"freezer"`
// Hugetlb limit (in bytes)
HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
// Parent slice to use for systemd TODO: remove in favor or parent
Slice string `json:"slice"`
// Whether to disable OOM Killer
OomKillDisable bool `json:"oom_kill_disable"`
// Tuning swappiness behaviour per cgroup
MemorySwappiness int64 `json:"memory_swappiness"`
// Set priority of network traffic for container
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
// Set class identifier for container's network packets
NetClsClassid string `json:"net_cls_classid"`
}

View File

@@ -1,248 +0,0 @@
package configs
import (
"bytes"
"encoding/json"
"os/exec"
)
type Rlimit struct {
Type int `json:"type"`
Hard uint64 `json:"hard"`
Soft uint64 `json:"soft"`
}
// IDMap represents UID/GID Mappings for User Namespaces.
type IDMap struct {
ContainerID int `json:"container_id"`
HostID int `json:"host_id"`
Size int `json:"size"`
}
// Seccomp represents syscall restrictions
// By default, only the native architecture of the kernel is allowed to be used
// for syscalls. Additional architectures can be added by specifying them in
// Architectures.
type Seccomp struct {
DefaultAction Action `json:"default_action"`
Architectures []string `json:"architectures"`
Syscalls []*Syscall `json:"syscalls"`
}
// An action to be taken upon rule match in Seccomp
type Action int
const (
Kill Action = iota - 4
Errno
Trap
Allow
)
// A comparison operator to be used when matching syscall arguments in Seccomp
type Operator int
const (
EqualTo Operator = iota
NotEqualTo
GreaterThan
GreaterThanOrEqualTo
LessThan
LessThanOrEqualTo
MaskEqualTo
)
// A rule to match a specific syscall argument in Seccomp
type Arg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
ValueTwo uint64 `json:"value_two"`
Op Operator `json:"op"`
}
// An rule to match a syscall in Seccomp
type Syscall struct {
Name string `json:"name"`
Action Action `json:"action"`
Args []*Arg `json:"args"`
}
// TODO Windows. Many of these fields should be factored out into those parts
// which are common across platforms, and those which are platform specific.
// Config defines configuration options for executing a process inside a contained environment.
type Config struct {
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
// This is a common option when the container is running in ramdisk
NoPivotRoot bool `json:"no_pivot_root"`
// ParentDeathSignal specifies the signal that is sent to the container's process in the case
// that the parent process dies.
ParentDeathSignal int `json:"parent_death_signal"`
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
PivotDir string `json:"pivot_dir"`
// Path to a directory containing the container's root filesystem.
Rootfs string `json:"rootfs"`
// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
// bind mounts are writtable.
Readonlyfs bool `json:"readonlyfs"`
// Specifies the mount propagation flags to be applied to /.
RootPropagation int `json:"rootPropagation"`
// Mounts specify additional source and destination paths that will be mounted inside the container's
// rootfs and mount namespace if specified
Mounts []*Mount `json:"mounts"`
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
Devices []*Device `json:"devices"`
MountLabel string `json:"mount_label"`
// Hostname optionally sets the container's hostname if provided
Hostname string `json:"hostname"`
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
Namespaces Namespaces `json:"namespaces"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
Capabilities []string `json:"capabilities"`
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks"`
// Routes can be specified to create entries in the route table as the container is started
Routes []*Route `json:"routes"`
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
Cgroups *Cgroup `json:"cgroups"`
// AppArmorProfile specifies the profile to apply to the process running in the container and is
// change at the time the process is execed
AppArmorProfile string `json:"apparmor_profile"`
// ProcessLabel specifies the label to apply to the process running in the container. It is
// commonly used by selinux
ProcessLabel string `json:"process_label"`
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []Rlimit `json:"rlimits"`
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
// for a process. Valid values are between the range [-1000, '1000'], where processes with
// higher scores are preferred for being killed.
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
OomScoreAdj int `json:"oom_score_adj"`
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []string `json:"additional_groups"`
// UidMappings is an array of User ID mappings for User Namespaces
UidMappings []IDMap `json:"uid_mappings"`
// GidMappings is an array of Group ID mappings for User Namespaces
GidMappings []IDMap `json:"gid_mappings"`
// MaskPaths specifies paths within the container's rootfs to mask over with a bind
// mount pointing to /dev/null as to prevent reads of the file.
MaskPaths []string `json:"mask_paths"`
// ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
// so that these files prevent any writes.
ReadonlyPaths []string `json:"readonly_paths"`
// Sysctl is a map of properties and their values. It is the equivalent of using
// sysctl -w my.property.name value in Linux.
Sysctl map[string]string `json:"sysctl"`
// Seccomp allows actions to be taken whenever a syscall is made within the container.
// A number of rules are given, each having an action to be taken if a syscall matches it.
// A default action to be taken if no rules match is also given.
Seccomp *Seccomp `json:"seccomp"`
// Hooks are a collection of actions to perform at various container lifecycle events.
// Hooks are not able to be marshaled to json but they are also not needed to.
Hooks *Hooks `json:"-"`
// Version is the version of opencontainer specification that is supported.
Version string `json:"version"`
}
type Hooks struct {
// Prestart commands are executed after the container namespaces are created,
// but before the user supplied command is executed from init.
Prestart []Hook
// Poststop commands are executed after the container init process exits.
Poststop []Hook
}
// HookState is the payload provided to a hook on execution.
type HookState struct {
Version string `json:"version"`
ID string `json:"id"`
Pid int `json:"pid"`
Root string `json:"root"`
}
type Hook interface {
// Run executes the hook with the provided state.
Run(HookState) error
}
// NewFunctionHooks will call the provided function when the hook is run.
func NewFunctionHook(f func(HookState) error) FuncHook {
return FuncHook{
run: f,
}
}
type FuncHook struct {
run func(HookState) error
}
func (f FuncHook) Run(s HookState) error {
return f.run(s)
}
type Command struct {
Path string `json:"path"`
Args []string `json:"args"`
Env []string `json:"env"`
Dir string `json:"dir"`
}
// NewCommandHooks will execute the provided command when the hook is run.
func NewCommandHook(cmd Command) CommandHook {
return CommandHook{
Command: cmd,
}
}
type CommandHook struct {
Command
}
func (c Command) Run(s HookState) error {
b, err := json.Marshal(s)
if err != nil {
return err
}
cmd := exec.Cmd{
Path: c.Path,
Args: c.Args,
Env: c.Env,
Stdin: bytes.NewReader(b),
}
return cmd.Run()
}

View File

@@ -1,154 +0,0 @@
package configs
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"testing"
)
// Checks whether the expected capability is specified in the capabilities.
func contains(expected string, values []string) bool {
for _, v := range values {
if v == expected {
return true
}
}
return false
}
func containsDevice(expected *Device, values []*Device) bool {
for _, d := range values {
if d.Path == expected.Path &&
d.Permissions == expected.Permissions &&
d.FileMode == expected.FileMode &&
d.Major == expected.Major &&
d.Minor == expected.Minor &&
d.Type == expected.Type {
return true
}
}
return false
}
func loadConfig(name string) (*Config, error) {
f, err := os.Open(filepath.Join("../sample_configs", name))
if err != nil {
return nil, err
}
defer f.Close()
var container *Config
if err := json.NewDecoder(f).Decode(&container); err != nil {
return nil, err
}
// Check that a config doesn't contain extra fields
var configMap, abstractMap map[string]interface{}
if _, err := f.Seek(0, 0); err != nil {
return nil, err
}
if err := json.NewDecoder(f).Decode(&abstractMap); err != nil {
return nil, err
}
configData, err := json.Marshal(&container)
if err != nil {
return nil, err
}
if err := json.Unmarshal(configData, &configMap); err != nil {
return nil, err
}
for k := range configMap {
delete(abstractMap, k)
}
if len(abstractMap) != 0 {
return nil, fmt.Errorf("unknown fields: %s", abstractMap)
}
return container, nil
}
func TestRemoveNamespace(t *testing.T) {
ns := Namespaces{
{Type: NEWNET},
}
if !ns.Remove(NEWNET) {
t.Fatal("NEWNET was not removed")
}
if len(ns) != 0 {
t.Fatalf("namespaces should have 0 items but reports %d", len(ns))
}
}
func TestHostUIDNoUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{},
}
uid, err := config.HostUID()
if err != nil {
t.Fatal(err)
}
if uid != 0 {
t.Fatalf("expected uid 0 with no USERNS but received %d", uid)
}
}
func TestHostUIDWithUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}},
UidMappings: []IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
}
uid, err := config.HostUID()
if err != nil {
t.Fatal(err)
}
if uid != 1000 {
t.Fatalf("expected uid 1000 with no USERNS but received %d", uid)
}
}
func TestHostGIDNoUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{},
}
uid, err := config.HostGID()
if err != nil {
t.Fatal(err)
}
if uid != 0 {
t.Fatalf("expected gid 0 with no USERNS but received %d", uid)
}
}
func TestHostGIDWithUSERNS(t *testing.T) {
config := &Config{
Namespaces: Namespaces{{Type: NEWUSER}},
GidMappings: []IDMap{
{
ContainerID: 0,
HostID: 1000,
Size: 1,
},
},
}
uid, err := config.HostGID()
if err != nil {
t.Fatal(err)
}
if uid != 1000 {
t.Fatalf("expected gid 1000 with no USERNS but received %d", uid)
}
}

View File

@@ -1,51 +0,0 @@
// +build freebsd linux
package configs
import "fmt"
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
}
id, found := c.hostIDFromMapping(0, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// Gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(0, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
}
return id, nil
}
// Return default root gid 0
return 0, nil
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View File

@@ -1,54 +0,0 @@
package configs
import (
"fmt"
"os"
)
const (
Wildcard = -1
)
// TODO Windows: This can be factored out in the future
type Device struct {
// Device type, block, char, etc.
Type rune `json:"type"`
// Path to the device.
Path string `json:"path"`
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
// Cgroup permissions format, rwm.
Permissions string `json:"permissions"`
// FileMode permission bits for the device.
FileMode os.FileMode `json:"file_mode"`
// Uid of the device.
Uid uint32 `json:"uid"`
// Gid of the device.
Gid uint32 `json:"gid"`
}
func (d *Device) CgroupString() string {
return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
}
func (d *Device) Mkdev() int {
return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12))
}
// deviceNumberString converts the device number to a string return result.
func deviceNumberString(number int64) string {
if number == Wildcard {
return "*"
}
return fmt.Sprint(number)
}

View File

@@ -1,139 +0,0 @@
// +build linux freebsd
package configs
var (
// These are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{
Path: "/dev/null",
Type: 'c',
Major: 1,
Minor: 3,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/zero",
Type: 'c',
Major: 1,
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/full",
Type: 'c',
Major: 1,
Minor: 7,
Permissions: "rwm",
FileMode: 0666,
},
// consoles and ttys
{
Path: "/dev/tty",
Type: 'c',
Major: 5,
Minor: 0,
Permissions: "rwm",
FileMode: 0666,
},
// /dev/urandom,/dev/random
{
Path: "/dev/urandom",
Type: 'c',
Major: 1,
Minor: 9,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/random",
Type: 'c',
Major: 1,
Minor: 8,
Permissions: "rwm",
FileMode: 0666,
},
}
DefaultAllowedDevices = append([]*Device{
// allow mknod for any device
{
Type: 'c',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Type: 'b',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Path: "/dev/console",
Type: 'c',
Major: 5,
Minor: 1,
Permissions: "rwm",
},
{
Path: "/dev/tty0",
Type: 'c',
Major: 4,
Minor: 0,
Permissions: "rwm",
},
{
Path: "/dev/tty1",
Type: 'c',
Major: 4,
Minor: 1,
Permissions: "rwm",
},
// /dev/pts/ - pts namespaces are "coming soon"
{
Path: "",
Type: 'c',
Major: 136,
Minor: Wildcard,
Permissions: "rwm",
},
{
Path: "",
Type: 'c',
Major: 5,
Minor: 2,
Permissions: "rwm",
},
// tuntap
{
Path: "",
Type: 'c',
Major: 10,
Minor: 200,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{
{
// /dev/fuse is created but not allowed.
// This is to allow java to work. Because java
// Insists on there being a /dev/fuse
// https://github.com/docker/docker/issues/514
// https://github.com/docker/docker/issues/2393
//
Path: "/dev/fuse",
Type: 'c',
Major: 10,
Minor: 229,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
)

View File

@@ -1,9 +0,0 @@
package configs
type HugepageLimit struct {
// which type of hugepage to limit.
Pagesize string `json:"page_size"`
// usage limit for hugepage.
Limit uint64 `json:"limit"`
}

View File

@@ -1,14 +0,0 @@
package configs
import (
"fmt"
)
type IfPrioMap struct {
Interface string `json:"interface"`
Priority int64 `json:"priority"`
}
func (i *IfPrioMap) CgroupString() string {
return fmt.Sprintf("%s %d", i.Interface, i.Priority)
}

View File

@@ -1,30 +0,0 @@
package configs
type Mount struct {
// Source path for the mount.
Source string `json:"source"`
// Destination path for the mount inside the container.
Destination string `json:"destination"`
// Device the mount is for.
Device string `json:"device"`
// Mount flags.
Flags int `json:"flags"`
// Propagation Flags
PropagationFlags []int `json:"propagation_flags"`
// Mount data applied to the mount.
Data string `json:"data"`
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`
// Optional Command to be run before Source is mounted.
PremountCmds []Command `json:"premount_cmds"`
// Optional Command to be run after Source is mounted.
PostmountCmds []Command `json:"postmount_cmds"`
}

View File

@@ -1,5 +0,0 @@
package configs
type NamespaceType string
type Namespaces []Namespace

View File

@@ -1,31 +0,0 @@
// +build linux
package configs
import "syscall"
func (n *Namespace) Syscall() int {
return namespaceInfo[n.Type]
}
var namespaceInfo = map[NamespaceType]int{
NEWNET: syscall.CLONE_NEWNET,
NEWNS: syscall.CLONE_NEWNS,
NEWUSER: syscall.CLONE_NEWUSER,
NEWIPC: syscall.CLONE_NEWIPC,
NEWUTS: syscall.CLONE_NEWUTS,
NEWPID: syscall.CLONE_NEWPID,
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This functions returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
var flag int
for _, v := range *n {
if v.Path != "" {
continue
}
flag |= namespaceInfo[v.Type]
}
return uintptr(flag)
}

View File

@@ -1,15 +0,0 @@
// +build !linux,!windows
package configs
func (n *Namespace) Syscall() int {
panic("No namespace syscall support")
return 0
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This functions returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
panic("No namespace syscall support")
return uintptr(0)
}

View File

@@ -1,89 +0,0 @@
// +build linux freebsd
package configs
import "fmt"
const (
NEWNET NamespaceType = "NEWNET"
NEWPID NamespaceType = "NEWPID"
NEWNS NamespaceType = "NEWNS"
NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
)
func NamespaceTypes() []NamespaceType {
return []NamespaceType{
NEWNET,
NEWPID,
NEWNS,
NEWUTS,
NEWIPC,
NEWUSER,
}
}
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
Type NamespaceType `json:"type"`
Path string `json:"path"`
}
func (n *Namespace) GetPath(pid int) string {
if n.Path != "" {
return n.Path
}
return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file())
}
func (n *Namespace) file() string {
file := ""
switch n.Type {
case NEWNET:
file = "net"
case NEWNS:
file = "mnt"
case NEWPID:
file = "pid"
case NEWIPC:
file = "ipc"
case NEWUSER:
file = "user"
case NEWUTS:
file = "uts"
}
return file
}
func (n *Namespaces) Remove(t NamespaceType) bool {
i := n.index(t)
if i == -1 {
return false
}
*n = append((*n)[:i], (*n)[i+1:]...)
return true
}
func (n *Namespaces) Add(t NamespaceType, path string) {
i := n.index(t)
if i == -1 {
*n = append(*n, Namespace{Type: t, Path: path})
return
}
(*n)[i].Path = path
}
func (n *Namespaces) index(t NamespaceType) int {
for i, ns := range *n {
if ns.Type == t {
return i
}
}
return -1
}
func (n *Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1
}

View File

@@ -1,6 +0,0 @@
package configs
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
}

View File

@@ -1,72 +0,0 @@
package configs
// Network defines configuration for a container's networking stack
//
// The network configuration can be omitted from a container causing the
// container to be setup with the host's networking stack
type Network struct {
// Type sets the networks type, commonly veth and loopback
Type string `json:"type"`
// Name of the network interface
Name string `json:"name"`
// The bridge to use.
Bridge string `json:"bridge"`
// MacAddress contains the MAC address to set on the network interface
MacAddress string `json:"mac_address"`
// Address contains the IPv4 and mask to set on the network interface
Address string `json:"address"`
// Gateway sets the gateway address that is used as the default for the interface
Gateway string `json:"gateway"`
// IPv6Address contains the IPv6 and mask to set on the network interface
IPv6Address string `json:"ipv6_address"`
// IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface
IPv6Gateway string `json:"ipv6_gateway"`
// Mtu sets the mtu value for the interface and will be mirrored on both the host and
// container's interfaces if a pair is created, specifically in the case of type veth
// Note: This does not apply to loopback interfaces.
Mtu int `json:"mtu"`
// TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and
// container's interfaces if a pair is created, specifically in the case of type veth
// Note: This does not apply to loopback interfaces.
TxQueueLen int `json:"txqueuelen"`
// HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the
// container.
HostInterfaceName string `json:"host_interface_name"`
// HairpinMode specifies if hairpin NAT should be enabled on the virtual interface
// bridge port in the case of type veth
// Note: This is unsupported on some systems.
// Note: This does not apply to loopback interfaces.
HairpinMode bool `json:"hairpin_mode"`
}
// Routes can be specified to create entries in the route table as the container is started
//
// All of destination, source, and gateway should be either IPv4 or IPv6.
// One of the three options must be present, and omitted entries will use their
// IP family default for the route table. For IPv4 for example, setting the
// gateway to 1.2.3.4 and the interface to eth0 will set up a standard
// destination of 0.0.0.0(or *) when viewed in the route table.
type Route struct {
// Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6
Destination string `json:"destination"`
// Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6
Source string `json:"source"`
// Sets the gateway. Accepts IPv4 and IPv6
Gateway string `json:"gateway"`
// The device to set this route up for, for example: eth0
InterfaceName string `json:"interface_name"`
}

View File

@@ -1,15 +0,0 @@
package libcontainer
import "io"
// Console represents a pseudo TTY.
type Console interface {
io.ReadWriter
io.Closer
// Path returns the filesystem path to the slave side of the pty.
Path() string
// Fd returns the fd for the master of the pty.
Fd() uintptr
}

View File

@@ -1,13 +0,0 @@
// +build freebsd
package libcontainer
import (
"errors"
)
// newConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole(uid, gid int) (Console, error) {
return nil, errors.New("libcontainer console is not supported on FreeBSD")
}

View File

@@ -1,145 +0,0 @@
package libcontainer
import (
"fmt"
"os"
"path/filepath"
"syscall"
"unsafe"
"github.com/opencontainers/runc/libcontainer/label"
)
// newConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole(uid, gid int) (Console, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
console, err := ptsname(master)
if err != nil {
return nil, err
}
if err := unlockpt(master); err != nil {
return nil, err
}
if err := os.Chmod(console, 0600); err != nil {
return nil, err
}
if err := os.Chown(console, uid, gid); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
}, nil
}
// newConsoleFromPath is an internal function returning an initialized console for use inside
// a container's MNT namespace.
func newConsoleFromPath(slavePath string) *linuxConsole {
return &linuxConsole{
slavePath: slavePath,
}
}
// linuxConsole is a linux psuedo TTY for use within a container.
type linuxConsole struct {
master *os.File
slavePath string
}
func (c *linuxConsole) Fd() uintptr {
return c.master.Fd()
}
func (c *linuxConsole) Path() string {
return c.slavePath
}
func (c *linuxConsole) Read(b []byte) (int, error) {
return c.master.Read(b)
}
func (c *linuxConsole) Write(b []byte) (int, error) {
return c.master.Write(b)
}
func (c *linuxConsole) Close() error {
if m := c.master; m != nil {
return m.Close()
}
return nil
}
// mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console.
func (c *linuxConsole) mount(rootfs, mountLabel string) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
return err
}
dest := filepath.Join(rootfs, "/dev/console")
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "")
}
// dupStdio opens the slavePath for the console and dups the fds to the current
// processes stdio, fd 0,1,2.
func (c *linuxConsole) dupStdio() error {
slave, err := c.open(syscall.O_RDWR)
if err != nil {
return err
}
fd := int(slave.Fd())
for _, i := range []int{0, 1, 2} {
if err := syscall.Dup3(fd, i, 0); err != nil {
return err
}
}
return nil
}
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
func (c *linuxConsole) open(flag int) (*os.File, error) {
r, e := syscall.Open(c.slavePath, flag, 0)
if e != nil {
return nil, &os.PathError{
Op: "open",
Path: c.slavePath,
Err: e,
}
}
return os.NewFile(uintptr(r), c.slavePath), nil
}
func ioctl(fd uintptr, flag, data uintptr) error {
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
}
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
var n int32
if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}

View File

@@ -1,30 +0,0 @@
package libcontainer
// newConsole returns an initalized console that can be used within a container
func newConsole(uid, gid int) (Console, error) {
return &windowsConsole{}, nil
}
// windowsConsole is a Windows psuedo TTY for use within a container.
type windowsConsole struct {
}
func (c *windowsConsole) Fd() uintptr {
return 0
}
func (c *windowsConsole) Path() string {
return ""
}
func (c *windowsConsole) Read(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Write(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Close() error {
return nil
}

View File

@@ -1,170 +0,0 @@
// Libcontainer provides a native Go implementation for creating containers
// with namespaces, cgroups, capabilities, and filesystem access controls.
// It allows you to manage the lifecycle of the container performing additional operations
// after the container is created.
package libcontainer
import (
"os"
"github.com/opencontainers/runc/libcontainer/configs"
)
// The status of a container.
type Status int
const (
// The container exists and is running.
Running Status = iota + 1
// The container exists, it is in the process of being paused.
Pausing
// The container exists, but all its processes are paused.
Paused
// The container exists, but its state is saved on disk
Checkpointed
// The container does not exist.
Destroyed
)
// State represents a running container's state
type State struct {
// ID is the container ID.
ID string `json:"id"`
// InitProcessPid is the init process id in the parent namespace.
InitProcessPid int `json:"init_process_pid"`
// InitProcessStartTime is the init process start time.
InitProcessStartTime string `json:"init_process_start"`
// Path to all the cgroups setup for a container. Key is cgroup subsystem name
// with the value as the path.
CgroupPaths map[string]string `json:"cgroup_paths"`
// NamespacePaths are filepaths to the container's namespaces. Key is the namespace type
// with the value as the path.
NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"`
// Config is the container's configuration.
Config configs.Config `json:"config"`
// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
// Returns the ID of the container
ID() string
// Returns the current status of the container.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
Status() (Status, error)
// State returns the current container's state information.
//
// errors:
// Systemerror - System error.
State() (*State, error)
// Returns the current config of the container.
Config() configs.Config
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
//
// Some of the returned PIDs may no longer refer to processes in the Container, unless
// the Container state is PAUSED in which case every PID in the slice is valid.
Processes() ([]int, error)
// Returns statistics for the container.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
Stats() (*Stats, error)
// Set cgroup resources of container as configured
//
// We can use this to change resources when containers are running.
//
// errors:
// Systemerror - System error.
Set(config configs.Config) error
// Start a process inside the container. Returns error if process fails to
// start. You can track process lifecycle with passed Process structure.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// Systemerror - System error.
Start(process *Process) (err error)
// Checkpoint checkpoints the running container's state to disk using the criu(8) utility.
//
// errors:
// Systemerror - System error.
Checkpoint(criuOpts *CriuOpts) error
// Restore restores the checkpointed container to a running state using the criu(8) utiity.
//
// errors:
// Systemerror - System error.
Restore(process *Process, criuOpts *CriuOpts) error
// Destroys the container after killing all running processes.
//
// Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed.
//
// errors:
// Systemerror - System error.
Destroy() error
// If the Container state is RUNNING or PAUSING, sets the Container state to PAUSING and pauses
// the execution of any user processes. Asynchronously, when the container finished being paused the
// state is changed to PAUSED.
// If the Container state is PAUSED, do nothing.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
Pause() error
// If the Container state is PAUSED, resumes the execution of any user processes in the
// Container before setting the Container state to RUNNING.
// If the Container state is RUNNING, do nothing.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// Systemerror - System error.
Resume() error
// NotifyOOM returns a read-only channel signaling when the container receives an OOM notification.
//
// errors:
// Systemerror - System error.
NotifyOOM() (<-chan struct{}, error)
// Signal sends the provided signal code to the container's initial process.
//
// errors:
// Systemerror - System error.
Signal(s os.Signal) error
}

View File

@@ -1,920 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/golang/protobuf/proto"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc"
)
const stdioFdCount = 3
type linuxContainer struct {
id string
root string
config *configs.Config
cgroupManager cgroups.Manager
initPath string
initArgs []string
initProcess parentProcess
criuPath string
m sync.Mutex
}
// ID returns the container's unique ID
func (c *linuxContainer) ID() string {
return c.id
}
// Config returns the container's configuration
func (c *linuxContainer) Config() configs.Config {
return *c.config
}
func (c *linuxContainer) Status() (Status, error) {
c.m.Lock()
defer c.m.Unlock()
return c.currentStatus()
}
func (c *linuxContainer) State() (*State, error) {
c.m.Lock()
defer c.m.Unlock()
return c.currentState()
}
func (c *linuxContainer) Processes() ([]int, error) {
pids, err := c.cgroupManager.GetPids()
if err != nil {
return nil, newSystemError(err)
}
return pids, nil
}
func (c *linuxContainer) Stats() (*Stats, error) {
var (
err error
stats = &Stats{}
)
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
return stats, newSystemError(err)
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
if err != nil {
return stats, newSystemError(err)
}
stats.Interfaces = append(stats.Interfaces, istats)
}
}
return stats, nil
}
func (c *linuxContainer) Set(config configs.Config) error {
c.m.Lock()
defer c.m.Unlock()
c.config = &config
return c.cgroupManager.Set(c.config)
}
func (c *linuxContainer) Start(process *Process) error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return err
}
doInit := status == Destroyed
parent, err := c.newParentProcess(process, doInit)
if err != nil {
return newSystemError(err)
}
if err := parent.start(); err != nil {
// terminate the process to ensure that it properly is reaped.
if err := parent.terminate(); err != nil {
logrus.Warn(err)
}
return newSystemError(err)
}
process.ops = parent
if doInit {
c.updateState(parent)
}
return nil
}
func (c *linuxContainer) Signal(s os.Signal) error {
if err := c.initProcess.signal(s); err != nil {
return newSystemError(err)
}
return nil
}
func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) {
parentPipe, childPipe, err := newPipe()
if err != nil {
return nil, newSystemError(err)
}
cmd, err := c.commandTemplate(p, childPipe)
if err != nil {
return nil, newSystemError(err)
}
if !doInit {
return c.newSetnsProcess(p, cmd, parentPipe, childPipe), nil
}
return c.newInitProcess(p, cmd, parentPipe, childPipe)
}
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
cmd := &exec.Cmd{
Path: c.initPath,
Args: c.initArgs,
}
cmd.Stdin = p.Stdin
cmd.Stdout = p.Stdout
cmd.Stderr = p.Stderr
cmd.Dir = c.config.Rootfs
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
}
cmd.ExtraFiles = append(p.ExtraFiles, childPipe)
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
// even with the parent still running.
if c.config.ParentDeathSignal > 0 {
cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal)
}
return cmd, nil
}
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
t := "_LIBCONTAINER_INITTYPE=standard"
cloneFlags := c.config.Namespaces.CloneFlags()
if cloneFlags&syscall.CLONE_NEWUSER != 0 {
if err := c.addUidGidMappings(cmd.SysProcAttr); err != nil {
// user mappings are not supported
return nil, err
}
enableSetgroups(cmd.SysProcAttr)
// Default to root user when user namespaces are enabled.
if cmd.SysProcAttr.Credential == nil {
cmd.SysProcAttr.Credential = &syscall.Credential{}
}
}
cmd.Env = append(cmd.Env, t)
cmd.SysProcAttr.Cloneflags = cloneFlags
return &initProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
config: c.newInitConfig(p),
container: c,
}, nil
}
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *setnsProcess {
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()),
"_LIBCONTAINER_INITTYPE=setns",
)
if p.consolePath != "" {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_CONSOLE_PATH="+p.consolePath)
}
// TODO: set on container for process management
return &setnsProcess{
cmd: cmd,
cgroupPaths: c.cgroupManager.GetPaths(),
childPipe: childPipe,
parentPipe: parentPipe,
config: c.newInitConfig(p),
}
}
func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
return &initConfig{
Config: c.config,
Args: process.Args,
Env: process.Env,
User: process.User,
Cwd: process.Cwd,
Console: process.consolePath,
Capabilities: process.Capabilities,
PassedFilesCount: len(process.ExtraFiles),
}
}
func newPipe() (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}
func (c *linuxContainer) Destroy() error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return err
}
if status != Destroyed {
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
}
if !c.config.Namespaces.Contains(configs.NEWPID) {
if err := killCgroupProcesses(c.cgroupManager); err != nil {
logrus.Warn(err)
}
}
err = c.cgroupManager.Destroy()
if rerr := os.RemoveAll(c.root); err == nil {
err = rerr
}
c.initProcess = nil
if c.config.Hooks != nil {
s := configs.HookState{
Version: c.config.Version,
ID: c.id,
Root: c.config.Rootfs,
}
for _, hook := range c.config.Hooks.Poststop {
if err := hook.Run(s); err != nil {
return err
}
}
}
return err
}
func (c *linuxContainer) Pause() error {
c.m.Lock()
defer c.m.Unlock()
return c.cgroupManager.Freeze(configs.Frozen)
}
func (c *linuxContainer) Resume() error {
c.m.Lock()
defer c.m.Unlock()
return c.cgroupManager.Freeze(configs.Thawed)
}
func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
return notifyOnOOM(c.cgroupManager.GetPaths())
}
// XXX debug support, remove when debugging done.
func addArgsFromEnv(evar string, args *[]string) {
if e := os.Getenv(evar); e != "" {
for _, f := range strings.Fields(e) {
*args = append(*args, f)
}
}
fmt.Printf(">>> criu %v\n", *args)
}
// check Criu version greater than or equal to min_version
func (c *linuxContainer) checkCriuVersion(min_version string) error {
var x, y, z, versionReq int
_, err := fmt.Sscanf(min_version, "%d.%d.%d\n", &x, &y, &z) // 1.5.2
if err != nil {
_, err = fmt.Sscanf(min_version, "Version: %d.%d\n", &x, &y) // 1.6
}
versionReq = x*10000 + y*100 + z
out, err := exec.Command(c.criuPath, "-V").Output()
if err != nil {
return fmt.Errorf("Unable to execute CRIU command: %s", c.criuPath)
}
x = 0
y = 0
z = 0
if ep := strings.Index(string(out), "-"); ep >= 0 {
// criu Git version format
var version string
if sp := strings.Index(string(out), "GitID"); sp > 0 {
version = string(out)[sp:ep]
} else {
return fmt.Errorf("Unable to parse the CRIU version: %s", c.criuPath)
}
n, err := fmt.Sscanf(string(version), "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2
if err != nil {
n, err = fmt.Sscanf(string(version), "GitID: v%d.%d", &x, &y) // 1.6
y++
} else {
z++
}
if n < 2 || err != nil {
return fmt.Errorf("Unable to parse the CRIU version: %s %d %s", version, n, err)
}
} else {
// criu release version format
n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2
if err != nil {
n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6
}
if n < 2 || err != nil {
return fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err)
}
}
if x*10000+y*100+z < versionReq {
return fmt.Errorf("CRIU version must be %s or higher", min_version)
}
return nil
}
const descriptorsFilename = "descriptors.json"
func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) {
mountDest := m.Destination
if strings.HasPrefix(mountDest, c.config.Rootfs) {
mountDest = mountDest[len(c.config.Rootfs):]
}
extMnt := &criurpc.ExtMountMap{
Key: proto.String(mountDest),
Val: proto.String(mountDest),
}
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
}
func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
if err := c.checkCriuVersion("1.5.2"); err != nil {
return err
}
if criuOpts.ImagesDirectory == "" {
criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image")
}
// Since a container can be C/R'ed multiple times,
// the checkpoint directory may already exist.
if err := os.Mkdir(criuOpts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) {
return err
}
if criuOpts.WorkDirectory == "" {
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
}
if err := os.Mkdir(criuOpts.WorkDirectory, 0755); err != nil && !os.IsExist(err) {
return err
}
workDir, err := os.Open(criuOpts.WorkDirectory)
if err != nil {
return err
}
defer workDir.Close()
imageDir, err := os.Open(criuOpts.ImagesDirectory)
if err != nil {
return err
}
defer imageDir.Close()
rpcOpts := criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
LogLevel: proto.Int32(4),
LogFile: proto.String("dump.log"),
Root: proto.String(c.config.Rootfs),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
Pid: proto.Int32(int32(c.initProcess.pid())),
ShellJob: proto.Bool(criuOpts.ShellJob),
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
FileLocks: proto.Bool(criuOpts.FileLocks),
}
// append optional criu opts, e.g., page-server and port
if criuOpts.PageServer.Address != "" && criuOpts.PageServer.Port != 0 {
rpcOpts.Ps = &criurpc.CriuPageServerInfo{
Address: proto.String(criuOpts.PageServer.Address),
Port: proto.Int32(criuOpts.PageServer.Port),
}
}
// append optional manage cgroups mode
if criuOpts.ManageCgroupsMode != 0 {
if err := c.checkCriuVersion("1.7"); err != nil {
return err
}
rpcOpts.ManageCgroupsMode = proto.Uint32(uint32(criuOpts.ManageCgroupsMode))
}
t := criurpc.CriuReqType_DUMP
req := &criurpc.CriuReq{
Type: &t,
Opts: &rpcOpts,
}
for _, m := range c.config.Mounts {
switch m.Device {
case "bind":
c.addCriuDumpMount(req, m)
break
case "cgroup":
binds, err := getCgroupMounts(m)
if err != nil {
return err
}
for _, b := range binds {
c.addCriuDumpMount(req, b)
}
break
}
}
// Write the FD info to a file in the image directory
fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors())
if err != nil {
return err
}
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
if err != nil {
return err
}
err = c.criuSwrk(nil, req, criuOpts, false)
if err != nil {
return err
}
return nil
}
func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mount) {
mountDest := m.Destination
if strings.HasPrefix(mountDest, c.config.Rootfs) {
mountDest = mountDest[len(c.config.Rootfs):]
}
extMnt := &criurpc.ExtMountMap{
Key: proto.String(mountDest),
Val: proto.String(m.Source),
}
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
}
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
if err := c.checkCriuVersion("1.5.2"); err != nil {
return err
}
if criuOpts.WorkDirectory == "" {
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
}
// Since a container can be C/R'ed multiple times,
// the work directory may already exist.
if err := os.Mkdir(criuOpts.WorkDirectory, 0655); err != nil && !os.IsExist(err) {
return err
}
workDir, err := os.Open(criuOpts.WorkDirectory)
if err != nil {
return err
}
defer workDir.Close()
if criuOpts.ImagesDirectory == "" {
criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image")
}
imageDir, err := os.Open(criuOpts.ImagesDirectory)
if err != nil {
return err
}
defer imageDir.Close()
// CRIU has a few requirements for a root directory:
// * it must be a mount point
// * its parent must not be overmounted
// c.config.Rootfs is bind-mounted to a temporary directory
// to satisfy these requirements.
root := filepath.Join(c.root, "criu-root")
if err := os.Mkdir(root, 0755); err != nil {
return err
}
defer os.Remove(root)
root, err = filepath.EvalSymlinks(root)
if err != nil {
return err
}
err = syscall.Mount(c.config.Rootfs, root, "", syscall.MS_BIND|syscall.MS_REC, "")
if err != nil {
return err
}
defer syscall.Unmount(root, syscall.MNT_DETACH)
t := criurpc.CriuReqType_RESTORE
req := &criurpc.CriuReq{
Type: &t,
Opts: &criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
EvasiveDevices: proto.Bool(true),
LogLevel: proto.Int32(4),
LogFile: proto.String("restore.log"),
RstSibling: proto.Bool(true),
Root: proto.String(root),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
ShellJob: proto.Bool(criuOpts.ShellJob),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
FileLocks: proto.Bool(criuOpts.FileLocks),
},
}
for _, m := range c.config.Mounts {
switch m.Device {
case "bind":
c.addCriuRestoreMount(req, m)
break
case "cgroup":
binds, err := getCgroupMounts(m)
if err != nil {
return err
}
for _, b := range binds {
c.addCriuRestoreMount(req, b)
}
break
}
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(iface.HostInterfaceName)
veth.IfIn = proto.String(iface.Name)
req.Opts.Veths = append(req.Opts.Veths, veth)
break
case "loopback":
break
}
}
for _, i := range criuOpts.VethPairs {
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(i.HostInterfaceName)
veth.IfIn = proto.String(i.ContainerInterfaceName)
req.Opts.Veths = append(req.Opts.Veths, veth)
}
// append optional manage cgroups mode
if criuOpts.ManageCgroupsMode != 0 {
if err := c.checkCriuVersion("1.7"); err != nil {
return err
}
req.Opts.ManageCgroupsMode = proto.Uint32(uint32(criuOpts.ManageCgroupsMode))
}
var (
fds []string
fdJSON []byte
)
if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil {
return err
}
if err = json.Unmarshal(fdJSON, &fds); err != nil {
return err
}
for i := range fds {
if s := fds[i]; strings.Contains(s, "pipe:") {
inheritFd := new(criurpc.InheritFd)
inheritFd.Key = proto.String(s)
inheritFd.Fd = proto.Int32(int32(i))
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
}
}
err = c.criuSwrk(process, req, criuOpts, true)
if err != nil {
return err
}
return nil
}
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
if err := c.cgroupManager.Apply(pid); err != nil {
return err
}
path := fmt.Sprintf("/proc/%d/cgroup", pid)
cgroupsPaths, err := cgroups.ParseCgroupFile(path)
if err != nil {
return err
}
for c, p := range cgroupsPaths {
cgroupRoot := &criurpc.CgroupRoot{
Ctrl: proto.String(c),
Path: proto.String(p),
}
req.Opts.CgRoot = append(req.Opts.CgRoot, cgroupRoot)
}
return nil
}
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return err
}
logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile())
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
defer criuClient.Close()
defer criuServer.Close()
args := []string{"swrk", "3"}
cmd := exec.Command(c.criuPath, args...)
if process != nil {
cmd.Stdin = process.Stdin
cmd.Stdout = process.Stdout
cmd.Stderr = process.Stderr
}
cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer)
if err := cmd.Start(); err != nil {
return err
}
criuServer.Close()
defer func() {
criuClient.Close()
_, err := cmd.Process.Wait()
if err != nil {
return
}
}()
if applyCgroups {
err := c.criuApplyCgroups(cmd.Process.Pid, req)
if err != nil {
return err
}
}
var extFds []string
if process != nil {
extFds, err = getPipeFds(cmd.Process.Pid)
if err != nil {
return err
}
}
data, err := proto.Marshal(req)
if err != nil {
return err
}
_, err = criuClient.Write(data)
if err != nil {
return err
}
buf := make([]byte, 10*4096)
for true {
n, err := criuClient.Read(buf)
if err != nil {
return err
}
if n == 0 {
return fmt.Errorf("unexpected EOF")
}
if n == len(buf) {
return fmt.Errorf("buffer is too small")
}
resp := new(criurpc.CriuResp)
err = proto.Unmarshal(buf[:n], resp)
if err != nil {
return err
}
if !resp.GetSuccess() {
typeString := req.GetType().String()
return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath)
}
t := resp.GetType()
switch {
case t == criurpc.CriuReqType_NOTIFY:
if err := c.criuNotifications(resp, process, opts, extFds); err != nil {
return err
}
t = criurpc.CriuReqType_NOTIFY
req = &criurpc.CriuReq{
Type: &t,
NotifySuccess: proto.Bool(true),
}
data, err = proto.Marshal(req)
if err != nil {
return err
}
n, err = criuClient.Write(data)
if err != nil {
return err
}
continue
case t == criurpc.CriuReqType_RESTORE:
case t == criurpc.CriuReqType_DUMP:
break
default:
return fmt.Errorf("unable to parse the response %s", resp.String())
}
break
}
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
// Here we want to wait only the CRIU process.
st, err := cmd.Process.Wait()
if err != nil {
return err
}
if !st.Success() {
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
}
return nil
}
// block any external network activity
func lockNetwork(config *configs.Config) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.detach(config); err != nil {
return err
}
}
return nil
}
func unlockNetwork(config *configs.Config) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err = strategy.attach(config); err != nil {
return err
}
}
return nil
}
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error {
notify := resp.GetNotify()
if notify == nil {
return fmt.Errorf("invalid response: %s", resp.String())
}
switch {
case notify.GetScript() == "post-dump":
if !opts.LeaveRunning {
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
if err != nil {
return err
}
f.Close()
}
break
case notify.GetScript() == "network-unlock":
if err := unlockNetwork(c.config); err != nil {
return err
}
break
case notify.GetScript() == "network-lock":
if err := lockNetwork(c.config); err != nil {
return err
}
break
case notify.GetScript() == "post-restore":
pid := notify.GetPid()
r, err := newRestoredProcess(int(pid), fds)
if err != nil {
return err
}
// TODO: crosbymichael restore previous process information by saving the init process information in
// the container's state file or separate process state files.
if err := c.updateState(r); err != nil {
return err
}
process.ops = r
break
}
return nil
}
func (c *linuxContainer) updateState(process parentProcess) error {
c.initProcess = process
state, err := c.currentState()
if err != nil {
return err
}
f, err := os.Create(filepath.Join(c.root, stateFilename))
if err != nil {
return err
}
defer f.Close()
os.Remove(filepath.Join(c.root, "checkpoint"))
return json.NewEncoder(f).Encode(state)
}
func (c *linuxContainer) currentStatus() (Status, error) {
if _, err := os.Stat(filepath.Join(c.root, "checkpoint")); err == nil {
return Checkpointed, nil
}
if c.initProcess == nil {
return Destroyed, nil
}
// return Running if the init process is alive
if err := syscall.Kill(c.initProcess.pid(), 0); err != nil {
if err == syscall.ESRCH {
return Destroyed, nil
}
return 0, newSystemError(err)
}
if c.config.Cgroups != nil && c.config.Cgroups.Freezer == configs.Frozen {
return Paused, nil
}
return Running, nil
}
func (c *linuxContainer) currentState() (*State, error) {
status, err := c.currentStatus()
if err != nil {
return nil, err
}
if status == Destroyed {
return nil, newGenericError(fmt.Errorf("container destroyed"), ContainerNotExists)
}
startTime, err := c.initProcess.startTime()
if err != nil {
return nil, newSystemError(err)
}
state := &State{
ID: c.ID(),
Config: *c.config,
InitProcessPid: c.initProcess.pid(),
InitProcessStartTime: startTime,
CgroupPaths: c.cgroupManager.GetPaths(),
NamespacePaths: make(map[configs.NamespaceType]string),
ExternalDescriptors: c.initProcess.externalDescriptors(),
}
for _, ns := range c.config.Namespaces {
state.NamespacePaths[ns.Type] = ns.GetPath(c.initProcess.pid())
}
for _, nsType := range configs.NamespaceTypes() {
if _, ok := state.NamespacePaths[nsType]; !ok {
ns := configs.Namespace{Type: nsType}
state.NamespacePaths[ns.Type] = ns.GetPath(c.initProcess.pid())
}
}
return state, nil
}

View File

@@ -1,212 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"testing"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type mockCgroupManager struct {
pids []int
stats *cgroups.Stats
paths map[string]string
}
func (m *mockCgroupManager) GetPids() ([]int, error) {
return m.pids, nil
}
func (m *mockCgroupManager) GetStats() (*cgroups.Stats, error) {
return m.stats, nil
}
func (m *mockCgroupManager) Apply(pid int) error {
return nil
}
func (m *mockCgroupManager) Set(container *configs.Config) error {
return nil
}
func (m *mockCgroupManager) Destroy() error {
return nil
}
func (m *mockCgroupManager) GetPaths() map[string]string {
return m.paths
}
func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
return nil
}
type mockProcess struct {
_pid int
started string
}
func (m *mockProcess) terminate() error {
return nil
}
func (m *mockProcess) pid() int {
return m._pid
}
func (m *mockProcess) startTime() (string, error) {
return m.started, nil
}
func (m *mockProcess) start() error {
return nil
}
func (m *mockProcess) wait() (*os.ProcessState, error) {
return nil, nil
}
func (m *mockProcess) signal(_ os.Signal) error {
return nil
}
func (p *mockProcess) externalDescriptors() []string {
return []string{}
}
func (p *mockProcess) setExternalDescriptors(newFds []string) {
}
func TestGetContainerPids(t *testing.T) {
container := &linuxContainer{
id: "myid",
config: &configs.Config{},
cgroupManager: &mockCgroupManager{pids: []int{1, 2, 3}},
}
pids, err := container.Processes()
if err != nil {
t.Fatal(err)
}
for i, expected := range []int{1, 2, 3} {
if pids[i] != expected {
t.Fatalf("expected pid %d but received %d", expected, pids[i])
}
}
}
func TestGetContainerStats(t *testing.T) {
container := &linuxContainer{
id: "myid",
config: &configs.Config{},
cgroupManager: &mockCgroupManager{
pids: []int{1, 2, 3},
stats: &cgroups.Stats{
MemoryStats: cgroups.MemoryStats{
Usage: cgroups.MemoryData{
Usage: 1024,
},
},
},
},
}
stats, err := container.Stats()
if err != nil {
t.Fatal(err)
}
if stats.CgroupStats == nil {
t.Fatal("cgroup stats are nil")
}
if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 {
t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage.Usage)
}
}
func TestGetContainerState(t *testing.T) {
var (
pid = os.Getpid()
expectedMemoryPath = "/sys/fs/cgroup/memory/myid"
expectedNetworkPath = "/networks/fd"
)
container := &linuxContainer{
id: "myid",
config: &configs.Config{
Namespaces: []configs.Namespace{
{Type: configs.NEWPID},
{Type: configs.NEWNS},
{Type: configs.NEWNET, Path: expectedNetworkPath},
{Type: configs.NEWUTS},
// emulate host for IPC
//{Type: configs.NEWIPC},
},
},
initProcess: &mockProcess{
_pid: pid,
started: "010",
},
cgroupManager: &mockCgroupManager{
pids: []int{1, 2, 3},
stats: &cgroups.Stats{
MemoryStats: cgroups.MemoryStats{
Usage: cgroups.MemoryData{
Usage: 1024,
},
},
},
paths: map[string]string{
"memory": expectedMemoryPath,
},
},
}
state, err := container.State()
if err != nil {
t.Fatal(err)
}
if state.InitProcessPid != pid {
t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid)
}
if state.InitProcessStartTime != "010" {
t.Fatalf("expected process start time 010 but received %s", state.InitProcessStartTime)
}
paths := state.CgroupPaths
if paths == nil {
t.Fatal("cgroup paths should not be nil")
}
if memPath := paths["memory"]; memPath != expectedMemoryPath {
t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath)
}
for _, ns := range container.config.Namespaces {
path := state.NamespacePaths[ns.Type]
if path == "" {
t.Fatalf("expected non nil namespace path for %s", ns.Type)
}
if ns.Type == configs.NEWNET {
if path != expectedNetworkPath {
t.Fatalf("expected path %q but received %q", expectedNetworkPath, path)
}
} else {
file := ""
switch ns.Type {
case configs.NEWNET:
file = "net"
case configs.NEWNS:
file = "mnt"
case configs.NEWPID:
file = "pid"
case configs.NEWIPC:
file = "ipc"
case configs.NEWUSER:
file = "user"
case configs.NEWUTS:
file = "uts"
}
expected := fmt.Sprintf("/proc/%d/ns/%s", pid, file)
if expected != path {
t.Fatalf("expected path %q but received %q", expected, path)
}
}
}
}

View File

@@ -1,13 +0,0 @@
// +build !go1.4
package libcontainer
import (
"fmt"
"syscall"
)
// not available before go 1.4
func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error {
return fmt.Errorf("User namespace is not supported in golang < 1.4")
}

View File

@@ -1,26 +0,0 @@
// +build go1.4
package libcontainer
import "syscall"
// Converts IDMap to SysProcIDMap array and adds it to SysProcAttr.
func (c *linuxContainer) addUidGidMappings(sys *syscall.SysProcAttr) error {
if c.config.UidMappings != nil {
sys.UidMappings = make([]syscall.SysProcIDMap, len(c.config.UidMappings))
for i, um := range c.config.UidMappings {
sys.UidMappings[i].ContainerID = um.ContainerID
sys.UidMappings[i].HostID = um.HostID
sys.UidMappings[i].Size = um.Size
}
}
if c.config.GidMappings != nil {
sys.GidMappings = make([]syscall.SysProcIDMap, len(c.config.GidMappings))
for i, gm := range c.config.GidMappings {
sys.GidMappings[i].ContainerID = gm.ContainerID
sys.GidMappings[i].HostID = gm.HostID
sys.GidMappings[i].Size = gm.Size
}
}
return nil
}

View File

@@ -1,34 +0,0 @@
package libcontainer
// cgroup restoring strategy provided by criu
type cg_mode uint32
const (
CRIU_CG_MODE_SOFT cg_mode = 3 + iota // restore cgroup properties if only dir created by criu
CRIU_CG_MODE_FULL // always restore all cgroups and their properties
CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system
CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT
)
type CriuPageServerInfo struct {
Address string // IP address of CRIU page server
Port int32 // port number of CRIU page server
}
type VethPairName struct {
ContainerInterfaceName string
HostInterfaceName string
}
type CriuOpts struct {
ImagesDirectory string // directory for storing image files
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
LeaveRunning bool // leave container in running state after checkpoint
TcpEstablished bool // checkpoint/restore established TCP connections
ExternalUnixConnections bool // allow external unix connections
ShellJob bool // allow to dump and restore shell jobs
FileLocks bool // handle file locks, for safety
PageServer CriuPageServerInfo // allow to dump to criu page server
VethPairs []VethPairName // pass the veth to criu when restore
ManageCgroupsMode cg_mode // dump or restore cgroup mode
}

View File

@@ -1,62 +0,0 @@
package libcontainer
import "io"
// API error code type.
type ErrorCode int
// API error codes.
const (
// Factory errors
IdInUse ErrorCode = iota
InvalidIdFormat
// Container errors
ContainerNotExists
ContainerPaused
ContainerNotStopped
ContainerNotRunning
// Process errors
ProcessNotExecuted
// Common errors
ConfigInvalid
SystemError
)
func (c ErrorCode) String() string {
switch c {
case IdInUse:
return "Id already in use"
case InvalidIdFormat:
return "Invalid format"
case ContainerPaused:
return "Container paused"
case ConfigInvalid:
return "Invalid configuration"
case SystemError:
return "System error"
case ContainerNotExists:
return "Container does not exist"
case ContainerNotStopped:
return "Container is not stopped"
case ContainerNotRunning:
return "Container is not running"
default:
return "Unknown error"
}
}
// API Error type.
type Error interface {
error
// Returns a verbose string including the error message
// and a representation of the stack trace suitable for
// printing.
Detail(w io.Writer) error
// Returns the error code for this error.
Code() ErrorCode
}

View File

@@ -1,20 +0,0 @@
package libcontainer
import "testing"
func TestErrorCode(t *testing.T) {
codes := map[ErrorCode]string{
IdInUse: "Id already in use",
InvalidIdFormat: "Invalid format",
ContainerPaused: "Container paused",
ConfigInvalid: "Invalid configuration",
SystemError: "System error",
ContainerNotExists: "Container does not exist",
}
for code, expected := range codes {
if actual := code.String(); actual != expected {
t.Fatalf("expected string %q but received %q", expected, actual)
}
}
}

View File

@@ -1,45 +0,0 @@
package libcontainer
import (
"github.com/opencontainers/runc/libcontainer/configs"
)
type Factory interface {
// Creates a new container with the given id and starts the initial process inside it.
// id must be a string containing only letters, digits and underscores and must contain
// between 1 and 1024 characters, inclusive.
//
// The id must not already be in use by an existing container. Containers created using
// a factory with the same path (and file system) must have distinct ids.
//
// Returns the new container with a running process.
//
// errors:
// IdInUse - id is already in use by a container
// InvalidIdFormat - id has incorrect format
// ConfigInvalid - config is invalid
// Systemerror - System error
//
// On error, any partially created container parts are cleaned up (the operation is atomic).
Create(id string, config *configs.Config) (Container, error)
// Load takes an ID for an existing container and returns the container information
// from the state. This presents a read only view of the container.
//
// errors:
// Path does not exist
// Container is stopped
// System error
Load(id string) (Container, error)
// StartInitialization is an internal API to libcontainer used during the reexec of the
// container.
//
// Errors:
// Pipe connection error
// System error
StartInitialization() error
// Type returns info string about factory type (e.g. lxc, libcontainer...)
Type() string
}

View File

@@ -1,269 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"syscall"
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
)
const (
stateFilename = "state.json"
)
var (
idRegex = regexp.MustCompile(`^[\w_-]+$`)
maxIdLen = 1024
)
// InitArgs returns an options func to configure a LinuxFactory with the
// provided init arguments.
func InitArgs(args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
name := args[0]
if filepath.Base(name) == name {
if lp, err := exec.LookPath(name); err == nil {
name = lp
}
} else {
abs, err := filepath.Abs(name)
if err != nil {
return err
}
name = abs
}
l.InitPath = "/proc/self/exe"
l.InitArgs = append([]string{name}, args[1:]...)
return nil
}
}
// InitPath returns an options func to configure a LinuxFactory with the
// provided absolute path to the init binary and arguements.
func InitPath(path string, args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
l.InitPath = path
l.InitArgs = args
return nil
}
}
// SystemdCgroups is an options func to configure a LinuxFactory to return
// containers that use systemd to create and manage cgroups.
func SystemdCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &systemd.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// Cgroupfs is an options func to configure a LinuxFactory to return
// containers that use the native cgroups filesystem implementation to
// create and manage cgroups.
func Cgroupfs(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &fs.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root)
if err != nil {
return err
}
if !mounted {
if err := syscall.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
return err
}
}
return nil
}
// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
if root != "" {
if err := os.MkdirAll(root, 0700); err != nil {
return nil, newGenericError(err, SystemError)
}
}
l := &LinuxFactory{
Root: root,
Validator: validate.New(),
CriuPath: "criu",
}
InitArgs(os.Args[0], "init")(l)
Cgroupfs(l)
for _, opt := range options {
if err := opt(l); err != nil {
return nil, err
}
}
return l, nil
}
// LinuxFactory implements the default factory interface for linux based systems.
type LinuxFactory struct {
// Root directory for the factory to store state.
Root string
// InitPath is the absolute path to the init binary.
InitPath string
// InitArgs are arguments for calling the init responsibilities for spawning
// a container.
InitArgs []string
// CriuPath is the path to the criu binary used for checkpoint and restore of
// containers.
CriuPath string
// Validator provides validation to container configurations.
Validator validate.Validator
// NewCgroupsManager returns an initialized cgroups manager for a single container.
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
}
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
if err := l.validateID(id); err != nil {
return nil, err
}
if err := l.Validator.Validate(config); err != nil {
return nil, newGenericError(err, ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
if _, err := os.Stat(containerRoot); err == nil {
return nil, newGenericError(fmt.Errorf("Container with id exists: %v", id), IdInUse)
} else if !os.IsNotExist(err) {
return nil, newGenericError(err, SystemError)
}
if err := os.MkdirAll(containerRoot, 0700); err != nil {
return nil, newGenericError(err, SystemError)
}
return &linuxContainer{
id: id,
root: containerRoot,
config: config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}, nil
}
func (l *LinuxFactory) Load(id string) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
containerRoot := filepath.Join(l.Root, id)
state, err := l.loadState(containerRoot)
if err != nil {
return nil, err
}
r := &nonChildProcess{
processPid: state.InitProcessPid,
processStartTime: state.InitProcessStartTime,
fds: state.ExternalDescriptors,
}
return &linuxContainer{
initProcess: r,
id: id,
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
}, nil
}
func (l *LinuxFactory) Type() string {
return "libcontainer"
}
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
pipefd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_INITPIPE"))
if err != nil {
return err
}
var (
pipe = os.NewFile(uintptr(pipefd), "pipe")
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
)
// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
defer func() {
// if we have an error during the initialization of the container's init then send it back to the
// parent process in the form of an initError.
if err != nil {
// ensure that any data sent from the parent is consumed so it doesn't
// receive ECONNRESET when the child writes to the pipe.
ioutil.ReadAll(pipe)
if err := json.NewEncoder(pipe).Encode(newSystemError(err)); err != nil {
panic(err)
}
}
// ensure that this pipe is always closed
pipe.Close()
}()
i, err := newContainerInit(it, pipe)
if err != nil {
return err
}
return i.Init()
}
func (l *LinuxFactory) loadState(root string) (*State, error) {
f, err := os.Open(filepath.Join(root, stateFilename))
if err != nil {
if os.IsNotExist(err) {
return nil, newGenericError(err, ContainerNotExists)
}
return nil, newGenericError(err, SystemError)
}
defer f.Close()
var state *State
if err := json.NewDecoder(f).Decode(&state); err != nil {
return nil, newGenericError(err, SystemError)
}
return state, nil
}
func (l *LinuxFactory) validateID(id string) error {
if !idRegex.MatchString(id) {
return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat)
}
if len(id) > maxIdLen {
return newGenericError(fmt.Errorf("Invalid id format: %v", id), InvalidIdFormat)
}
return nil
}

View File

@@ -1,181 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
"syscall"
"testing"
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/configs"
)
func newTestRoot() (string, error) {
dir, err := ioutil.TempDir("", "libcontainer")
if err != nil {
return "", err
}
return dir, nil
}
func TestFactoryNew(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
if factory.Type() != "libcontainer" {
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
}
}
func TestFactoryNewTmpfs(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs, TmpfsRoot)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
if factory.Type() != "libcontainer" {
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
}
mounted, err := mount.Mounted(lfactory.Root)
if err != nil {
t.Fatal(err)
}
if !mounted {
t.Fatalf("Factory Root is not mounted")
}
mounts, err := mount.GetMounts()
if err != nil {
t.Fatal(err)
}
var found bool
for _, m := range mounts {
if m.Mountpoint == lfactory.Root {
if m.Fstype != "tmpfs" {
t.Fatalf("Fstype of root: %s, expected %s", m.Fstype, "tmpfs")
}
if m.Source != "tmpfs" {
t.Fatalf("Source of root: %s, expected %s", m.Source, "tmpfs")
}
found = true
}
}
if !found {
t.Fatalf("Factory Root is not listed in mounts list")
}
defer syscall.Unmount(root, syscall.MNT_DETACH)
}
func TestFactoryLoadNotExists(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
_, err = factory.Load("nocontainer")
if err == nil {
t.Fatal("expected nil error loading non-existing container")
}
lerr, ok := err.(Error)
if !ok {
t.Fatal("expected libcontainer error type")
}
if lerr.Code() != ContainerNotExists {
t.Fatalf("expected error code %s but received %s", ContainerNotExists, lerr.Code())
}
}
func TestFactoryLoadContainer(t *testing.T) {
root, err := newTestRoot()
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
// setup default container config and state for mocking
var (
id = "1"
expectedConfig = &configs.Config{
Rootfs: "/mycontainer/root",
}
expectedState = &State{
InitProcessPid: 1024,
Config: *expectedConfig,
}
)
if err := os.Mkdir(filepath.Join(root, id), 0700); err != nil {
t.Fatal(err)
}
if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil {
t.Fatal(err)
}
factory, err := New(root, Cgroupfs)
if err != nil {
t.Fatal(err)
}
container, err := factory.Load(id)
if err != nil {
t.Fatal(err)
}
if container.ID() != id {
t.Fatalf("expected container id %q but received %q", id, container.ID())
}
config := container.Config()
if config.Rootfs != expectedConfig.Rootfs {
t.Fatalf("expected rootfs %q but received %q", expectedConfig.Rootfs, config.Rootfs)
}
lcontainer, ok := container.(*linuxContainer)
if !ok {
t.Fatal("expected linux container on linux based systems")
}
if lcontainer.initProcess.pid() != expectedState.InitProcessPid {
t.Fatalf("expected init pid %d but received %d", expectedState.InitProcessPid, lcontainer.initProcess.pid())
}
}
func marshal(path string, v interface{}) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return json.NewEncoder(f).Encode(v)
}

View File

@@ -1,74 +0,0 @@
package libcontainer
import (
"fmt"
"io"
"text/template"
"time"
"github.com/opencontainers/runc/libcontainer/stacktrace"
)
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
Code: {{.ECode}}
{{if .Message }}
Message: {{.Message}}
{{end}}
Frames:{{range $i, $frame := .Stack.Frames}}
---
{{$i}}: {{$frame.Function}}
Package: {{$frame.Package}}
File: {{$frame.File}}@{{$frame.Line}}{{end}}
`))
func newGenericError(err error, c ErrorCode) Error {
if le, ok := err.(Error); ok {
return le
}
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: c,
Stack: stacktrace.Capture(1),
}
if err != nil {
gerr.Message = err.Error()
}
return gerr
}
func newSystemError(err error) Error {
if le, ok := err.(Error); ok {
return le
}
gerr := &genericError{
Timestamp: time.Now(),
Err: err,
ECode: SystemError,
Stack: stacktrace.Capture(1),
}
if err != nil {
gerr.Message = err.Error()
}
return gerr
}
type genericError struct {
Timestamp time.Time
ECode ErrorCode
Err error `json:"-"`
Message string
Stack stacktrace.Stacktrace
}
func (e *genericError) Error() string {
return fmt.Sprintf("[%d] %s: %s", e.ECode, e.ECode, e.Message)
}
func (e *genericError) Code() ErrorCode {
return e.ECode
}
func (e *genericError) Detail(w io.Writer) error {
return errorTemplate.Execute(w, e)
}

View File

@@ -1,14 +0,0 @@
package libcontainer
import (
"fmt"
"io/ioutil"
"testing"
)
func TestErrorDetail(t *testing.T) {
err := newGenericError(fmt.Errorf("test error"), SystemError)
if derr := err.Detail(ioutil.Discard); derr != nil {
t.Fatal(derr)
}
}

View File

@@ -1,334 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"fmt"
"io/ioutil"
"net"
"os"
"strconv"
"strings"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/vishvananda/netlink"
)
type initType string
const (
initSetns initType = "setns"
initStandard initType = "standard"
)
type pid struct {
Pid int `json:"pid"`
}
// network is an internal struct used to setup container networks.
type network struct {
configs.Network
// TempVethPeerName is a unique temporary veth peer name that was placed into
// the container's namespace.
TempVethPeerName string `json:"temp_veth_peer_name"`
}
// initConfig is used for transferring parameters from Exec() to Init()
type initConfig struct {
Args []string `json:"args"`
Env []string `json:"env"`
Cwd string `json:"cwd"`
Capabilities []string `json:"capabilities"`
User string `json:"user"`
Config *configs.Config `json:"config"`
Console string `json:"console"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
}
type initer interface {
Init() error
}
func newContainerInit(t initType, pipe *os.File) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
}
if err := populateProcessEnvironment(config.Env); err != nil {
return nil, err
}
switch t {
case initSetns:
return &linuxSetnsInit{
config: config,
}, nil
case initStandard:
return &linuxStandardInit{
parentPid: syscall.Getppid(),
config: config,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
}
// populateProcessEnvironment loads the provided environment variables into the
// current processes's environment.
func populateProcessEnvironment(env []string) error {
for _, pair := range env {
p := strings.SplitN(pair, "=", 2)
if len(p) < 2 {
return fmt.Errorf("invalid environment '%v'", pair)
}
if err := os.Setenv(p[0], p[1]); err != nil {
return err
}
}
return nil
}
// finalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaked file descriptors
// before executing the command inside the namespace
func finalizeNamespace(config *initConfig) error {
// Ensure that all unwanted fds we may have accidentally
// inherited are marked close-on-exec so they stay out of the
// container
if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
return err
}
capabilities := config.Config.Capabilities
if config.Capabilities != nil {
capabilities = config.Capabilities
}
w, err := newCapWhitelist(capabilities)
if err != nil {
return err
}
// drop capabilities in bounding set before changing user
if err := w.dropBoundingSet(); err != nil {
return err
}
// preserve existing capabilities while we change users
if err := system.SetKeepCaps(); err != nil {
return err
}
if err := setupUser(config); err != nil {
return err
}
if err := system.ClearKeepCaps(); err != nil {
return err
}
// drop all other capabilities
if err := w.drop(); err != nil {
return err
}
if config.Cwd != "" {
if err := syscall.Chdir(config.Cwd); err != nil {
return err
}
}
return nil
}
// joinExistingNamespaces gets all the namespace paths specified for the container and
// does a setns on the namespace fd so that the current process joins the namespace.
func joinExistingNamespaces(namespaces []configs.Namespace) error {
for _, ns := range namespaces {
if ns.Path != "" {
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
if err != nil {
return err
}
err = system.Setns(f.Fd(), uintptr(ns.Syscall()))
f.Close()
if err != nil {
return err
}
}
}
return nil
}
// setupUser changes the groups, gid, and uid for the user inside the container
func setupUser(config *initConfig) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
}
var addGroups []int
if len(config.Config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath)
if err != nil {
return err
}
}
// before we change to the container's user make sure that the processes STDIO
// is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(execUser); err != nil {
return err
}
suppGroups := append(execUser.Sgids, addGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return err
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
}
if err := system.Setuid(execUser.Uid); err != nil {
return err
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
return err
}
}
return nil
}
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
// The ownership needs to match because it is created outside of the container and needs to be
// localized.
func fixStdioPermissions(u *user.ExecUser) error {
var null syscall.Stat_t
if err := syscall.Stat("/dev/null", &null); err != nil {
return err
}
for _, fd := range []uintptr{
os.Stdin.Fd(),
os.Stderr.Fd(),
os.Stdout.Fd(),
} {
var s syscall.Stat_t
if err := syscall.Fstat(int(fd), &s); err != nil {
return err
}
// skip chown of /dev/null if it was used as one of the STDIO fds.
if s.Rdev == null.Rdev {
continue
}
if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil {
return err
}
}
return nil
}
// setupNetwork sets up and initializes any network interface inside the container.
func setupNetwork(config *initConfig) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.initialize(config); err != nil {
return err
}
}
return nil
}
func setupRoute(config *configs.Config) error {
for _, config := range config.Routes {
_, dst, err := net.ParseCIDR(config.Destination)
if err != nil {
return err
}
src := net.ParseIP(config.Source)
if src == nil {
return fmt.Errorf("Invalid source for route: %s", config.Source)
}
gw := net.ParseIP(config.Gateway)
if gw == nil {
return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
}
l, err := netlink.LinkByName(config.InterfaceName)
if err != nil {
return err
}
route := &netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
Dst: dst,
Src: src,
Gw: gw,
LinkIndex: l.Attrs().Index,
}
if err := netlink.RouteAdd(route); err != nil {
return err
}
}
return nil
}
func setupRlimits(config *configs.Config) error {
for _, rlimit := range config.Rlimits {
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}
return nil
}
func setOomScoreAdj(oomScoreAdj int) error {
path := "/proc/self/oom_score_adj"
return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0700)
}
// killCgroupProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending a SIGKILL to each process then waiting for them to
// exit.
func killCgroupProcesses(m cgroups.Manager) error {
var procs []*os.Process
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Warn(err)
}
pids, err := m.GetPids()
if err != nil {
m.Freeze(configs.Thawed)
return err
}
for _, pid := range pids {
if p, err := os.FindProcess(pid); err == nil {
procs = append(procs, p)
if err := p.Kill(); err != nil {
logrus.Warn(err)
}
}
}
if err := m.Freeze(configs.Thawed); err != nil {
logrus.Warn(err)
}
for _, p := range procs {
if _, err := p.Wait(); err != nil {
logrus.Warn(err)
}
}
return nil
}

View File

@@ -1,259 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"net"
"path/filepath"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/vishvananda/netlink"
)
var strategies = map[string]networkStrategy{
"veth": &veth{},
"loopback": &loopback{},
}
// networkStrategy represents a specific network configuration for
// a container's networking stack
type networkStrategy interface {
create(*network, int) error
initialize(*network) error
detach(*configs.Network) error
attach(*configs.Network) error
}
// getStrategy returns the specific network strategy for the
// provided type.
func getStrategy(tpe string) (networkStrategy, error) {
s, exists := strategies[tpe]
if !exists {
return nil, fmt.Errorf("unknown strategy type %q", tpe)
}
return s, nil
}
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) {
out := &NetworkInterface{Name: interfaceName}
// This can happen if the network runtime information is missing - possible if the
// container was created by an old version of libcontainer.
if interfaceName == "" {
return out, nil
}
type netStatsPair struct {
// Where to write the output.
Out *uint64
// The network stats file to read.
File string
}
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
netStats := []netStatsPair{
{Out: &out.RxBytes, File: "tx_bytes"},
{Out: &out.RxPackets, File: "tx_packets"},
{Out: &out.RxErrors, File: "tx_errors"},
{Out: &out.RxDropped, File: "tx_dropped"},
{Out: &out.TxBytes, File: "rx_bytes"},
{Out: &out.TxPackets, File: "rx_packets"},
{Out: &out.TxErrors, File: "rx_errors"},
{Out: &out.TxDropped, File: "rx_dropped"},
}
for _, netStat := range netStats {
data, err := readSysfsNetworkStats(interfaceName, netStat.File)
if err != nil {
return nil, err
}
*(netStat.Out) = data
}
return out, nil
}
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
if err != nil {
return 0, err
}
return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
}
// loopback is a network strategy that provides a basic loopback device
type loopback struct {
}
func (l *loopback) create(n *network, nspid int) error {
return nil
}
func (l *loopback) initialize(config *network) error {
return netlink.LinkSetUp(&netlink.Device{netlink.LinkAttrs{Name: "lo"}})
}
func (l *loopback) attach(n *configs.Network) (err error) {
return nil
}
func (l *loopback) detach(n *configs.Network) (err error) {
return nil
}
// veth is a network strategy that uses a bridge and creates
// a veth pair, one that is attached to the bridge on the host and the other
// is placed inside the container's namespace
type veth struct {
}
func (v *veth) detach(n *configs.Network) (err error) {
return netlink.LinkSetMaster(&netlink.Device{netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil)
}
// attach a container network interface to an external network
func (v *veth) attach(n *configs.Network) (err error) {
brl, err := netlink.LinkByName(n.Bridge)
if err != nil {
return err
}
br, ok := brl.(*netlink.Bridge)
if !ok {
return fmt.Errorf("Wrong device type %T", brl)
}
host, err := netlink.LinkByName(n.HostInterfaceName)
if err != nil {
return err
}
if err := netlink.LinkSetMaster(host, br); err != nil {
return err
}
if err := netlink.LinkSetMTU(host, n.Mtu); err != nil {
return err
}
if n.HairpinMode {
if err := netlink.LinkSetHairpin(host, true); err != nil {
return err
}
}
if err := netlink.LinkSetUp(host); err != nil {
return err
}
return nil
}
func (v *veth) create(n *network, nspid int) (err error) {
tmpName, err := v.generateTempPeerName()
if err != nil {
return err
}
n.TempVethPeerName = tmpName
if n.Bridge == "" {
return fmt.Errorf("bridge is not specified")
}
veth := &netlink.Veth{
LinkAttrs: netlink.LinkAttrs{
Name: n.HostInterfaceName,
TxQLen: n.TxQueueLen,
},
PeerName: n.TempVethPeerName,
}
if err := netlink.LinkAdd(veth); err != nil {
return err
}
defer func() {
if err != nil {
netlink.LinkDel(veth)
}
}()
if err := v.attach(&n.Network); err != nil {
return err
}
child, err := netlink.LinkByName(n.TempVethPeerName)
if err != nil {
return err
}
return netlink.LinkSetNsPid(child, nspid)
}
func (v *veth) generateTempPeerName() (string, error) {
return utils.GenerateRandomName("veth", 7)
}
func (v *veth) initialize(config *network) error {
peer := config.TempVethPeerName
if peer == "" {
return fmt.Errorf("peer is not specified")
}
child, err := netlink.LinkByName(peer)
if err != nil {
return err
}
if err := netlink.LinkSetDown(child); err != nil {
return err
}
if err := netlink.LinkSetName(child, config.Name); err != nil {
return err
}
// get the interface again after we changed the name as the index also changes.
if child, err = netlink.LinkByName(config.Name); err != nil {
return err
}
if config.MacAddress != "" {
mac, err := net.ParseMAC(config.MacAddress)
if err != nil {
return err
}
if err := netlink.LinkSetHardwareAddr(child, mac); err != nil {
return err
}
}
ip, err := netlink.ParseAddr(config.Address)
if err != nil {
return err
}
if err := netlink.AddrAdd(child, ip); err != nil {
return err
}
if config.IPv6Address != "" {
ip6, err := netlink.ParseAddr(config.IPv6Address)
if err != nil {
return err
}
if err := netlink.AddrAdd(child, ip6); err != nil {
return err
}
}
if err := netlink.LinkSetMTU(child, config.Mtu); err != nil {
return err
}
if err := netlink.LinkSetUp(child); err != nil {
return err
}
if config.Gateway != "" {
gw := net.ParseIP(config.Gateway)
if err := netlink.RouteAdd(&netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
LinkIndex: child.Attrs().Index,
Gw: gw,
}); err != nil {
return err
}
}
if config.IPv6Gateway != "" {
gw := net.ParseIP(config.IPv6Gateway)
if err := netlink.RouteAdd(&netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
LinkIndex: child.Attrs().Index,
Gw: gw,
}); err != nil {
return err
}
}
return nil
}

View File

@@ -1,63 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
)
const oomCgroupName = "memory"
// notifyOnOOM returns channel on which you can expect event about OOM,
// if process died without OOM this channel will be closed.
// s is current *libcontainer.State for container.
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
dir := paths[oomCgroupName]
if dir == "" {
return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName)
}
oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
if err != nil {
return nil, err
}
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
if syserr != 0 {
oomControl.Close()
return nil, syserr
}
eventfd := os.NewFile(fd, "eventfd")
eventControlPath := filepath.Join(dir, "cgroup.event_control")
data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
eventfd.Close()
oomControl.Close()
return nil, err
}
ch := make(chan struct{})
go func() {
defer func() {
close(ch)
eventfd.Close()
oomControl.Close()
}()
buf := make([]byte, 8)
for {
if _, err := eventfd.Read(buf); err != nil {
return
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
return
}
ch <- struct{}{}
}
}()
return ch, nil
}

View File

@@ -1,96 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/binary"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"syscall"
"testing"
"time"
)
func TestNotifyOnOOM(t *testing.T) {
memoryPath, err := ioutil.TempDir("", "testnotifyoom-")
if err != nil {
t.Fatal(err)
}
oomPath := filepath.Join(memoryPath, "memory.oom_control")
eventPath := filepath.Join(memoryPath, "cgroup.event_control")
if err := ioutil.WriteFile(oomPath, []byte{}, 0700); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(eventPath, []byte{}, 0700); err != nil {
t.Fatal(err)
}
var eventFd, oomControlFd int
paths := map[string]string{
"memory": memoryPath,
}
ooms, err := notifyOnOOM(paths)
if err != nil {
t.Fatal("expected no error, got:", err)
}
data, err := ioutil.ReadFile(eventPath)
if err != nil {
t.Fatal("couldn't read event control file:", err)
}
if _, err := fmt.Sscanf(string(data), "%d %d", &eventFd, &oomControlFd); err != nil {
t.Fatalf("invalid control data %q: %s", data, err)
}
// re-open the eventfd
efd, err := syscall.Dup(eventFd)
if err != nil {
t.Fatal("unable to reopen eventfd:", err)
}
defer syscall.Close(efd)
if err != nil {
t.Fatal("unable to dup event fd:", err)
}
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, 1)
if _, err := syscall.Write(efd, buf); err != nil {
t.Fatal("unable to write to eventfd:", err)
}
select {
case <-ooms:
case <-time.After(100 * time.Millisecond):
t.Fatal("no notification on oom channel after 100ms")
}
// simulate what happens when a cgroup is destroyed by cleaning up and then
// writing to the eventfd.
if err := os.RemoveAll(memoryPath); err != nil {
t.Fatal(err)
}
if _, err := syscall.Write(efd, buf); err != nil {
t.Fatal("unable to write to eventfd:", err)
}
// give things a moment to shut down
select {
case _, ok := <-ooms:
if ok {
t.Fatal("expected no oom to be triggered")
}
case <-time.After(100 * time.Millisecond):
}
if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(oomControlFd), syscall.F_GETFD, 0); err != syscall.EBADF {
t.Error("expected oom control to be closed")
}
if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(eventFd), syscall.F_GETFD, 0); err != syscall.EBADF {
t.Error("expected event fd to be closed")
}
}

View File

@@ -1,25 +0,0 @@
## nsenter
The `nsenter` package registers a special init constructor that is called before
the Go runtime has a chance to boot. This provides us the ability to `setns` on
existing namespaces and avoid the issues that the Go runtime has with multiple
threads. This constructor will be called if this package is registered,
imported, in your go application.
The `nsenter` package will `import "C"` and it uses [cgo](https://golang.org/cmd/cgo/)
package. In cgo, if the import of "C" is immediately preceded by a comment, that comment,
called the preamble, is used as a header when compiling the C parts of the package.
So every time we import package `nsenter`, the C code function `nsexec()` would be
called. And package `nsenter` is now only imported in Docker execdriver, so every time
before we call `execdriver.Exec()`, that C code would run.
`nsexec()` will first check the environment variable `_LIBCONTAINER_INITPID`
which will give the process of the container that should be joined. Namespaces fd will
be found from `/proc/[pid]/ns` and set by `setns` syscall.
And then get the pipe number from `_LIBCONTAINER_INITPIPE`, error message could
be transfered through it. If tty is added, `_LIBCONTAINER_CONSOLE_PATH` will
have value and start a console for output.
Finally, `nsexec()` will clone a child process , exit the parent process and let
the Go runtime take over.

View File

@@ -1,12 +0,0 @@
// +build linux,!gccgo
package nsenter
/*
#cgo CFLAGS: -Wall
extern void nsexec();
void __attribute__((constructor)) init(void) {
nsexec();
}
*/
import "C"

View File

@@ -1,25 +0,0 @@
// +build linux,gccgo
package nsenter
/*
#cgo CFLAGS: -Wall
extern void nsexec();
void __attribute__((constructor)) init(void) {
nsexec();
}
*/
import "C"
// AlwaysFalse is here to stay false
// (and be exported so the compiler doesn't optimize out its reference)
var AlwaysFalse bool
func init() {
if AlwaysFalse {
// by referencing this C init() in a noop test, it will ensure the compiler
// links in the C function.
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134
C.init()
}
}

View File

@@ -1,91 +0,0 @@
package nsenter
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"strings"
"testing"
)
type pid struct {
Pid int `json:"Pid"`
}
func TestNsenterAlivePid(t *testing.T) {
args := []string{"nsenter-exec"}
r, w, err := os.Pipe()
if err != nil {
t.Fatalf("failed to create pipe %v", err)
}
cmd := &exec.Cmd{
Path: os.Args[0],
Args: args,
ExtraFiles: []*os.File{w},
Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", os.Getpid()), "_LIBCONTAINER_INITPIPE=3"},
}
if err := cmd.Start(); err != nil {
t.Fatalf("nsenter failed to start %v", err)
}
w.Close()
decoder := json.NewDecoder(r)
var pid *pid
if err := decoder.Decode(&pid); err != nil {
t.Fatalf("%v", err)
}
if err := cmd.Wait(); err != nil {
t.Fatalf("nsenter exits with a non-zero exit status")
}
p, err := os.FindProcess(pid.Pid)
if err != nil {
t.Fatalf("%v", err)
}
p.Wait()
}
func TestNsenterInvalidPid(t *testing.T) {
args := []string{"nsenter-exec"}
cmd := &exec.Cmd{
Path: os.Args[0],
Args: args,
Env: []string{"_LIBCONTAINER_INITPID=-1"},
}
err := cmd.Run()
if err == nil {
t.Fatal("nsenter exits with a zero exit status")
}
}
func TestNsenterDeadPid(t *testing.T) {
dead_cmd := exec.Command("true")
if err := dead_cmd.Run(); err != nil {
t.Fatal(err)
}
args := []string{"nsenter-exec"}
cmd := &exec.Cmd{
Path: os.Args[0],
Args: args,
Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", dead_cmd.Process.Pid)},
}
err := cmd.Run()
if err == nil {
t.Fatal("nsenter exits with a zero exit status")
}
}
func init() {
if strings.HasPrefix(os.Args[0], "nsenter-") {
os.Exit(0)
}
return
}

View File

@@ -1,5 +0,0 @@
// +build !linux !cgo
package nsenter
import "C"

View File

@@ -1,206 +0,0 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <linux/limits.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <signal.h>
#include <setjmp.h>
#include <sched.h>
#include <signal.h>
/* All arguments should be above stack, because it grows down */
struct clone_arg {
/*
* Reserve some space for clone() to locate arguments
* and retcode in this place
*/
char stack[4096] __attribute__ ((aligned(16)));
char stack_ptr[0];
jmp_buf *env;
};
#define pr_perror(fmt, ...) fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__)
static int child_func(void *_arg)
{
struct clone_arg *arg = (struct clone_arg *)_arg;
longjmp(*arg->env, 1);
}
// Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12)
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
#define _GNU_SOURCE
#include "syscall.h"
#if defined(__NR_setns) && !defined(SYS_setns)
#define SYS_setns __NR_setns
#endif
#ifdef SYS_setns
int setns(int fd, int nstype)
{
return syscall(SYS_setns, fd, nstype);
}
#endif
#endif
static int clone_parent(jmp_buf * env) __attribute__ ((noinline));
static int clone_parent(jmp_buf * env)
{
struct clone_arg ca;
int child;
ca.env = env;
child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
return child;
}
void nsexec()
{
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" };
const int num = sizeof(namespaces) / sizeof(char *);
jmp_buf env;
char buf[PATH_MAX], *val;
int i, tfd, self_tfd, child, len, pipenum, consolefd = -1;
pid_t pid;
char *console;
val = getenv("_LIBCONTAINER_INITPID");
if (val == NULL)
return;
pid = atoi(val);
snprintf(buf, sizeof(buf), "%d", pid);
if (strcmp(val, buf)) {
pr_perror("Unable to parse _LIBCONTAINER_INITPID");
exit(1);
}
val = getenv("_LIBCONTAINER_INITPIPE");
if (val == NULL) {
pr_perror("Child pipe not found");
exit(1);
}
pipenum = atoi(val);
snprintf(buf, sizeof(buf), "%d", pipenum);
if (strcmp(val, buf)) {
pr_perror("Unable to parse _LIBCONTAINER_INITPIPE");
exit(1);
}
console = getenv("_LIBCONTAINER_CONSOLE_PATH");
if (console != NULL) {
consolefd = open(console, O_RDWR);
if (consolefd < 0) {
pr_perror("Failed to open console %s", console);
exit(1);
}
}
/* Check that the specified process exists */
snprintf(buf, PATH_MAX - 1, "/proc/%d/ns", pid);
tfd = open(buf, O_DIRECTORY | O_RDONLY);
if (tfd == -1) {
pr_perror("Failed to open \"%s\"", buf);
exit(1);
}
self_tfd = open("/proc/self/ns", O_DIRECTORY | O_RDONLY);
if (self_tfd == -1) {
pr_perror("Failed to open /proc/self/ns");
exit(1);
}
for (i = 0; i < num; i++) {
struct stat st;
struct stat self_st;
int fd;
/* Symlinks on all namespaces exist for dead processes, but they can't be opened */
if (fstatat(tfd, namespaces[i], &st, 0) == -1) {
// Ignore nonexistent namespaces.
if (errno == ENOENT)
continue;
}
/* Skip namespaces we're already part of */
if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 &&
st.st_ino == self_st.st_ino) {
continue;
}
fd = openat(tfd, namespaces[i], O_RDONLY);
if (fd == -1) {
pr_perror("Failed to open ns file %s for ns %s", buf,
namespaces[i]);
exit(1);
}
// Set the namespace.
if (setns(fd, 0) == -1) {
pr_perror("Failed to setns for %s", namespaces[i]);
exit(1);
}
close(fd);
}
close(self_tfd);
close(tfd);
if (setjmp(env) == 1) {
// Child
if (setsid() == -1) {
pr_perror("setsid failed");
exit(1);
}
if (consolefd != -1) {
if (ioctl(consolefd, TIOCSCTTY, 0) == -1) {
pr_perror("ioctl TIOCSCTTY failed");
exit(1);
}
if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) {
pr_perror("Failed to dup 0");
exit(1);
}
if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) {
pr_perror("Failed to dup 1");
exit(1);
}
if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) {
pr_perror("Failed to dup 2");
exit(1);
}
}
// Finish executing, let the Go runtime take over.
return;
}
// Parent
// We must fork to actually enter the PID namespace, use CLONE_PARENT
// so the child can have the right parent, and we don't need to forward
// the child's exit code or resend its death signal.
child = clone_parent(&env);
if (child < 0) {
pr_perror("Unable to fork");
exit(1);
}
len = snprintf(buf, sizeof(buf), "{ \"pid\" : %d }\n", child);
if (write(pipenum, buf, len) != len) {
pr_perror("Unable to send a child pid");
kill(child, SIGKILL);
exit(1);
}
exit(0);
}

View File

@@ -1,89 +0,0 @@
package libcontainer
import (
"fmt"
"io"
"math"
"os"
)
type processOperations interface {
wait() (*os.ProcessState, error)
signal(sig os.Signal) error
pid() int
}
// Process specifies the configuration and IO for a process inside
// a container.
type Process struct {
// The command to be run followed by any arguments.
Args []string
// Env specifies the environment variables for the process.
Env []string
// User will set the uid and gid of the executing process running inside the container
// local to the container's user and group configuration.
User string
// Cwd will change the processes current working directory inside the container's rootfs.
Cwd string
// Stdin is a pointer to a reader which provides the standard input stream.
Stdin io.Reader
// Stdout is a pointer to a writer which receives the standard output stream.
Stdout io.Writer
// Stderr is a pointer to a writer which receives the standard error stream.
Stderr io.Writer
// ExtraFiles specifies additional open files to be inherited by the container
ExtraFiles []*os.File
// consolePath is the path to the console allocated to the container.
consolePath string
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capabilities not specified will be dropped from the processes capability mask
Capabilities []string
ops processOperations
}
// Wait waits for the process to exit.
// Wait releases any resources associated with the Process
func (p Process) Wait() (*os.ProcessState, error) {
if p.ops == nil {
return nil, newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted)
}
return p.ops.wait()
}
// Pid returns the process ID
func (p Process) Pid() (int, error) {
// math.MinInt32 is returned here, because it's invalid value
// for the kill() system call.
if p.ops == nil {
return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted)
}
return p.ops.pid(), nil
}
// Signal sends a signal to the Process.
func (p Process) Signal(sig os.Signal) error {
if p.ops == nil {
return newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted)
}
return p.ops.signal(sig)
}
// NewConsole creates new console for process and returns it
func (p *Process) NewConsole(rootuid int) (Console, error) {
console, err := newConsole(rootuid, rootuid)
if err != nil {
return nil, err
}
p.consolePath = console.Path()
return console, nil
}

View File

@@ -1,314 +0,0 @@
// +build linux
package libcontainer
import (
"encoding/json"
"errors"
"io"
"os"
"os/exec"
"path/filepath"
"strconv"
"syscall"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
)
type parentProcess interface {
// pid returns the pid for the running process.
pid() int
// start starts the process execution.
start() error
// send a SIGKILL to the process and wait for the exit.
terminate() error
// wait waits on the process returning the process state.
wait() (*os.ProcessState, error)
// startTime return's the process start time.
startTime() (string, error)
signal(os.Signal) error
externalDescriptors() []string
setExternalDescriptors(fds []string)
}
type setnsProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
cgroupPaths map[string]string
config *initConfig
fds []string
}
func (p *setnsProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
}
func (p *setnsProcess) signal(sig os.Signal) error {
s, ok := sig.(syscall.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
return syscall.Kill(p.pid(), s)
}
func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
if err = p.execSetns(); err != nil {
return newSystemError(err)
}
if len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
return newSystemError(err)
}
}
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
return newSystemError(err)
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemError(err)
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *genericError
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
return newSystemError(err)
}
if ierr != nil {
p.wait()
return newSystemError(ierr)
}
return nil
}
// execSetns runs the process that executes C code to perform the setns calls
// because setns support requires the C process to fork off a child and perform the setns
// before the go runtime boots, we wait on the process to die and receive the child's pid
// over the provided pipe.
func (p *setnsProcess) execSetns() error {
err := p.cmd.Start()
p.childPipe.Close()
if err != nil {
return newSystemError(err)
}
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()
return newSystemError(err)
}
if !status.Success() {
p.cmd.Wait()
return newSystemError(&exec.ExitError{ProcessState: status})
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
p.cmd.Wait()
return newSystemError(err)
}
process, err := os.FindProcess(pid.Pid)
if err != nil {
return err
}
p.cmd.Process = process
return nil
}
// terminate sends a SIGKILL to the forked process for the setns routine then waits to
// avoid the process becomming a zombie.
func (p *setnsProcess) terminate() error {
if p.cmd.Process == nil {
return nil
}
err := p.cmd.Process.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *setnsProcess) wait() (*os.ProcessState, error) {
err := p.cmd.Wait()
// Return actual ProcessState even on Wait error
return p.cmd.ProcessState, err
}
func (p *setnsProcess) pid() int {
return p.cmd.Process.Pid
}
func (p *setnsProcess) externalDescriptors() []string {
return p.fds
}
func (p *setnsProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
type initProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
config *initConfig
manager cgroups.Manager
container *linuxContainer
fds []string
}
func (p *initProcess) pid() int {
return p.cmd.Process.Pid
}
func (p *initProcess) externalDescriptors() []string {
return p.fds
}
func (p *initProcess) start() (err error) {
defer p.parentPipe.Close()
err = p.cmd.Start()
p.childPipe.Close()
if err != nil {
return newSystemError(err)
}
// Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up.
fds, err := getPipeFds(p.pid())
if err != nil {
return newSystemError(err)
}
p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children
// can escape the cgroup
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemError(err)
}
defer func() {
if err != nil {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
}
}()
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Root: p.config.Config.Rootfs,
}
for _, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemError(err)
}
}
}
if err := p.createNetworkInterfaces(); err != nil {
return newSystemError(err)
}
if err := p.sendConfig(); err != nil {
return newSystemError(err)
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *genericError
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
return newSystemError(err)
}
if ierr != nil {
return newSystemError(ierr)
}
return nil
}
func (p *initProcess) wait() (*os.ProcessState, error) {
err := p.cmd.Wait()
if err != nil {
return p.cmd.ProcessState, err
}
// we should kill all processes in cgroup when init is died if we use host PID namespace
if p.cmd.SysProcAttr.Cloneflags&syscall.CLONE_NEWPID == 0 {
killCgroupProcesses(p.manager)
}
return p.cmd.ProcessState, nil
}
func (p *initProcess) terminate() error {
if p.cmd.Process == nil {
return nil
}
err := p.cmd.Process.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *initProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
}
func (p *initProcess) sendConfig() error {
// send the state to the container's init process then shutdown writes for the parent
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
return err
}
// shutdown writes for the parent side of the pipe
return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR)
}
func (p *initProcess) createNetworkInterfaces() error {
for _, config := range p.config.Config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
n := &network{
Network: *config,
}
if err := strategy.create(n, p.pid()); err != nil {
return err
}
p.config.Networks = append(p.config.Networks, n)
}
return nil
}
func (p *initProcess) signal(sig os.Signal) error {
s, ok := sig.(syscall.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
return syscall.Kill(p.pid(), s)
}
func (p *initProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
func getPipeFds(pid int) ([]string, error) {
fds := make([]string, 3)
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
for i := 0; i < 3; i++ {
f := filepath.Join(dirPath, strconv.Itoa(i))
target, err := os.Readlink(f)
if err != nil {
return fds, err
}
fds[i] = target
}
return fds, nil
}

View File

@@ -1,122 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"os"
"github.com/opencontainers/runc/libcontainer/system"
)
func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
var (
err error
)
proc, err := os.FindProcess(pid)
if err != nil {
return nil, err
}
started, err := system.GetProcessStartTime(pid)
if err != nil {
return nil, err
}
return &restoredProcess{
proc: proc,
processStartTime: started,
fds: fds,
}, nil
}
type restoredProcess struct {
proc *os.Process
processStartTime string
fds []string
}
func (p *restoredProcess) start() error {
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
}
func (p *restoredProcess) pid() int {
return p.proc.Pid
}
func (p *restoredProcess) terminate() error {
err := p.proc.Kill()
if _, werr := p.wait(); err == nil {
err = werr
}
return err
}
func (p *restoredProcess) wait() (*os.ProcessState, error) {
// TODO: how do we wait on the actual process?
// maybe use --exec-cmd in criu
st, err := p.proc.Wait()
if err != nil {
return nil, err
}
return st, nil
}
func (p *restoredProcess) startTime() (string, error) {
return p.processStartTime, nil
}
func (p *restoredProcess) signal(s os.Signal) error {
return p.proc.Signal(s)
}
func (p *restoredProcess) externalDescriptors() []string {
return p.fds
}
func (p *restoredProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
// nonChildProcess represents a process where the calling process is not
// the parent process. This process is created when a factory loads a container from
// a persisted state.
type nonChildProcess struct {
processPid int
processStartTime string
fds []string
}
func (p *nonChildProcess) start() error {
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
}
func (p *nonChildProcess) pid() int {
return p.processPid
}
func (p *nonChildProcess) terminate() error {
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
}
func (p *nonChildProcess) wait() (*os.ProcessState, error) {
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
}
func (p *nonChildProcess) startTime() (string, error) {
return p.processStartTime, nil
}
func (p *nonChildProcess) signal(s os.Signal) error {
proc, err := os.FindProcess(p.processPid)
if err != nil {
return err
}
return proc.Signal(s)
}
func (p *nonChildProcess) externalDescriptors() []string {
return p.fds
}
func (p *nonChildProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}

View File

@@ -1,665 +0,0 @@
// +build linux
package libcontainer
import (
"fmt"
"io/ioutil"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"syscall"
"time"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/symlink"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/label"
)
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// setupRootfs sets up the devices, mount points, and filesystems for use inside a
// new mount namespace.
func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
if err := prepareRoot(config); err != nil {
return newSystemError(err)
}
setupDev := len(config.Devices) == 0
for _, m := range config.Mounts {
for _, precmd := range m.PremountCmds {
if err := mountCmd(precmd); err != nil {
return newSystemError(err)
}
}
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
return newSystemError(err)
}
for _, postcmd := range m.PostmountCmds {
if err := mountCmd(postcmd); err != nil {
return newSystemError(err)
}
}
}
if !setupDev {
if err := createDevices(config); err != nil {
return newSystemError(err)
}
if err := setupPtmx(config, console); err != nil {
return newSystemError(err)
}
if err := setupDevSymlinks(config.Rootfs); err != nil {
return newSystemError(err)
}
}
if err := syscall.Chdir(config.Rootfs); err != nil {
return newSystemError(err)
}
if config.NoPivotRoot {
err = msMoveRoot(config.Rootfs)
} else {
err = pivotRoot(config.Rootfs, config.PivotDir)
}
if err != nil {
return newSystemError(err)
}
if !setupDev {
if err := reOpenDevNull(); err != nil {
return newSystemError(err)
}
}
if config.Readonlyfs {
if err := setReadonly(); err != nil {
return newSystemError(err)
}
}
syscall.Umask(0022)
return nil
}
func mountCmd(cmd configs.Command) error {
command := exec.Command(cmd.Path, cmd.Args[:]...)
command.Env = cmd.Env
command.Dir = cmd.Dir
if out, err := command.CombinedOutput(); err != nil {
return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
}
return nil
}
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
var (
dest = m.Destination
)
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
switch m.Device {
case "proc", "sysfs":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
// Selinux kernels do not support labeling of /proc or /sys
return mountPropagate(m, rootfs, "")
case "mqueue":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
// older kernels do not support labeling of /dev/mqueue
if err := mountPropagate(m, rootfs, ""); err != nil {
return err
}
}
return label.SetFileLabel(dest, mountLabel)
case "tmpfs":
stat, err := os.Stat(dest)
if err != nil {
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
return err
}
if stat != nil {
if err = os.Chmod(dest, stat.Mode()); err != nil {
return err
}
}
return nil
case "devpts":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
return mountPropagate(m, rootfs, mountLabel)
case "securityfs":
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
return mountPropagate(m, rootfs, mountLabel)
case "bind":
stat, err := os.Stat(m.Source)
if err != nil {
// error out if the source of a bind mount does not exist as we will be
// unable to bind anything to it.
return err
}
// ensure that the destination of the bind mount is resolved of symlinks at mount time because
// any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs.
if dest, err = symlink.FollowSymlinkInScope(filepath.Join(rootfs, m.Destination), rootfs); err != nil {
return err
}
if err := checkMountDestination(rootfs, dest); err != nil {
return err
}
// update the mount with the correct dest after symlinks are resolved.
m.Destination = dest
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
return err
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
return err
}
// bind mount won't change mount options, we need remount to make mount options effective.
// first check that we have non-default options required before attempting a remount
if m.Flags&^(syscall.MS_REC|syscall.MS_REMOUNT|syscall.MS_BIND) != 0 {
// only remount if unique mount options are set
if err := remount(m, rootfs); err != nil {
return err
}
}
if m.Relabel != "" {
if err := label.Validate(m.Relabel); err != nil {
return err
}
shared := label.IsShared(m.Relabel)
if err := label.Relabel(m.Source, mountLabel, shared); err != nil {
return err
}
}
case "cgroup":
binds, err := getCgroupMounts(m)
if err != nil {
return err
}
var merged []string
for _, b := range binds {
ss := filepath.Base(b.Destination)
if strings.Contains(ss, ",") {
merged = append(merged, ss)
}
}
tmpfs := &configs.Mount{
Source: "tmpfs",
Device: "tmpfs",
Destination: m.Destination,
Flags: defaultMountFlags,
Data: "mode=755",
PropagationFlags: m.PropagationFlags,
}
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
return err
}
for _, b := range binds {
if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
return err
}
}
// create symlinks for merged cgroups
cwd, err := os.Getwd()
if err != nil {
return err
}
if err := os.Chdir(filepath.Join(rootfs, m.Destination)); err != nil {
return err
}
for _, mc := range merged {
for _, ss := range strings.Split(mc, ",") {
if err := os.Symlink(mc, ss); err != nil {
// if cgroup already exists, then okay(it could have been created before)
if os.IsExist(err) {
continue
}
os.Chdir(cwd)
return err
}
}
}
if err := os.Chdir(cwd); err != nil {
return err
}
if m.Flags&syscall.MS_RDONLY != 0 {
// remount cgroup root as readonly
mcgrouproot := &configs.Mount{
Destination: m.Destination,
Flags: defaultMountFlags | syscall.MS_RDONLY,
}
if err := remount(mcgrouproot, rootfs); err != nil {
return err
}
}
default:
return fmt.Errorf("unknown mount device %q to %q", m.Device, m.Destination)
}
return nil
}
func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
mounts, err := cgroups.GetCgroupMounts()
if err != nil {
return nil, err
}
cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
var binds []*configs.Mount
for _, mm := range mounts {
dir, err := mm.GetThisCgroupDir(cgroupPaths)
if err != nil {
return nil, err
}
relDir, err := filepath.Rel(mm.Root, dir)
if err != nil {
return nil, err
}
binds = append(binds, &configs.Mount{
Device: "bind",
Source: filepath.Join(mm.Mountpoint, relDir),
Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")),
Flags: syscall.MS_BIND | syscall.MS_REC | m.Flags,
PropagationFlags: m.PropagationFlags,
})
}
return binds, nil
}
// checkMountDestination checks to ensure that the mount destination is not over the
// top of /proc or /sys.
// dest is required to be an abs path and have any symlinks resolved before calling this function.
func checkMountDestination(rootfs, dest string) error {
if filepath.Clean(rootfs) == filepath.Clean(dest) {
return fmt.Errorf("mounting into / is prohibited")
}
invalidDestinations := []string{
"/proc",
}
for _, invalid := range invalidDestinations {
path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest)
if err != nil {
return err
}
if path == "." || !strings.HasPrefix(path, "..") {
return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid)
}
}
return nil
}
func setupDevSymlinks(rootfs string) error {
var links = [][2]string{
{"/proc/self/fd", "/dev/fd"},
{"/proc/self/fd/0", "/dev/stdin"},
{"/proc/self/fd/1", "/dev/stdout"},
{"/proc/self/fd/2", "/dev/stderr"},
}
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
// in /dev if it exists in /proc.
if _, err := os.Stat("/proc/kcore"); err == nil {
links = append(links, [2]string{"/proc/kcore", "/dev/kcore"})
}
for _, link := range links {
var (
src = link[0]
dst = filepath.Join(rootfs, link[1])
)
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
return fmt.Errorf("symlink %s %s %s", src, dst, err)
}
}
return nil
}
// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs
// this method will make them point to `/dev/null` in this container's rootfs. This
// needs to be called after we chroot/pivot into the container's rootfs so that any
// symlinks are resolved locally.
func reOpenDevNull() error {
var stat, devNullStat syscall.Stat_t
file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
if err != nil {
return fmt.Errorf("Failed to open /dev/null - %s", err)
}
defer file.Close()
if err := syscall.Fstat(int(file.Fd()), &devNullStat); err != nil {
return err
}
for fd := 0; fd < 3; fd++ {
if err := syscall.Fstat(fd, &stat); err != nil {
return err
}
if stat.Rdev == devNullStat.Rdev {
// Close and re-open the fd.
if err := syscall.Dup3(int(file.Fd()), fd, 0); err != nil {
return err
}
}
}
return nil
}
// Create the device nodes in the container.
func createDevices(config *configs.Config) error {
oldMask := syscall.Umask(0000)
for _, node := range config.Devices {
// containers running in a user namespace are not allowed to mknod
// devices so we can just bind mount it from the host.
if err := createDeviceNode(config.Rootfs, node, config.Namespaces.Contains(configs.NEWUSER)); err != nil {
syscall.Umask(oldMask)
return err
}
}
syscall.Umask(oldMask)
return nil
}
// Creates the device node in the rootfs of the container.
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
dest := filepath.Join(rootfs, node.Path)
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
return err
}
if bind {
f, err := os.Create(dest)
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "")
}
if err := mknodDevice(dest, node); err != nil {
if os.IsExist(err) {
return nil
}
return err
}
return nil
}
func mknodDevice(dest string, node *configs.Device) error {
fileMode := node.FileMode
switch node.Type {
case 'c':
fileMode |= syscall.S_IFCHR
case 'b':
fileMode |= syscall.S_IFBLK
default:
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
}
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
return err
}
return syscall.Chown(dest, int(node.Uid), int(node.Gid))
}
func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
for _, m := range mountinfo {
if m.Mountpoint == dir {
return m
}
}
return nil
}
// Get the parent mount point of directory passed in as argument. Also return
// optional fields.
func getParentMount(rootfs string) (string, string, error) {
var path string
mountinfos, err := mount.GetMounts()
if err != nil {
return "", "", err
}
mountinfo := getMountInfo(mountinfos, rootfs)
if mountinfo != nil {
return rootfs, mountinfo.Optional, nil
}
path = rootfs
for {
path = filepath.Dir(path)
mountinfo = getMountInfo(mountinfos, path)
if mountinfo != nil {
return path, mountinfo.Optional, nil
}
if path == "/" {
break
}
}
// If we are here, we did not find parent mount. Something is wrong.
return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs)
}
// Make parent mount private if it was shared
func rootfsParentMountPrivate(config *configs.Config) error {
sharedMount := false
parentMount, optionalOpts, err := getParentMount(config.Rootfs)
if err != nil {
return err
}
optsSplit := strings.Split(optionalOpts, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
sharedMount = true
break
}
}
// Make parent mount PRIVATE if it was shared. It is needed for two
// reasons. First of all pivot_root() will fail if parent mount is
// shared. Secondly when we bind mount rootfs it will propagate to
// parent namespace and we don't want that to happen.
if sharedMount {
return syscall.Mount("", parentMount, "", syscall.MS_PRIVATE, "")
}
return nil
}
func prepareRoot(config *configs.Config) error {
flag := syscall.MS_SLAVE | syscall.MS_REC
if config.RootPropagation != 0 {
flag = config.RootPropagation
}
if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
return err
}
if err := rootfsParentMountPrivate(config); err != nil {
return err
}
return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "")
}
func setReadonly() error {
return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
}
func setupPtmx(config *configs.Config, console *linuxConsole) error {
ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
}
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if console != nil {
return console.mount(config.Rootfs, config.MountLabel)
}
return nil
}
func pivotRoot(rootfs, pivotBaseDir string) error {
if pivotBaseDir == "" {
pivotBaseDir = "/"
}
tmpDir := filepath.Join(rootfs, pivotBaseDir)
if err := os.MkdirAll(tmpDir, 0755); err != nil {
return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err)
}
pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root")
if err != nil {
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
}
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
if err := syscall.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
// path to pivot dir now changed, update
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
// Make pivotDir rprivate to make sure any of the unmounts don't
// propagate to parent.
if err := syscall.Mount("", pivotDir, "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
return err
}
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err)
}
return os.Remove(pivotDir)
}
func msMoveRoot(rootfs string) error {
if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
return err
}
if err := syscall.Chroot("."); err != nil {
return err
}
return syscall.Chdir("/")
}
// createIfNotExists creates a file or a directory only if it does not already exist.
func createIfNotExists(path string, isDir bool) error {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
if isDir {
return os.MkdirAll(path, 0755)
}
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return err
}
f, err := os.OpenFile(path, os.O_CREATE, 0755)
if err != nil {
return err
}
f.Close()
}
}
return nil
}
// remountReadonly will bind over the top of an existing path and ensure that it is read-only.
func remountReadonly(path string) error {
for i := 0; i < 5; i++ {
if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) {
switch err {
case syscall.EINVAL:
// Probably not a mountpoint, use bind-mount
if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil {
return err
}
return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "")
case syscall.EBUSY:
time.Sleep(100 * time.Millisecond)
continue
default:
return err
}
}
return nil
}
return fmt.Errorf("unable to mount %s as readonly max retries reached", path)
}
// maskFile bind mounts /dev/null over the top of the specified path inside a container
// to avoid security issues from processes reading information from non-namespace aware mounts ( proc/kcore ).
func maskFile(path string) error {
if err := syscall.Mount("/dev/null", path, "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
return err
}
return nil
}
// writeSystemProperty writes the value to a path under /proc/sys as determined from the key.
// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
func writeSystemProperty(key, value string) error {
keyPath := strings.Replace(key, ".", "/", -1)
return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
}
func remount(m *configs.Mount, rootfs string) error {
var (
dest = m.Destination
)
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
return err
}
return nil
}
// Do the mount operation followed by additional mounts required to take care
// of propagation flags.
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
var (
dest = m.Destination
data = label.FormatMountLabel(m.Data, mountLabel)
)
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
return err
}
for _, pflag := range m.PropagationFlags {
if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil {
return err
}
}
return nil
}

View File

@@ -1,37 +0,0 @@
// +build linux
package libcontainer
import "testing"
func TestCheckMountDestOnProc(t *testing.T) {
dest := "/rootfs/proc/"
err := checkMountDestination("/rootfs", dest)
if err == nil {
t.Fatal("destination inside proc should return an error")
}
}
func TestCheckMountDestInSys(t *testing.T) {
dest := "/rootfs//sys/fs/cgroup"
err := checkMountDestination("/rootfs", dest)
if err != nil {
t.Fatal("destination inside /sys should not return an error")
}
}
func TestCheckMountDestFalsePositive(t *testing.T) {
dest := "/rootfs/sysfiles/fs/cgroup"
err := checkMountDestination("/rootfs", dest)
if err != nil {
t.Fatal(err)
}
}
func TestCheckMountRoot(t *testing.T) {
dest := "/rootfs"
err := checkMountDestination("/rootfs", dest)
if err == nil {
t.Fatal(err)
}
}

View File

@@ -1,11 +0,0 @@
// +build linux,go1.5
package libcontainer
import "syscall"
// Set the GidMappingsEnableSetgroups member to true, so the process's
// setgroups proc entry wont be set to 'deny' if GidMappings are set
func enableSetgroups(sys *syscall.SysProcAttr) {
sys.GidMappingsEnableSetgroups = true
}

View File

@@ -1,44 +0,0 @@
// +build linux
package libcontainer
import (
"os"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
)
// linuxSetnsInit performs the container's initialization for running a new process
// inside an existing container.
type linuxSetnsInit struct {
config *initConfig
}
func (l *linuxSetnsInit) Init() error {
if err := setupRlimits(l.config.Config); err != nil {
return err
}
if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
return err
}
if l.config.Config.Seccomp != nil {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
}
if err := finalizeNamespace(l.config); err != nil {
return err
}
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
return err
}
if l.config.Config.ProcessLabel != "" {
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
return err
}
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}

View File

@@ -1,113 +0,0 @@
// +build linux
package libcontainer
import (
"os"
"syscall"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
)
type linuxStandardInit struct {
parentPid int
config *initConfig
}
func (l *linuxStandardInit) Init() error {
// join any namespaces via a path to the namespace fd if provided
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
return err
}
var console *linuxConsole
if l.config.Console != "" {
console = newConsoleFromPath(l.config.Console)
if err := console.dupStdio(); err != nil {
return err
}
}
if _, err := syscall.Setsid(); err != nil {
return err
}
if console != nil {
if err := system.Setctty(); err != nil {
return err
}
}
if err := setupNetwork(l.config); err != nil {
return err
}
if err := setupRoute(l.config.Config); err != nil {
return err
}
if err := setupRlimits(l.config.Config); err != nil {
return err
}
if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
return err
}
label.Init()
// InitializeMountNamespace() can be executed only for a new mount namespace
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := setupRootfs(l.config.Config, console); err != nil {
return err
}
}
if hostname := l.config.Config.Hostname; hostname != "" {
if err := syscall.Sethostname([]byte(hostname)); err != nil {
return err
}
}
if err := apparmor.ApplyProfile(l.config.Config.AppArmorProfile); err != nil {
return err
}
if err := label.SetProcessLabel(l.config.Config.ProcessLabel); err != nil {
return err
}
for key, value := range l.config.Config.Sysctl {
if err := writeSystemProperty(key, value); err != nil {
return err
}
}
for _, path := range l.config.Config.ReadonlyPaths {
if err := remountReadonly(path); err != nil {
return err
}
}
for _, path := range l.config.Config.MaskPaths {
if err := maskFile(path); err != nil {
return err
}
}
pdeath, err := system.GetParentDeathSignal()
if err != nil {
return err
}
if l.config.Config.Seccomp != nil {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
}
if err := finalizeNamespace(l.config); err != nil {
return err
}
// finalizeNamespace can change user/group which clears the parent death
// signal, so we restore it here.
if err := pdeath.Restore(); err != nil {
return err
}
// compare the parent from the inital start of the init process and make sure that it did not change.
// if the parent changes that means it died and we were reparened to something else so we should
// just kill ourself and not cause problems for someone else.
if syscall.Getppid() != l.parentPid {
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}

View File

@@ -1,15 +0,0 @@
package libcontainer
type NetworkInterface struct {
// Name is the name of the network interface.
Name string
RxBytes uint64
RxPackets uint64
RxErrors uint64
RxDropped uint64
TxBytes uint64
TxPackets uint64
TxErrors uint64
TxDropped uint64
}

View File

@@ -1,5 +0,0 @@
package libcontainer
type Stats struct {
Interfaces []*NetworkInterface
}

View File

@@ -1,8 +0,0 @@
package libcontainer
import "github.com/opencontainers/runc/libcontainer/cgroups"
type Stats struct {
Interfaces []*NetworkInterface
CgroupStats *cgroups.Stats
}

View File

@@ -1,5 +0,0 @@
package libcontainer
type Stats struct {
Interfaces []*NetworkInterface
}

View File

@@ -1,77 +0,0 @@
// +build linux
package system
import (
"os/exec"
"syscall"
"unsafe"
)
type ParentDeathSignal int
func (p ParentDeathSignal) Restore() error {
if p == 0 {
return nil
}
current, err := GetParentDeathSignal()
if err != nil {
return err
}
if p == current {
return nil
}
return p.Set()
}
func (p ParentDeathSignal) Set() error {
return SetParentDeathSignal(uintptr(p))
}
func Execv(cmd string, args []string, env []string) error {
name, err := exec.LookPath(cmd)
if err != nil {
return err
}
return syscall.Exec(name, args, env)
}
func SetParentDeathSignal(sig uintptr) error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
return err
}
return nil
}
func GetParentDeathSignal() (ParentDeathSignal, error) {
var sig int
_, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0)
if err != 0 {
return -1, err
}
return ParentDeathSignal(sig), nil
}
func SetKeepCaps() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 {
return err
}
return nil
}
func ClearKeepCaps() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 {
return err
}
return nil
}
func Setctty() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 {
return err
}
return nil
}

View File

@@ -1,27 +0,0 @@
package system
import (
"io/ioutil"
"path/filepath"
"strconv"
"strings"
)
// look in /proc to find the process start time so that we can verify
// that this pid has started after ourself
func GetProcessStartTime(pid int) (string, error) {
data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
if err != nil {
return "", err
}
parts := strings.Split(string(data), " ")
// the starttime is located at pos 22
// from the man page
//
// starttime %llu (was %lu before Linux 2.6)
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
// (divide by sysconf(_SC_CLK_TCK)).
return parts[22-1], nil // starts at 1
}

View File

@@ -1,40 +0,0 @@
package system
import (
"fmt"
"runtime"
"syscall"
)
// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092
//
// We need different setns values for the different platforms and arch
// We are declaring the macro here because the SETNS syscall does not exist in th stdlib
var setNsMap = map[string]uintptr{
"linux/386": 346,
"linux/arm64": 268,
"linux/amd64": 308,
"linux/arm": 375,
"linux/ppc": 350,
"linux/ppc64": 350,
"linux/ppc64le": 350,
"linux/s390x": 339,
}
var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
func SysSetns() uint32 {
return uint32(sysSetns)
}
func Setns(fd uintptr, flags uintptr) error {
ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
if !exists {
return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
}
_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
if err != 0 {
return err
}
return nil
}

View File

@@ -1,25 +0,0 @@
// +build linux,386
package system
import (
"syscall"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
return
}
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}
return
}

View File

@@ -1,25 +0,0 @@
// +build linux,arm64 linux,amd64 linux,ppc linux,ppc64 linux,ppc64le linux,s390x
package system
import (
"syscall"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
return
}
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}
return
}

View File

@@ -1,25 +0,0 @@
// +build linux,arm
package system
import (
"syscall"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
return
}
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}
return
}

View File

@@ -1,12 +0,0 @@
// +build cgo,linux cgo,freebsd
package system
/*
#include <unistd.h>
*/
import "C"
func GetClockTicks() int {
return int(C.sysconf(C._SC_CLK_TCK))
}

View File

@@ -1,15 +0,0 @@
// +build !cgo windows
package system
func GetClockTicks() int {
// TODO figure out a better alternative for platforms where we're missing cgo
//
// TODO Windows. This could be implemented using Win32 QueryPerformanceFrequency().
// https://msdn.microsoft.com/en-us/library/windows/desktop/ms644905(v=vs.85).aspx
//
// An example of its usage can be found here.
// https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx
return 100
}

View File

@@ -1,99 +0,0 @@
package system
import (
"syscall"
"unsafe"
)
var _zero uintptr
// Returns the size of xattrs and nil error
// Requires path, takes allocated []byte or nil as last argument
func Llistxattr(path string, dest []byte) (size int, err error) {
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return -1, err
}
var newpathBytes unsafe.Pointer
if len(dest) > 0 {
newpathBytes = unsafe.Pointer(&dest[0])
} else {
newpathBytes = unsafe.Pointer(&_zero)
}
_size, _, errno := syscall.Syscall6(syscall.SYS_LLISTXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(newpathBytes), uintptr(len(dest)), 0, 0, 0)
size = int(_size)
if errno != 0 {
return -1, errno
}
return size, nil
}
// Returns a []byte slice if the xattr is set and nil otherwise
// Requires path and its attribute as arguments
func Lgetxattr(path string, attr string) ([]byte, error) {
var sz int
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return nil, err
}
attrBytes, err := syscall.BytePtrFromString(attr)
if err != nil {
return nil, err
}
// Start with a 128 length byte array
sz = 128
dest := make([]byte, sz)
destBytes := unsafe.Pointer(&dest[0])
_sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
switch {
case errno == syscall.ENODATA:
return nil, errno
case errno == syscall.ENOTSUP:
return nil, errno
case errno == syscall.ERANGE:
// 128 byte array might just not be good enough,
// A dummy buffer is used ``uintptr(0)`` to get real size
// of the xattrs on disk
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0)
sz = int(_sz)
if sz < 0 {
return nil, errno
}
dest = make([]byte, sz)
destBytes := unsafe.Pointer(&dest[0])
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
if errno != 0 {
return nil, errno
}
case errno != 0:
return nil, errno
}
sz = int(_sz)
return dest[:sz], nil
}
func Lsetxattr(path string, attr string, data []byte, flags int) error {
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return err
}
attrBytes, err := syscall.BytePtrFromString(attr)
if err != nil {
return err
}
var dataBytes unsafe.Pointer
if len(data) > 0 {
dataBytes = unsafe.Pointer(&data[0])
} else {
dataBytes = unsafe.Pointer(&_zero)
}
_, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0)
if errno != 0 {
return errno
}
return nil
}

View File

@@ -2,13 +2,15 @@ package user
import (
"errors"
"fmt"
"syscall"
)
var (
// The current operating system does not provide the required data for user lookups.
ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data")
// No matching entries found in file.
ErrNoPasswdEntries = errors.New("no matching entries in passwd file")
ErrNoGroupEntries = errors.New("no matching entries in group file")
)
func lookupUser(filter func(u User) bool) (User, error) {
@@ -27,7 +29,7 @@ func lookupUser(filter func(u User) bool) (User, error) {
// No user entries found.
if len(users) == 0 {
return User{}, fmt.Errorf("no matching entries in passwd file")
return User{}, ErrNoPasswdEntries
}
// Assume the first entry is the "correct" one.
@@ -75,7 +77,7 @@ func lookupGroup(filter func(g Group) bool) (Group, error) {
// No user entries found.
if len(groups) == 0 {
return Group{}, fmt.Errorf("no matching entries in group file")
return Group{}, ErrNoGroupEntries
}
// Assume the first entry is the "correct" one.

View File

@@ -15,7 +15,7 @@ const (
)
var (
ErrRange = fmt.Errorf("Uids and gids must be in range %d-%d", minId, maxId)
ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minId, maxId)
)
type User struct {
@@ -42,29 +42,30 @@ func parseLine(line string, v ...interface{}) {
parts := strings.Split(line, ":")
for i, p := range parts {
// Ignore cases where we don't have enough fields to populate the arguments.
// Some configuration files like to misbehave.
if len(v) <= i {
// if we have more "parts" than we have places to put them, bail for great "tolerance" of naughty configuration files
break
}
// Use the type of the argument to figure out how to parse it, scanf() style.
// This is legit.
switch e := v[i].(type) {
case *string:
// "root", "adm", "/bin/bash"
*e = p
case *int:
// "0", "4", "1000"
// ignore string to int conversion errors, for great "tolerance" of naughty configuration files
// "numbers", with conversion errors ignored because of some misbehaving configuration files.
*e, _ = strconv.Atoi(p)
case *[]string:
// "", "root", "root,adm,daemon"
// Comma-separated lists.
if p != "" {
*e = strings.Split(p, ",")
} else {
*e = []string{}
}
default:
// panic, because this is a programming/logic error, not a runtime one
panic("parseLine expects only pointers! argument " + strconv.Itoa(i) + " is not a pointer!")
// Someone goof'd when writing code using this function. Scream so they can hear us.
panic(fmt.Sprintf("parseLine only accepts {*string, *int, *[]string} as arguments! %#v is not a pointer!", e))
}
}
}
@@ -106,8 +107,8 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
return nil, err
}
text := strings.TrimSpace(s.Text())
if text == "" {
line := strings.TrimSpace(s.Text())
if line == "" {
continue
}
@@ -117,10 +118,7 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
// root:x:0:0:root:/root:/bin/bash
// adm:x:3:4:adm:/var/adm:/bin/false
p := User{}
parseLine(
text,
&p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell,
)
parseLine(line, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell)
if filter == nil || filter(p) {
out = append(out, p)
@@ -135,6 +133,7 @@ func ParseGroupFile(path string) ([]Group, error) {
if err != nil {
return nil, err
}
defer group.Close()
return ParseGroup(group)
}
@@ -178,10 +177,7 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
// root:x:0:root
// adm:x:4:root,adm,daemon
p := Group{}
parseLine(
text,
&p.Name, &p.Pass, &p.Gid, &p.List,
)
parseLine(text, &p.Name, &p.Pass, &p.Gid, &p.List)
if filter == nil || filter(p) {
out = append(out, p)
@@ -192,9 +188,10 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
}
type ExecUser struct {
Uid, Gid int
Sgids []int
Home string
Uid int
Gid int
Sgids []int
Home string
}
// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the
@@ -235,12 +232,12 @@ func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath
// * "uid:gid
// * "user:gid"
// * "uid:group"
//
// It should be noted that if you specify a numeric user or group id, they will
// not be evaluated as usernames (only the metadata will be filled). So attempting
// to parse a user with user.Name = "1337" will produce the user with a UID of
// 1337.
func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) {
var (
userArg, groupArg string
name string
)
if defaults == nil {
defaults = new(ExecUser)
}
@@ -258,87 +255,113 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
user.Sgids = []int{}
}
// allow for userArg to have either "user" syntax, or optionally "user:group" syntax
// Allow for userArg to have either "user" syntax, or optionally "user:group" syntax
var userArg, groupArg string
parseLine(userSpec, &userArg, &groupArg)
// Convert userArg and groupArg to be numeric, so we don't have to execute
// Atoi *twice* for each iteration over lines.
uidArg, uidErr := strconv.Atoi(userArg)
gidArg, gidErr := strconv.Atoi(groupArg)
// Find the matching user.
users, err := ParsePasswdFilter(passwd, func(u User) bool {
if userArg == "" {
// Default to current state of the user.
return u.Uid == user.Uid
}
return u.Name == userArg || strconv.Itoa(u.Uid) == userArg
if uidErr == nil {
// If the userArg is numeric, always treat it as a UID.
return uidArg == u.Uid
}
return u.Name == userArg
})
// If we can't find the user, we have to bail.
if err != nil && passwd != nil {
if userArg == "" {
userArg = strconv.Itoa(user.Uid)
}
return nil, fmt.Errorf("Unable to find user %v: %v", userArg, err)
return nil, fmt.Errorf("unable to find user %s: %v", userArg, err)
}
haveUser := users != nil && len(users) > 0
if haveUser {
// if we found any user entries that matched our filter, let's take the first one as "correct"
name = users[0].Name
var matchedUserName string
if len(users) > 0 {
// First match wins, even if there's more than one matching entry.
matchedUserName = users[0].Name
user.Uid = users[0].Uid
user.Gid = users[0].Gid
user.Home = users[0].Home
} else if userArg != "" {
// we asked for a user but didn't find them... let's check to see if we wanted a numeric user
user.Uid, err = strconv.Atoi(userArg)
if err != nil {
// not numeric - we have to bail
return nil, fmt.Errorf("Unable to find user %v", userArg)
// If we can't find a user with the given username, the only other valid
// option is if it's a numeric username with no associated entry in passwd.
if uidErr != nil {
// Not numeric.
return nil, fmt.Errorf("unable to find user %s: %v", userArg, ErrNoPasswdEntries)
}
user.Uid = uidArg
// Must be inside valid uid range.
if user.Uid < minId || user.Uid > maxId {
return nil, ErrRange
}
// if userArg couldn't be found in /etc/passwd but is numeric, just roll with it - this is legit
// Okay, so it's numeric. We can just roll with this.
}
if groupArg != "" || name != "" {
// On to the groups. If we matched a username, we need to do this because of
// the supplementary group IDs.
if groupArg != "" || matchedUserName != "" {
groups, err := ParseGroupFilter(group, func(g Group) bool {
// Explicit group format takes precedence.
if groupArg != "" {
return g.Name == groupArg || strconv.Itoa(g.Gid) == groupArg
}
// Check if user is a member.
for _, u := range g.List {
if u == name {
return true
// If the group argument isn't explicit, we'll just search for it.
if groupArg == "" {
// Check if user is a member of this group.
for _, u := range g.List {
if u == matchedUserName {
return true
}
}
return false
}
return false
if gidErr == nil {
// If the groupArg is numeric, always treat it as a GID.
return gidArg == g.Gid
}
return g.Name == groupArg
})
if err != nil && group != nil {
return nil, fmt.Errorf("Unable to find groups for user %v: %v", users[0].Name, err)
return nil, fmt.Errorf("unable to find groups for spec %v: %v", matchedUserName, err)
}
haveGroup := groups != nil && len(groups) > 0
// Only start modifying user.Gid if it is in explicit form.
if groupArg != "" {
if haveGroup {
// if we found any group entries that matched our filter, let's take the first one as "correct"
if len(groups) > 0 {
// First match wins, even if there's more than one matching entry.
user.Gid = groups[0].Gid
} else {
// we asked for a group but didn't find id... let's check to see if we wanted a numeric group
user.Gid, err = strconv.Atoi(groupArg)
if err != nil {
// not numeric - we have to bail
return nil, fmt.Errorf("Unable to find group %v", groupArg)
}
} else if groupArg != "" {
// If we can't find a group with the given name, the only other valid
// option is if it's a numeric group name with no associated entry in group.
// Ensure gid is inside gid range.
if gidErr != nil {
// Not numeric.
return nil, fmt.Errorf("unable to find group %s: %v", groupArg, ErrNoGroupEntries)
}
user.Gid = gidArg
// Must be inside valid gid range.
if user.Gid < minId || user.Gid > maxId {
return nil, ErrRange
}
// if groupArg couldn't be found in /etc/group but is numeric, just roll with it - this is legit
// Okay, so it's numeric. We can just roll with this.
}
} else if haveGroup {
// If implicit group format, fill supplementary gids.
} else if len(groups) > 0 {
// Supplementary group ids only make sense if in the implicit form.
user.Sgids = make([]int, len(groups))
for i, group := range groups {
user.Sgids[i] = group.Gid

View File

@@ -1,472 +0,0 @@
package user
import (
"io"
"reflect"
"sort"
"strconv"
"strings"
"testing"
)
func TestUserParseLine(t *testing.T) {
var (
a, b string
c []string
d int
)
parseLine("", &a, &b)
if a != "" || b != "" {
t.Fatalf("a and b should be empty ('%v', '%v')", a, b)
}
parseLine("a", &a, &b)
if a != "a" || b != "" {
t.Fatalf("a should be 'a' and b should be empty ('%v', '%v')", a, b)
}
parseLine("bad boys:corny cows", &a, &b)
if a != "bad boys" || b != "corny cows" {
t.Fatalf("a should be 'bad boys' and b should be 'corny cows' ('%v', '%v')", a, b)
}
parseLine("", &c)
if len(c) != 0 {
t.Fatalf("c should be empty (%#v)", c)
}
parseLine("d,e,f:g:h:i,j,k", &c, &a, &b, &c)
if a != "g" || b != "h" || len(c) != 3 || c[0] != "i" || c[1] != "j" || c[2] != "k" {
t.Fatalf("a should be 'g', b should be 'h', and c should be ['i','j','k'] ('%v', '%v', '%#v')", a, b, c)
}
parseLine("::::::::::", &a, &b, &c)
if a != "" || b != "" || len(c) != 0 {
t.Fatalf("a, b, and c should all be empty ('%v', '%v', '%#v')", a, b, c)
}
parseLine("not a number", &d)
if d != 0 {
t.Fatalf("d should be 0 (%v)", d)
}
parseLine("b:12:c", &a, &d, &b)
if a != "b" || b != "c" || d != 12 {
t.Fatalf("a should be 'b' and b should be 'c', and d should be 12 ('%v', '%v', %v)", a, b, d)
}
}
func TestUserParsePasswd(t *testing.T) {
users, err := ParsePasswdFilter(strings.NewReader(`
root:x:0:0:root:/root:/bin/bash
adm:x:3:4:adm:/var/adm:/bin/false
this is just some garbage data
`), nil)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if len(users) != 3 {
t.Fatalf("Expected 3 users, got %v", len(users))
}
if users[0].Uid != 0 || users[0].Name != "root" {
t.Fatalf("Expected users[0] to be 0 - root, got %v - %v", users[0].Uid, users[0].Name)
}
if users[1].Uid != 3 || users[1].Name != "adm" {
t.Fatalf("Expected users[1] to be 3 - adm, got %v - %v", users[1].Uid, users[1].Name)
}
}
func TestUserParseGroup(t *testing.T) {
groups, err := ParseGroupFilter(strings.NewReader(`
root:x:0:root
adm:x:4:root,adm,daemon
this is just some garbage data
`), nil)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if len(groups) != 3 {
t.Fatalf("Expected 3 groups, got %v", len(groups))
}
if groups[0].Gid != 0 || groups[0].Name != "root" || len(groups[0].List) != 1 {
t.Fatalf("Expected groups[0] to be 0 - root - 1 member, got %v - %v - %v", groups[0].Gid, groups[0].Name, len(groups[0].List))
}
if groups[1].Gid != 4 || groups[1].Name != "adm" || len(groups[1].List) != 3 {
t.Fatalf("Expected groups[1] to be 4 - adm - 3 members, got %v - %v - %v", groups[1].Gid, groups[1].Name, len(groups[1].List))
}
}
func TestValidGetExecUser(t *testing.T) {
const passwdContent = `
root:x:0:0:root user:/root:/bin/bash
adm:x:42:43:adm:/var/adm:/bin/false
this is just some garbage data
`
const groupContent = `
root:x:0:root
adm:x:43:
grp:x:1234:root,adm
this is just some garbage data
`
defaultExecUser := ExecUser{
Uid: 8888,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
}
tests := []struct {
ref string
expected ExecUser
}{
{
ref: "root",
expected: ExecUser{
Uid: 0,
Gid: 0,
Sgids: []int{0, 1234},
Home: "/root",
},
},
{
ref: "adm",
expected: ExecUser{
Uid: 42,
Gid: 43,
Sgids: []int{1234},
Home: "/var/adm",
},
},
{
ref: "root:adm",
expected: ExecUser{
Uid: 0,
Gid: 43,
Sgids: defaultExecUser.Sgids,
Home: "/root",
},
},
{
ref: "adm:1234",
expected: ExecUser{
Uid: 42,
Gid: 1234,
Sgids: defaultExecUser.Sgids,
Home: "/var/adm",
},
},
{
ref: "42:1234",
expected: ExecUser{
Uid: 42,
Gid: 1234,
Sgids: defaultExecUser.Sgids,
Home: "/var/adm",
},
},
{
ref: "1337:1234",
expected: ExecUser{
Uid: 1337,
Gid: 1234,
Sgids: defaultExecUser.Sgids,
Home: defaultExecUser.Home,
},
},
{
ref: "1337",
expected: ExecUser{
Uid: 1337,
Gid: defaultExecUser.Gid,
Sgids: defaultExecUser.Sgids,
Home: defaultExecUser.Home,
},
},
{
ref: "",
expected: ExecUser{
Uid: defaultExecUser.Uid,
Gid: defaultExecUser.Gid,
Sgids: defaultExecUser.Sgids,
Home: defaultExecUser.Home,
},
},
}
for _, test := range tests {
passwd := strings.NewReader(passwdContent)
group := strings.NewReader(groupContent)
execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group)
if err != nil {
t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error())
t.Fail()
continue
}
if !reflect.DeepEqual(test.expected, *execUser) {
t.Logf("got: %#v", execUser)
t.Logf("expected: %#v", test.expected)
t.Fail()
continue
}
}
}
func TestInvalidGetExecUser(t *testing.T) {
const passwdContent = `
root:x:0:0:root user:/root:/bin/bash
adm:x:42:43:adm:/var/adm:/bin/false
this is just some garbage data
`
const groupContent = `
root:x:0:root
adm:x:43:
grp:x:1234:root,adm
this is just some garbage data
`
tests := []string{
// No such user/group.
"notuser",
"notuser:notgroup",
"root:notgroup",
"notuser:adm",
"8888:notgroup",
"notuser:8888",
// Invalid user/group values.
"-1:0",
"0:-3",
"-5:-2",
}
for _, test := range tests {
passwd := strings.NewReader(passwdContent)
group := strings.NewReader(groupContent)
execUser, err := GetExecUser(test, nil, passwd, group)
if err == nil {
t.Logf("got unexpected success when parsing '%s': %#v", test, execUser)
t.Fail()
continue
}
}
}
func TestGetExecUserNilSources(t *testing.T) {
const passwdContent = `
root:x:0:0:root user:/root:/bin/bash
adm:x:42:43:adm:/var/adm:/bin/false
this is just some garbage data
`
const groupContent = `
root:x:0:root
adm:x:43:
grp:x:1234:root,adm
this is just some garbage data
`
defaultExecUser := ExecUser{
Uid: 8888,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
}
tests := []struct {
ref string
passwd, group bool
expected ExecUser
}{
{
ref: "",
passwd: false,
group: false,
expected: ExecUser{
Uid: 8888,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
},
},
{
ref: "root",
passwd: true,
group: false,
expected: ExecUser{
Uid: 0,
Gid: 0,
Sgids: []int{8888},
Home: "/root",
},
},
{
ref: "0",
passwd: false,
group: false,
expected: ExecUser{
Uid: 0,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
},
},
{
ref: "0:0",
passwd: false,
group: false,
expected: ExecUser{
Uid: 0,
Gid: 0,
Sgids: []int{8888},
Home: "/8888",
},
},
}
for _, test := range tests {
var passwd, group io.Reader
if test.passwd {
passwd = strings.NewReader(passwdContent)
}
if test.group {
group = strings.NewReader(groupContent)
}
execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group)
if err != nil {
t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error())
t.Fail()
continue
}
if !reflect.DeepEqual(test.expected, *execUser) {
t.Logf("got: %#v", execUser)
t.Logf("expected: %#v", test.expected)
t.Fail()
continue
}
}
}
func TestGetAdditionalGroups(t *testing.T) {
const groupContent = `
root:x:0:root
adm:x:43:
grp:x:1234:root,adm
adm:x:4343:root,adm-duplicate
this is just some garbage data
`
tests := []struct {
groups []string
expected []int
hasError bool
}{
{
// empty group
groups: []string{},
expected: []int{},
},
{
// single group
groups: []string{"adm"},
expected: []int{43},
},
{
// multiple groups
groups: []string{"adm", "grp"},
expected: []int{43, 1234},
},
{
// invalid group
groups: []string{"adm", "grp", "not-exist"},
expected: nil,
hasError: true,
},
{
// group with numeric id
groups: []string{"43"},
expected: []int{43},
},
{
// group with unknown numeric id
groups: []string{"adm", "10001"},
expected: []int{43, 10001},
},
{
// groups specified twice with numeric and name
groups: []string{"adm", "43"},
expected: []int{43},
},
{
// groups with too small id
groups: []string{"-1"},
expected: nil,
hasError: true,
},
{
// groups with too large id
groups: []string{strconv.Itoa(1 << 31)},
expected: nil,
hasError: true,
},
}
for _, test := range tests {
group := strings.NewReader(groupContent)
gids, err := GetAdditionalGroups(test.groups, group)
if test.hasError && err == nil {
t.Errorf("Parse(%#v) expects error but has none", test)
continue
}
if !test.hasError && err != nil {
t.Errorf("Parse(%#v) has error %v", test, err)
continue
}
sort.Sort(sort.IntSlice(gids))
if !reflect.DeepEqual(gids, test.expected) {
t.Errorf("Gids(%v), expect %v from groups %v", gids, test.expected, test.groups)
}
}
}
func TestGetAdditionalGroupsNumeric(t *testing.T) {
tests := []struct {
groups []string
expected []int
hasError bool
}{
{
// numeric groups only
groups: []string{"1234", "5678"},
expected: []int{1234, 5678},
},
{
// numeric and alphabetic
groups: []string{"1234", "fake"},
expected: nil,
hasError: true,
},
}
for _, test := range tests {
gids, err := GetAdditionalGroups(test.groups, nil)
if test.hasError && err == nil {
t.Errorf("Parse(%#v) expects error but has none", test)
continue
}
if !test.hasError && err != nil {
t.Errorf("Parse(%#v) has error %v", test, err)
continue
}
sort.Sort(sort.IntSlice(gids))
if !reflect.DeepEqual(gids, test.expected) {
t.Errorf("Gids(%v), expect %v from groups %v", gids, test.expected, test.groups)
}
}
}

View File

@@ -1,91 +0,0 @@
package main
import (
"os"
"github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/opencontainers/specs"
)
const (
version = "0.3"
usage = `Open Container Initiative runtime
runc is a command line client for running applications packaged according to
the Open Container Format (OCF) and is a compliant implementation of the
Open Container Initiative specification.
runc integrates well with existing process supervisors to provide a production
container runtime environment for applications. It can be used with your
existing process monitoring tools and the container will be spawned as a
direct child of the process supervisor.
After creating config files for your root filesystem with runc, you can execute a
container in your shell by running:
# cd /mycontainer
# runc start [ -c spec-config-file ] [ -r runtime-config-file ]
If not specified, the default value for the 'spec-config-file' is 'config.json',
and the default value for the 'runtime-config-file' is 'runtime.json'.`
)
func main() {
app := cli.NewApp()
app.Name = "runc"
app.Usage = usage
app.Version = version
app.Flags = []cli.Flag{
cli.StringFlag{
Name: "id",
Value: getDefaultID(),
Usage: "specify the ID to be used for the container",
},
cli.BoolFlag{
Name: "debug",
Usage: "enable debug output for logging",
},
cli.StringFlag{
Name: "log",
Usage: "set the log file path where internal debug information is written",
},
cli.StringFlag{
Name: "root",
Value: specs.LinuxStateDirectory,
Usage: "root directory for storage of container state (this should be located in tmpfs)",
},
cli.StringFlag{
Name: "criu",
Value: "criu",
Usage: "path to the criu binary used for checkpoint and restore",
},
}
app.Commands = []cli.Command{
startCommand,
checkpointCommand,
eventsCommand,
restoreCommand,
killCommand,
specCommand,
pauseCommand,
resumeCommand,
execCommand,
}
app.Before = func(context *cli.Context) error {
if context.GlobalBool("debug") {
logrus.SetLevel(logrus.DebugLevel)
}
if path := context.GlobalString("log"); path != "" {
f, err := os.Create(path)
if err != nil {
return err
}
logrus.SetOutput(f)
}
return nil
}
if err := app.Run(os.Args); err != nil {
logrus.Fatal(err)
}
}

View File

@@ -1,5 +0,0 @@
// +build linux
package main
import _ "github.com/opencontainers/runc/libcontainer/nsenter"

View File

@@ -1,24 +0,0 @@
// +build !linux
package main
import (
"github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
)
func getDefaultID() string {
return ""
}
var (
checkpointCommand cli.Command
eventsCommand cli.Command
restoreCommand cli.Command
specCommand cli.Command
killCommand cli.Command
)
func runAction(*cli.Context) {
logrus.Fatal("Current OS is not supported yet")
}

View File

@@ -1,33 +0,0 @@
// +build linux
package main
import "github.com/codegangsta/cli"
var pauseCommand = cli.Command{
Name: "pause",
Usage: "pause suspends all processes inside the container",
Action: func(context *cli.Context) {
container, err := getContainer(context)
if err != nil {
fatal(err)
}
if err := container.Pause(); err != nil {
fatal(err)
}
},
}
var resumeCommand = cli.Command{
Name: "resume",
Usage: "resume resumes all processes that have been previously paused",
Action: func(context *cli.Context) {
container, err := getContainer(context)
if err != nil {
fatal(err)
}
if err := container.Resume(); err != nil {
fatal(err)
}
},
}

View File

@@ -1,169 +0,0 @@
// +build linux
package main
import (
"fmt"
"os"
"path/filepath"
"github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/specs"
)
var restoreCommand = cli.Command{
Name: "restore",
Usage: "restore a container from a previous checkpoint",
Flags: []cli.Flag{
cli.StringFlag{
Name: "image-path",
Value: "",
Usage: "path to criu image files for restoring",
},
cli.StringFlag{
Name: "work-path",
Value: "",
Usage: "path for saving work files and logs",
},
cli.BoolFlag{
Name: "tcp-established",
Usage: "allow open tcp connections",
},
cli.BoolFlag{
Name: "ext-unix-sk",
Usage: "allow external unix sockets",
},
cli.BoolFlag{
Name: "shell-job",
Usage: "allow shell jobs",
},
cli.BoolFlag{
Name: "file-locks",
Usage: "handle file locks, for safety",
},
cli.StringFlag{
Name: "manage-cgroups-mode",
Value: "",
Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'.",
},
cli.StringFlag{
Name: "config-file, c",
Value: "config.json",
Usage: "path to spec file for writing",
},
cli.StringFlag{
Name: "runtime-file, r",
Value: "runtime.json",
Usage: "path for runtime file for writing",
},
},
Action: func(context *cli.Context) {
imagePath := context.String("image-path")
if imagePath == "" {
imagePath = getDefaultImagePath(context)
}
spec, rspec, err := loadSpec(context.String("config-file"), context.String("runtime-file"))
if err != nil {
fatal(err)
}
config, err := createLibcontainerConfig(context.GlobalString("id"), spec, rspec)
if err != nil {
fatal(err)
}
status, err := restoreContainer(context, spec, config, imagePath)
if err != nil {
fatal(err)
}
os.Exit(status)
},
}
func restoreContainer(context *cli.Context, spec *specs.LinuxSpec, config *configs.Config, imagePath string) (code int, err error) {
rootuid := 0
factory, err := loadFactory(context)
if err != nil {
return -1, err
}
container, err := factory.Load(context.GlobalString("id"))
if err != nil {
container, err = factory.Create(context.GlobalString("id"), config)
if err != nil {
return -1, err
}
}
options := criuOptions(context)
status, err := container.Status()
if err != nil {
logrus.Error(err)
}
if status == libcontainer.Running {
fatal(fmt.Errorf("Container with id %s already running", context.GlobalString("id")))
}
setManageCgroupsMode(context, options)
// ensure that the container is always removed if we were the process
// that created it.
defer func() {
if err != nil {
return
}
status, err := container.Status()
if err != nil {
logrus.Error(err)
}
if status != libcontainer.Checkpointed {
if err := container.Destroy(); err != nil {
logrus.Error(err)
}
if err := os.RemoveAll(options.ImagesDirectory); err != nil {
logrus.Error(err)
}
}
}()
process := &libcontainer.Process{
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
}
tty, err := newTty(spec.Process.Terminal, process, rootuid)
if err != nil {
return -1, err
}
handler := newSignalHandler(tty)
defer handler.Close()
if err := container.Restore(process, options); err != nil {
cstatus, cerr := container.Status()
if cerr != nil {
logrus.Error(cerr)
}
if cstatus == libcontainer.Destroyed {
dest := filepath.Join(context.GlobalString("root"), context.GlobalString("id"))
if errVal := os.RemoveAll(dest); errVal != nil {
logrus.Error(errVal)
}
}
return -1, err
}
return handler.forward(process)
}
func criuOptions(context *cli.Context) *libcontainer.CriuOpts {
imagePath := getCheckpointImagePath(context)
if err := os.MkdirAll(imagePath, 0655); err != nil {
fatal(err)
}
return &libcontainer.CriuOpts{
ImagesDirectory: imagePath,
WorkDirectory: context.String("work-path"),
LeaveRunning: context.Bool("leave-running"),
TcpEstablished: context.Bool("tcp-established"),
ExternalUnixConnections: context.Bool("ext-unix-sk"),
ShellJob: context.Bool("shell-job"),
FileLocks: context.Bool("file-locks"),
}
}

View File

@@ -1,49 +0,0 @@
package main
import "fmt"
const (
RLIMIT_CPU = iota // CPU time in sec
RLIMIT_FSIZE // Maximum filesize
RLIMIT_DATA // max data size
RLIMIT_STACK // max stack size
RLIMIT_CORE // max core file size
RLIMIT_RSS // max resident set size
RLIMIT_NPROC // max number of processes
RLIMIT_NOFILE // max number of open files
RLIMIT_MEMLOCK // max locked-in-memory address space
RLIMIT_AS // address space limit
RLIMIT_LOCKS // maximum file locks held
RLIMIT_SIGPENDING // max number of pending signals
RLIMIT_MSGQUEUE // maximum bytes in POSIX mqueues
RLIMIT_NICE // max nice prio allowed to raise to
RLIMIT_RTPRIO // maximum realtime priority
RLIMIT_RTTIME // timeout for RT tasks in us
)
var rlimitMap = map[string]int{
"RLIMIT_CPU": RLIMIT_CPU,
"RLIMIT_FSIZE": RLIMIT_FSIZE,
"RLIMIT_DATA": RLIMIT_DATA,
"RLIMIT_STACK": RLIMIT_STACK,
"RLIMIT_CORE": RLIMIT_CORE,
"RLIMIT_RSS": RLIMIT_RSS,
"RLIMIT_NPROC": RLIMIT_NPROC,
"RLIMIT_NOFILE": RLIMIT_NOFILE,
"RLIMIT_MEMLOCK": RLIMIT_MEMLOCK,
"RLIMIT_AS": RLIMIT_AS,
"RLIMIT_LOCKS": RLIMIT_LOCKS,
"RLIMIT_SGPENDING": RLIMIT_SIGPENDING,
"RLIMIT_MSGQUEUE": RLIMIT_MSGQUEUE,
"RLIMIT_NICE": RLIMIT_NICE,
"RLIMIT_RTPRIO": RLIMIT_RTPRIO,
"RLIMIT_RTTIME": RLIMIT_RTTIME,
}
func strToRlimit(key string) (int, error) {
rl, ok := rlimitMap[key]
if !ok {
return 0, fmt.Errorf("Wrong rlimit value: %s", key)
}
return rl, nil
}

View File

@@ -1,113 +0,0 @@
// +build linux
package main
import (
"os"
"os/signal"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/utils"
)
const signalBufferSize = 2048
// newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals
// while still forwarding all other signals to the process.
func newSignalHandler(tty *tty) *signalHandler {
// ensure that we have a large buffer size so that we do not miss any signals
// incase we are not processing them fast enough.
s := make(chan os.Signal, signalBufferSize)
// handle all signals for the process.
signal.Notify(s)
return &signalHandler{
tty: tty,
signals: s,
}
}
// exit models a process exit status with the pid and
// exit status.
type exit struct {
pid int
status int
}
type signalHandler struct {
signals chan os.Signal
tty *tty
}
// forward handles the main signal event loop forwarding, resizing, or reaping depending
// on the signal received.
func (h *signalHandler) forward(process *libcontainer.Process) (int, error) {
// make sure we know the pid of our main process so that we can return
// after it dies.
pid1, err := process.Pid()
if err != nil {
return -1, err
}
// perform the initial tty resize.
h.tty.resize()
for s := range h.signals {
switch s {
case syscall.SIGWINCH:
h.tty.resize()
case syscall.SIGCHLD:
exits, err := h.reap()
if err != nil {
logrus.Error(err)
}
for _, e := range exits {
logrus.WithFields(logrus.Fields{
"pid": e.pid,
"status": e.status,
}).Debug("process exited")
if e.pid == pid1 {
// call Wait() on the process even though we already have the exit
// status because we must ensure that any of the go specific process
// fun such as flushing pipes are complete before we return.
process.Wait()
return e.status, nil
}
}
default:
logrus.Debugf("sending signal to process %s", s)
if err := syscall.Kill(pid1, s.(syscall.Signal)); err != nil {
logrus.Error(err)
}
}
}
return -1, nil
}
// reap runs wait4 in a loop until we have finished processing any existing exits
// then returns all exits to the main event loop for further processing.
func (h *signalHandler) reap() (exits []exit, err error) {
var (
ws syscall.WaitStatus
rus syscall.Rusage
)
for {
pid, err := syscall.Wait4(-1, &ws, syscall.WNOHANG, &rus)
if err != nil {
if err == syscall.ECHILD {
return exits, nil
}
return nil, err
}
if pid <= 0 {
return exits, nil
}
exits = append(exits, exit{
pid: pid,
status: utils.ExitStatus(ws),
})
}
}
func (h *signalHandler) Close() error {
return h.tty.Close()
}

View File

@@ -1,712 +0,0 @@
// +build linux
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/specs"
)
var specCommand = cli.Command{
Name: "spec",
Usage: "create a new specification file",
Flags: []cli.Flag{
cli.StringFlag{
Name: "config-file, c",
Value: "config.json",
Usage: "path to spec config file for writing",
},
cli.StringFlag{
Name: "runtime-file, r",
Value: "runtime.json",
Usage: "path to runtime config file for writing",
},
},
Action: func(context *cli.Context) {
spec := specs.LinuxSpec{
Spec: specs.Spec{
Version: specs.Version,
Platform: specs.Platform{
OS: runtime.GOOS,
Arch: runtime.GOARCH,
},
Root: specs.Root{
Path: "rootfs",
Readonly: true,
},
Process: specs.Process{
Terminal: true,
User: specs.User{},
Args: []string{
"sh",
},
Env: []string{
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM=xterm",
},
},
Hostname: "shell",
Mounts: []specs.MountPoint{
{
Name: "proc",
Path: "/proc",
},
{
Name: "dev",
Path: "/dev",
},
{
Name: "devpts",
Path: "/dev/pts",
},
{
Name: "shm",
Path: "/dev/shm",
},
{
Name: "mqueue",
Path: "/dev/mqueue",
},
{
Name: "sysfs",
Path: "/sys",
},
{
Name: "cgroup",
Path: "/sys/fs/cgroup",
},
},
},
Linux: specs.Linux{
Capabilities: []string{
"CAP_AUDIT_WRITE",
"CAP_KILL",
"CAP_NET_BIND_SERVICE",
},
},
}
rspec := specs.LinuxRuntimeSpec{
RuntimeSpec: specs.RuntimeSpec{
Mounts: map[string]specs.Mount{
"proc": {
Type: "proc",
Source: "proc",
Options: nil,
},
"dev": {
Type: "tmpfs",
Source: "tmpfs",
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
},
"devpts": {
Type: "devpts",
Source: "devpts",
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
},
"shm": {
Type: "tmpfs",
Source: "shm",
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
},
"mqueue": {
Type: "mqueue",
Source: "mqueue",
Options: []string{"nosuid", "noexec", "nodev"},
},
"sysfs": {
Type: "sysfs",
Source: "sysfs",
Options: []string{"nosuid", "noexec", "nodev"},
},
"cgroup": {
Type: "cgroup",
Source: "cgroup",
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
},
},
},
Linux: specs.LinuxRuntime{
Namespaces: []specs.Namespace{
{
Type: "pid",
},
{
Type: "network",
},
{
Type: "ipc",
},
{
Type: "uts",
},
{
Type: "mount",
},
},
Rlimits: []specs.Rlimit{
{
Type: "RLIMIT_NOFILE",
Hard: uint64(1024),
Soft: uint64(1024),
},
},
Devices: []specs.Device{
{
Type: 'c',
Path: "/dev/null",
Major: 1,
Minor: 3,
Permissions: "rwm",
FileMode: 0666,
UID: 0,
GID: 0,
},
{
Type: 'c',
Path: "/dev/random",
Major: 1,
Minor: 8,
Permissions: "rwm",
FileMode: 0666,
UID: 0,
GID: 0,
},
{
Type: 'c',
Path: "/dev/full",
Major: 1,
Minor: 7,
Permissions: "rwm",
FileMode: 0666,
UID: 0,
GID: 0,
},
{
Type: 'c',
Path: "/dev/tty",
Major: 5,
Minor: 0,
Permissions: "rwm",
FileMode: 0666,
UID: 0,
GID: 0,
},
{
Type: 'c',
Path: "/dev/zero",
Major: 1,
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
UID: 0,
GID: 0,
},
{
Type: 'c',
Path: "/dev/urandom",
Major: 1,
Minor: 9,
Permissions: "rwm",
FileMode: 0666,
UID: 0,
GID: 0,
},
},
Resources: &specs.Resources{
Memory: specs.Memory{
Swappiness: -1,
},
},
Seccomp: specs.Seccomp{
DefaultAction: "SCMP_ACT_ALLOW",
Syscalls: []*specs.Syscall{},
},
},
}
checkNoFile := func(name string) error {
_, err := os.Stat(name)
if err == nil {
return fmt.Errorf("File %s exists. Remove it first", name)
}
if !os.IsNotExist(err) {
return err
}
return nil
}
cName := context.String("config-file")
rName := context.String("runtime-file")
if err := checkNoFile(cName); err != nil {
logrus.Fatal(err)
}
if err := checkNoFile(rName); err != nil {
logrus.Fatal(err)
}
data, err := json.MarshalIndent(&spec, "", "\t")
if err != nil {
logrus.Fatal(err)
}
if err := ioutil.WriteFile(cName, data, 0666); err != nil {
logrus.Fatal(err)
}
rdata, err := json.MarshalIndent(&rspec, "", "\t")
if err != nil {
logrus.Fatal(err)
}
if err := ioutil.WriteFile(rName, rdata, 0666); err != nil {
logrus.Fatal(err)
}
},
}
var namespaceMapping = map[specs.NamespaceType]configs.NamespaceType{
specs.PIDNamespace: configs.NEWPID,
specs.NetworkNamespace: configs.NEWNET,
specs.MountNamespace: configs.NEWNS,
specs.UserNamespace: configs.NEWUSER,
specs.IPCNamespace: configs.NEWIPC,
specs.UTSNamespace: configs.NEWUTS,
}
var mountPropagationMapping = map[string]int{
"rprivate": syscall.MS_PRIVATE | syscall.MS_REC,
"private": syscall.MS_PRIVATE,
"rslave": syscall.MS_SLAVE | syscall.MS_REC,
"slave": syscall.MS_SLAVE,
"rshared": syscall.MS_SHARED | syscall.MS_REC,
"shared": syscall.MS_SHARED,
"": syscall.MS_PRIVATE | syscall.MS_REC,
}
// loadSpec loads the specification from the provided path.
// If the path is empty then the default path will be "config.json"
func loadSpec(cPath, rPath string) (spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec, err error) {
cf, err := os.Open(cPath)
if err != nil {
if os.IsNotExist(err) {
return nil, nil, fmt.Errorf("JSON specification file at %s not found", cPath)
}
return
}
defer cf.Close()
rf, err := os.Open(rPath)
if err != nil {
if os.IsNotExist(err) {
return nil, nil, fmt.Errorf("JSON runtime config file at %s not found", rPath)
}
return
}
defer rf.Close()
if err = json.NewDecoder(cf).Decode(&spec); err != nil {
return
}
if err = json.NewDecoder(rf).Decode(&rspec); err != nil {
return
}
return spec, rspec, checkSpecVersion(spec)
}
// checkSpecVersion makes sure that the spec version matches runc's while we are in the initial
// development period. It is better to hard fail than have missing fields or options in the spec.
func checkSpecVersion(s *specs.LinuxSpec) error {
if s.Version != specs.Version {
return fmt.Errorf("spec version is not compatible with implemented version %q: spec %q", specs.Version, s.Version)
}
return nil
}
func createLibcontainerConfig(cgroupName string, spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec) (*configs.Config, error) {
cwd, err := os.Getwd()
if err != nil {
return nil, err
}
rootfsPath := spec.Root.Path
if !filepath.IsAbs(rootfsPath) {
rootfsPath = filepath.Join(cwd, rootfsPath)
}
config := &configs.Config{
Rootfs: rootfsPath,
Capabilities: spec.Linux.Capabilities,
Readonlyfs: spec.Root.Readonly,
Hostname: spec.Hostname,
}
exists := false
if config.RootPropagation, exists = mountPropagationMapping[rspec.Linux.RootfsPropagation]; !exists {
return nil, fmt.Errorf("rootfsPropagation=%v is not supported", rspec.Linux.RootfsPropagation)
}
for _, ns := range rspec.Linux.Namespaces {
t, exists := namespaceMapping[ns.Type]
if !exists {
return nil, fmt.Errorf("namespace %q does not exist", ns)
}
config.Namespaces.Add(t, ns.Path)
}
if config.Namespaces.Contains(configs.NEWNET) {
config.Networks = []*configs.Network{
{
Type: "loopback",
},
}
}
for _, mp := range spec.Mounts {
m, ok := rspec.Mounts[mp.Name]
if !ok {
return nil, fmt.Errorf("Mount with Name %q not found in runtime config", mp.Name)
}
config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, mp.Path, m))
}
if err := createDevices(rspec, config); err != nil {
return nil, err
}
if err := setupUserNamespace(rspec, config); err != nil {
return nil, err
}
for _, rlimit := range rspec.Linux.Rlimits {
rl, err := createLibContainerRlimit(rlimit)
if err != nil {
return nil, err
}
config.Rlimits = append(config.Rlimits, rl)
}
c, err := createCgroupConfig(cgroupName, rspec, config.Devices)
if err != nil {
return nil, err
}
config.Cgroups = c
if config.Readonlyfs {
setReadonly(config)
config.MaskPaths = []string{
"/proc/kcore",
}
config.ReadonlyPaths = []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
}
}
seccomp, err := setupSeccomp(&rspec.Linux.Seccomp)
if err != nil {
return nil, err
}
config.Seccomp = seccomp
config.Sysctl = rspec.Linux.Sysctl
config.ProcessLabel = rspec.Linux.SelinuxProcessLabel
config.AppArmorProfile = rspec.Linux.ApparmorProfile
for _, g := range spec.Process.User.AdditionalGids {
config.AdditionalGroups = append(config.AdditionalGroups, strconv.FormatUint(uint64(g), 10))
}
createHooks(rspec, config)
config.Version = specs.Version
return config, nil
}
func createLibcontainerMount(cwd, dest string, m specs.Mount) *configs.Mount {
flags, pgflags, data := parseMountOptions(m.Options)
source := m.Source
if m.Type == "bind" {
if !filepath.IsAbs(source) {
source = filepath.Join(cwd, m.Source)
}
}
return &configs.Mount{
Device: m.Type,
Source: source,
Destination: dest,
Data: data,
Flags: flags,
PropagationFlags: pgflags,
}
}
func createCgroupConfig(name string, spec *specs.LinuxRuntimeSpec, devices []*configs.Device) (*configs.Cgroup, error) {
myCgroupPath, err := cgroups.GetThisCgroupDir("devices")
if err != nil {
return nil, err
}
c := &configs.Cgroup{
Name: name,
Parent: myCgroupPath,
AllowedDevices: append(devices, allowedDevices...),
}
r := spec.Linux.Resources
c.Memory = r.Memory.Limit
c.MemoryReservation = r.Memory.Reservation
c.MemorySwap = r.Memory.Swap
c.KernelMemory = r.Memory.Kernel
c.MemorySwappiness = r.Memory.Swappiness
c.CpuShares = r.CPU.Shares
c.CpuQuota = r.CPU.Quota
c.CpuPeriod = r.CPU.Period
c.CpuRtRuntime = r.CPU.RealtimeRuntime
c.CpuRtPeriod = r.CPU.RealtimePeriod
c.CpusetCpus = r.CPU.Cpus
c.CpusetMems = r.CPU.Mems
c.BlkioWeight = r.BlockIO.Weight
c.BlkioLeafWeight = r.BlockIO.LeafWeight
for _, wd := range r.BlockIO.WeightDevice {
weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, wd.Weight, wd.LeafWeight)
c.BlkioWeightDevice = append(c.BlkioWeightDevice, weightDevice)
}
for _, td := range r.BlockIO.ThrottleReadBpsDevice {
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, td.Rate)
c.BlkioThrottleReadBpsDevice = append(c.BlkioThrottleReadBpsDevice, throttleDevice)
}
for _, td := range r.BlockIO.ThrottleWriteBpsDevice {
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, td.Rate)
c.BlkioThrottleWriteBpsDevice = append(c.BlkioThrottleWriteBpsDevice, throttleDevice)
}
for _, td := range r.BlockIO.ThrottleReadIOPSDevice {
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, td.Rate)
c.BlkioThrottleReadIOPSDevice = append(c.BlkioThrottleReadIOPSDevice, throttleDevice)
}
for _, td := range r.BlockIO.ThrottleWriteIOPSDevice {
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, td.Rate)
c.BlkioThrottleWriteIOPSDevice = append(c.BlkioThrottleWriteIOPSDevice, throttleDevice)
}
for _, l := range r.HugepageLimits {
c.HugetlbLimit = append(c.HugetlbLimit, &configs.HugepageLimit{
Pagesize: l.Pagesize,
Limit: l.Limit,
})
}
c.OomKillDisable = r.DisableOOMKiller
c.NetClsClassid = r.Network.ClassID
for _, m := range r.Network.Priorities {
c.NetPrioIfpriomap = append(c.NetPrioIfpriomap, &configs.IfPrioMap{
Interface: m.Name,
Priority: m.Priority,
})
}
return c, nil
}
func createDevices(spec *specs.LinuxRuntimeSpec, config *configs.Config) error {
for _, d := range spec.Linux.Devices {
device := &configs.Device{
Type: d.Type,
Path: d.Path,
Major: d.Major,
Minor: d.Minor,
Permissions: d.Permissions,
FileMode: d.FileMode,
Uid: d.UID,
Gid: d.GID,
}
config.Devices = append(config.Devices, device)
}
return nil
}
func setReadonly(config *configs.Config) {
for _, m := range config.Mounts {
if m.Device == "sysfs" {
m.Flags |= syscall.MS_RDONLY
}
}
}
func setupUserNamespace(spec *specs.LinuxRuntimeSpec, config *configs.Config) error {
if len(spec.Linux.UIDMappings) == 0 {
return nil
}
config.Namespaces.Add(configs.NEWUSER, "")
create := func(m specs.IDMapping) configs.IDMap {
return configs.IDMap{
HostID: int(m.HostID),
ContainerID: int(m.ContainerID),
Size: int(m.Size),
}
}
for _, m := range spec.Linux.UIDMappings {
config.UidMappings = append(config.UidMappings, create(m))
}
for _, m := range spec.Linux.GIDMappings {
config.GidMappings = append(config.GidMappings, create(m))
}
rootUID, err := config.HostUID()
if err != nil {
return err
}
rootGID, err := config.HostGID()
if err != nil {
return err
}
for _, node := range config.Devices {
node.Uid = uint32(rootUID)
node.Gid = uint32(rootGID)
}
return nil
}
func createLibContainerRlimit(rlimit specs.Rlimit) (configs.Rlimit, error) {
rl, err := strToRlimit(rlimit.Type)
if err != nil {
return configs.Rlimit{}, err
}
return configs.Rlimit{
Type: rl,
Hard: uint64(rlimit.Hard),
Soft: uint64(rlimit.Soft),
}, nil
}
// parseMountOptions parses the string and returns the flags, propagation
// flags and any mount data that it contains.
func parseMountOptions(options []string) (int, []int, string) {
var (
flag int
pgflag []int
data []string
)
flags := map[string]struct {
clear bool
flag int
}{
"async": {true, syscall.MS_SYNCHRONOUS},
"atime": {true, syscall.MS_NOATIME},
"bind": {false, syscall.MS_BIND},
"defaults": {false, 0},
"dev": {true, syscall.MS_NODEV},
"diratime": {true, syscall.MS_NODIRATIME},
"dirsync": {false, syscall.MS_DIRSYNC},
"exec": {true, syscall.MS_NOEXEC},
"mand": {false, syscall.MS_MANDLOCK},
"noatime": {false, syscall.MS_NOATIME},
"nodev": {false, syscall.MS_NODEV},
"nodiratime": {false, syscall.MS_NODIRATIME},
"noexec": {false, syscall.MS_NOEXEC},
"nomand": {true, syscall.MS_MANDLOCK},
"norelatime": {true, syscall.MS_RELATIME},
"nostrictatime": {true, syscall.MS_STRICTATIME},
"nosuid": {false, syscall.MS_NOSUID},
"rbind": {false, syscall.MS_BIND | syscall.MS_REC},
"relatime": {false, syscall.MS_RELATIME},
"remount": {false, syscall.MS_REMOUNT},
"ro": {false, syscall.MS_RDONLY},
"rw": {true, syscall.MS_RDONLY},
"strictatime": {false, syscall.MS_STRICTATIME},
"suid": {true, syscall.MS_NOSUID},
"sync": {false, syscall.MS_SYNCHRONOUS},
}
propagationFlags := map[string]struct {
clear bool
flag int
}{
"private": {false, syscall.MS_PRIVATE},
"shared": {false, syscall.MS_SHARED},
"slave": {false, syscall.MS_SLAVE},
"unbindable": {false, syscall.MS_UNBINDABLE},
"rprivate": {false, syscall.MS_PRIVATE | syscall.MS_REC},
"rshared": {false, syscall.MS_SHARED | syscall.MS_REC},
"rslave": {false, syscall.MS_SLAVE | syscall.MS_REC},
"runbindable": {false, syscall.MS_UNBINDABLE | syscall.MS_REC},
}
for _, o := range options {
// If the option does not exist in the flags table or the flag
// is not supported on the platform,
// then it is a data value for a specific fs type
if f, exists := flags[o]; exists && f.flag != 0 {
if f.clear {
flag &= ^f.flag
} else {
flag |= f.flag
}
} else if f, exists := propagationFlags[o]; exists && f.flag != 0 {
pgflag = append(pgflag, f.flag)
} else {
data = append(data, o)
}
}
return flag, pgflag, strings.Join(data, ",")
}
func setupSeccomp(config *specs.Seccomp) (*configs.Seccomp, error) {
if config == nil {
return nil, nil
}
// No default action specified, no syscalls listed, assume seccomp disabled
if config.DefaultAction == "" && len(config.Syscalls) == 0 {
return nil, nil
}
newConfig := new(configs.Seccomp)
newConfig.Syscalls = []*configs.Syscall{}
// Convert default action from string representation
newDefaultAction, err := seccomp.ConvertStringToAction(string(config.DefaultAction))
if err != nil {
return nil, err
}
newConfig.DefaultAction = newDefaultAction
// Loop through all syscall blocks and convert them to libcontainer format
for _, call := range config.Syscalls {
newAction, err := seccomp.ConvertStringToAction(string(call.Action))
if err != nil {
return nil, err
}
newCall := configs.Syscall{
Name: call.Name,
Action: newAction,
Args: []*configs.Arg{},
}
// Loop through all the arguments of the syscall and convert them
for _, arg := range call.Args {
newOp, err := seccomp.ConvertStringToOperator(string(arg.Op))
if err != nil {
return nil, err
}
newArg := configs.Arg{
Index: arg.Index,
Value: arg.Value,
ValueTwo: arg.ValueTwo,
Op: newOp,
}
newCall.Args = append(newCall.Args, &newArg)
}
newConfig.Syscalls = append(newConfig.Syscalls, &newCall)
}
return newConfig, nil
}
func createHooks(rspec *specs.LinuxRuntimeSpec, config *configs.Config) {
config.Hooks = &configs.Hooks{}
for _, h := range rspec.Hooks.Prestart {
cmd := configs.Command{
Path: h.Path,
Args: h.Args,
Env: h.Env,
}
config.Hooks.Prestart = append(config.Hooks.Prestart, configs.NewCommandHook(cmd))
}
for _, h := range rspec.Hooks.Poststop {
cmd := configs.Command{
Path: h.Path,
Args: h.Args,
Env: h.Env,
}
config.Hooks.Poststop = append(config.Hooks.Poststop, configs.NewCommandHook(cmd))
}
}

View File

@@ -1,155 +0,0 @@
// +build linux
package main
import (
"fmt"
"os"
"runtime"
"strconv"
"github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/specs"
)
const SD_LISTEN_FDS_START = 3
// default action is to start a container
var startCommand = cli.Command{
Name: "start",
Usage: "create and run a container",
Flags: []cli.Flag{
cli.StringFlag{
Name: "config-file, c",
Value: "config.json",
Usage: "path to spec config file",
},
cli.StringFlag{
Name: "runtime-file, r",
Value: "runtime.json",
Usage: "path to runtime config file",
},
},
Action: func(context *cli.Context) {
spec, rspec, err := loadSpec(context.String("config-file"), context.String("runtime-file"))
if err != nil {
fatal(err)
}
notifySocket := os.Getenv("NOTIFY_SOCKET")
if notifySocket != "" {
setupSdNotify(spec, rspec, notifySocket)
}
listenFds := os.Getenv("LISTEN_FDS")
listenPid := os.Getenv("LISTEN_PID")
if listenFds != "" && listenPid == strconv.Itoa(os.Getpid()) {
setupSocketActivation(spec, listenFds)
}
if os.Geteuid() != 0 {
logrus.Fatal("runc should be run as root")
}
status, err := startContainer(context, spec, rspec)
if err != nil {
logrus.Fatalf("Container start failed: %v", err)
}
// exit with the container's exit status so any external supervisor is
// notified of the exit with the correct exit status.
os.Exit(status)
},
}
func init() {
if len(os.Args) > 1 && os.Args[1] == "init" {
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
factory, _ := libcontainer.New("")
if err := factory.StartInitialization(); err != nil {
fatal(err)
}
panic("--this line should have never been executed, congratulations--")
}
}
func startContainer(context *cli.Context, spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec) (int, error) {
config, err := createLibcontainerConfig(context.GlobalString("id"), spec, rspec)
if err != nil {
return -1, err
}
if _, err := os.Stat(config.Rootfs); err != nil {
if os.IsNotExist(err) {
return -1, fmt.Errorf("Rootfs (%q) does not exist", config.Rootfs)
}
return -1, err
}
rootuid, err := config.HostUID()
if err != nil {
return -1, err
}
factory, err := loadFactory(context)
if err != nil {
return -1, err
}
container, err := factory.Create(context.GlobalString("id"), config)
if err != nil {
return -1, err
}
// ensure that the container is always removed if we were the process
// that created it.
defer destroy(container)
process := newProcess(spec.Process)
// Support on-demand socket activation by passing file descriptors into the container init process.
if os.Getenv("LISTEN_FDS") != "" {
listenFdsInt, err := strconv.Atoi(os.Getenv("LISTEN_FDS"))
if err != nil {
return -1, err
}
for i := SD_LISTEN_FDS_START; i < (listenFdsInt + SD_LISTEN_FDS_START); i++ {
process.ExtraFiles = append(process.ExtraFiles, os.NewFile(uintptr(i), ""))
}
}
tty, err := newTty(spec.Process.Terminal, process, rootuid)
if err != nil {
return -1, err
}
handler := newSignalHandler(tty)
defer handler.Close()
if err := container.Start(process); err != nil {
return -1, err
}
return handler.forward(process)
}
// If systemd is supporting sd_notify protocol, this function will add support
// for sd_notify protocol from within the container.
func setupSdNotify(spec *specs.LinuxSpec, rspec *specs.LinuxRuntimeSpec, notifySocket string) {
mountName := "sdNotify"
spec.Mounts = append(spec.Mounts, specs.MountPoint{Name: mountName, Path: notifySocket})
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notifySocket))
rspec.Mounts[mountName] = specs.Mount{Type: "bind", Source: notifySocket, Options: []string{"bind"}}
}
// If systemd is supporting on-demand socket activation, this function will add support
// for on-demand socket activation for the containerized service.
func setupSocketActivation(spec *specs.LinuxSpec, listenFds string) {
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("LISTEN_FDS=%s", listenFds), "LISTEN_PID=1")
}
func destroy(container libcontainer.Container) {
status, err := container.Status()
if err != nil {
logrus.Error(err)
}
if status != libcontainer.Checkpointed {
if err := container.Destroy(); err != nil {
logrus.Error(err)
}
}
}

View File

@@ -1,112 +0,0 @@
// +build linux
package main
import (
"fmt"
"io"
"os"
"syscall"
"github.com/docker/docker/pkg/term"
"github.com/opencontainers/runc/libcontainer"
)
// newTty creates a new tty for use with the container. If a tty is not to be
// created for the process, pipes are created so that the TTY of the parent
// process are not inherited by the container.
func newTty(create bool, p *libcontainer.Process, rootuid int) (*tty, error) {
if create {
return createTty(p, rootuid)
}
return createStdioPipes(p, rootuid)
}
// setup standard pipes so that the TTY of the calling runc process
// is not inherited by the container.
func createStdioPipes(p *libcontainer.Process, rootuid int) (*tty, error) {
var (
t = &tty{}
fds []int
)
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
fds = append(fds, int(r.Fd()), int(w.Fd()))
go io.Copy(w, os.Stdin)
t.closers = append(t.closers, w)
p.Stdin = r
if r, w, err = os.Pipe(); err != nil {
return nil, err
}
fds = append(fds, int(r.Fd()), int(w.Fd()))
go io.Copy(os.Stdout, r)
p.Stdout = w
t.closers = append(t.closers, r)
if r, w, err = os.Pipe(); err != nil {
return nil, err
}
fds = append(fds, int(r.Fd()), int(w.Fd()))
go io.Copy(os.Stderr, r)
p.Stderr = w
t.closers = append(t.closers, r)
// change the ownership of the pipe fds incase we are in a user namespace.
for _, fd := range fds {
if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
return nil, err
}
}
return t, nil
}
func createTty(p *libcontainer.Process, rootuid int) (*tty, error) {
console, err := p.NewConsole(rootuid)
if err != nil {
return nil, err
}
go io.Copy(console, os.Stdin)
go io.Copy(os.Stdout, console)
state, err := term.SetRawTerminal(os.Stdin.Fd())
if err != nil {
return nil, fmt.Errorf("failed to set the terminal from the stdin: %v", err)
}
t := &tty{
console: console,
state: state,
closers: []io.Closer{
console,
},
}
p.Stderr = nil
p.Stdout = nil
p.Stdin = nil
return t, nil
}
type tty struct {
console libcontainer.Console
state *term.State
closers []io.Closer
}
func (t *tty) Close() error {
for _, c := range t.closers {
c.Close()
}
if t.state != nil {
term.RestoreTerminal(os.Stdin.Fd(), t.state)
}
return nil
}
func (t *tty) resize() error {
if t.console == nil {
return nil
}
ws, err := term.GetWinsize(os.Stdin.Fd())
if err != nil {
return err
}
return term.SetWinsize(t.console.Fd(), ws)
}

View File

@@ -1,177 +0,0 @@
// +build linux
package main
import (
"fmt"
"os"
"path/filepath"
"github.com/codegangsta/cli"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/specs"
)
const wildcard = -1
var allowedDevices = []*configs.Device{
// allow mknod for any device
{
Type: 'c',
Major: wildcard,
Minor: wildcard,
Permissions: "m",
},
{
Type: 'b',
Major: wildcard,
Minor: wildcard,
Permissions: "m",
},
{
Path: "/dev/console",
Type: 'c',
Major: 5,
Minor: 1,
Permissions: "rwm",
},
{
Path: "/dev/tty0",
Type: 'c',
Major: 4,
Minor: 0,
Permissions: "rwm",
},
{
Path: "/dev/tty1",
Type: 'c',
Major: 4,
Minor: 1,
Permissions: "rwm",
},
// /dev/pts/ - pts namespaces are "coming soon"
{
Path: "",
Type: 'c',
Major: 136,
Minor: wildcard,
Permissions: "rwm",
},
{
Path: "",
Type: 'c',
Major: 5,
Minor: 2,
Permissions: "rwm",
},
// tuntap
{
Path: "",
Type: 'c',
Major: 10,
Minor: 200,
Permissions: "rwm",
},
}
var container libcontainer.Container
func containerPreload(context *cli.Context) error {
c, err := getContainer(context)
if err != nil {
return err
}
container = c
return nil
}
var factory libcontainer.Factory
func factoryPreload(context *cli.Context) error {
f, err := loadFactory(context)
if err != nil {
return err
}
factory = f
return nil
}
// loadFactory returns the configured factory instance for execing containers.
func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
root := context.GlobalString("root")
abs, err := filepath.Abs(root)
if err != nil {
return nil, err
}
return libcontainer.New(abs, libcontainer.Cgroupfs, func(l *libcontainer.LinuxFactory) error {
l.CriuPath = context.GlobalString("criu")
return nil
})
}
// getContainer returns the specified container instance by loading it from state
// with the default factory.
func getContainer(context *cli.Context) (libcontainer.Container, error) {
factory, err := loadFactory(context)
if err != nil {
return nil, err
}
container, err := factory.Load(context.GlobalString("id"))
if err != nil {
return nil, err
}
return container, nil
}
// fatal prints the error's details if it is a libcontainer specific error type
// then exits the program with an exit status of 1.
func fatal(err error) {
if lerr, ok := err.(libcontainer.Error); ok {
lerr.Detail(os.Stderr)
os.Exit(1)
}
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
// fatalf formats the errror string with the specified template then exits the
// program with an exit status of 1.
func fatalf(t string, v ...interface{}) {
fmt.Fprintf(os.Stderr, t, v...)
os.Exit(1)
}
// getDefaultID returns a string to be used as the container id based on the
// current working directory of the runc process. This function panics
// if the cwd is unable to be found based on a system error.
func getDefaultID() string {
cwd, err := os.Getwd()
if err != nil {
panic(err)
}
return filepath.Base(cwd)
}
func getDefaultImagePath(context *cli.Context) string {
cwd, err := os.Getwd()
if err != nil {
panic(err)
}
return filepath.Join(cwd, "checkpoint")
}
// newProcess returns a new libcontainer Process with the arguments from the
// spec and stdio from the current process.
func newProcess(p specs.Process) *libcontainer.Process {
return &libcontainer.Process{
Args: p.Args,
Env: p.Env,
// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
Cwd: p.Cwd,
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
}
}