diff --git a/docs/yaml.md b/docs/yaml.md index 9b3233a23..094fb38f1 100644 --- a/docs/yaml.md +++ b/docs/yaml.md @@ -181,6 +181,50 @@ bind mounted into a container. There are experimental `userns`, `uidMappings` and `gidMappings` options for user namespaces but these are not yet supported, and may have permissions issues in use. +In addition to the parts of the specification above used to generate the OCI spec, there is a `runtime` section in the image specification +which specifies some actions to take place when the container is being started. +- `mkdir` takes a list of directories to create at runtime, in the root mount namespace. These are created before the container is started, so they can be used to create + directories for bind mounts, for example in `/tmp` or `/run` which would otherwise be empty. +- `interface` defines a list of actions to perform on a network interface: + - `name` specifies the name of an interface. An existing interface with this name will be moved into the container's network namespace. + - `add` specifies a type of interface to be created in the containers namespace, with the specified name. + - `createInRoot` is a boolean which specifes that the interface being `add`ed should be created in the root namespace first, then moved. This is needed for `wireguard` interfaces. + - `peer` specifies the name of the other end when creating a `veth` interface. This end will remain in the root namespace, where it can be attached to a bridge. Specifying this implies `add: veth`. +- `bindNS` specifies a namespace type and a path where the namespace from the container being created will be bound. This allows a namespace to be set up in an `onboot` container, and then + using `net: path` for a `service` container to use that network namespace later. + +An example of using the `runtime` config to configure a network namespace with `wireguard` and then run `nginx` in that namespace is shown below: +``` +onboot: + - name: dhcpcd + image: linuxkit/dhcpcd: + command: ["/sbin/dhcpcd", "--nobackground", "-f", "/dhcpcd.conf", "-1"] + - name: wg + image: linuxkit/ip: + net: new + binds: + - /etc/wireguard:/etc/wireguard + command: ["sh", "-c", "ip link set dev wg0 up; ip address add dev wg0 192.168.2.1 peer 192.168.2.2; wg setconf wg0 /etc/wireguard/wg0.conf; wg show wg0"] + runtime: + interfaces: + - name: wg0 + add: wireguard + createInRoot: true + bindNS: + net: /run/netns/wg +services: + - name: nginx + image: nginx:alpine + net: /run/netns/wg + capabilities: + - CAP_NET_BIND_SERVICE + - CAP_CHOWN + - CAP_SETUID + - CAP_SETGID + - CAP_DAC_OVERRIDE +``` + + ### Mount Options When mounting filesystem paths into a container - whether as part of `onboot` or `services` - there are several options of which you need to be aware. Using them properly is necessary for your containers to function properly. diff --git a/src/moby/build.go b/src/moby/build.go index 6ac138fe3..f98d01e75 100644 --- a/src/moby/build.go +++ b/src/moby/build.go @@ -123,7 +123,7 @@ func enforceContentTrust(fullImageName string, config *TrustConfig) bool { func outputImage(image Image, section string, prefix string, m Moby, idMap map[string]uint32, pull bool, iw *tar.Writer) error { log.Infof(" Create OCI config for %s", image.Image) useTrust := enforceContentTrust(image.Image, &m.Trust) - oci, err := ConfigToOCI(image, useTrust, idMap) + oci, runtime, err := ConfigToOCI(image, useTrust, idMap) if err != nil { return fmt.Errorf("Failed to create OCI spec for %s: %v", image.Image, err) } @@ -131,9 +131,13 @@ func outputImage(image Image, section string, prefix string, m Moby, idMap map[s if err != nil { return fmt.Errorf("Failed to create config for %s: %v", image.Image, err) } + runtimeConfig, err := json.MarshalIndent(runtime, "", " ") + if err != nil { + return fmt.Errorf("Failed to create runtime config for %s: %v", image.Image, err) + } path := path.Join("containers", section, prefix+image.Name) readonly := oci.Root.Readonly - err = ImageBundle(path, image.Image, config, iw, useTrust, pull, readonly) + err = ImageBundle(path, image.Image, config, runtimeConfig, iw, useTrust, pull, readonly) if err != nil { return fmt.Errorf("Failed to extract root filesystem for %s: %v", image.Image, err) } diff --git a/src/moby/config.go b/src/moby/config.go index 39603f1e0..7945966f4 100644 --- a/src/moby/config.go +++ b/src/moby/config.go @@ -88,6 +88,33 @@ type Image struct { Rlimits *[]string `yaml:"rlimits" json:"rlimits,omitempty"` UIDMappings *[]specs.LinuxIDMapping `yaml:"uidMappings" json:"uidMappings,omitempty"` GIDMappings *[]specs.LinuxIDMapping `yaml:"gidMappings" json:"gidMappings,omitempty"` + Runtime *Runtime `yaml:"runtime" json:"runtime,omitempty"` +} + +// Runtime is the type of config processed at runtime, not used to build the OCI spec +type Runtime struct { + Mkdir []string `yaml:"mkdir" json:"mkdir,omitempty"` + Interfaces []Interface `yaml:"interfaces" json:"interfaces,omitempty"` + BindNS *Namespaces `yaml:"bindNS" json:"bindNS,omitempty"` +} + +// Namespaces is the type for configuring paths to bind namespaces +type Namespaces struct { + Cgroup string `yaml:"cgroup" json:"cgroup,omitempty"` + Ipc string `yaml:"ipc" json:"ipc,omitempty"` + Mnt string `yaml:"mnt" json:"mnt,omitempty"` + Net string `yaml:"net" json:"net,omitempty"` + Pid string `yaml:"pid" json:"pid,omitempty"` + User string `yaml:"user" json:"user,omitempty"` + Uts string `yaml:"uts" json:"uts,omitempty"` +} + +// Interface is the runtime config for network interfaces +type Interface struct { + Name string `yaml:"name" json:"name,omitempty"` + Add string `yaml:"add" json:"add,omitempty"` + Peer string `yaml:"peer" json:"peer,omitempty"` + CreateInRoot bool `yaml:"createInRoot" json:"createInRoot"` } // github.com/go-yaml/yaml treats map keys as interface{} while encoding/json @@ -261,26 +288,26 @@ func NewImage(config []byte) (Image, error) { return mi, nil } -// ConfigToOCI converts a config specification to an OCI config file -func ConfigToOCI(image Image, trust bool, idMap map[string]uint32) (specs.Spec, error) { +// ConfigToOCI converts a config specification to an OCI config file and a runtime config +func ConfigToOCI(image Image, trust bool, idMap map[string]uint32) (specs.Spec, Runtime, error) { // TODO pass through same docker client to all functions cli, err := dockerClient() if err != nil { - return specs.Spec{}, err + return specs.Spec{}, Runtime{}, err } inspect, err := dockerInspectImage(cli, image.Image, trust) if err != nil { - return specs.Spec{}, err + return specs.Spec{}, Runtime{}, err } - oci, err := ConfigInspectToOCI(image, inspect, idMap) + oci, runtime, err := ConfigInspectToOCI(image, inspect, idMap) if err != nil { - return specs.Spec{}, err + return specs.Spec{}, Runtime{}, err } - return oci, nil + return oci, runtime, nil } func defaultMountpoint(tp string) string { @@ -471,6 +498,17 @@ func assignResources(v1, v2 *specs.LinuxResources) specs.LinuxResources { return specs.LinuxResources{} } +// assignRuntime does ordered overrides from Runtime +func assignRuntime(v1, v2 *Runtime) Runtime { + if v2 != nil { + return *v2 + } + if v1 != nil { + return *v1 + } + return Runtime{} +} + // assignStringEmpty does ordered overrides if strings are empty, for // values where there is always an explicit override eg "none" func assignStringEmpty(v1, v2 string) string { @@ -570,8 +608,9 @@ func idNumeric(v interface{}, idMap map[string]uint32) (uint32, error) { } // ConfigInspectToOCI converts a config and the output of image inspect to an OCI config -func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string]uint32) (specs.Spec, error) { +func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string]uint32) (specs.Spec, Runtime, error) { oci := specs.Spec{} + runtime := Runtime{} var inspectConfig container.Config if inspect.Config != nil { @@ -585,7 +624,7 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string var err error label, err = NewImage([]byte(labelString)) if err != nil { - return oci, err + return oci, runtime, err } } @@ -627,7 +666,7 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string for _, t := range assignStrings(label.Tmpfs, yaml.Tmpfs) { parts := strings.Split(t, ":") if len(parts) > 2 { - return oci, fmt.Errorf("Cannot parse tmpfs, too many ':': %s", t) + return oci, runtime, fmt.Errorf("Cannot parse tmpfs, too many ':': %s", t) } dest := parts[0] opts := []string{} @@ -639,10 +678,10 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string for _, b := range assignStrings(label.Binds, yaml.Binds) { parts := strings.Split(b, ":") if len(parts) < 2 { - return oci, fmt.Errorf("Cannot parse bind, missing ':': %s", b) + return oci, runtime, fmt.Errorf("Cannot parse bind, missing ':': %s", b) } if len(parts) > 3 { - return oci, fmt.Errorf("Cannot parse bind, too many ':': %s", b) + return oci, runtime, fmt.Errorf("Cannot parse bind, too many ':': %s", b) } src := parts[0] dest := parts[1] @@ -667,7 +706,7 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string tp = "tmpfs" } if tp == "" { - return oci, fmt.Errorf("Mount for destination %s is missing type", dest) + return oci, runtime, fmt.Errorf("Mount for destination %s is missing type", dest) } if src == "" { // usually sane, eg proc, tmpfs etc @@ -677,7 +716,7 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string dest = defaultMountpoint(tp) } if dest == "" { - return oci, fmt.Errorf("Mount type %s is missing destination", tp) + return oci, runtime, fmt.Errorf("Mount type %s is missing destination", tp) } mounts[dest] = specs.Mount{Destination: dest, Type: tp, Source: src, Options: opts} } @@ -753,7 +792,7 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string } for _, capability := range caps { if !capCheck[capability] { - return oci, fmt.Errorf("unknown capability: %s", capability) + return oci, runtime, fmt.Errorf("unknown capability: %s", capability) } boundingSet[capability] = true } @@ -768,7 +807,7 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string } for _, capability := range ambient { if !capCheck[capability] { - return oci, fmt.Errorf("unknown capability: %s", capability) + return oci, runtime, fmt.Errorf("unknown capability: %s", capability) } boundingSet[capability] = true } @@ -797,7 +836,7 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string var err error soft, err = strconv.ParseUint(softString, 10, 64) if err != nil { - return oci, fmt.Errorf("Cannot parse %s as uint64: %v", softString, err) + return oci, runtime, fmt.Errorf("Cannot parse %s as uint64: %v", softString, err) } } hardString := strings.TrimSpace(rs[2]) @@ -807,7 +846,7 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string var err error hard, err = strconv.ParseUint(hardString, 10, 64) if err != nil { - return oci, fmt.Errorf("Cannot parse %s as uint64: %v", hardString, err) + return oci, runtime, fmt.Errorf("Cannot parse %s as uint64: %v", hardString, err) } } switch limit { @@ -830,10 +869,10 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string "RLIMIT_RTTIME": rlimits = append(rlimits, specs.POSIXRlimit{Type: limit, Soft: soft, Hard: hard}) default: - return oci, fmt.Errorf("Unknown limit: %s", origLimit) + return oci, runtime, fmt.Errorf("Unknown limit: %s", origLimit) } default: - return oci, fmt.Errorf("Cannot parse rlimit: %s", rlimitsString) + return oci, runtime, fmt.Errorf("Cannot parse rlimit: %s", rlimitsString) } } @@ -843,17 +882,17 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string agIf := assignInterfaceArray(label.AdditionalGids, yaml.AdditionalGids) uid, err := idNumeric(uidIf, idMap) if err != nil { - return oci, err + return oci, runtime, err } gid, err := idNumeric(gidIf, idMap) if err != nil { - return oci, err + return oci, runtime, err } additionalGroups := []uint32{} for _, id := range agIf { ag, err := idNumeric(id, idMap) if err != nil { - return oci, err + return oci, runtime, err } additionalGroups = append(additionalGroups, ag) } @@ -912,5 +951,7 @@ func ConfigInspectToOCI(yaml Image, inspect types.ImageInspect, idMap map[string // IntelRdt } - return oci, nil + runtime = assignRuntime(label.Runtime, yaml.Runtime) + + return oci, runtime, nil } diff --git a/src/moby/config_test.go b/src/moby/config_test.go index 7e99aae92..4aa27878f 100644 --- a/src/moby/config_test.go +++ b/src/moby/config_test.go @@ -44,7 +44,7 @@ func TestOverrides(t *testing.T) { inspect := setupInspect(t, label) - oci, err := ConfigInspectToOCI(yaml, inspect, idMap) + oci, _, err := ConfigInspectToOCI(yaml, inspect, idMap) if err != nil { t.Error(err) } @@ -72,7 +72,7 @@ func TestInvalidCap(t *testing.T) { inspect := setupInspect(t, label) - _, err := ConfigInspectToOCI(yaml, inspect, idMap) + _, _, err := ConfigInspectToOCI(yaml, inspect, idMap) if err == nil { t.Error("expected error, got valid OCI config") } @@ -95,7 +95,7 @@ func TestIdMap(t *testing.T) { inspect := setupInspect(t, label) - oci, err := ConfigInspectToOCI(yaml, inspect, idMap) + oci, _, err := ConfigInspectToOCI(yaml, inspect, idMap) if err != nil { t.Error(err) } diff --git a/src/moby/image.go b/src/moby/image.go index cc0272504..ce5706c8e 100644 --- a/src/moby/image.go +++ b/src/moby/image.go @@ -189,8 +189,8 @@ func ImageTar(image, prefix string, tw tarWriter, trust bool, pull bool, resolv } // ImageBundle produces an OCI bundle at the given path in a tarball, given an image and a config.json -func ImageBundle(prefix string, image string, config []byte, tw tarWriter, trust bool, pull bool, readonly bool) error { - log.Debugf("image bundle: %s %s cfg: %s", prefix, image, string(config)) +func ImageBundle(prefix string, image string, config []byte, runtimeConfig []byte, tw tarWriter, trust bool, pull bool, readonly bool) error { + log.Debugf("image bundle: %s %s cfg: %s runtime: %s", prefix, image, string(config), string(runtimeConfig)) // if read only, just unpack in rootfs/ but otherwise set up for overlay rootfs := "rootfs" @@ -213,6 +213,23 @@ func ImageBundle(prefix string, image string, config []byte, tw tarWriter, trust if _, err := io.Copy(tw, buf); err != nil { return err } + + // do not write an empty runtime config + if string(runtimeConfig) != "{}" { + hdr = &tar.Header{ + Name: path.Join(prefix, "runtime.json"), + Mode: 0644, + Size: int64(len(runtimeConfig)), + } + if err := tw.WriteHeader(hdr); err != nil { + return err + } + buf = bytes.NewBuffer(runtimeConfig) + if _, err := io.Copy(tw, buf); err != nil { + return err + } + } + if !readonly { // add a tmp directory to be used as a mount point for tmpfs for upper, work hdr = &tar.Header{ diff --git a/src/moby/schema.go b/src/moby/schema.go index 043c88d9e..2fd65e838 100644 --- a/src/moby/schema.go +++ b/src/moby/schema.go @@ -208,6 +208,42 @@ var schema = string(` "network": {"$ref": "#/definitions/network"} } }, + "interfaces": { + "type": "array", + "items": {"$ref": "#/definitions/interface"} + }, + "interface": { + "type": "object", + "additionalProperties": false, + "properties": { + "name": {"type": "string"}, + "add": {"type": "string"}, + "peer": {"type": "string"}, + "createInRoot": {"type": "boolean"} + } + }, + "namespaces": { + "type": "object", + "additionalProperties": false, + "properties": { + "cgroup": {"type": "string"}, + "ipc": {"type": "string"}, + "mnt": {"type": "string"}, + "net": {"type": "string"}, + "pid": {"type": "string"}, + "user": {"type": "string"}, + "uts": {"type": "string"} + } + }, + "runtime": { + "type": "object", + "additionalProperties": false, + "properties": { + "mkdir": {"$ref": "#/definitions/strings"}, + "interfaces": {"$ref": "#/definitions/interfaces"}, + "bindNS": {"$ref": "#/definitions/namespaces"} + } + }, "image": { "type": "object", "additionalProperties": false, @@ -249,7 +285,8 @@ var schema = string(` }, "rlimits": { "$ref": "#/definitions/strings" }, "uidMappings": { "$ref": "#/definitions/idmappings" }, - "gidMappings": { "$ref": "#/definitions/idmappings" } + "gidMappings": { "$ref": "#/definitions/idmappings" }, + "runtime": {"$ref": "#/definitions/runtime"} } }, "images": {