From b953d1781cc4f59a5795a051be24774c74a685b9 Mon Sep 17 00:00:00 2001 From: Avi Deitcher Date: Tue, 16 Jul 2024 15:05:50 +0300 Subject: [PATCH] add support for volumes Signed-off-by: Avi Deitcher --- docs/yaml.md | 77 ++++++++++++++++--- examples/volumes.yml | 45 ++++++++++++ src/cmd/linuxkit/moby/build/build.go | 67 ++++++++++++++++- src/cmd/linuxkit/moby/build/image.go | 19 +++-- src/cmd/linuxkit/moby/build/volume.go | 102 ++++++++++++++++++++++++++ src/cmd/linuxkit/moby/config.go | 88 ++++++++++++++++++++++ src/cmd/linuxkit/moby/const.go | 1 + src/cmd/linuxkit/moby/schema.go | 16 +++- 8 files changed, 396 insertions(+), 19 deletions(-) create mode 100644 examples/volumes.yml create mode 100644 src/cmd/linuxkit/moby/build/volume.go diff --git a/docs/yaml.md b/docs/yaml.md index c04ad17e0..b04e13e81 100644 --- a/docs/yaml.md +++ b/docs/yaml.md @@ -3,7 +3,7 @@ The `linuxkit build` command assembles a set of containerised components into in image. The simplest type of image is just a `tar` file of the contents (useful for debugging) but more useful outputs add a `Dockerfile` to build a container, or build a full disk image that can be -booted as a linuxKit VM. The main use case is to build an assembly that includes +booted as a linuxkit VM. The main use case is to build an assembly that includes `containerd` to run a set of containers, but the tooling is very generic. The yaml configuration specifies the components used to build up an image . All components @@ -16,8 +16,10 @@ The Docker images are optionally verified with Docker Content Trust. For private registries or private repositories on a registry credentials provided via `docker login` are re-used. +## Sections + The configuration file is processed in the order `kernel`, `init`, `onboot`, `onshutdown`, -`services`, `files`. Each section adds files to the root file system. Sections may be omitted. +`services`, `files`, `volumes`. Each section adds files to the root file system. Sections may be omitted. Each container that is specified is allocated a unique `uid` and `gid` that it may use if it wishes to run as an isolated user (or user namespace). Anywhere you specify a `uid` or `gid` @@ -40,7 +42,7 @@ files: mode: "0600" ``` -## `kernel` +### `kernel` The `kernel` section is only required if booting a VM. The files will be put into the `boot/` directory, where they are used to build bootable images. @@ -57,7 +59,7 @@ Kernel packages may also contain a cpio archive containing CPU microcode which n the initrd. To select this option, recommended when booting on bare metal, add `ucode: intel-ucode.cpio` to the kernel section. -## `init` +### `init` The `init` section is a list of images that are used for the `init` system and are unpacked directly into the root filesystem. This should bring up `containerd`, start the system and daemon containers, @@ -65,14 +67,14 @@ and set up basic filesystem mounts. in the case of a LinuxKit system. For ease o modification `runc` and `containerd` images, which just contain these programs are added here rather than bundled into the `init` container. -## `onboot` +### `onboot` The `onboot` section is a list of images. These images are run before any other images. They are run sequentially and each must exit before the next one is run. These images can be used to configure one shot settings. See [Image specification](#image-specification) for a list of supported fields. -## `onshutdown` +### `onshutdown` This is a list of images to run on a clean shutdown. Note that you must not rely on these being run at all, as machines may be be powered off or shut down without having time to run @@ -81,18 +83,67 @@ run and when they are not. Most systems are likely to be "crash only" and not ha but you can attempt to deregister cleanly from a network service here, rather than relying on timeouts, for example. -## `services` +### `services` The `services` section is a list of images for long running services which are run with `containerd`. Startup order is undefined, so containers should wait on any resources, such as networking, that they need. See [Image specification](#image-specification) for a list of supported fields. -## `files` +### `volumes` + +The volumes section is a list of named volumes that can be used by other containers, +including those in `services`, `onboot` and `onshutdown`. The volumes are created in a directory +chosen by linuxkit at build-time. The volumes then can be referenced by other containers and +mounted into them. + +Volumes normally are blank directories. If an image is provided, the contents of that image +will be used to populate the volume. + +The `volumes` section can declare a volume to be read-write or read-only. If the volume is read-write, +a volume that is mounted into a container can be mounted read-only or read-write. If the volume is read-only, +it can be mounted into a container read-only; attempting to do so read-write will generate a build-time error. +By default, volumes are created read-write, and are mounted read-write. + +Volume names **must** be unique, and must contain only lower-case alphanumeric characters, hyphens, and +underscores. + +Sample `volumes` section: + +```yml +volumes: +- name: vola + image: alpine:latest + readonly: true +- name: volb + image: alpine:latest + readonly: false +- name: volc + readonly: false +``` + +In the above example: + +* `vola` is populated by the contents of `alpine:latest` and is read-only. +* `volb` is populated by the contents of `alpine:latest` and is read-write. +* `volc` is an empty volume and is read-write. + +Sample usage of volumes in `services` section: + +```yml +services: +- name: myservice + image: alpine:latest + binds: + - volA:/mnt/volA:ro + - volB:/mnt/volB +``` + +### `files` The files section can be used to add files inline in the config, or from an external file. -``` +```yml files: - path: dir directory: true @@ -118,7 +169,8 @@ user's home directory. In addition there is a `metadata` option that will generate the file. Currently the only value supported here is `"yaml"` which will output the yaml used to generate the image into the specified file: -``` + +```yml - path: etc/linuxkit.yml metadata: yaml ``` @@ -130,7 +182,7 @@ Because a `tmpfs` is mounted onto `/var`, `/run`, and `/tmp` by default, the `tm ## Image specification -Entries in the `onboot` and `services` sections specify an OCI image and +Entries in the `onboot`, `onshutdown`, `volumes` and `services` sections specify an OCI image and options. Default values may be specified using the `org.mobyproject.config` image label. For more details see the [OCI specification](https://github.com/opencontainers/runtime-spec/blob/master/spec.md). @@ -205,7 +257,8 @@ which specifies some actions to take place when the container is being started. - `namespace` overrides the LinuxKit default containerd namespace to put the container in; only applicable to services. An example of using the `runtime` config to configure a network namespace with `wireguard` and then run `nginx` in that namespace is shown below: -``` + +```yml onboot: - name: dhcpcd image: linuxkit/dhcpcd: diff --git a/examples/volumes.yml b/examples/volumes.yml new file mode 100644 index 000000000..7ef2b1f11 --- /dev/null +++ b/examples/volumes.yml @@ -0,0 +1,45 @@ +# example with volumes, both blank and populated +kernel: + image: linuxkit/kernel:6.6.13 + cmdline: "console=tty0 console=ttyS0 console=ttyAMA0" +init: + - linuxkit/init:8a7b6cdb89197dc94eb6db69ef9dc90b750db598 + - linuxkit/runc:6062483d748609d505f2bcde4e52ee64a3329f5f + - linuxkit/containerd:e7a92d9f3282039eac5fb1b07cac2b8664cbf0ad + - linuxkit/ca-certificates:5aaa343474e5ac3ac01f8b917e82efb1063d80ff +onboot: + - name: sysctl + image: linuxkit/sysctl:5a374e4bf3e5a7deeacff6571d0f30f7ea8f56db + - name: dhcpcd + image: linuxkit/dhcpcd:e9e3580f2de00e73e7b316a007186d22fea056ee + command: ["/sbin/dhcpcd", "--nobackground", "-f", "/dhcpcd.conf", "-1"] +onshutdown: + - name: shutdown + image: busybox:latest + command: ["/bin/echo", "so long and thanks for all the fish"] +services: + - name: getty + image: linuxkit/getty:5d86a2ce2d890c14ab66b13638dcadf74f29218b + env: + - INSECURE=true + - name: rngd + image: linuxkit/rngd:cdb919e4aee49fed0bf6075f0a104037cba83c39 + - name: nginx + image: nginx:1.19.5-alpine + capabilities: + - CAP_NET_BIND_SERVICE + - CAP_CHOWN + - CAP_SETUID + - CAP_SETGID + - CAP_DAC_OVERRIDE + binds: + - /etc/resolv.conf:/etc/resolv.conf + - blank:/blank + - alpine:/alpine +volumes: +- name: blank # blank volume +- name: alpine # populated volume + image: alpine:3.19 +files: + - path: etc/linuxkit-config + metadata: yaml diff --git a/src/cmd/linuxkit/moby/build/build.go b/src/cmd/linuxkit/moby/build/build.go index 10c8fde00..bc01af963 100644 --- a/src/cmd/linuxkit/moby/build/build.go +++ b/src/cmd/linuxkit/moby/build/build.go @@ -99,7 +99,13 @@ func outputImage(image *moby.Image, section string, index int, prefix string, m if err != nil { return fmt.Errorf("failed to retrieve config for %s: %v", image.Image, err) } - oci, runtime, err := moby.ConfigToOCI(image, configRaw, idMap) + // use a modified version of onboot which replaces volume names with paths + imageWithVolPaths, err := updateMountsAndBindsFromVolumes(image, m) + if err != nil { + return fmt.Errorf("failed update image %s from volumes: %w", image.Image, err) + } + + oci, runtime, err := moby.ConfigToOCI(imageWithVolPaths, configRaw, idMap) if err != nil { return fmt.Errorf("failed to create OCI spec for %s: %v", image.Image, err) } @@ -256,6 +262,65 @@ func Build(m moby.Moby, w io.Writer, opts BuildOpts) error { return err } + if len(m.Volumes) != 0 { + log.Infof("Add volumes:") + } + + for i, vol := range m.Volumes { + log.Infof("Process volume image: %s", vol.Name) + // there is an Image, so we need to extract it, either from inputTar or from the image + if oldConfig != nil && len(oldConfig.Volumes) > i && oldConfig.Volumes[i].Image == vol.Image { + if err := extractPackageFilesFromTar(in, iw, vol.Image, fmt.Sprintf("volumes[%d]", i)); err != nil { + return err + } + continue + } + location := fmt.Sprintf("volume[%d]", i) + lower, tmpDir, merged := vol.LowerDir(), vol.TmpDir(), vol.MergedDir() + lowerPath := strings.TrimPrefix(lower, "/") + "/" + + // get volume tarball from container + if err := ImageTar(location, vol.ImageRef(), lowerPath, apkTar, resolvconfSymlink, opts); err != nil { + return fmt.Errorf("failed to build volume tarball from %s: %v", vol.Name, err) + } + // make upper and merged dirs which will be used for mounting + // no need to make lower dir, as it is made automatically by ImageTar() + // the existence of an upper dir indicates that it is read-write and should be overlayfs. + if !vol.ReadOnly { + // need the tmp dir where work gets done, since the whole thing fs is read-only + tmpPath := strings.TrimPrefix(tmpDir, "/") + "/" + tmphdr := &tar.Header{ + Name: tmpPath, + Mode: 0755, + Typeflag: tar.TypeDir, + ModTime: defaultModTime, + Format: tar.FormatPAX, + PAXRecords: map[string]string{ + moby.PaxRecordLinuxkitSource: "linuxkit.volumes", + moby.PaxRecordLinuxkitLocation: location, + }, + } + if err := apkTar.WriteHeader(tmphdr); err != nil { + return err + } + } + mergedPath := strings.TrimPrefix(merged, "/") + "/" + mhdr := &tar.Header{ + Name: mergedPath, + Mode: 0755, + Typeflag: tar.TypeDir, + ModTime: defaultModTime, + Format: tar.FormatPAX, + PAXRecords: map[string]string{ + moby.PaxRecordLinuxkitSource: "linuxkit.volumes", + moby.PaxRecordLinuxkitLocation: location, + }, + } + if err := apkTar.WriteHeader(mhdr); err != nil { + return err + } + } + if len(m.Onboot) != 0 { log.Infof("Add onboot containers:") } diff --git a/src/cmd/linuxkit/moby/build/image.go b/src/cmd/linuxkit/moby/build/image.go index 6715c0f4a..b7476231a 100644 --- a/src/cmd/linuxkit/moby/build/image.go +++ b/src/cmd/linuxkit/moby/build/image.go @@ -142,7 +142,7 @@ var touch = map[string]tar.Header{ // tarPrefix creates the leading directories for a path // path is the path to prefix, location is where this appears in the linuxkit.yaml file -func tarPrefix(path, location string, ref *reference.Spec, tw tarWriter) error { +func tarPrefix(path, location, refName string, tw tarWriter) error { if path == "" { return nil } @@ -163,7 +163,7 @@ func tarPrefix(path, location string, ref *reference.Spec, tw tarWriter) error { Typeflag: tar.TypeDir, Format: tar.FormatPAX, PAXRecords: map[string]string{ - moby.PaxRecordLinuxkitSource: ref.String(), + moby.PaxRecordLinuxkitSource: refName, moby.PaxRecordLinuxkitLocation: location, }, } @@ -178,16 +178,25 @@ func tarPrefix(path, location string, ref *reference.Spec, tw tarWriter) error { // ImageTar takes a Docker image and outputs it to a tar stream // location is where it is in the linuxkit.yaml file func ImageTar(location string, ref *reference.Spec, prefix string, tw tarWriter, resolv string, opts BuildOpts) (e error) { - log.Debugf("image tar: %s %s", ref, prefix) + refName := "empty" + if ref != nil { + refName = ref.String() + } + log.Debugf("image tar: %s %s", refName, prefix) if prefix != "" && prefix[len(prefix)-1] != '/' { return fmt.Errorf("prefix does not end with /: %s", prefix) } - err := tarPrefix(prefix, location, ref, tw) + err := tarPrefix(prefix, location, refName, tw) if err != nil { return err } + // if the image is blank, we do not need to do any more + if ref == nil { + return nil + } + // pullImage first checks in the cache, then pulls the image. // If pull==true, then it always tries to pull from registry. src, err := imagePull(ref, opts.Pull, opts.CacheDir, opts.DockerCache, opts.Arch) @@ -364,7 +373,7 @@ func ImageBundle(prefix, location string, ref *reference.Spec, config []byte, ru } dupMap[ref.String()] = root } else { - if err := tarPrefix(prefix+"/", location, ref, tw); err != nil { + if err := tarPrefix(prefix+"/", location, ref.String(), tw); err != nil { return err } root = dupMap[ref.String()] diff --git a/src/cmd/linuxkit/moby/build/volume.go b/src/cmd/linuxkit/moby/build/volume.go new file mode 100644 index 000000000..006ee3e5a --- /dev/null +++ b/src/cmd/linuxkit/moby/build/volume.go @@ -0,0 +1,102 @@ +package build + +import ( + "fmt" + "strings" + + "github.com/linuxkit/linuxkit/src/cmd/linuxkit/moby" +) + +func updateMountsAndBindsFromVolumes(image *moby.Image, m moby.Moby) (*moby.Image, error) { + // clean image to send back + img := *image + if img.Mounts != nil { + for i, mount := range *img.Mounts { + // only care about type bind + if mount.Type != "bind" { + continue + } + // starts with / = not a volume + if strings.HasPrefix(mount.Source, "/") { + continue + } + vol := m.VolByName(mount.Source) + if vol == nil { + return nil, fmt.Errorf("volume %s not found in onboot image mount %d", mount.Source, i) + } + merged := vol.MergedDir() + // make sure it is not read-write if the underlying volume is read-only + if vol.ReadOnly { + var foundReadOnly bool + for _, opt := range mount.Options { + if opt == "rw" { + foundReadOnly = false + break + } + if opt == "ro" { + foundReadOnly = true + break + } + } + if !foundReadOnly { + return nil, fmt.Errorf("volume %s is read-only, but attempting to write into container read-write", mount.Source) + } + } + mount.Source = merged + } + } + if img.Binds != nil { + var newBinds []string + for i, bind := range *img.Binds { + parts := strings.Split(bind, ":") + // starts with / = not a volume + if strings.HasPrefix(parts[0], "/") { + newBinds = append(newBinds, bind) + continue + } + source := parts[0] + // split + vol := m.VolByName(source) + if vol == nil { + return nil, fmt.Errorf("volume %s not found in onboot image bin %d", source, i) + } + merged := vol.MergedDir() + if vol.ReadOnly { + if len(parts) < 3 || parts[2] != "ro" { + return nil, fmt.Errorf("volume %s is read-only, but attempting to write into container read-write", source) + } + } + parts[0] = merged + newBinds = append(newBinds, strings.Join(parts, ":")) + } + img.Binds = &newBinds + } + if img.BindsAdd != nil { + var newBinds []string + for i, bind := range *img.BindsAdd { + parts := strings.Split(bind, ":") + // starts with / = not a volume + if strings.HasPrefix(parts[0], "/") { + newBinds = append(newBinds, bind) + continue + } + source := parts[0] + vol := m.VolByName(source) + // split + if vol == nil { + return nil, fmt.Errorf("volume %s not found in onboot image bin %d", parts[0], i) + } + merged := vol.MergedDir() + if vol.ReadOnly { + if len(parts) < 3 || parts[2] != "ro" { + return nil, fmt.Errorf("volume %s is read-only, but attempting to write into container read-write", source) + } + } + parts[0] = merged + newBinds = append(newBinds, strings.Join(parts, ":")) + } + img.BindsAdd = &newBinds + } + + return &img, nil +} diff --git a/src/cmd/linuxkit/moby/config.go b/src/cmd/linuxkit/moby/config.go index 5e7ac0029..179b7abb5 100644 --- a/src/cmd/linuxkit/moby/config.go +++ b/src/cmd/linuxkit/moby/config.go @@ -4,6 +4,8 @@ import ( "bytes" "fmt" "os" + "path" + "regexp" "sort" "strconv" "strings" @@ -19,6 +21,8 @@ import ( "gopkg.in/yaml.v3" ) +var nameRE = regexp.MustCompile(`^[a-z0-9_-]*$`) + // Moby is the type of a Moby config file type Moby struct { Kernel KernelConfig `kernel:"cmdline,omitempty" json:"kernel,omitempty"` @@ -27,14 +31,20 @@ type Moby struct { Onshutdown []*Image `yaml:"onshutdown" json:"onshutdown"` Services []*Image `yaml:"services" json:"services"` Files []File `yaml:"files" json:"files"` + Volumes []*Volume `yaml:"volumes" json:"volumes"` initRefs []*reference.Spec + vols map[string]*Volume } func (m Moby) InitRefs() []*reference.Spec { return m.initRefs } +func (m Moby) VolByName(name string) *Volume { + return m.vols[name] +} + // KernelConfig is the type of the config for a kernel type KernelConfig struct { Image string `yaml:"image" json:"image"` @@ -64,6 +74,34 @@ type File struct { GID interface{} `yaml:"gid,omitempty" json:"gid,omitempty"` } +// Volume is the type of a volume specification +type Volume struct { + Name string `yaml:"name" json:"name"` + Image string `yaml:"image,omitempty" json:"image,omitempty"` + ReadOnly bool `yaml:"readonly,omitempty" json:"readonly,omitempty"` + ref *reference.Spec +} + +func (v Volume) ImageRef() *reference.Spec { + return v.ref +} + +func (v Volume) BaseDir() string { + return volumeBaseDir(v.Name) +} + +func (v Volume) LowerDir() string { + return volumeLowerDir(v.Name) +} + +func (v Volume) TmpDir() string { + return volumeTmpDir(v.Name) +} + +func (v Volume) MergedDir() string { + return volumeMergedDir(v.Name) +} + // Image is the type of an image config type Image struct { Name string `yaml:"name" json:"name"` @@ -210,6 +248,37 @@ func uniqueServices(m Moby) error { return nil } +func uniqueVolumes(m *Moby) error { + // volume names must be unique + m.vols = map[string]*Volume{} + for _, v := range m.Volumes { + if !nameRE.MatchString(v.Name) { + return fmt.Errorf("invalid volume name: %s", v.Name) + } + if _, ok := m.vols[v.Name]; ok { + return fmt.Errorf("duplicate volume name: %s", v.Name) + } + m.vols[v.Name] = v + } + return nil +} + +func volumeBaseDir(name string) string { + return path.Join(allVolumesBaseDir, name) +} + +func volumeLowerDir(name string) string { + return path.Join(volumeBaseDir(name), "lower") +} + +func volumeTmpDir(name string) string { + return path.Join(volumeBaseDir(name), "tmp") +} + +func volumeMergedDir(name string) string { + return path.Join(volumeBaseDir(name), "merged") +} + func extractReferences(m *Moby) error { if m.Kernel.Image != "" { r, err := reference.Parse(util.ReferenceExpand(m.Kernel.Image)) @@ -246,6 +315,16 @@ func extractReferences(m *Moby) error { } image.ref = &r } + for _, image := range m.Volumes { + if image.Image == "" { + continue + } + r, err := reference.Parse(util.ReferenceExpand(image.Image)) + if err != nil { + return fmt.Errorf("extract volume image reference: %v", err) + } + image.ref = &r + } return nil } @@ -318,6 +397,10 @@ func NewConfig(config []byte, packageFinder spec.PackageResolver) (Moby, error) return m, err } + if err := uniqueVolumes(&m); err != nil { + return m, err + } + if err := extractReferences(&m); err != nil { return m, err } @@ -352,6 +435,11 @@ func AppendConfig(m0, m1 Moby) (Moby, error) { moby.Services = append(moby.Services, m1.Services...) moby.Files = append(moby.Files, m1.Files...) moby.initRefs = append(moby.initRefs, m1.initRefs...) + moby.Volumes = append(moby.Volumes, m1.Volumes...) + moby.vols = map[string]*Volume{} + for k, v := range m1.vols { + moby.vols[k] = v + } return moby, uniqueServices(moby) } diff --git a/src/cmd/linuxkit/moby/const.go b/src/cmd/linuxkit/moby/const.go index d2b786def..e2fd3681c 100644 --- a/src/cmd/linuxkit/moby/const.go +++ b/src/cmd/linuxkit/moby/const.go @@ -6,4 +6,5 @@ const ( // PaxRecordLinuxkitLocation report the location of the file in the linuxkit.yaml // that led to this file being in this location PaxRecordLinuxkitLocation = "LINUXKIT.location" + allVolumesBaseDir = "/containers/volumes" ) diff --git a/src/cmd/linuxkit/moby/schema.go b/src/cmd/linuxkit/moby/schema.go index fd91d0be4..d8af60ddb 100644 --- a/src/cmd/linuxkit/moby/schema.go +++ b/src/cmd/linuxkit/moby/schema.go @@ -37,6 +37,19 @@ var schema = ` "type": "array", "items": { "$ref": "#/definitions/file" } }, + "volume": { + "type": "object", + "additionalProperties": false, + "properties": { + "name": {"type": "string"}, + "image": {"type": "string"}, + "readonly": {"type": "boolean"} + } + }, + "volumes": { + "type": "array", + "items": { "$ref": "#/definitions/volume" } + }, "trust": { "type": "object", "additionalProperties": false, @@ -333,7 +346,8 @@ var schema = ` "onshutdown": { "$ref": "#/definitions/images" }, "services": { "$ref": "#/definitions/images" }, "trust": { "$ref": "#/definitions/trust" }, - "files": { "$ref": "#/definitions/files" } + "files": { "$ref": "#/definitions/files" }, + "volumes": { "$ref": "#/definitions/volumes" } } } `