From 8bcb2c9e0e224f68aae750c2ece5791987a16ce8 Mon Sep 17 00:00:00 2001 From: Justin Cormack Date: Sun, 2 Apr 2017 15:15:18 +0100 Subject: [PATCH 1/2] Add a formatting container This is based on the code we used for Docker Editions, and will format an external drive, to be used for example for `/var` for Docker image persistence. It does not `mount` the drive yet, as splitting format and mount gives better modularity. Example yaml fragment: ``` - name: format image: "mobylinux/format:097d4f22b20f976b1f89d8f0b8a5d074d35b856c" binds: - /dev:/dev capabilities: - CAP_SYS_ADMIN - CAP_MKNOD ``` Signed-off-by: Justin Cormack --- pkg/format/Dockerfile | 13 +++++ pkg/format/Makefile | 29 +++++++++++ pkg/format/format.sh | 113 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+) create mode 100644 pkg/format/Dockerfile create mode 100644 pkg/format/Makefile create mode 100755 pkg/format/format.sh diff --git a/pkg/format/Dockerfile b/pkg/format/Dockerfile new file mode 100644 index 000000000..124e4daec --- /dev/null +++ b/pkg/format/Dockerfile @@ -0,0 +1,13 @@ +FROM alpine:3.5 + +RUN \ + apk update && apk upgrade -a && \ + apk add --no-cache \ + e2fsprogs \ + e2fsprogs-extra \ + jq \ + sfdisk \ + && true + +COPY . ./ +CMD ["/bin/sh", "/format.sh"] diff --git a/pkg/format/Makefile b/pkg/format/Makefile new file mode 100644 index 000000000..b11e09f67 --- /dev/null +++ b/pkg/format/Makefile @@ -0,0 +1,29 @@ +.PHONY: tag push + +BASE=alpine:3.5 +IMAGE=format + +default: push + +hash: Dockerfile format.sh + DOCKER_CONTENT_TRUST=1 docker pull $(BASE) + tar cf - $^ | docker build --no-cache -t $(IMAGE):build - + docker run --rm --entrypoint /bin/sh $(IMAGE):build -c "cat $^ /lib/apk/db/installed | sha1sum" | sed 's/ .*//' > $@ + +push: hash + docker pull mobylinux/$(IMAGE):$(shell cat hash) || \ + (docker tag $(IMAGE):build mobylinux/$(IMAGE):$(shell cat hash) && \ + docker push mobylinux/$(IMAGE):$(shell cat hash)) + docker rmi $(IMAGE):build + rm -f hash + +tag: hash + docker pull mobylinux/$(IMAGE):$(shell cat hash) || \ + docker tag $(IMAGE):build mobylinux/$(IMAGE):$(shell cat hash) + docker rmi $(IMAGE):build + rm -f hash + +clean: + rm -f hash + +.DELETE_ON_ERROR: diff --git a/pkg/format/format.sh b/pkg/format/format.sh new file mode 100755 index 000000000..bd5bcd89e --- /dev/null +++ b/pkg/format/format.sh @@ -0,0 +1,113 @@ +#!/bin/sh + +# this script assumes anything on the disk can be removed if corrupted +# other use cases may need different scripts. + +# currently only supports ext4 but should be expanded + +do_fsck() +{ + # preen + /sbin/e2fsck -p $* + EXIT_CODE=$? + # exit code 1 is errors corrected + [ "${EXIT_CODE}" -eq 1 ] && EXIT_CODE=0 + # exit code 2 or 3 means need to reboot + [ "${EXIT_CODE}" -eq 2 -o "${EXIT_CODE}" -eq 3 ] && /sbin/reboot + # exit code 4 or over is fatal + [ "${EXIT_CODE}" -lt 4 ] && return "${EXIT_CODE}" + + # try harder + /sbin/e2fsck -y $* + # exit code 1 is errors corrected + [ "${EXIT_CODE}" -eq 1 ] && EXIT_CODE=0 + # exit code 2 or 3 means need to reboot + [ "${EXIT_CODE}" -eq 2 -o "${EXIT_CODE}" -eq 3 ] && /sbin/reboot + # exit code 4 or over is fatal + [ "${EXIT_CODE}" -ge 4 ] && printf "Filesystem unrecoverably corrupted, will reformat\n" + + return "${EXIT_CODE}" +} + +do_fsck_extend_mount() +{ + DRIVE="$1" + DATA="$2" + + do_fsck "$DATA" || return 1 + + # only try to extend if there is a single partition and free space + PARTITIONS=$(sfdisk -J "$DRIVE" | jq '.partitiontable.partitions | length') + + if [ "$PARTITIONS" -eq 1 ] && \ + sfdisk -F "$DRIVE" | grep -q 'Unpartitioned space' && + ! sfdisk -F "$DRIVE" | grep -q '0 B, 0 bytes, 0 sectors' + then + SPACE=$(sfdisk -F "$DRIVE" | grep 'Unpartitioned space') + printf "Resizing disk partition: $SPACE\n" + + START=$(sfdisk -J "$DRIVE" | jq -e '.partitiontable.partitions | map(select(.type=="83")) | .[0].start') + + sfdisk -q --delete "$DRIVE" 2> /dev/null + echo "${START},,83;" | sfdisk -q "$DRIVE" + + # set bootable flag + sfdisk -A "$DRIVE" 1 + + # update status + blockdev --rereadpt $diskdev 2> /dev/null + mdev -s + + # wait for device + for i in $(seq 1 50); do test -b "$DATA" && break || sleep .1; mdev -s; done + + # resize2fs fails unless we use -f here + do_fsck -f "$DATA" || return 1 + resize2fs "$DATA" + + do_fsck "$DATA" || return 1 + fi +} + +do_mkfs() +{ + diskdev="$1" + + # new disks does not have an DOS signature in sector 0 + # this makes sfdisk complain. We can workaround this by letting + # fdisk create that DOS signature, by just do a "w", a write. + # http://bugs.alpinelinux.org/issues/145 + echo "w" | fdisk $diskdev >/dev/null + + # format one large partition + echo ";" | sfdisk --quiet $diskdev + + # update status + blockdev --rereadpt $diskdev 2> /dev/null + mdev -s + + FSOPTS="-O resize_inode,has_journal,extent,huge_file,flex_bg,uninit_bg,64bit,dir_nlink,extra_isize" + + mkfs.ext4 -q -F $FSOPTS ${diskdev}1 +} + +# TODO fix for multiple disks, cdroms etc +DEV="$(find /dev -maxdepth 1 -type b ! -name 'loop*' | grep -v '[0-9]$' | sed 's@.*/dev/@@' | sort | head -1 )" + +[ -z "${DEV}" ] && exit 1 + +DRIVE="/dev/${DEV}" + +# see if it has a partition table already +if sfdisk -d "${DRIVE}" >/dev/null 2>/dev/null +then + DATA=$(sfdisk -J "$DRIVE" | jq -e -r '.partitiontable.partitions | map(select(.type=="83")) | .[0].node') + if [ $? -eq 0 ] + then + do_fsck_extend_mount "$DRIVE" "$DATA" || do_mkfs "$DRIVE" + else + do_mkfs "$DRIVE" + fi +else + do_mkfs "$DRIVE" +fi From cf7b952995ef729566c22ef38281031199d518d2 Mon Sep 17 00:00:00 2001 From: Justin Cormack Date: Thu, 6 Apr 2017 13:51:00 +0100 Subject: [PATCH 2/2] Add persistent drive support to Docker container This works and runs containers now, if you eg `runc exec` into it. Needs a few tweaks for rlimits, but will pull and run containers. Will integrate better with ssh/dev containers to make more usable. For a simple test use ``` ./bin/moby build examples/docker.yml ./bin/moby run hyperkit -disk-size 100 docker ``` Signed-off-by: Justin Cormack --- examples/docker.yml | 56 ++++++++++++++++++++++++++++++++++++++++ pkg/docker-ce/Dockerfile | 14 +++++++--- pkg/docker-ce/Makefile | 2 +- pkg/docker-ce/docker.sh | 38 +++++++++++++++++++++++++++ pkg/format/format.sh | 5 +++- 5 files changed, 109 insertions(+), 6 deletions(-) create mode 100644 examples/docker.yml create mode 100755 pkg/docker-ce/docker.sh diff --git a/examples/docker.yml b/examples/docker.yml new file mode 100644 index 000000000..a21bb42fb --- /dev/null +++ b/examples/docker.yml @@ -0,0 +1,56 @@ +kernel: + image: "mobylinux/kernel:4.9.x" + cmdline: "console=ttyS0 console=tty0 page_poison=1" +init: "mobylinux/init:c0007f0cdf1ef821a981fcc676e3f1c2dd9ab5b1" +system: + - name: sysctl + image: "mobylinux/sysctl:2cf2f9d5b4d314ba1bfc22b2fe931924af666d8c" + net: host + pid: host + ipc: host + capabilities: + - CAP_SYS_ADMIN + readonly: true + - name: binfmt + image: "mobylinux/binfmt:bdb754f25a5d851b4f5f8d185a43dfcbb3c22d01" + binds: + - /proc/sys/fs/binfmt_misc:/binfmt_misc + readonly: true + - name: format + image: "mobylinux/format:53748000acf515549d398e6ae68545c26c0f3a2e" + binds: + - /dev:/dev + capabilities: + - CAP_SYS_ADMIN + - CAP_MKNOD +daemon: + - name: rngd + image: "mobylinux/rngd:3dad6dd43270fa632ac031e99d1947f20b22eec9@sha256:1c93c1db7196f6f71f8e300bc1d15f0376dd18e8891c8789d77c8ff19f3a9a92" + capabilities: + - CAP_SYS_ADMIN + oomScoreAdj: -800 + readonly: true + - name: dhcpcd + image: "mobylinux/dhcpcd:57a8ef29d3a910645b2b24c124f9ce9ef53ce703" + binds: + - /var:/var + - /tmp/etc:/etc + capabilities: + - CAP_NET_ADMIN + - CAP_NET_BIND_SERVICE + - CAP_NET_RAW + net: host + oomScoreAdj: -800 + - name: docker + image: "mobylinux/docker-ce:f6505961df89ca6b5d024f1ac5a6b986359786d1" + capabilities: + - all + net: host + mounts: + - type: cgroup + options: ["rw","nosuid","noexec","nodev","relatime"] + binds: + - /dev:/dev + - /lib/modules:/lib/modules +outputs: + - format: kernel+initrd diff --git a/pkg/docker-ce/Dockerfile b/pkg/docker-ce/Dockerfile index 34969e431..45a44e176 100644 --- a/pkg/docker-ce/Dockerfile +++ b/pkg/docker-ce/Dockerfile @@ -2,26 +2,31 @@ FROM alpine:3.5 # Docker daemon only minimal Alpine install +# set up Docker group # set up subuid/subgid so that "--userns-remap=default" works out-of-the-box RUN set -x \ + && addgroup -S docker \ && addgroup -S dockremap \ && adduser -S -G dockremap dockremap \ && echo 'dockremap:165536:65536' >> /etc/subuid \ && echo 'dockremap:165536:65536' >> /etc/subgid # https://github.com/docker/docker/blob/master/project/PACKAGERS.md#runtime-dependencies +# sfdisk and jq used by disk mounting code at present RUN apk add --no-cache \ ca-certificates \ curl \ iptables \ - xz + xz \ + sfdisk \ + jq # removed xfsprogs e2fs btrfs as we do not support dm or btrfs yet # removed openssl as I do not think server needs it ENV DOCKER_BUCKET get.docker.com -ENV DOCKER_VERSION 17.03.0-ce -ENV DOCKER_SHA256 4a9766d99c6818b2d54dc302db3c9f7b352ad0a80a2dc179ec164a3ba29c2d3e +ENV DOCKER_VERSION 17.04.0-ce +ENV DOCKER_SHA256 c52cff62c4368a978b52e3d03819054d87bcd00d15514934ce2e0e09b99dd100 # we could avoid installing client here I suppose RUN set -x \ @@ -35,4 +40,5 @@ RUN set -x \ COPY . ./ -ENTRYPOINT ["/usr/bin/docker-init", "/usr/bin/dockerd"] +# use the Docker copy of tini as our init for zombie reaping +ENTRYPOINT ["/usr/bin/docker-init", "/bin/sh", "/docker.sh"] diff --git a/pkg/docker-ce/Makefile b/pkg/docker-ce/Makefile index d3fcb0869..1cca96610 100644 --- a/pkg/docker-ce/Makefile +++ b/pkg/docker-ce/Makefile @@ -5,7 +5,7 @@ IMAGE=docker-ce default: push -hash: Dockerfile +hash: Dockerfile docker.sh DOCKER_CONTENT_TRUST=1 docker pull $(BASE) tar cf - $^ | docker build --no-cache -t $(IMAGE):build - docker run --entrypoint /bin/sh --rm $(IMAGE):build -c 'cat $^ /lib/apk/db/installed | sha1sum' | sed 's/ .*//' > $@ diff --git a/pkg/docker-ce/docker.sh b/pkg/docker-ce/docker.sh new file mode 100755 index 000000000..5ab37b090 --- /dev/null +++ b/pkg/docker-ce/docker.sh @@ -0,0 +1,38 @@ +#!/bin/sh + +set -x + +mount_drive() +{ + MOUNTPOINT=/var/lib/docker + + mkdir -p "$MOUNTPOINT" + + # TODO fix for multiple disks, cdroms etc + DEVS="$(find /dev -maxdepth 1 -type b ! -name 'loop*' ! -name 'nbd*' | grep -v '[0-9]$' | sed 's@.*/dev/@@' | sort)" + + for DEV in $DEVS + do + DRIVE="/dev/${DEV}" + + # see if it has a partition table + if sfdisk -d "${DRIVE}" >/dev/null 2>/dev/null + then + # 83 is Linux partition identifier + DATA=$(sfdisk -J "$DRIVE" | jq -e -r '.partitiontable.partitions | map(select(.type=="83")) | .[0].node') + if [ $? -eq 0 ] + then + mount "$DATA" "$MOUNTPOINT" && return + fi + fi + done + + echo "WARNING: Failed to mount a persistent volume (is there one?)" + + # not sure if we want to fatally bail here, in some debug situations it is ok + # exit 1 +} + +mount_drive + +exec /usr/bin/dockerd diff --git a/pkg/format/format.sh b/pkg/format/format.sh index bd5bcd89e..6ae912bdb 100755 --- a/pkg/format/format.sh +++ b/pkg/format/format.sh @@ -46,6 +46,7 @@ do_fsck_extend_mount() SPACE=$(sfdisk -F "$DRIVE" | grep 'Unpartitioned space') printf "Resizing disk partition: $SPACE\n" + # 83 is Linux partition id START=$(sfdisk -J "$DRIVE" | jq -e '.partitiontable.partitions | map(select(.type=="83")) | .[0].start') sfdisk -q --delete "$DRIVE" 2> /dev/null @@ -84,7 +85,9 @@ do_mkfs() # update status blockdev --rereadpt $diskdev 2> /dev/null - mdev -s + + # wait for device + for i in $(seq 1 50); do test -b "$DATA" && break || sleep .1; mdev -s; done FSOPTS="-O resize_inode,has_journal,extent,huge_file,flex_bg,uninit_bg,64bit,dir_nlink,extra_isize"