Containerize binfmt_misc

- statically make containerd symlinks so rootfs can be read only
- run binfmt_misc in a containerd container
- ship arm, aarch64, ppc64le qemu static versions that always "just work" as this is supported in Linux 4.8

fix #53

Signed-off-by: Justin Cormack <justin.cormack@docker.com>
This commit is contained in:
Justin Cormack 2016-11-29 17:40:30 +00:00
parent 1f2f77f1e9
commit 8d3691fabb
15 changed files with 390 additions and 33 deletions

View File

@ -10,6 +10,11 @@ RUN \
COPY . .
RUN rm Dockerfile
RUN cd /usr/bin && \
ln -s docker-runc runc && \
ln -s docker-containerd-shim containerd-shim && \
ln -s docker-containerd-ctr containerd-ctr && \
ln -s docker-containerd containerd
RUN \
rc-update add sysctl boot && \
@ -43,7 +48,6 @@ RUN \
rc-update add transfused default && \
rc-update add automount sysinit && \
rc-update add diagnostics default && \
rc-update add binfmt default && \
rc-update add hostsettings default && \
rc-update add windowsnet boot && \
rc-update add hv_kvp_daemon default && \

View File

@ -13,6 +13,7 @@ endif
initrd.img: Dockerfile mkinitrd.sh init $(ETCFILES)
$(MAKE) -C kernel
$(MAKE) -j -C packages
$(MAKE) -j -C containers
printf $(TAG)$(DIRTY) > etc/moby-commit
BUILD=$$( tar cf - \
Dockerfile etc usr init mkinitrd.sh \
@ -23,7 +24,6 @@ initrd.img: Dockerfile mkinitrd.sh init $(ETCFILES)
-C packages/docker usr etc -C ../.. \
-C packages/diagnostics usr etc -C ../.. \
-C packages/automount etc -C ../.. \
-C packages/binfmt_misc etc -C ../.. \
-C packages/hostsettings etc -C ../.. \
-C packages/chronyd etc -C ../.. \
-C packages/userns etc -C ../.. \
@ -38,6 +38,7 @@ initrd.img: Dockerfile mkinitrd.sh init $(ETCFILES)
-C packages/containerd etc -C ../.. \
-C packages/aws etc -C ../.. \
-C packages/azure etc -C ../.. \
containers/binfmt/rootfs containers/binfmt/config.json \
| \
docker build -q - ) && [ -n "$$BUILD" ] && echo "Built $$BUILD" && \
docker run --net=none --log-driver=none --rm $$BUILD > $@
@ -53,7 +54,8 @@ mobylinux-bios.iso: initrd.img kernel/x86_64/vmlinuz64
common: initrd.img
$(MAKE) -C kernel
$(MAKE) -C packages
$(MAKE) -j -C packages
$(MAKE) -j -C containers
ami: common
tar cf - \
@ -160,6 +162,7 @@ clean:
docker images -q moby-azure:raw2vhd | xargs docker rmi -f || true
docker volume rm vhdartifact || true
$(MAKE) -C packages clean
$(MAKE) -C containers clean
$(MAKE) -C kernel clean
.DELETE_ON_ERROR:

View File

@ -0,0 +1,3 @@
FROM debian:testing
RUN apt-get update && apt-get -y upgrade && apt-get install -y qemu-user-static

View File

@ -0,0 +1,29 @@
.PHONY: tag push
BASE=debian:testing
IMAGE=qemu-user-static
default: push
hash: Dockerfile
docker pull $(BASE)
tar cf - $^ | docker build --no-cache -t $(IMAGE):build -
docker run --rm $(IMAGE):build sh -c 'apt list --installed 2>/dev/null | sha1sum' | sed 's/ .*//' > hash
push: hash
docker pull mobylinux/$(IMAGE):$(shell cat hash) || \
(docker tag $(IMAGE):build mobylinux/$(IMAGE):$(shell cat hash) && \
docker push mobylinux/$(IMAGE):$(shell cat hash))
docker rmi $(IMAGE):build
rm -f hash
tag: hash
docker pull mobylinux/$(IMAGE):$(shell cat hash) || \
docker tag $(IMAGE):build mobylinux/$(IMAGE):$(shell cat hash)
docker rmi $(IMAGE):build
rm -f hash
clean:
rm -f hash
.DELETE_ON_ERROR:

View File

@ -0,0 +1,10 @@
DIRS=$(wildcard */)
.PHONY: clean $(DIRS)
default: $(DIRS)
$(DIRS):
$(MAKE) -C $@
clean:
for f in $(DIRS); do $(MAKE) -C $$f clean; done

2
alpine/containers/binfmt/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
rootfs
qemu-*

View File

@ -0,0 +1,3 @@
:qemu-aarch64:M:0:\x7f\x45\x4c\x46\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-aarch64-static:CF
:qemu-arm:M:0:\x7f\x45\x4c\x46\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/bin/qemu-arm-static:CF
:qemu-ppc64le:M:0:\x7f\x45\x4c\x46\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00:/usr/bin/qemu-ppc64le-static:CF

View File

@ -0,0 +1,19 @@
FROM mobylinux/alpine-build-go:78d9a136c632c66f0767e11ce3c661989d706f21
COPY *.go /go/src/binfmt/
WORKDIR /go/src/binfmt
RUN go install --ldflags '-extldflags "-fno-PIC"'
WORKDIR /rootfs
RUN mkdir -p usr/bin binfmt_misc dev etc/binfmt.d proc sys
RUN cp /go/bin/binfmt usr/bin
COPY qemu* usr/bin/
COPY 00_moby.conf etc/binfmt.d/
RUN printf 'FROM scratch\nCOPY . ./\nCMD ["/usr/bin/binfmt", "-dir", "/etc/binfmt.d/", "-mount", "/binfmt_misc"]\n' > Dockerfile
CMD ["tar", "cf", "-", "."]

View File

@ -0,0 +1,27 @@
QEMU_IMAGE=mobylinux/qemu-user-static:7a07de557d7f6ae3d72873c32bfb4c51c7687d03
QEMU_BINARIES=qemu-arm-static qemu-aarch64-static qemu-ppc64le-static
default: rootfs
$(QEMU_BINARIES):
docker run --rm --net=none $(QEMU_IMAGE) tar cf - -C /usr/bin $@ | tar xf -
DEPS=Dockerfile main.go 00_moby.conf $(QEMU_BINARIES)
rootfs: $(DEPS) $(QEMU_BINARIES)
mkdir -p $@
BUILD=$$( tar cf - $(DEPS) | docker build -q - ) && \
[ -n "$$BUILD" ] && \
echo "Built $$BUILD" && \
IMAGE=$$( docker run --rm --net=none $$BUILD | docker build -q - ) && \
[ -n "$$IMAGE" ] && \
echo "Built $$IMAGE" && \
CONTAINER=$$( docker create $$IMAGE /dev/null ) && \
docker export $$CONTAINER | tar -xf - -C $@ && \
docker rm $$CONTAINER && \
( cd $@ && rm -rf .dockerenv Dockerfile dev/* etc/hostname etc/hosts etc/mtab etc/resolv.conf )
clean:
rm -rf rootfs $(QEMU_BINARIES)
.DELETE_ON_ERROR:

View File

@ -0,0 +1,168 @@
{
"ociVersion": "1.0.0-rc2-dev",
"platform": {
"os": "linux",
"arch": "amd64"
},
"process": {
"terminal": false,
"user": {
"uid": 0,
"gid": 0
},
"args": [
"/usr/bin/binfmt",
"-dir",
"/etc/binfmt.d/",
"-mount",
"/binfmt_misc"
],
"env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
],
"cwd": "/",
"capabilities": [],
"rlimits": [
{
"type": "RLIMIT_NOFILE",
"hard": 1024,
"soft": 1024
}
],
"noNewPrivileges": true
},
"root": {
"path": "rootfs",
"readonly": true
},
"hostname": "elegant_albattani",
"mounts": [
{
"destination": "/proc",
"type": "proc",
"source": "proc"
},
{
"destination": "/dev",
"type": "tmpfs",
"source": "tmpfs",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
]
},
{
"destination": "/dev/pts",
"type": "devpts",
"source": "devpts",
"options": [
"nosuid",
"noexec",
"newinstance",
"ptmxmode=0666",
"mode=0620",
"gid=5"
]
},
{
"destination": "/dev/shm",
"type": "tmpfs",
"source": "shm",
"options": [
"nosuid",
"noexec",
"nodev",
"mode=1777",
"size=65536k"
]
},
{
"destination": "/dev/mqueue",
"type": "mqueue",
"source": "mqueue",
"options": [
"nosuid",
"noexec",
"nodev"
]
},
{
"destination": "/sys",
"type": "sysfs",
"source": "sysfs",
"options": [
"nosuid",
"noexec",
"nodev"
]
},
{
"destination": "/sys/fs/cgroup",
"type": "cgroup",
"source": "cgroup",
"options": [
"nosuid",
"noexec",
"nodev",
"relatime",
"ro"
]
},
{
"destination": "/binfmt_misc",
"type": "bind",
"source": "/proc/sys/fs/binfmt_misc",
"options": [
"rw",
"rbind",
"rprivate"
]
}
],
"hooks": {},
"linux": {
"resources": {
"devices": [
{
"allow": false,
"access": "rwm"
}
]
},
"namespaces": [
{
"type": "pid"
},
{
"type": "network"
},
{
"type": "ipc"
},
{
"type": "uts"
},
{
"type": "mount"
}
],
"maskedPaths": [
"/proc/kcore",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware"
],
"readonlyPaths": [
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
}
}

View File

@ -0,0 +1,93 @@
package main
import (
"bytes"
"flag"
"fmt"
"io/ioutil"
"log"
"os"
"path/filepath"
"syscall"
)
var (
dir string
mount string
)
func init() {
flag.StringVar(&dir, "dir", "/etc/binfmt.d", "directory with config files")
flag.StringVar(&mount, "mount", "/proc/sys/fs/binfmt_misc", "binfmt_misc mount point")
}
func binfmt(line []byte) error {
register := filepath.Join(mount, "register")
file, err := os.OpenFile(register, os.O_WRONLY, 0)
if err != nil {
e, ok := err.(*os.PathError)
if ok && e.Err == syscall.ENOENT {
return fmt.Errorf("ENOENT opening %s is it mounted?", register)
}
if ok && e.Err == syscall.EPERM {
return fmt.Errorf("EPERM opening %s check permissions?", register)
}
return fmt.Errorf("Cannot open %s: %s", register, err)
}
defer file.Close()
// short writes should not occur on sysfs, cannot usefully recover
_, err = file.Write(line)
if err != nil {
e, ok := err.(*os.PathError)
if ok && e.Err == syscall.EEXIST {
// clear existing entry
split := bytes.SplitN(line[1:], []byte(":"), 2)
if len(split) == 0 {
return fmt.Errorf("Cannot determine arch from: %s", line)
}
arch := filepath.Join(mount, string(split[0]))
clear, err := os.OpenFile(arch, os.O_WRONLY, 0)
if err != nil {
return fmt.Errorf("Cannot open %s: %s", arch, err)
}
defer clear.Close()
_, err = clear.Write([]byte("-1"))
if err != nil {
return fmt.Errorf("Cannot write to %s: %s", arch, err)
}
_, err = file.Write(line)
if err != nil {
return fmt.Errorf("Cannot write to %s: %s", register, err)
}
return nil
}
return fmt.Errorf("Cannot write to %s: %s", register, err)
}
return nil
}
func main() {
flag.Parse()
files, err := ioutil.ReadDir(dir)
if err != nil {
log.Fatalf("Cannot read directory %s: %s", dir, err)
}
for _, file := range files {
contents, err := ioutil.ReadFile(filepath.Join(dir, file.Name()))
if err != nil {
log.Fatalf("Cannot read file %s: %s", file.Name(), err)
}
lines := bytes.Split(contents, []byte("\n"))
for _, line := range lines {
if len(line) == 0 {
continue
}
err = binfmt(line)
if err != nil {
log.Fatal(err)
}
}
}
}

View File

@ -0,0 +1,13 @@
#!/bin/sh
# FOR REFERENCE ONLY
# needs adjusting for real use, riddler needs some updates
set -e
printf "FROM scratch\nCOPY . ./\n" > rootfs/Dockerfile
IMAGE=$(docker build -q rootfs)
CONTAINER=$(docker create --net=none --security-opt apparmor=unconfined --cap-drop all --read-only -v /proc/sys/fs/binfmt_misc:/binfmt_misc $IMAGE /usr/bin/binfmt -dir /etc/binfmt.d/ -mount /binfmt_misc)
rm rootfs/Dockerfile
docker run -v $PWD:/conf -v /var/run/docker.sock:/var/run/docker.sock --rm jess/riddler -f -bundle /conf $CONTAINER
docker rm $CONTAINER

View File

@ -1,21 +0,0 @@
:qemu-aarch64:M:0:\x7f\x45\x4c\x46\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-aarch64-static:OC
:qemu-alpha:M:0:\x7f\x45\x4c\x46\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x26\x90:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/bin/qemu-alpha-static:OC
:qemu-arm:M:0:\x7f\x45\x4c\x46\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/bin/qemu-arm-static:OC
:qemu-armeb:M:0:\x7f\x45\x4c\x46\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-armeb-static:OC
:qemu-cris:M:0:\x7f\x45\x4c\x46\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x4c\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/bin/qemu-cris-static:OC
:qemu-m68k:M:0:\x7f\x45\x4c\x46\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x04:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-m68k-static:OC
:qemu-microblaze:M:0:\x7f\x45\x4c\x46\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\xba\xab:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-microblaze-static:OC
:qemu-mips:M:0:\x7f\x45\x4c\x46\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08:\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-mips-static:OC
:qemu-mips64:M:0:\x7f\x45\x4c\x46\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08:\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-mips64-static:OC
:qemu-mips64el:M:0:\x7f\x45\x4c\x46\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/bin/qemu-mips64el-static:OC
:qemu-mipsel:M:0:\x7f\x45\x4c\x46\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x08\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xfe\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/bin/qemu-mipsel-static:OC
:qemu-ppc:M:0:\x7f\x45\x4c\x46\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x14:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-ppc-static:OC
:qemu-ppc64:M:0:\x7f\x45\x4c\x46\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-ppc64-static:OC
:qemu-ppc64abi32:M:0:\x7f\x45\x4c\x46\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-ppc64abi32-static:OC
:qemu-ppc64le:M:0:\x7f\x45\x4c\x46\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00:/usr/bin/qemu-ppc64le-static:OC
:qemu-s390x:M:0:\x7f\x45\x4c\x46\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-s390x-static:OC
:qemu-sh4:M:0:\x7f\x45\x4c\x46\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/bin/qemu-sh4-static:OC
:qemu-sh4eb:M:0:\x7f\x45\x4c\x46\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2a:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-sh4eb-static:OC
:qemu-sparc:M:0:\x7f\x45\x4c\x46\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-sparc-static:OC
:qemu-sparc32plus:M:0:\x7f\x45\x4c\x46\x01\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x12:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-sparc32plus-static:OC
:qemu-sparc64:M:0:\x7f\x45\x4c\x46\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x2b:\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-sparc64-static:OC

View File

@ -2,7 +2,7 @@
depend()
{
after docker
before docker
}
start()
@ -13,17 +13,19 @@ start()
ulimit -n 1048576
ulimit -p unlimited
[ ! -e /usr/bin/containerd ] && \
( cd /usr/bin && \
ln -s docker-runc runc && \
ln -s docker-containerd-shim containerd-shim && \
ln -s docker-containerd-ctr containerd-ctr && \
ln -s docker-containerd containerd \
)
/usr/bin/containerd 1>&2 2>/var/log/containerd.log &
ewaitfile 5 /var/run/containerd/containerd.sock
eend $? "Failed to start system containerd"
ebegin "Running system containers"
for f in /containers/*
do
# note we attach, so will be synchronous for now
containerd-ctr containers start --no-pivot --attach "$(basename $f)" "$f"
done
eend $? "Failed to start system containers"
}

View File

@ -9,6 +9,8 @@ docker info
docker ps
docker pull alpine
docker run alpine true
docker pull armhf/alpine
docker run armhf/alpine uname -a
docker run --name webserver -d -p 80:80 alpine httpd -f -h /etc
wget -O - -q localhost/hostname
docker kill webserver