diff --git a/alpine/Dockerfile b/alpine/Dockerfile index 02f038d39..70a46ce87 100644 --- a/alpine/Dockerfile +++ b/alpine/Dockerfile @@ -54,6 +54,8 @@ COPY packages/userns/etc /etc/ COPY packages/userns/groupadd /usr/sbin COPY packages/userns/useradd /usr/sbin COPY packages/nc-vsock/nc-vsock /usr/bin +COPY packages/vsudd/vsudd /sbin +COPY packages/vsudd/etc /etc RUN \ rc-update add swap boot && \ @@ -90,7 +92,8 @@ RUN \ rc-update add hupper default && \ rc-update add hv_fcopy_daemon default && \ rc-update add hv_kvp_daemon default && \ - rc-update add hv_vss_daemon default + rc-update add hv_vss_daemon default && \ + rc-update add vsudd default COPY init / diff --git a/alpine/packages/Makefile b/alpine/packages/Makefile index 7931d1709..fa194a708 100644 --- a/alpine/packages/Makefile +++ b/alpine/packages/Makefile @@ -6,6 +6,7 @@ all: $(MAKE) -C hvtools OS=linux $(MAKE) -C docker OS=Linux $(MAKE) -C nc-vsock OS=linux + $(MAKE) -C vsudd OS=linux arm: $(MAKE) -C proxy OS=linux ARCH=arm @@ -14,6 +15,7 @@ arm: $(MAKE) -C hupper OS=linux ARCH=arm $(MAKE) -C docker OS=Linux ARCH=arm $(MAKE) -C nc-vsock OS=linux ARCH=arm + $(MAKE) -C vsudd OS=linux ARCH=arm clean: $(MAKE) -C proxy clean @@ -23,3 +25,4 @@ clean: $(MAKE) -C hupper clean $(MAKE) -C hvtools clean $(MAKE) -C nc-vsock clean + $(MAKE) -C vsudd clean diff --git a/alpine/packages/vsudd/.gitignore b/alpine/packages/vsudd/.gitignore new file mode 100644 index 000000000..e4461fbd4 --- /dev/null +++ b/alpine/packages/vsudd/.gitignore @@ -0,0 +1 @@ +/vsudd diff --git a/alpine/packages/vsudd/9pudc b/alpine/packages/vsudd/9pudc new file mode 100755 index 000000000..8b8c67921 Binary files /dev/null and b/alpine/packages/vsudd/9pudc differ diff --git a/alpine/packages/vsudd/Dockerfile b/alpine/packages/vsudd/Dockerfile new file mode 100644 index 000000000..a2c1d6f36 --- /dev/null +++ b/alpine/packages/vsudd/Dockerfile @@ -0,0 +1,15 @@ +FROM golang:alpine + +RUN apk update && apk add alpine-sdk + +RUN mkdir -p /go/src/vsudd +WORKDIR /go/src/vsudd + +COPY . /go/src/vsudd/ + +ARG GOARCH +ARG GOOS + +RUN go install --ldflags '-extldflags "-fno-PIC"' + +RUN [ -f /go/bin/*/vsudd ] && mv /go/bin/*/vsudd /go/bin/ || true diff --git a/alpine/packages/vsudd/Makefile b/alpine/packages/vsudd/Makefile new file mode 100644 index 000000000..43c2ecb95 --- /dev/null +++ b/alpine/packages/vsudd/Makefile @@ -0,0 +1,10 @@ +all: vsudd + +vsudd: Dockerfile main.go + docker build --build-arg GOOS=$(OS) --build-arg GOARCH=$(ARCH) -t vsudd:build . + docker run --rm vsudd:build cat /go/bin/vsudd > vsudd + chmod 755 vsudd + +clean: + rm -f vsudd + docker images -q vsudd:build | xargs docker rmi -f diff --git a/alpine/packages/vsudd/etc/init.d/vsudd b/alpine/packages/vsudd/etc/init.d/vsudd new file mode 100755 index 000000000..90fe4a88d --- /dev/null +++ b/alpine/packages/vsudd/etc/init.d/vsudd @@ -0,0 +1,31 @@ +#!/sbin/openrc-run + +description="vsock socket proxy client" + +start() +{ + ebegin "Starting docker socket vsock passthrough" + + [ -n "${PIDFILE}" ] || PIDFILE=/var/run/vsudd.pid + [ -n "${LOGFILE}" ] || LOGFILE=/var/log/vsudd.log + + start-stop-daemon --start --quiet \ + --background \ + --exec /sbin/vsudd \ + --make-pidfile --pidfile ${PIDFILE} \ + --stderr "${LOGFILE}" --stdout "${LOGFILE}" \ + -- -port 2376 -sock /var/run/docker.sock + + eend $? "Failed to start vsudd" +} + +stop() +{ + ebegin "Stopping docker socket passthrough" + + [ -n "${PIDFILE}" ] || PIDFILE=/var/run/vsudd.pid + + start-stop-daemon --stop --quiet --pidfile ${PIDFILE} + + eend $? "Failed to stop vsudd" +} diff --git a/alpine/packages/vsudd/include/uapi/linux/vm_sockets.h b/alpine/packages/vsudd/include/uapi/linux/vm_sockets.h new file mode 100644 index 000000000..41934a185 --- /dev/null +++ b/alpine/packages/vsudd/include/uapi/linux/vm_sockets.h @@ -0,0 +1,161 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _UAPI_VM_SOCKETS_H +#define _UAPI_VM_SOCKETS_H + +#ifdef __KERNEL__ +#include +#else +#define __kernel_sa_family_t sa_family_t +#include +#endif + +/* Option name for STREAM socket buffer size. Use as the option name in + * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that + * specifies the size of the buffer underlying a vSockets STREAM socket. + * Value is clamped to the MIN and MAX. + */ + +#define SO_VM_SOCKETS_BUFFER_SIZE 0 + +/* Option name for STREAM socket minimum buffer size. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long that + * specifies the minimum size allowed for the buffer underlying a vSockets + * STREAM socket. + */ + +#define SO_VM_SOCKETS_BUFFER_MIN_SIZE 1 + +/* Option name for STREAM socket maximum buffer size. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long + * that specifies the maximum size allowed for the buffer underlying a + * vSockets STREAM socket. + */ + +#define SO_VM_SOCKETS_BUFFER_MAX_SIZE 2 + +/* Option name for socket peer's host-specific VM ID. Use as the option name + * in getsockopt(3) to get a host-specific identifier for the peer endpoint's + * VM. The identifier is a signed integer. + * Only available for hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_PEER_HOST_VM_ID 3 + +/* Option name for determining if a socket is trusted. Use as the option name + * in getsockopt(3) to determine if a socket is trusted. The value is a + * signed integer. + */ + +#define SO_VM_SOCKETS_TRUSTED 5 + +/* Option name for STREAM socket connection timeout. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get the connection + * timeout for a STREAM socket. + */ + +#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6 + +/* Option name for using non-blocking send/receive. Use as the option name + * for setsockopt(3) or getsockopt(3) to set or get the non-blocking + * transmit/receive flag for a STREAM socket. This flag determines whether + * send() and recv() can be called in non-blocking contexts for the given + * socket. The value is a signed integer. + * + * This option is only relevant to kernel endpoints, where descheduling the + * thread of execution is not allowed, for example, while holding a spinlock. + * It is not to be confused with conventional non-blocking socket operations. + * + * Only available for hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_NONBLOCK_TXRX 7 + +/* The vSocket equivalent of INADDR_ANY. This works for the svm_cid field of + * sockaddr_vm and indicates the context ID of the current endpoint. + */ + +#define VMADDR_CID_ANY -1U + +/* Bind to any available port. Works for the svm_port field of + * sockaddr_vm. + */ + +#define VMADDR_PORT_ANY -1U + +/* Use this as the destination CID in an address when referring to the + * hypervisor. VMCI relies on it being 0, but this would be useful for other + * transports too. + */ + +#define VMADDR_CID_HYPERVISOR 0 + +/* This CID is specific to VMCI and can be considered reserved (even VMCI + * doesn't use it anymore, it's a legacy value from an older release). + */ + +#define VMADDR_CID_RESERVED 1 + +/* Use this as the destination CID in an address when referring to the host + * (any process other than the hypervisor). VMCI relies on it being 2, but + * this would be useful for other transports too. + */ + +#define VMADDR_CID_HOST 2 + +/* Invalid vSockets version. */ + +#define VM_SOCKETS_INVALID_VERSION -1U + +/* The epoch (first) component of the vSockets version. A single byte + * representing the epoch component of the vSockets version. + */ + +#define VM_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24) + +/* The major (second) component of the vSockets version. A single byte + * representing the major component of the vSockets version. Typically + * changes for every major release of a product. + */ + +#define VM_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16) + +/* The minor (third) component of the vSockets version. Two bytes representing + * the minor component of the vSockets version. + */ + +#define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF)) + +/* Address structure for vSockets. The address family should be set to + * AF_VSOCK. The structure members should all align on their natural + * boundaries without resorting to compiler packing directives. The total size + * of this structure should be exactly the same as that of struct sockaddr. + */ + +struct sockaddr_vm { + __kernel_sa_family_t svm_family; + unsigned short svm_reserved1; + unsigned int svm_port; + unsigned int svm_cid; + unsigned char svm_zero[sizeof(struct sockaddr) - + sizeof(sa_family_t) - + sizeof(unsigned short) - + sizeof(unsigned int) - sizeof(unsigned int)]; +}; + +#define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) + +#endif /* _UAPI_VM_SOCKETS_H */ diff --git a/alpine/packages/vsudd/main.go b/alpine/packages/vsudd/main.go new file mode 100644 index 000000000..6427ffde3 --- /dev/null +++ b/alpine/packages/vsudd/main.go @@ -0,0 +1,164 @@ +package main + +import ( + "flag" + "fmt" + "io" + "log" + "net" + "os" + "syscall" + "time" +) + +/* No way to teach net or syscall about vsock sockaddr, so go right to C */ + +/* +#include +#include "include/uapi/linux/vm_sockets.h" +int bind_sockaddr_vm(int fd, const struct sockaddr_vm *sa_vm) { + return bind(fd, (const struct sockaddr*)sa_vm, sizeof(*sa_vm)); +} +int connect_sockaddr_vm(int fd, const struct sockaddr_vm *sa_vm) { + return connect(fd, (const struct sockaddr*)sa_vm, sizeof(*sa_vm)); +} +int accept_vm(int fd) { + return accept(fd, 0, 0); +} +*/ +import "C" + +const ( + AF_VSOCK = 40 + VSOCK_CID_ANY = 4294967295 /* 2^32-1 */ +) + +var ( + port uint + sock string + detach bool +) + +func init() { + flag.UintVar(&port, "port", 2376, "vsock port to forward") + flag.StringVar(&sock, "sock", "/var/run/docker.sock", "path of the local Unix domain socket to forward to") + flag.BoolVar(&detach, "detach", false, "detach from terminal") +} + +func main() { + log.SetFlags(0) + flag.Parse() + + if detach { + logFile, err := os.Create("/var/log/vsudd.log") + if err != nil { + log.Fatalln("Failed to open log file", err) + } + log.SetOutput(logFile) + null, err := os.OpenFile("/dev/null", os.O_RDWR, 0) + if err != nil { + log.Fatalln("Failed to open /dev/null", err) + } + fd := null.Fd() + syscall.Dup2(int(fd), int(os.Stdin.Fd())) + syscall.Dup2(int(fd), int(os.Stdout.Fd())) + syscall.Dup2(int(fd), int(os.Stderr.Fd())) + } + + accept_fd, err := syscall.Socket(AF_VSOCK, syscall.SOCK_STREAM, 0) + if err != nil { + log.Fatal(err) + } + + sa := C.struct_sockaddr_vm{} + sa.svm_family = AF_VSOCK + sa.svm_port = C.uint(port) + sa.svm_cid = 3 + + if ret := C.bind_sockaddr_vm(C.int(accept_fd), &sa); ret != 0 { + log.Fatal(fmt.Sprintf("failed bind vsock connection to %08x.%08x, returned %d", sa.svm_cid, sa.svm_port, ret)) + } + + err = syscall.Listen(accept_fd, syscall.SOMAXCONN) + if err != nil { + log.Fatalln("Failed to listen to VSOCK", err) + } + + log.Printf("Starting at %s", time.Now().Format(time.RFC822)) + log.Printf("Listening on fd %d", accept_fd) + + connid := 0 + + for { + connid++ + fd, err := C.accept_vm(C.int(accept_fd)) + if err != nil { + log.Fatalln("Error accepting connection", err) + } + go handleOne(connid, int(fd)) + } +} + +func handleOne(connid int, fd int) { + vsock := os.NewFile(uintptr(fd), "vsock connection") + log.Println(connid, "Accepted connection on fd", fd) + + defer syscall.Close(fd) + + var docker *net.UnixConn + var err error + + // Cope with the server socket appearing up to 10s later + for i := 0; i < 200; i++ { + docker, err = net.DialUnix("unix", nil, &net.UnixAddr{sock, "unix"}) + if err == nil { + break + } + time.Sleep(50 * time.Millisecond) + } + defer docker.Close() + + if err != nil { + // If the forwarding program has broken then close and continue + log.Println(connid, "Failed to connect to Unix domain socket after 10s", sock, err) + return + } + + w := make(chan int64) + go func() { + n, err := io.Copy(vsock, docker) + if err != nil { + log.Println(connid, "error copying from docker to vsock:", err) + } + log.Println(connid, "copying from docker to vsock: ", n, "bytes done") + + err = docker.CloseRead() + if err != nil { + log.Println(connid, "error CloseRead on docker socket:", err) + } + err = syscall.Shutdown(fd, syscall.SHUT_WR) + if err != nil { + log.Println(connid, "error SHUT_WR on vsock:", err) + } + w <- n + }() + + n, err := io.Copy(docker, vsock) + if err != nil { + log.Println(connid, "error copying from vsock to docker:", err) + } + log.Println(connid, "copying from vsock to docker: ", n, "bytes done") + totalRead := n + + err = docker.CloseWrite() + if err != nil { + log.Println(connid, "error CloseWrite on docker socket:", err) + } + err = syscall.Shutdown(fd, syscall.SHUT_RD) + if err != nil { + log.Println(connid, "error SHUT_RD on vsock:", err) + } + + totalWritten := <-w + log.Println(connid, "Done. read:", totalRead, "written:", totalWritten) +}