vsudd: New daemon to proxy vsock to Unix domain sock

Used to proxy vsock 2376 <-> /var/run/docker.sock in the Moby VM.

Signed-off-by: Ian Campbell <ian.campbell@docker.com>
This commit is contained in:
Ian Campbell 2016-04-01 17:09:30 +01:00
parent c2546d2319
commit eac9d655c6
9 changed files with 389 additions and 1 deletions

View File

@ -54,6 +54,8 @@ COPY packages/userns/etc /etc/
COPY packages/userns/groupadd /usr/sbin
COPY packages/userns/useradd /usr/sbin
COPY packages/nc-vsock/nc-vsock /usr/bin
COPY packages/vsudd/vsudd /sbin
COPY packages/vsudd/etc /etc
RUN \
rc-update add swap boot && \
@ -90,7 +92,8 @@ RUN \
rc-update add hupper default && \
rc-update add hv_fcopy_daemon default && \
rc-update add hv_kvp_daemon default && \
rc-update add hv_vss_daemon default
rc-update add hv_vss_daemon default && \
rc-update add vsudd default
COPY init /

View File

@ -6,6 +6,7 @@ all:
$(MAKE) -C hvtools OS=linux
$(MAKE) -C docker OS=Linux
$(MAKE) -C nc-vsock OS=linux
$(MAKE) -C vsudd OS=linux
arm:
$(MAKE) -C proxy OS=linux ARCH=arm
@ -14,6 +15,7 @@ arm:
$(MAKE) -C hupper OS=linux ARCH=arm
$(MAKE) -C docker OS=Linux ARCH=arm
$(MAKE) -C nc-vsock OS=linux ARCH=arm
$(MAKE) -C vsudd OS=linux ARCH=arm
clean:
$(MAKE) -C proxy clean
@ -23,3 +25,4 @@ clean:
$(MAKE) -C hupper clean
$(MAKE) -C hvtools clean
$(MAKE) -C nc-vsock clean
$(MAKE) -C vsudd clean

1
alpine/packages/vsudd/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/vsudd

BIN
alpine/packages/vsudd/9pudc Executable file

Binary file not shown.

View File

@ -0,0 +1,15 @@
FROM golang:alpine
RUN apk update && apk add alpine-sdk
RUN mkdir -p /go/src/vsudd
WORKDIR /go/src/vsudd
COPY . /go/src/vsudd/
ARG GOARCH
ARG GOOS
RUN go install --ldflags '-extldflags "-fno-PIC"'
RUN [ -f /go/bin/*/vsudd ] && mv /go/bin/*/vsudd /go/bin/ || true

View File

@ -0,0 +1,10 @@
all: vsudd
vsudd: Dockerfile main.go
docker build --build-arg GOOS=$(OS) --build-arg GOARCH=$(ARCH) -t vsudd:build .
docker run --rm vsudd:build cat /go/bin/vsudd > vsudd
chmod 755 vsudd
clean:
rm -f vsudd
docker images -q vsudd:build | xargs docker rmi -f

View File

@ -0,0 +1,31 @@
#!/sbin/openrc-run
description="vsock socket proxy client"
start()
{
ebegin "Starting docker socket vsock passthrough"
[ -n "${PIDFILE}" ] || PIDFILE=/var/run/vsudd.pid
[ -n "${LOGFILE}" ] || LOGFILE=/var/log/vsudd.log
start-stop-daemon --start --quiet \
--background \
--exec /sbin/vsudd \
--make-pidfile --pidfile ${PIDFILE} \
--stderr "${LOGFILE}" --stdout "${LOGFILE}" \
-- -port 2376 -sock /var/run/docker.sock
eend $? "Failed to start vsudd"
}
stop()
{
ebegin "Stopping docker socket passthrough"
[ -n "${PIDFILE}" ] || PIDFILE=/var/run/vsudd.pid
start-stop-daemon --stop --quiet --pidfile ${PIDFILE}
eend $? "Failed to stop vsudd"
}

View File

@ -0,0 +1,161 @@
/*
* VMware vSockets Driver
*
* Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation version 2 and no later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _UAPI_VM_SOCKETS_H
#define _UAPI_VM_SOCKETS_H
#ifdef __KERNEL__
#include <linux/socket.h>
#else
#define __kernel_sa_family_t sa_family_t
#include <sys/socket.h>
#endif
/* Option name for STREAM socket buffer size. Use as the option name in
* setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
* specifies the size of the buffer underlying a vSockets STREAM socket.
* Value is clamped to the MIN and MAX.
*/
#define SO_VM_SOCKETS_BUFFER_SIZE 0
/* Option name for STREAM socket minimum buffer size. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
* specifies the minimum size allowed for the buffer underlying a vSockets
* STREAM socket.
*/
#define SO_VM_SOCKETS_BUFFER_MIN_SIZE 1
/* Option name for STREAM socket maximum buffer size. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get an unsigned long long
* that specifies the maximum size allowed for the buffer underlying a
* vSockets STREAM socket.
*/
#define SO_VM_SOCKETS_BUFFER_MAX_SIZE 2
/* Option name for socket peer's host-specific VM ID. Use as the option name
* in getsockopt(3) to get a host-specific identifier for the peer endpoint's
* VM. The identifier is a signed integer.
* Only available for hypervisor endpoints.
*/
#define SO_VM_SOCKETS_PEER_HOST_VM_ID 3
/* Option name for determining if a socket is trusted. Use as the option name
* in getsockopt(3) to determine if a socket is trusted. The value is a
* signed integer.
*/
#define SO_VM_SOCKETS_TRUSTED 5
/* Option name for STREAM socket connection timeout. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get the connection
* timeout for a STREAM socket.
*/
#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6
/* Option name for using non-blocking send/receive. Use as the option name
* for setsockopt(3) or getsockopt(3) to set or get the non-blocking
* transmit/receive flag for a STREAM socket. This flag determines whether
* send() and recv() can be called in non-blocking contexts for the given
* socket. The value is a signed integer.
*
* This option is only relevant to kernel endpoints, where descheduling the
* thread of execution is not allowed, for example, while holding a spinlock.
* It is not to be confused with conventional non-blocking socket operations.
*
* Only available for hypervisor endpoints.
*/
#define SO_VM_SOCKETS_NONBLOCK_TXRX 7
/* The vSocket equivalent of INADDR_ANY. This works for the svm_cid field of
* sockaddr_vm and indicates the context ID of the current endpoint.
*/
#define VMADDR_CID_ANY -1U
/* Bind to any available port. Works for the svm_port field of
* sockaddr_vm.
*/
#define VMADDR_PORT_ANY -1U
/* Use this as the destination CID in an address when referring to the
* hypervisor. VMCI relies on it being 0, but this would be useful for other
* transports too.
*/
#define VMADDR_CID_HYPERVISOR 0
/* This CID is specific to VMCI and can be considered reserved (even VMCI
* doesn't use it anymore, it's a legacy value from an older release).
*/
#define VMADDR_CID_RESERVED 1
/* Use this as the destination CID in an address when referring to the host
* (any process other than the hypervisor). VMCI relies on it being 2, but
* this would be useful for other transports too.
*/
#define VMADDR_CID_HOST 2
/* Invalid vSockets version. */
#define VM_SOCKETS_INVALID_VERSION -1U
/* The epoch (first) component of the vSockets version. A single byte
* representing the epoch component of the vSockets version.
*/
#define VM_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24)
/* The major (second) component of the vSockets version. A single byte
* representing the major component of the vSockets version. Typically
* changes for every major release of a product.
*/
#define VM_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16)
/* The minor (third) component of the vSockets version. Two bytes representing
* the minor component of the vSockets version.
*/
#define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF))
/* Address structure for vSockets. The address family should be set to
* AF_VSOCK. The structure members should all align on their natural
* boundaries without resorting to compiler packing directives. The total size
* of this structure should be exactly the same as that of struct sockaddr.
*/
struct sockaddr_vm {
__kernel_sa_family_t svm_family;
unsigned short svm_reserved1;
unsigned int svm_port;
unsigned int svm_cid;
unsigned char svm_zero[sizeof(struct sockaddr) -
sizeof(sa_family_t) -
sizeof(unsigned short) -
sizeof(unsigned int) - sizeof(unsigned int)];
};
#define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9)
#endif /* _UAPI_VM_SOCKETS_H */

View File

@ -0,0 +1,164 @@
package main
import (
"flag"
"fmt"
"io"
"log"
"net"
"os"
"syscall"
"time"
)
/* No way to teach net or syscall about vsock sockaddr, so go right to C */
/*
#include <sys/socket.h>
#include "include/uapi/linux/vm_sockets.h"
int bind_sockaddr_vm(int fd, const struct sockaddr_vm *sa_vm) {
return bind(fd, (const struct sockaddr*)sa_vm, sizeof(*sa_vm));
}
int connect_sockaddr_vm(int fd, const struct sockaddr_vm *sa_vm) {
return connect(fd, (const struct sockaddr*)sa_vm, sizeof(*sa_vm));
}
int accept_vm(int fd) {
return accept(fd, 0, 0);
}
*/
import "C"
const (
AF_VSOCK = 40
VSOCK_CID_ANY = 4294967295 /* 2^32-1 */
)
var (
port uint
sock string
detach bool
)
func init() {
flag.UintVar(&port, "port", 2376, "vsock port to forward")
flag.StringVar(&sock, "sock", "/var/run/docker.sock", "path of the local Unix domain socket to forward to")
flag.BoolVar(&detach, "detach", false, "detach from terminal")
}
func main() {
log.SetFlags(0)
flag.Parse()
if detach {
logFile, err := os.Create("/var/log/vsudd.log")
if err != nil {
log.Fatalln("Failed to open log file", err)
}
log.SetOutput(logFile)
null, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
if err != nil {
log.Fatalln("Failed to open /dev/null", err)
}
fd := null.Fd()
syscall.Dup2(int(fd), int(os.Stdin.Fd()))
syscall.Dup2(int(fd), int(os.Stdout.Fd()))
syscall.Dup2(int(fd), int(os.Stderr.Fd()))
}
accept_fd, err := syscall.Socket(AF_VSOCK, syscall.SOCK_STREAM, 0)
if err != nil {
log.Fatal(err)
}
sa := C.struct_sockaddr_vm{}
sa.svm_family = AF_VSOCK
sa.svm_port = C.uint(port)
sa.svm_cid = 3
if ret := C.bind_sockaddr_vm(C.int(accept_fd), &sa); ret != 0 {
log.Fatal(fmt.Sprintf("failed bind vsock connection to %08x.%08x, returned %d", sa.svm_cid, sa.svm_port, ret))
}
err = syscall.Listen(accept_fd, syscall.SOMAXCONN)
if err != nil {
log.Fatalln("Failed to listen to VSOCK", err)
}
log.Printf("Starting at %s", time.Now().Format(time.RFC822))
log.Printf("Listening on fd %d", accept_fd)
connid := 0
for {
connid++
fd, err := C.accept_vm(C.int(accept_fd))
if err != nil {
log.Fatalln("Error accepting connection", err)
}
go handleOne(connid, int(fd))
}
}
func handleOne(connid int, fd int) {
vsock := os.NewFile(uintptr(fd), "vsock connection")
log.Println(connid, "Accepted connection on fd", fd)
defer syscall.Close(fd)
var docker *net.UnixConn
var err error
// Cope with the server socket appearing up to 10s later
for i := 0; i < 200; i++ {
docker, err = net.DialUnix("unix", nil, &net.UnixAddr{sock, "unix"})
if err == nil {
break
}
time.Sleep(50 * time.Millisecond)
}
defer docker.Close()
if err != nil {
// If the forwarding program has broken then close and continue
log.Println(connid, "Failed to connect to Unix domain socket after 10s", sock, err)
return
}
w := make(chan int64)
go func() {
n, err := io.Copy(vsock, docker)
if err != nil {
log.Println(connid, "error copying from docker to vsock:", err)
}
log.Println(connid, "copying from docker to vsock: ", n, "bytes done")
err = docker.CloseRead()
if err != nil {
log.Println(connid, "error CloseRead on docker socket:", err)
}
err = syscall.Shutdown(fd, syscall.SHUT_WR)
if err != nil {
log.Println(connid, "error SHUT_WR on vsock:", err)
}
w <- n
}()
n, err := io.Copy(docker, vsock)
if err != nil {
log.Println(connid, "error copying from vsock to docker:", err)
}
log.Println(connid, "copying from vsock to docker: ", n, "bytes done")
totalRead := n
err = docker.CloseWrite()
if err != nil {
log.Println(connid, "error CloseWrite on docker socket:", err)
}
err = syscall.Shutdown(fd, syscall.SHUT_RD)
if err != nil {
log.Println(connid, "error SHUT_RD on vsock:", err)
}
totalWritten := <-w
log.Println(connid, "Done. read:", totalRead, "written:", totalWritten)
}