transfused: use vsock transport instead of 9p

Signed-off-by: David Sheets <dsheets@docker.com>
This commit is contained in:
David Sheets 2016-04-12 16:04:20 +01:00
parent 0390b2ba4d
commit cb08ad76d3
10 changed files with 555 additions and 255 deletions

View File

@ -7,11 +7,8 @@ DEV=$(mount | grep '/dev/[sxv]da. on /var type')
[ $? -eq 0 ] && printf "✓ Drive mounted: $DEV\n" || printf "✗ No drive mounted\n" [ $? -eq 0 ] && printf "✓ Drive mounted: $DEV\n" || printf "✗ No drive mounted\n"
INET=$(ifconfig eth0 2> /dev/null | grep 'inet addr') INET=$(ifconfig eth0 2> /dev/null | grep 'inet addr')
[ $? -eq 0 ] && printf "✓ Network connected: $INET\n" || printf "✗ No network connection\n" [ $? -eq 0 ] && printf "✓ Network connected: $INET\n" || printf "✗ No network connection\n"
if [ -d /Transfuse ]
then
FUSE=$(ps -eo args | grep '^/sbin/transfused') FUSE=$(ps -eo args | grep '^/sbin/transfused')
[ $? -eq 0 ] && printf "✓ Process transfused running\n" || printf "✗ No transfused process\n" [ $? -eq 0 ] && printf "✓ Process transfused running\n" || printf "✗ No transfused process\n"
fi
MDNS=$(ps -eo args | grep '^/sbin/mdnstool') MDNS=$(ps -eo args | grep '^/sbin/mdnstool')
[ $? -eq 0 ] && printf "✓ Process mdnstool running: $MDNS\n" || printf "✗ No mdnstool process\n" [ $? -eq 0 ] && printf "✓ Process mdnstool running: $MDNS\n" || printf "✗ No mdnstool process\n"
HUPPER=$(ps -eo args | grep '^/bin/hupper') HUPPER=$(ps -eo args | grep '^/bin/hupper')

View File

@ -1,7 +1,7 @@
.PHONY: all .PHONY: all
HDR=transfused.h transfused_log.h HDR=transfused.h transfused_log.h transfused_vsock.h
SRC=transfused.c transfused_log.c SRC=transfused.c transfused_log.c transfused_vsock.c
DEPS=$(HDR) $(SRC) DEPS=$(HDR) $(SRC)

View File

@ -6,9 +6,7 @@ start()
{ {
ebegin "Starting FUSE socket passthrough" ebegin "Starting FUSE socket passthrough"
mkdir -p /Transfuse
mkdir -p /Mac mkdir -p /Mac
mount -t 9p -o trans=virtio,dfltuid=1001,dfltgid=50,version=9p2000,msize=32768 fuse /Transfuse
if cat /proc/cmdline | grep -q 'com.docker.driverDir' if cat /proc/cmdline | grep -q 'com.docker.driverDir'
then then
@ -38,8 +36,6 @@ start()
stop() stop()
{ {
[ -d /Transfuse ] || exit 0
ebegin "Stopping FUSE socket passthrough" ebegin "Stopping FUSE socket passthrough"
PIDFILE=/var/run/transfused.pid PIDFILE=/var/run/transfused.pid

View File

@ -0,0 +1,161 @@
/*
* VMware vSockets Driver
*
* Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation version 2 and no later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _UAPI_VM_SOCKETS_H
#define _UAPI_VM_SOCKETS_H
#ifdef __KERNEL__
#include <linux/socket.h>
#else
#define __kernel_sa_family_t sa_family_t
#include <sys/socket.h>
#endif
/* Option name for STREAM socket buffer size. Use as the option name in
* setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
* specifies the size of the buffer underlying a vSockets STREAM socket.
* Value is clamped to the MIN and MAX.
*/
#define SO_VM_SOCKETS_BUFFER_SIZE 0
/* Option name for STREAM socket minimum buffer size. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
* specifies the minimum size allowed for the buffer underlying a vSockets
* STREAM socket.
*/
#define SO_VM_SOCKETS_BUFFER_MIN_SIZE 1
/* Option name for STREAM socket maximum buffer size. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get an unsigned long long
* that specifies the maximum size allowed for the buffer underlying a
* vSockets STREAM socket.
*/
#define SO_VM_SOCKETS_BUFFER_MAX_SIZE 2
/* Option name for socket peer's host-specific VM ID. Use as the option name
* in getsockopt(3) to get a host-specific identifier for the peer endpoint's
* VM. The identifier is a signed integer.
* Only available for hypervisor endpoints.
*/
#define SO_VM_SOCKETS_PEER_HOST_VM_ID 3
/* Option name for determining if a socket is trusted. Use as the option name
* in getsockopt(3) to determine if a socket is trusted. The value is a
* signed integer.
*/
#define SO_VM_SOCKETS_TRUSTED 5
/* Option name for STREAM socket connection timeout. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get the connection
* timeout for a STREAM socket.
*/
#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6
/* Option name for using non-blocking send/receive. Use as the option name
* for setsockopt(3) or getsockopt(3) to set or get the non-blocking
* transmit/receive flag for a STREAM socket. This flag determines whether
* send() and recv() can be called in non-blocking contexts for the given
* socket. The value is a signed integer.
*
* This option is only relevant to kernel endpoints, where descheduling the
* thread of execution is not allowed, for example, while holding a spinlock.
* It is not to be confused with conventional non-blocking socket operations.
*
* Only available for hypervisor endpoints.
*/
#define SO_VM_SOCKETS_NONBLOCK_TXRX 7
/* The vSocket equivalent of INADDR_ANY. This works for the svm_cid field of
* sockaddr_vm and indicates the context ID of the current endpoint.
*/
#define VMADDR_CID_ANY -1U
/* Bind to any available port. Works for the svm_port field of
* sockaddr_vm.
*/
#define VMADDR_PORT_ANY -1U
/* Use this as the destination CID in an address when referring to the
* hypervisor. VMCI relies on it being 0, but this would be useful for other
* transports too.
*/
#define VMADDR_CID_HYPERVISOR 0
/* This CID is specific to VMCI and can be considered reserved (even VMCI
* doesn't use it anymore, it's a legacy value from an older release).
*/
#define VMADDR_CID_RESERVED 1
/* Use this as the destination CID in an address when referring to the host
* (any process other than the hypervisor). VMCI relies on it being 2, but
* this would be useful for other transports too.
*/
#define VMADDR_CID_HOST 2
/* Invalid vSockets version. */
#define VM_SOCKETS_INVALID_VERSION -1U
/* The epoch (first) component of the vSockets version. A single byte
* representing the epoch component of the vSockets version.
*/
#define VM_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24)
/* The major (second) component of the vSockets version. A single byte
* representing the major component of the vSockets version. Typically
* changes for every major release of a product.
*/
#define VM_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16)
/* The minor (third) component of the vSockets version. Two bytes representing
* the minor component of the vSockets version.
*/
#define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF))
/* Address structure for vSockets. The address family should be set to
* AF_VSOCK. The structure members should all align on their natural
* boundaries without resorting to compiler packing directives. The total size
* of this structure should be exactly the same as that of struct sockaddr.
*/
struct sockaddr_vm {
__kernel_sa_family_t svm_family;
unsigned short svm_reserved1;
unsigned int svm_port;
unsigned int svm_cid;
unsigned char svm_zero[sizeof(struct sockaddr) -
sizeof(sa_family_t) -
sizeof(unsigned short) -
sizeof(unsigned int) - sizeof(unsigned int)];
};
#define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9)
#endif /* _UAPI_VM_SOCKETS_H */

View File

@ -21,16 +21,15 @@
#include <sys/resource.h> #include <sys/resource.h>
#include "transfused_log.h" #include "transfused_log.h"
#include "transfused_vsock.h"
#define COPY_BUFSZ 65536 #define IN_BUFSZ ((1 << 20) + 16)
// The Linux 9p driver/xhyve virtio-9p device have bugs in the #define OUT_BUFSZ ((1 << 20) + 64)
// zero-copy code path which is triggered by I/O of more than 1024 #define EVENT_BUFSZ 4096
// bytes. For an unknown reason, these defects are unusually prominent
// in the event channel use pattern.
#define EVENT_BUFSZ 1024
#define DEFAULT_FUSERMOUNT "/bin/fusermount" #define DEFAULT_FUSERMOUNT "/bin/fusermount"
#define DEFAULT_SOCKET9P_ROOT "/Transfuse" #define DEFAULT_SOCKET "v:_:1525"
#define DEFAULT_SERVER "v:2:1524"
#define RMDIR_SYSCALL 0 #define RMDIR_SYSCALL 0
#define UNLINK_SYSCALL 1 #define UNLINK_SYSCALL 1
@ -41,12 +40,14 @@
// these could be turned into an enum probably but... C standard nausea // these could be turned into an enum probably but... C standard nausea
char * default_fusermount = DEFAULT_FUSERMOUNT; char * default_fusermount = DEFAULT_FUSERMOUNT;
char * default_socket9p_root = DEFAULT_SOCKET9P_ROOT; char * default_socket = DEFAULT_SOCKET;
char * default_server = DEFAULT_SERVER;
char * usage = char * usage =
"usage: transfused [-p pidfile] [-9 socket9p_root] [-f fusermount]\n" "usage: transfused [-p pidfile] [-d server] [-s socket] [-f fusermount]\n"
" [-l logfile] [-m mount_trigger] [-t triggerlog]\n" " [-l logfile] [-m mount_trigger] [-t triggerlog]\n"
" -p pidfile\tthe path at which to write the pid of the process\n" " -p pidfile\tthe path at which to write the pid of the process\n"
" -9 " DEFAULT_SOCKET9P_ROOT "\tthe root of the 9p socket file system\n" " -d " DEFAULT_SERVER "\tthe server address to use ('v:addr:port')\n"
" -s " DEFAULT_SOCKET "\tthe socket address to use ('v:addr:port')\n"
" -f " DEFAULT_FUSERMOUNT "\tthe fusermount executable to use\n" " -f " DEFAULT_FUSERMOUNT "\tthe fusermount executable to use\n"
" -l logfile\tthe log file to use before the mount trigger\n" " -l logfile\tthe log file to use before the mount trigger\n"
" -m mount_trigger\tthe mountpoint to use to trigger log switchover\n" " -m mount_trigger\tthe mountpoint to use to trigger log switchover\n"
@ -58,7 +59,6 @@ pthread_attr_t detached;
typedef struct { typedef struct {
char * descr; char * descr;
long connection;
char * tag; char * tag;
int from; int from;
int to; int to;
@ -101,24 +101,55 @@ void unlock(char *const descr, pthread_mutex_t * mutex) {
die(1, "", "unlock %s: ", descr); die(1, "", "unlock %s: ", descr);
} }
int bind_socket(const char * socket) {
int sock;
if (socket[0] == 0)
die(2, NULL, "Socket family required");
if (socket[1] != ':')
die(2, NULL, "Socket address required");
switch (socket[0]) {
case 'v':
sock = bind_vsock(socket + 2);
break;
default:
die(2, NULL, "Unknown socket family '%c'", socket[0]);
}
return sock;
}
int connect_socket(const char * socket) {
int sock;
if (socket[0] == 0)
die(2, NULL, "Socket family required");
if (socket[1] != ':')
die(2, NULL, "Scoket address required");
switch (socket[0]) {
case 'v':
sock = connect_vsock(socket + 2);
break;
default:
die(2, NULL, "Unknown socket family '%c'", socket[0]);
}
return sock;
}
char ** read_opts(connection_t * connection, char * buf) { char ** read_opts(connection_t * connection, char * buf) {
int read_fd;
char * read_path;
int read_count; int read_count;
int optc = 1; int optc = 1;
char ** optv; char ** optv;
size_t mount_len;
if (asprintf(&read_path, "%s/connections/%ld/read", // TODO: deal with socket read conditions e.g. EAGAIN
connection->params->socket9p_root, connection->id) == -1) read_count = read(connection->sock, buf, EVENT_BUFSZ - 1);
die(1, "Couldn't allocate read path", ""); if (read_count < 0) die(1, "read_opts error reading", "");
read_fd = open(read_path, O_RDONLY);
if (read_fd == -1)
die(1, "couldn't open read path", "For connection %ld, ", connection->id);
read_count = read(read_fd, buf, COPY_BUFSZ - 1);
if (read_count == -1) die(1, "read_opts error reading", "");
// TODO: protocol should deal with short read
buf[read_count] = 0x0; buf[read_count] = 0x0;
for (int i = 0; i < read_count; i++) { for (int i = 0; i < read_count; i++) {
@ -137,7 +168,10 @@ char ** read_opts(connection_t * connection, char * buf) {
} }
} }
free(read_path); mount_len = strnlen(optv[optc - 1], 4096) + 1;
connection->mount_point = must_malloc("mount point string", mount_len);
strncpy(connection->mount_point, optv[optc - 1], mount_len - 1);
connection->mount_point[mount_len - 1] = '\0';
return optv; return optv;
} }
@ -146,48 +180,54 @@ uint64_t message_id(uint64_t * message) {
return message[1]; return message[1];
} }
void copy(copy_thread_state * copy_state) { int read_message(char * descr, int fd, char * buf) {
int read_count;
size_t nbyte;
uint32_t len;
// TODO: socket read conditions e.g. EAGAIN
read_count = read(fd, buf, 4);
if (read_count != 4) {
if (read_count < 0) die(1, "", "copy %s: error reading: ", descr);
if (read_count == 0) die(1, NULL, "copy %s: EOF reading length", descr);
die(1, NULL, "copy %s: short read length %d", descr, read_count);
}
len = *((uint32_t *) buf);
if (len > IN_BUFSZ)
die(1, NULL, "copy %s: message size %d exceeds buffer capacity %d",
len, IN_BUFSZ);
nbyte = (size_t) (len - 4);
buf += 4;
do {
// TODO: socket read conditions e.g. EAGAIN
read_count = read(fd, buf, nbyte);
if (read_count < 0) die(1, "", "copy %s: error reading: ", descr);
if (read_count == 0) die(1, NULL, "copy %s: EOF reading", descr);
nbyte -= read_count;
buf += read_count;
} while (nbyte != 0);
return (int) len;
}
void copy_into_fuse(copy_thread_state * copy_state) {
int from = copy_state->from; int from = copy_state->from;
int to = copy_state->to; int to = copy_state->to;
char * descr = copy_state->descr; char * descr = copy_state->descr;
int read_count, write_count; int read_count, write_count;
long connection = copy_state->connection;
char * tag = copy_state->tag;
void * buf; void * buf;
buf = must_malloc(descr, COPY_BUFSZ); buf = must_malloc(descr, IN_BUFSZ);
while(1) { while(1) {
read_count = read(from, buf, COPY_BUFSZ); read_count = read_message(descr, from, (char *) buf);
if (read_count == -1) die(1, "", "copy %s: error reading: ", descr);
if (debug) {
int trace_fd;
char * trace_path;
if (asprintf(&trace_path, "/tmp/transfused.%ld.%s.%llu",
connection, tag, message_id(buf)) == -1)
die(1, "Couldn't allocate trace packet path", "");
trace_fd = open(trace_path, O_WRONLY | O_CREAT, 0600);
if (trace_fd == -1)
die(1, "couldn't open trace packet path", "For %s, ", descr);
write_count = write(trace_fd, buf, read_count);
if (write_count == -1)
die(1, "", "copy %s trace: error writing %s: ", descr, trace_path);
if (write_count != read_count)
die(1, NULL, "copy %s trace: read %d but only wrote %d",
descr, read_count, write_count);
close(trace_fd);
free(trace_path);
}
write_count = write(to, buf, read_count); write_count = write(to, buf, read_count);
if (write_count == -1) die(1, "", "copy %s: error writing: ", descr); if (write_count < 0) die(1, "", "copy %s: error writing: ", descr);
// /dev/fuse accepts only complete writes
if (write_count != read_count) if (write_count != read_count)
die(1, NULL, "copy %s: read %d but only wrote %d", die(1, NULL, "copy %s: read %d but only wrote %d",
descr, read_count, write_count); descr, read_count, write_count);
@ -196,8 +236,42 @@ void copy(copy_thread_state * copy_state) {
free(buf); free(buf);
} }
void * copy_clean_from(copy_thread_state * copy_state) { void write_exactly(char * descr, int fd, char * buf, size_t nbyte) {
copy(copy_state); int write_count;
do {
// TODO: socket write conditions e.g. EAGAIN
write_count = write(fd, buf, nbyte);
if (write_count < 0) die(1, "", "copy %s: error writing: ", descr);
if (write_count == 0) die(1, "", "copy %s: 0 write: ", descr);
nbyte -= write_count;
buf += write_count;
} while (nbyte != 0);
}
void copy_outof_fuse(copy_thread_state * copy_state) {
int from = copy_state->from;
int to = copy_state->to;
char * descr = copy_state->descr;
int read_count;
void * buf;
buf = must_malloc(descr, OUT_BUFSZ);
while(1) {
// /dev/fuse only returns complete reads
read_count = read(from, buf, OUT_BUFSZ);
if (read_count < 0) die(1, "", "copy %s: error reading: ", descr);
write_exactly(descr, to, (char *) buf, read_count);
}
free(buf);
}
void * copy_clean_into_fuse(copy_thread_state * copy_state) {
copy_into_fuse(copy_state);
close(copy_state->from); close(copy_state->from);
@ -207,12 +281,12 @@ void * copy_clean_from(copy_thread_state * copy_state) {
return NULL; return NULL;
} }
void * copy_clean_from_thread(void * copy_state) { void * copy_clean_into_fuse_thread(void * copy_state) {
return (copy_clean_from((copy_thread_state *) copy_state)); return (copy_clean_into_fuse((copy_thread_state *) copy_state));
} }
void * copy_clean_to(copy_thread_state * copy_state) { void * copy_clean_outof_fuse(copy_thread_state * copy_state) {
copy(copy_state); copy_outof_fuse(copy_state);
close(copy_state->to); close(copy_state->to);
@ -222,8 +296,8 @@ void * copy_clean_to(copy_thread_state * copy_state) {
return NULL; return NULL;
} }
void * copy_clean_to_thread(void * copy_state) { void * copy_clean_outof_fuse_thread(void * copy_state) {
return (copy_clean_to((copy_thread_state *) copy_state)); return (copy_clean_outof_fuse((copy_thread_state *) copy_state));
} }
int recv_fd(int sock) { int recv_fd(int sock) {
@ -326,57 +400,35 @@ int get_fuse_sock(connection_t * conn, int optc, char *const optv[]) {
} }
void start_reader(connection_t * connection, int fuse) { void start_reader(connection_t * connection, int fuse) {
int read_fd;
char * read_path;
pthread_t child; pthread_t child;
copy_thread_state * copy_state; copy_thread_state * copy_state;
if (asprintf(&read_path, "%s/connections/%ld/read",
connection->params->socket9p_root, connection->id) == -1)
die(1, "Couldn't allocate read path", "");
read_fd = open(read_path, O_RDONLY);
if (read_fd == -1)
die(1, "couldn't open read path", "For connection %ld, ", connection->id);
copy_state = (copy_thread_state *) must_malloc("start_reader copy_state", copy_state = (copy_thread_state *) must_malloc("start_reader copy_state",
sizeof(copy_thread_state)); sizeof(copy_thread_state));
copy_state->descr = read_path; copy_state->descr = connection->mount_point;
copy_state->connection = connection->id;
copy_state->tag = "read"; copy_state->tag = "read";
copy_state->from = read_fd; copy_state->from = connection->sock;
copy_state->to = fuse; copy_state->to = fuse;
if ((errno = pthread_create(&child, &detached, if ((errno = pthread_create(&child, &detached,
copy_clean_from_thread, copy_state))) copy_clean_into_fuse_thread, copy_state)))
die(1, "", "couldn't create read copy thread for connection %ld: ", die(1, "", "couldn't create read copy thread for mount %s: ",
connection->id); connection->mount_point);
} }
void start_writer(connection_t * connection, int fuse) { void start_writer(connection_t * connection, int fuse) {
int write_fd;
char * write_path;
pthread_t child; pthread_t child;
copy_thread_state * copy_state; copy_thread_state * copy_state;
if (asprintf(&write_path, "%s/connections/%ld/write",
connection->params->socket9p_root, connection->id) == -1)
die(1, "Couldn't allocate write path", "");
write_fd = open(write_path, O_WRONLY);
if (write_fd == -1)
die(1, "couldn't open write path", "For connection %ld, ", connection->id);
copy_state = (copy_thread_state *) must_malloc("do_write copy_state", copy_state = (copy_thread_state *) must_malloc("do_write copy_state",
sizeof(copy_thread_state)); sizeof(copy_thread_state));
copy_state->descr = write_path; copy_state->descr = connection->mount_point;
copy_state->connection = connection->id;
copy_state->tag = "write"; copy_state->tag = "write";
copy_state->from = fuse; copy_state->from = fuse;
copy_state->to = write_fd; copy_state->to = connection->sock;
if ((errno = pthread_create(&child, &detached, if ((errno = pthread_create(&child, &detached,
copy_clean_to_thread, copy_state))) copy_clean_outof_fuse_thread, copy_state)))
die(1, "", "Couldn't create write copy thread for connection %ld: ", die(1, "", "Couldn't create write copy thread for mount %s: ",
connection->id); connection->mount_point);
} }
void * mount_connection(connection_t * conn) { void * mount_connection(connection_t * conn) {
@ -388,7 +440,7 @@ void * mount_connection(connection_t * conn) {
pthread_cond_t copy_halt; pthread_cond_t copy_halt;
int should_halt = 0; int should_halt = 0;
buf = (char *) must_malloc("read_opts packet malloc", COPY_BUFSZ); buf = (char *) must_malloc("read_opts packet malloc", EVENT_BUFSZ);
optv = read_opts(conn, buf); optv = read_opts(conn, buf);
@ -425,8 +477,8 @@ void * mount_connection(connection_t * conn) {
lock("copy lock", &copy_lock); lock("copy lock", &copy_lock);
while (!should_halt) while (!should_halt)
if ((errno = pthread_cond_wait(&copy_halt, &copy_lock))) if ((errno = pthread_cond_wait(&copy_halt, &copy_lock)))
die(1, "", "Couldn't wait for copy halt for connection %ld: ", die(1, "", "Couldn't wait for copy halt for mount %s: ",
conn->id); conn->mount_point);
unlock("copy lock", &copy_lock); unlock("copy lock", &copy_lock);
free(conn); free(conn);
@ -439,36 +491,26 @@ void * mount_thread(void * connection) {
} }
void write_pid(connection_t * connection) { void write_pid(connection_t * connection) {
int write_fd;
char * write_path;
pid_t pid = gettid(); pid_t pid = gettid();
char * pid_s; char * pid_s;
int pid_s_len, write_count; int pid_s_len, write_count;
if (asprintf(&write_path, "%s/connections/%ld/write",
connection->params->socket9p_root, connection->id) == -1)
die(1, "Couldn't allocate write path", "");
write_fd = open(write_path, O_WRONLY);
if (write_fd == -1)
die(1, "couldn't open write path", "For connection %ld, ", connection->id);
if (asprintf(&pid_s, "%lld", (long long) pid) == -1) if (asprintf(&pid_s, "%lld", (long long) pid) == -1)
die(1, "Couldn't allocate pid string", ""); die(1, "Couldn't allocate pid string", "");
pid_s_len = strlen(pid_s); pid_s_len = strlen(pid_s);
write_count = write(write_fd, pid_s, pid_s_len); // TODO: check for socket write conditions e.g. EAGAIN
if (write_count == -1) write_count = write(connection->sock, pid_s, pid_s_len);
if (write_count < 0)
die(1, "Error writing pid", ""); die(1, "Error writing pid", "");
// TODO: handle short writes
if (write_count != pid_s_len) if (write_count != pid_s_len)
die(1, NULL, "Error writing pid %s to socket: only wrote %d bytes", die(1, NULL, "Error writing pid %s to socket: only wrote %d bytes",
pid_s, write_count); pid_s, write_count);
close(write_fd);
free(pid_s); free(pid_s);
free(write_path);
} }
void perform_syscall(connection_t * conn, uint8_t syscall, char path[]) { void perform_syscall(connection_t * conn, uint8_t syscall, char path[]) {
@ -517,8 +559,6 @@ void perform_syscall(connection_t * conn, uint8_t syscall, char path[]) {
} }
void * event_thread(void * connection_ptr) { void * event_thread(void * connection_ptr) {
char * read_path;
int read_fd;
int read_count, event_len, path_len; int read_count, event_len, path_len;
void * buf; void * buf;
connection_t * connection = connection_ptr; connection_t * connection = connection_ptr;
@ -527,32 +567,25 @@ void * event_thread(void * connection_ptr) {
uint8_t syscall; uint8_t syscall;
void * msg; void * msg;
// This thread registers with the mounted file system as being an // This thread registers with the file system server as being an
// fsnotify event actuator. Other mounted file system interactions // fsnotify event actuator. Other mounted file system interactions
// (such as self-logging) SHOULD NOT occur on this thread. // (such as self-logging) SHOULD NOT occur on this thread.
write_pid(connection); write_pid(connection);
if (asprintf(&read_path, "%s/connections/%ld/read",
connection->params->socket9p_root, connection->id) == -1)
die(1, "Couldn't allocate read path", "");
read_fd = open(read_path, O_RDONLY);
if (read_fd == -1)
die(1, "couldn't open read path", "For connection %ld, ", connection->id);
buf = must_malloc("incoming event buffer", EVENT_BUFSZ); buf = must_malloc("incoming event buffer", EVENT_BUFSZ);
while(1) { while(1) {
read_count = read(read_fd, buf, EVENT_BUFSZ); read_count = read(connection->sock, buf, EVENT_BUFSZ);
if (read_count == -1) die(1, "event thread: error reading", ""); if (read_count < 0) die(1, "event thread: error reading", "");
event_len = (int) ntohs(*((uint16_t *) buf)); event_len = (int) ntohs(*((uint16_t *) buf));
if (debug) if (debug)
thread_log_time(connection, "read %d bytes from connection %ld\n", thread_log_time(connection, "read %d bytes from event connection\n",
read_count, connection->id); read_count);
// TODO: handle short read
if (read_count != event_len) { if (read_count != event_len) {
thread_log_time(connection, "event thread: only read %d of %d\n", thread_log_time(connection, "event thread: only read %d of %d\n",
read_count, event_len); read_count, event_len);
@ -578,9 +611,67 @@ void * event_thread(void * connection_ptr) {
perform_syscall(connection, syscall, path); perform_syscall(connection, syscall, path);
} }
close(read_fd);
free(buf); free(buf);
free(read_path); // TODO: close connection
return NULL;
}
void write_pidfile(char * pidfile) {
int fd;
pid_t pid = getpid();
char * pid_s;
int pid_s_len, write_count;
if (asprintf(&pid_s, "%lld", (long long) pid) == -1)
die(1, "Couldn't allocate pidfile string", "");
pid_s_len = strlen(pid_s);
fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (fd == -1)
die(1, "", "Couldn't open pidfile path %s: ", pidfile);
write_count = write(fd, pid_s, pid_s_len);
if (write_count == -1)
die(1, "", "Error writing pidfile %s: ", pidfile);
if (write_count != pid_s_len)
die(1, NULL, "Error writing %s to pidfile %s: only wrote %d bytes",
pid_s, pidfile, write_count);
close(fd);
free(pid_s);
}
void * init_thread(void * params_ptr) {
parameters * params = params_ptr;
int init_sock = connect_socket(params->server);
int write_count, read_count;
char init_msg[6] = { '\6', '\0', '\0', '\0', '\0', '\0' };
void * buf;
// TODO: handle short write/socket conditions
write_count = write(init_sock, init_msg, sizeof(init_msg));
if (write_count < 0) die(1, "init thread: couldn't write init", "");
if (write_count != sizeof(init_msg))
die(1, "init thread: incomplete write", "");
buf = must_malloc("incoming init buffer", EVENT_BUFSZ);
// TODO: handle short read/socket conditions
read_count = read(init_sock, buf, EVENT_BUFSZ);
if (read_count < 0) die(1, "init thread: error reading", "");
// TODO: handle other messages
if (read_count != 6) die(1, "init thread: response not 6", "");
for (int i = 0; i < sizeof(init_msg); i++)
if (((char *)buf)[i] != init_msg[i])
die(1, "init thread: unexpected message", "");
// we've gotten Continue so write the pidfile
if (params->pidfile != NULL)
write_pidfile(params->pidfile);
// TODO: handle more messages
return NULL; return NULL;
} }
@ -601,7 +692,7 @@ void parse_parameters(int argc, char * argv[], parameters * params) {
int errflg = 0; int errflg = 0;
params->pidfile = NULL; params->pidfile = NULL;
params->socket9p_root = NULL; params->socket = NULL;
params->fusermount = NULL; params->fusermount = NULL;
params->logfile = NULL; params->logfile = NULL;
params->logfile_fd = 0; params->logfile_fd = 0;
@ -610,15 +701,19 @@ void parse_parameters(int argc, char * argv[], parameters * params) {
params->trigger_fd = 0; params->trigger_fd = 0;
lock_init("fd_lock", &params->fd_lock, NULL); lock_init("fd_lock", &params->fd_lock, NULL);
while ((c = getopt(argc, argv, ":p:9:f:l:m:t:")) != -1) { while ((c = getopt(argc, argv, ":p:d:s:f:l:m:t:")) != -1) {
switch(c) { switch(c) {
case 'p': case 'p':
params->pidfile = optarg; params->pidfile = optarg;
break; break;
case '9': case 'd':
params->socket9p_root = optarg; params->server = optarg;
break;
case 's':
params->socket = optarg;
break; break;
case 'f': case 'f':
@ -675,15 +770,11 @@ void parse_parameters(int argc, char * argv[], parameters * params) {
exit(2); exit(2);
} }
if (params->socket9p_root == NULL) if (params->socket == NULL)
params->socket9p_root = default_socket9p_root; params->socket = default_socket;
if (access(params->socket9p_root, X_OK)) {
fprintf(stderr, if (params->server == NULL)
"-9 %s path to socket 9p root directory must be executable: ", params->server = default_server;
params->socket9p_root);
perror("");
exit(2);
}
if (params->logfile != NULL && access(params->logfile, W_OK)) if (params->logfile != NULL && access(params->logfile, W_OK))
if (errno != ENOENT) { if (errno != ENOENT) {
@ -702,76 +793,35 @@ void parse_parameters(int argc, char * argv[], parameters * params) {
} }
} }
void write_pidfile(char * pidfile) { void serve(parameters * params) {
int fd; ssize_t read_count;
pid_t pid = getpid(); char subproto_selector;
char * pid_s;
int pid_s_len, write_count;
if (asprintf(&pid_s, "%lld", (long long) pid) == -1)
die(1, "Couldn't allocate pidfile string", "");
pid_s_len = strlen(pid_s);
fd = open(pidfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (fd == -1)
die(1, "", "Couldn't open pidfile path %s: ", pidfile);
write_count = write(fd, pid_s, pid_s_len);
if (write_count == -1)
die(1, "", "Error writing pidfile %s: ", pidfile);
if (write_count != pid_s_len)
die(1, NULL, "Error writing %s to pidfile %s: only wrote %d bytes",
pid_s, pidfile, write_count);
close(fd);
free(pid_s);
}
#define ID_LEN 512
void process_events(char * events_path, int events, parameters * params) {
char buf[ID_LEN];
int read_count;
long conn_id;
pthread_t child; pthread_t child;
connection_t * conn; connection_t * conn;
void * (*connection_handler_thread)(void *); void * (*connection_handler_thread)(void *);
if (listen(params->sock, 16))
die(1, "listen", "");
if ((errno = pthread_create(&child, &detached, init_thread, params)))
die(1, "", "Couldn't create initialization thread: ");
while (1) { while (1) {
conn = (connection_t *) must_malloc("connection state", conn = (connection_t *) must_malloc("connection state",
sizeof(connection_t)); sizeof(connection_t));
conn->params = params; conn->params = params;
conn->id = 0; conn->mount_point = "";
read_count = read(events, buf, ID_LEN - 1); conn->sock = accept(params->sock, &conn->sa_client, &conn->socklen_client);
if (read_count == -1) { if (conn->sock < 0)
die(1, "", "Error reading events path %s: ", events_path); die(1, "accept", "");
} else if (read_count == 0) {
// TODO: this is probably the 9p server's fault due to
// not dropping the read 0 to force short read if
// the real read is flushed
log_time(conn, "read 0 from event stream %s\n", events_path);
free(conn);
continue;
}
buf[read_count] = 0x0; // TODO: check for socket read conditions e.g. EAGAIN
read_count = read(conn->sock, &subproto_selector, 1);
if (read_count <= 0)
die(1, "read subprotocol selector", "");
if (read_count < 2) { switch (subproto_selector) {
die(1, NULL, "Event connection id isn't long enough");
}
errno = 0;
conn_id = strtol(buf + 1, NULL, 10);
if (errno) die(1, "failed", "Connection id of string '%s'", buf);
conn->id = conn_id;
if (debug) log_time(conn, "handle connection %ld\n", conn_id);
switch (buf[0]) {
case 'm': case 'm':
conn->type_descr = "mount"; conn->type_descr = "mount";
connection_handler_thread = mount_thread; connection_handler_thread = mount_thread;
@ -781,22 +831,20 @@ void process_events(char * events_path, int events, parameters * params) {
connection_handler_thread = event_thread; connection_handler_thread = event_thread;
break; break;
default: default:
die(1, NULL, "Unknown connection type '%c'", buf[0]); die(1, NULL, "Unknown subprotocol type '%c'", subproto_selector);
} }
if ((errno = pthread_create(&child, &detached, if ((errno = pthread_create(&child, &detached,
connection_handler_thread, conn))) connection_handler_thread, conn)))
die(1, "", "Couldn't create thread for %s connection '%ld': ", die(1, "", "Couldn't create thread for %s connection: ",
conn->type_descr, conn_id); conn->type_descr);
if (debug) log_time(conn, "thread spawned\n"); if (debug) log_time(conn, "thread spawned\n");
} }
} }
int main(int argc, char * argv[]) { int main(int argc, char * argv[]) {
int events;
parameters params; parameters params;
char * events_path;
struct rlimit core_limit; struct rlimit core_limit;
core_limit.rlim_cur = RLIM_INFINITY; core_limit.rlim_cur = RLIM_INFINITY;
@ -813,25 +861,14 @@ int main(int argc, char * argv[]) {
PTHREAD_CREATE_DETACHED))) PTHREAD_CREATE_DETACHED)))
die(1, "Couldn't set pthread detach state", ""); die(1, "Couldn't set pthread detach state", "");
if (params.pidfile != NULL) write_pidfile(params.pidfile);
if (params.logfile != NULL) { if (params.logfile != NULL) {
params.logfile_fd = open(params.logfile, O_WRONLY | O_APPEND | O_CREAT); params.logfile_fd = open(params.logfile, O_WRONLY | O_APPEND | O_CREAT);
if (params.logfile_fd == -1) if (params.logfile_fd == -1)
die(1, "", "Couldn't open log file %s: ", params.logfile); die(1, "", "Couldn't open log file %s: ", params.logfile);
} }
if (asprintf(&events_path, "%s/events", params.socket9p_root) == -1) params.sock = bind_socket(params.socket);
die(1, "Couldn't allocate events path", ""); serve(&params);
events = open(events_path, O_RDONLY | O_CLOEXEC); return 0;
if (events != -1) process_events(events_path, events, &params);
connection_t top;
top.params = &params;
top.id = 0;
log_time(&top, "Failed to open events path %s: %s\n",
events_path, strerror(errno));
free(events_path);
return 1;
} }

View File

@ -1,12 +1,15 @@
#include <pthread.h> #include <pthread.h>
#include <sys/socket.h>
typedef struct { typedef struct {
char * socket9p_root; char * server;
char * socket;
char * fusermount; char * fusermount;
char * pidfile; char * pidfile;
char * logfile; char * logfile;
char * mount_trigger; char * mount_trigger;
char * trigger_log; char * trigger_log;
int sock;
pthread_mutex_t fd_lock; pthread_mutex_t fd_lock;
int logfile_fd; int logfile_fd;
int trigger_fd; int trigger_fd;
@ -14,8 +17,11 @@ typedef struct {
typedef struct { typedef struct {
parameters * params; parameters * params;
long id;
char * type_descr; char * type_descr;
char * mount_point;
struct sockaddr sa_client;
socklen_t socklen_client;
int sock;
} connection_t; } connection_t;
pthread_attr_t detached; pthread_attr_t detached;

View File

@ -129,8 +129,8 @@ void thread_log_time(connection_t * conn, const char * fmt, ...) {
// log demand to be low. // log demand to be low.
if ((errno = pthread_create(&logger, &detached, log_time_thread, log_state))) if ((errno = pthread_create(&logger, &detached, log_time_thread, log_state)))
die(1, "", "Couldn't create log thread for %s connection '%ld': ", die(1, "", "Couldn't create log thread for %s connection %s: ",
conn->type_descr, conn->id); conn->type_descr, conn->mount_point);
} }
void log_continue_locked(connection_t * connection, const char * fmt, ...) { void log_continue_locked(connection_t * connection, const char * fmt, ...) {

View File

@ -1,3 +1,5 @@
#include <stdarg.h>
#include "transfused.h" #include "transfused.h"
void die(int exit_code, const char * perror_arg, const char * fmt, ...); void die(int exit_code, const char * perror_arg, const char * fmt, ...);

View File

@ -0,0 +1,99 @@
#include <stddef.h>
#include <stdlib.h>
#include <sys/socket.h>
#include "include/uapi/linux/vm_sockets.h"
#include "transfused_log.h"
long parse_cid(const char * address) {
char * end = NULL;
long cid = strtol(address, &end, 10);
if (address == end || *end != ':') {
*end = 0;
die(2, NULL, "Invalid vsock cid: %s", address);
}
return cid;
}
long parse_port(const char * port_str) {
char * end = NULL;
long port = strtol(port_str, &end, 10);
if (port_str == end || *end != '\0') {
*end = 0;
die(2, NULL, "Invalid vsock port: %s", port_str);
}
return port;
}
int find_colon(const char * address) {
int colon = 0;
while (address[colon] != '\0')
if (address[colon] == ':') break;
else colon++;
if (address[colon] == '\0')
die(2, NULL, "Missing port in vsock address %s", address);
return colon;
}
int bind_vsock(const char * address) {
long cid, port;
int colon;
struct sockaddr_vm sa_listen = {
.svm_family = AF_VSOCK,
};
int sock_fd;
colon = find_colon(address);
if (address[0] == '_' && colon == 1) cid = VMADDR_CID_ANY;
else cid = parse_cid(address);
port = parse_port(address + colon + 1);
sa_listen.svm_cid = cid;
sa_listen.svm_port = port;
sock_fd = socket(AF_VSOCK, SOCK_STREAM, 0);
if (sock_fd < 0)
die(1, "socket(AF_VSOCK)", "");
if (bind(sock_fd, (struct sockaddr *) &sa_listen, sizeof(sa_listen)))
die(1, "bind(AF_VSOCK)", "");
return sock_fd;
}
int connect_vsock(const char * address) {
long cid, port;
int colon;
struct sockaddr_vm sa_connect = {
.svm_family = AF_VSOCK,
};
int sock_fd;
colon = find_colon(address);
cid = parse_cid(address);
port = parse_port(address + colon + 1);
sa_connect.svm_cid = cid;
sa_connect.svm_port = port;
sock_fd = socket(AF_VSOCK, SOCK_STREAM, 0);
if (sock_fd < 0)
die(1, "socket(AF_VSOCK)", "");
if (connect(sock_fd, (struct sockaddr *) &sa_connect, sizeof(sa_connect)))
die(1, "connect(AF_VSOCK)", "");
return sock_fd;
}

View File

@ -0,0 +1,2 @@
int bind_vsock(const char * address);
int connect_vsock(const char * address);