Merge pull request #69 from ijc25/vsock

Add frontend support for virtio-vsock
This commit is contained in:
Justin Cormack 2016-04-06 15:55:41 +01:00
commit f4d5a1c14e
30 changed files with 4384 additions and 4 deletions

View File

@ -22,6 +22,7 @@ COPY etc /etc/
ADD kernel/aufs-utils.tar /
COPY mkinitrd.sh /bin/
COPY kernel/kernel-source-info /etc/
ADD kernel/kernel-patches.tar /etc/kernel-patches
COPY packages/proxy/proxy /sbin/
COPY packages/transfused/transfused /sbin/
@ -52,6 +53,9 @@ COPY packages/hvtools/hv_set_ifconfig /sbin/
COPY packages/userns/etc /etc/
COPY packages/userns/groupadd /usr/sbin
COPY packages/userns/useradd /usr/sbin
COPY packages/nc-vsock/nc-vsock /usr/bin
COPY packages/vsudd/vsudd /sbin
COPY packages/vsudd/etc /etc
RUN \
rc-update add swap boot && \
@ -88,7 +92,8 @@ RUN \
rc-update add hupper default && \
rc-update add hv_fcopy_daemon default && \
rc-update add hv_kvp_daemon default && \
rc-update add hv_vss_daemon default
rc-update add hv_vss_daemon default && \
rc-update add vsudd default
COPY init /

View File

@ -1,4 +1,5 @@
vmlinuz64
zImage
aufs-utils.tar
kernel-patches.tar
kernel-source-info

View File

@ -57,6 +57,17 @@ RUN git clone -b "$AUFS_BRANCH" "$AUFS_REPO" /aufs && \
COPY kernel_config /linux/arch/x86/configs/x86_64_defconfig
COPY kernel_config.arm /linux/arch/arm/configs/versatile_defconfig
# Apply local patches
COPY patches /patches
RUN cd /linux && \
mkdir /etc/kernel-patches && \
set -e && for patch in /patches/*.patch; do \
echo "Applying $patch"; \
cp "$patch" /etc/kernel-patches; \
patch -p1 < "$patch"; \
done && \
cd /etc/kernel-patches && tar cf /kernel-patches.tar .
RUN jobs=$(nproc); \
cd /linux && \
make ARCH=$ARCH defconfig && \

View File

@ -5,6 +5,7 @@ vmlinuz64: kernel_config Dockerfile
docker run --rm mobykernel:build cat /linux/arch/x86_64/boot/bzImage > $@
docker run --rm mobykernel:build cat /aufs-utils.tar > aufs-utils.tar
docker run --rm mobykernel:build cat /kernel-source-info > kernel-source-info
docker run --rm mobykernel:build cat /kernel-patches.tar > kernel-patches.tar
arm: zImage
@ -13,8 +14,9 @@ zImage: kernel_config.arm Dockerfile
docker run --rm mobyarmkernel:build cat /linux/arch/arm/boot/zImage > $@
docker run --rm mobyarmkernel:build cat /aufs-utils.tar > aufs-utils.tar
docker run --rm mobykernel:build cat /kernel-source-info > kernel-source-info
docker run --rm mobykernel:build cat /kernel-patches.tar > kernel-patches.tar
clean:
rm -f zImage vmlinuz64 aufs-utils.tar kernel-source-info
rm -f zImage vmlinuz64 aufs-utils.tar kernel-source-info kernel-patches.tar
docker images -q mobykernel:build | xargs docker rmi -f
docker images -q mobyarmkernel:build | xargs docker rmi -f

View File

@ -1194,7 +1194,9 @@ CONFIG_DNS_RESOLVER=y
# CONFIG_BATMAN_ADV is not set
CONFIG_OPENVSWITCH=y
CONFIG_OPENVSWITCH_VXLAN=y
# CONFIG_VSOCKETS is not set
CONFIG_VSOCKETS=y
CONFIG_VIRTIO_VSOCKETS=y
CONFIG_VIRTIO_VSOCKETS_COMMON=y
CONFIG_NETLINK_MMAP=y
CONFIG_NETLINK_DIAG=y
CONFIG_MPLS=y

View File

@ -1183,7 +1183,9 @@ CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=y
CONFIG_OPENVSWITCH_GRE=y
CONFIG_OPENVSWITCH_VXLAN=y
# CONFIG_VSOCKETS is not set
CONFIG_VSOCKETS=y
CONFIG_VIRTIO_VSOCKETS=y
CONFIG_VIRTIO_VSOCKETS_COMMON=y
# CONFIG_NETLINK_MMAP is not set
CONFIG_NETLINK_DIAG=y
CONFIG_MPLS=y

View File

@ -0,0 +1,219 @@
From d8f7730e3211cdb16cd9d26143121aeb05f22509 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 17 Dec 2015 16:53:43 +0800
Subject: [PATCH 1/9] virtio: make find_vqs() checkpatch.pl-friendly
checkpatch.pl wants arrays of strings declared as follows:
static const char * const names[] = { "vq-1", "vq-2", "vq-3" };
Currently the find_vqs() function takes a const char *names[] argument
so passing checkpatch.pl's const char * const names[] results in a
compiler error due to losing the second const.
This patch adjusts the find_vqs() prototype and updates all virtio
transports. This makes it possible for virtio_balloon.c, virtio_input.c,
virtgpu_kms.c, and virtio_rpmsg_bus.c to use the checkpatch.pl-friendly
type.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
(cherry picked from commit f7ad26ff952b3ca2702d7da03aad0ab1f6c01d7c)
---
drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +-
drivers/misc/mic/card/mic_virtio.c | 2 +-
drivers/remoteproc/remoteproc_virtio.c | 2 +-
drivers/rpmsg/virtio_rpmsg_bus.c | 2 +-
drivers/s390/virtio/kvm_virtio.c | 2 +-
drivers/s390/virtio/virtio_ccw.c | 2 +-
drivers/virtio/virtio_balloon.c | 2 +-
drivers/virtio/virtio_input.c | 2 +-
drivers/virtio/virtio_mmio.c | 2 +-
drivers/virtio/virtio_pci_common.c | 4 ++--
drivers/virtio/virtio_pci_common.h | 2 +-
drivers/virtio/virtio_pci_modern.c | 2 +-
include/linux/virtio_config.h | 2 +-
13 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 06496a1..4150873 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -130,7 +130,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
static vq_callback_t *callbacks[] = {
virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack
};
- static const char *names[] = { "control", "cursor" };
+ static const char * const names[] = { "control", "cursor" };
struct virtio_gpu_device *vgdev;
/* this will expand later */
diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c
index e486a0c..f6ed57d 100644
--- a/drivers/misc/mic/card/mic_virtio.c
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -311,7 +311,7 @@ unmap:
static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct mic_vdev *mvdev = to_micvdev(vdev);
struct mic_device_ctrl __iomem *dc = mvdev->dc;
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index e1a1023..e44872f 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -147,7 +147,7 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev)
static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct rproc *rproc = vdev_to_rproc(vdev);
int i, ret;
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 73354ee..1fcd27c 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -945,7 +945,7 @@ static void rpmsg_ns_cb(struct rpmsg_channel *rpdev, void *data, int len,
static int rpmsg_probe(struct virtio_device *vdev)
{
vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done };
- const char *names[] = { "input", "output" };
+ static const char * const names[] = { "input", "output" };
struct virtqueue *vqs[2];
struct virtproc_info *vrp;
void *bufs_va;
diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c
index 53fb975..1d060fd 100644
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -255,7 +255,7 @@ static void kvm_del_vqs(struct virtio_device *vdev)
static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct kvm_device *kdev = to_kvmdev(vdev);
int i;
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 1b83159..bf2d130 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -635,7 +635,7 @@ out:
static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
unsigned long *indicatorp = NULL;
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 7d3e5d0..0c3691f 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -388,7 +388,7 @@ static int init_vqs(struct virtio_balloon *vb)
{
struct virtqueue *vqs[3];
vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
- const char *names[] = { "inflate", "deflate", "stats" };
+ static const char * const names[] = { "inflate", "deflate", "stats" };
int err, nvqs;
/*
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index c96944b..350a2a5 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -170,7 +170,7 @@ static int virtinput_init_vqs(struct virtio_input *vi)
struct virtqueue *vqs[2];
vq_callback_t *cbs[] = { virtinput_recv_events,
virtinput_recv_status };
- static const char *names[] = { "events", "status" };
+ static const char * const names[] = { "events", "status" };
int err;
err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names);
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index f499d9d..745c6ee 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -482,7 +482,7 @@ error_available:
static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 2046a68..f6bed86 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -296,7 +296,7 @@ void vp_del_vqs(struct virtio_device *vdev)
static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[],
+ const char * const names[],
bool use_msix,
bool per_vq_vectors)
{
@@ -376,7 +376,7 @@ error_find:
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
int err;
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index b976d96..2cc2522 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -139,7 +139,7 @@ void vp_del_vqs(struct virtio_device *vdev);
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[]);
+ const char * const names[]);
const char *vp_bus_name(struct virtio_device *vdev);
/* Setup the affinity for a virtqueue:
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 8e5cf19..c0c11fa 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -418,7 +418,7 @@ err_new_queue:
static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index e5ce8ab..6e6cb0c 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -70,7 +70,7 @@ struct virtio_config_ops {
int (*find_vqs)(struct virtio_device *, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[]);
+ const char * const names[]);
void (*del_vqs)(struct virtio_device *);
u64 (*get_features)(struct virtio_device *vdev);
int (*finalize_features)(struct virtio_device *vdev);
--
2.8.0.rc3

View File

@ -0,0 +1,77 @@
From ba1dc9346ba14b24f22ea04db21011f2874c3a67 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Sat, 21 Nov 2015 18:39:17 +0100
Subject: [PATCH 2/9] VSOCK: constify vmci_transport_notify_ops structures
The vmci_transport_notify_ops structures are never modified, so declare
them as const.
Done with the help of Coccinelle.
Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 3b22dae38db1cea9ead3229f08cfb0b69aca5706)
---
net/vmw_vsock/vmci_transport.h | 2 +-
net/vmw_vsock/vmci_transport_notify.c | 2 +-
net/vmw_vsock/vmci_transport_notify.h | 5 +++--
net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +-
4 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index 2ad46f3..1820e74 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -121,7 +121,7 @@ struct vmci_transport {
u64 queue_pair_max_size;
u32 detach_sub_id;
union vmci_transport_notify notify;
- struct vmci_transport_notify_ops *notify_ops;
+ const struct vmci_transport_notify_ops *notify_ops;
struct list_head elem;
struct sock *sk;
spinlock_t lock; /* protects sk. */
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 9b7f207..fd8cf02 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
}
/* Socket control packet based operations. */
-struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
vmci_transport_notify_pkt_socket_init,
vmci_transport_notify_pkt_socket_destruct,
vmci_transport_notify_pkt_poll_in,
diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h
index 7df7932..3c464d3 100644
--- a/net/vmw_vsock/vmci_transport_notify.h
+++ b/net/vmw_vsock/vmci_transport_notify.h
@@ -77,7 +77,8 @@ struct vmci_transport_notify_ops {
void (*process_negotiate) (struct sock *sk);
};
-extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
-extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
+extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
+extern const
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index dc9c792..21e591d 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue(
}
/* Socket always on control packet based operations. */
-struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
vmci_transport_notify_pkt_socket_init,
vmci_transport_notify_pkt_socket_destruct,
vmci_transport_notify_pkt_poll_in,
--
2.8.0.rc3

View File

@ -0,0 +1,164 @@
From e4d5bbced996499e080e790508113985963a55d5 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@fedoraproject.org>
Date: Thu, 4 Feb 2016 10:50:45 -0800
Subject: [PATCH 3/9] vsock: Fix blocking ops call in prepare_to_wait
We receoved a bug report from someone using vmware:
WARNING: CPU: 3 PID: 660 at kernel/sched/core.c:7389
__might_sleep+0x7d/0x90()
do not call blocking ops when !TASK_RUNNING; state=1 set at
[<ffffffff810fa68d>] prepare_to_wait+0x2d/0x90
Modules linked in: vmw_vsock_vmci_transport vsock snd_seq_midi
snd_seq_midi_event snd_ens1371 iosf_mbi gameport snd_rawmidi
snd_ac97_codec ac97_bus snd_seq coretemp snd_seq_device snd_pcm
snd_timer snd soundcore ppdev crct10dif_pclmul crc32_pclmul
ghash_clmulni_intel vmw_vmci vmw_balloon i2c_piix4 shpchp parport_pc
parport acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc btrfs
xor raid6_pq 8021q garp stp llc mrp crc32c_intel serio_raw mptspi vmwgfx
drm_kms_helper ttm drm scsi_transport_spi mptscsih e1000 ata_generic
mptbase pata_acpi
CPU: 3 PID: 660 Comm: vmtoolsd Not tainted
4.2.0-0.rc1.git3.1.fc23.x86_64 #1
Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop
Reference Platform, BIOS 6.00 05/20/2014
0000000000000000 0000000049e617f3 ffff88006ac37ac8 ffffffff818641f5
0000000000000000 ffff88006ac37b20 ffff88006ac37b08 ffffffff810ab446
ffff880068009f40 ffffffff81c63bc0 0000000000000061 0000000000000000
Call Trace:
[<ffffffff818641f5>] dump_stack+0x4c/0x65
[<ffffffff810ab446>] warn_slowpath_common+0x86/0xc0
[<ffffffff810ab4d5>] warn_slowpath_fmt+0x55/0x70
[<ffffffff8112551d>] ? debug_lockdep_rcu_enabled+0x1d/0x20
[<ffffffff810fa68d>] ? prepare_to_wait+0x2d/0x90
[<ffffffff810fa68d>] ? prepare_to_wait+0x2d/0x90
[<ffffffff810da2bd>] __might_sleep+0x7d/0x90
[<ffffffff812163b3>] __might_fault+0x43/0xa0
[<ffffffff81430477>] copy_from_iter+0x87/0x2a0
[<ffffffffa039460a>] __qp_memcpy_to_queue+0x9a/0x1b0 [vmw_vmci]
[<ffffffffa0394740>] ? qp_memcpy_to_queue+0x20/0x20 [vmw_vmci]
[<ffffffffa0394757>] qp_memcpy_to_queue_iov+0x17/0x20 [vmw_vmci]
[<ffffffffa0394d50>] qp_enqueue_locked+0xa0/0x140 [vmw_vmci]
[<ffffffffa039593f>] vmci_qpair_enquev+0x4f/0xd0 [vmw_vmci]
[<ffffffffa04847bb>] vmci_transport_stream_enqueue+0x1b/0x20
[vmw_vsock_vmci_transport]
[<ffffffffa047ae05>] vsock_stream_sendmsg+0x2c5/0x320 [vsock]
[<ffffffff810fabd0>] ? wake_atomic_t_function+0x70/0x70
[<ffffffff81702af8>] sock_sendmsg+0x38/0x50
[<ffffffff81702ff4>] SYSC_sendto+0x104/0x190
[<ffffffff8126e25a>] ? vfs_read+0x8a/0x140
[<ffffffff817042ee>] SyS_sendto+0xe/0x10
[<ffffffff8186d9ae>] entry_SYSCALL_64_fastpath+0x12/0x76
transport->stream_enqueue may call copy_to_user so it should
not be called inside a prepare_to_wait. Narrow the scope of
the prepare_to_wait to avoid the bad call. This also applies
to vsock_stream_recvmsg as well.
Reported-by: Vinson Lee <vlee@freedesktop.org>
Tested-by: Vinson Lee <vlee@freedesktop.org>
Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 5988818008257ca42010d6b43a3e0e48afec9898)
---
net/vmw_vsock/af_vsock.c | 19 ++++++-------------
1 file changed, 6 insertions(+), 13 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7fd1220..bbe65dc 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1557,8 +1557,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
while (total_written < len) {
ssize_t written;
@@ -1578,7 +1576,9 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_wait;
release_sock(sk);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
timeout = schedule_timeout(timeout);
+ finish_wait(sk_sleep(sk), &wait);
lock_sock(sk);
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
@@ -1588,8 +1588,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_wait;
}
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
/* These checks occur both as part of and after the loop
@@ -1635,7 +1633,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
out_wait:
if (total_written > 0)
err = total_written;
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
@@ -1716,7 +1713,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
while (1) {
s64 ready = vsock_stream_has_data(vsk);
@@ -1727,7 +1723,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
*/
err = -ENOMEM;
- goto out_wait;
+ goto out;
} else if (ready > 0) {
ssize_t read;
@@ -1750,7 +1746,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
vsk, target, read,
!(flags & MSG_PEEK), &recv_data);
if (err < 0)
- goto out_wait;
+ goto out;
if (read >= target || flags & MSG_PEEK)
break;
@@ -1773,7 +1769,9 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
break;
release_sock(sk);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
timeout = schedule_timeout(timeout);
+ finish_wait(sk_sleep(sk), &wait);
lock_sock(sk);
if (signal_pending(current)) {
@@ -1783,9 +1781,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
err = -EAGAIN;
break;
}
-
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
}
@@ -1816,8 +1811,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
err = copied;
}
-out_wait:
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
--
2.8.0.rc3

View File

@ -0,0 +1,58 @@
From 99f109b560a95f17f7d034a48c8479c37a685c5a Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 17 Dec 2015 11:10:21 +0800
Subject: [PATCH 4/9] VSOCK: transport-specific vsock_transport functions
struct vsock_transport contains function pointers called by AF_VSOCK
core code. The transport may want its own transport-specific function
pointers and they can be added after struct vsock_transport.
Allow the transport to fetch vsock_transport. It can downcast it to
access transport-specific function pointers.
The virtio transport will use this.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 7740f7aafc9e6f415e8b6d5e8421deae63033b8d)
---
include/net/af_vsock.h | 3 +++
net/vmw_vsock/af_vsock.c | 9 +++++++++
2 files changed, 12 insertions(+)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index e9eb2d6..23f5525 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -165,6 +165,9 @@ static inline int vsock_core_init(const struct vsock_transport *t)
}
void vsock_core_exit(void);
+/* The transport may downcast this to access transport-specific functions */
+const struct vsock_transport *vsock_core_get_transport(void);
+
/**** UTILS ****/
void vsock_release_pending(struct sock *pending);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index bbe65dc..1e5f5ed 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1987,6 +1987,15 @@ void vsock_core_exit(void)
}
EXPORT_SYMBOL_GPL(vsock_core_exit);
+const struct vsock_transport *vsock_core_get_transport(void)
+{
+ /* vsock_register_mutex not taken since only the transport uses this
+ * function and only while registered.
+ */
+ return transport;
+}
+EXPORT_SYMBOL_GPL(vsock_core_get_transport);
+
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family");
MODULE_VERSION("1.0.1.0-k");
--
2.8.0.rc3

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,637 @@
From 9e5562c5785843a8247d172112302fb8d1cdfcec Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Thu, 13 Jun 2013 18:28:48 +0800
Subject: [PATCH 6/9] VSOCK: Introduce virtio_transport.ko
VM sockets virtio transport implementation. This driver runs in the
guest.
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
v5:
* Add transport reset event handling
* Drop ctrl virtqueue
v4:
* Add MAINTAINERS file entry
* Drop short/long rx packets
* checkpatch.pl cleanups
* Clarify locking in struct virtio_vsock
* Narrow local variable scopes as suggested by Alex Bennee
* Call wake_up() after decrementing total_tx_buf to avoid deadlock
v2:
* Fix total_tx_buf accounting
* Add virtio_transport global mutex to prevent races
(cherry picked from commit 1b5c3d05a4c0c1dc87d29d3bee701cf1c84cd5d3)
---
MAINTAINERS | 1 +
net/vmw_vsock/virtio_transport.c | 584 +++++++++++++++++++++++++++++++++++++++
2 files changed, 585 insertions(+)
create mode 100644 net/vmw_vsock/virtio_transport.c
diff --git a/MAINTAINERS b/MAINTAINERS
index fab150d..403c4cc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11404,6 +11404,7 @@ S: Maintained
F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: net/vmw_vsock/virtio_transport_common.c
+F: net/vmw_vsock/virtio_transport.c
VIRTUAL SERIO DEVICE DRIVER
M: Stephen Chandler Paul <thatslyude@gmail.com>
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
new file mode 100644
index 0000000..45472e0
--- /dev/null
+++ b/net/vmw_vsock/virtio_transport.c
@@ -0,0 +1,584 @@
+/*
+ * virtio transport for vsock
+ *
+ * Copyright (C) 2013-2015 Red Hat, Inc.
+ * Author: Asias He <asias@redhat.com>
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * Some of the code is take from Gerd Hoffmann <kraxel@redhat.com>'s
+ * early virtio-vsock proof-of-concept bits.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_vsock.h>
+#include <net/sock.h>
+#include <linux/mutex.h>
+#include <net/af_vsock.h>
+
+static struct workqueue_struct *virtio_vsock_workqueue;
+static struct virtio_vsock *the_virtio_vsock;
+static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
+static void virtio_vsock_rx_fill(struct virtio_vsock *vsock);
+
+struct virtio_vsock {
+ struct virtio_device *vdev;
+ struct virtqueue *vqs[VSOCK_VQ_MAX];
+
+ /* Virtqueue processing is deferred to a workqueue */
+ struct work_struct tx_work;
+ struct work_struct rx_work;
+ struct work_struct event_work;
+
+ wait_queue_head_t tx_wait; /* for waiting for tx resources */
+
+ /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX]
+ * must be accessed with tx_lock held.
+ */
+ struct mutex tx_lock;
+ u32 total_tx_buf;
+
+ /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX]
+ * must be accessed with rx_lock held.
+ */
+ struct mutex rx_lock;
+ int rx_buf_nr;
+ int rx_buf_max_nr;
+
+ /* The following fields are protected by event_lock.
+ * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held.
+ */
+ struct mutex event_lock;
+ struct virtio_vsock_event event_list[8];
+
+ u32 guest_cid;
+};
+
+static struct virtio_vsock *virtio_vsock_get(void)
+{
+ return the_virtio_vsock;
+}
+
+static u32 virtio_transport_get_local_cid(void)
+{
+ struct virtio_vsock *vsock = virtio_vsock_get();
+
+ return vsock->guest_cid;
+}
+
+static int
+virtio_transport_send_one_pkt(struct virtio_vsock *vsock,
+ struct virtio_vsock_pkt *pkt)
+{
+ struct scatterlist hdr, buf, *sgs[2];
+ int ret, in_sg = 0, out_sg = 0;
+ struct virtqueue *vq;
+ DEFINE_WAIT(wait);
+
+ vq = vsock->vqs[VSOCK_VQ_TX];
+
+ /* Put pkt in the virtqueue */
+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
+ sgs[out_sg++] = &hdr;
+ if (pkt->buf) {
+ sg_init_one(&buf, pkt->buf, pkt->len);
+ sgs[out_sg++] = &buf;
+ }
+
+ mutex_lock(&vsock->tx_lock);
+ while ((ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt,
+ GFP_KERNEL)) < 0) {
+ prepare_to_wait_exclusive(&vsock->tx_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ mutex_unlock(&vsock->tx_lock);
+ schedule();
+ mutex_lock(&vsock->tx_lock);
+ finish_wait(&vsock->tx_wait, &wait);
+ }
+ virtqueue_kick(vq);
+ mutex_unlock(&vsock->tx_lock);
+
+ return pkt->len;
+}
+
+static int
+virtio_transport_send_pkt_no_sock(struct virtio_vsock_pkt *pkt)
+{
+ struct virtio_vsock *vsock;
+
+ vsock = virtio_vsock_get();
+ if (!vsock) {
+ virtio_transport_free_pkt(pkt);
+ return -ENODEV;
+ }
+
+ return virtio_transport_send_one_pkt(vsock, pkt);
+}
+
+static int
+virtio_transport_send_pkt(struct vsock_sock *vsk,
+ struct virtio_vsock_pkt_info *info)
+{
+ u32 src_cid, src_port, dst_cid, dst_port;
+ struct virtio_vsock_sock *vvs;
+ struct virtio_vsock_pkt *pkt;
+ struct virtio_vsock *vsock;
+ u32 pkt_len = info->pkt_len;
+ DEFINE_WAIT(wait);
+
+ vsock = virtio_vsock_get();
+ if (!vsock)
+ return -ENODEV;
+
+ src_cid = virtio_transport_get_local_cid();
+ src_port = vsk->local_addr.svm_port;
+ if (!info->remote_cid) {
+ dst_cid = vsk->remote_addr.svm_cid;
+ dst_port = vsk->remote_addr.svm_port;
+ } else {
+ dst_cid = info->remote_cid;
+ dst_port = info->remote_port;
+ }
+
+ vvs = vsk->trans;
+
+ if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
+ pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+ pkt_len = virtio_transport_get_credit(vvs, pkt_len);
+ /* Do not send zero length OP_RW pkt*/
+ if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
+ return pkt_len;
+
+ /* Respect global tx buf limitation */
+ mutex_lock(&vsock->tx_lock);
+ while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) {
+ prepare_to_wait_exclusive(&vsock->tx_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ mutex_unlock(&vsock->tx_lock);
+ schedule();
+ mutex_lock(&vsock->tx_lock);
+ finish_wait(&vsock->tx_wait, &wait);
+ }
+ vsock->total_tx_buf += pkt_len;
+ mutex_unlock(&vsock->tx_lock);
+
+ pkt = virtio_transport_alloc_pkt(info, pkt_len,
+ src_cid, src_port,
+ dst_cid, dst_port);
+ if (!pkt) {
+ mutex_lock(&vsock->tx_lock);
+ vsock->total_tx_buf -= pkt_len;
+ mutex_unlock(&vsock->tx_lock);
+ virtio_transport_put_credit(vvs, pkt_len);
+ wake_up(&vsock->tx_wait);
+ return -ENOMEM;
+ }
+
+ virtio_transport_inc_tx_pkt(vvs, pkt);
+
+ return virtio_transport_send_one_pkt(vsock, pkt);
+}
+
+static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
+{
+ int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+ struct virtio_vsock_pkt *pkt;
+ struct scatterlist hdr, buf, *sgs[2];
+ struct virtqueue *vq;
+ int ret;
+
+ vq = vsock->vqs[VSOCK_VQ_RX];
+
+ do {
+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+ if (!pkt)
+ break;
+
+ pkt->buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!pkt->buf) {
+ virtio_transport_free_pkt(pkt);
+ break;
+ }
+
+ pkt->len = buf_len;
+
+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
+ sgs[0] = &hdr;
+
+ sg_init_one(&buf, pkt->buf, buf_len);
+ sgs[1] = &buf;
+ ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL);
+ if (ret) {
+ virtio_transport_free_pkt(pkt);
+ break;
+ }
+ vsock->rx_buf_nr++;
+ } while (vq->num_free);
+ if (vsock->rx_buf_nr > vsock->rx_buf_max_nr)
+ vsock->rx_buf_max_nr = vsock->rx_buf_nr;
+ virtqueue_kick(vq);
+}
+
+static void virtio_transport_send_pkt_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, tx_work);
+ struct virtqueue *vq;
+ bool added = false;
+
+ vq = vsock->vqs[VSOCK_VQ_TX];
+ mutex_lock(&vsock->tx_lock);
+ do {
+ struct virtio_vsock_pkt *pkt;
+ unsigned int len;
+
+ virtqueue_disable_cb(vq);
+ while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) {
+ vsock->total_tx_buf -= pkt->len;
+ virtio_transport_free_pkt(pkt);
+ added = true;
+ }
+ } while (!virtqueue_enable_cb(vq));
+ mutex_unlock(&vsock->tx_lock);
+
+ if (added)
+ wake_up(&vsock->tx_wait);
+}
+
+static void virtio_transport_recv_pkt_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, rx_work);
+ struct virtqueue *vq;
+
+ vq = vsock->vqs[VSOCK_VQ_RX];
+ mutex_lock(&vsock->rx_lock);
+ do {
+ struct virtio_vsock_pkt *pkt;
+ unsigned int len;
+
+ virtqueue_disable_cb(vq);
+ while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) {
+ vsock->rx_buf_nr--;
+
+ /* Drop short/long packets */
+ if (unlikely(len < sizeof(pkt->hdr) ||
+ len > sizeof(pkt->hdr) + pkt->len)) {
+ virtio_transport_free_pkt(pkt);
+ continue;
+ }
+
+ pkt->len = len - sizeof(pkt->hdr);
+ virtio_transport_recv_pkt(pkt);
+ }
+ } while (!virtqueue_enable_cb(vq));
+
+ if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2)
+ virtio_vsock_rx_fill(vsock);
+ mutex_unlock(&vsock->rx_lock);
+}
+
+/* event_lock must be held */
+static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock,
+ struct virtio_vsock_event *event)
+{
+ struct scatterlist sg;
+ struct virtqueue *vq;
+
+ vq = vsock->vqs[VSOCK_VQ_EVENT];
+
+ sg_init_one(&sg, event, sizeof(*event));
+
+ return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL);
+}
+
+/* event_lock must be held */
+static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) {
+ struct virtio_vsock_event *event = &vsock->event_list[i];
+
+ virtio_vsock_event_fill_one(vsock, event);
+ }
+
+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
+}
+
+static void virtio_vsock_reset_sock(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_state = SS_UNCONNECTED;
+ sk->sk_err = ECONNRESET;
+ sk->sk_error_report(sk);
+ release_sock(sk);
+}
+
+static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)
+{
+ struct virtio_device *vdev = vsock->vdev;
+ u32 guest_cid;
+
+ vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid),
+ &guest_cid, sizeof(guest_cid));
+ vsock->guest_cid = le32_to_cpu(guest_cid);
+}
+
+/* event_lock must be held */
+static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
+ struct virtio_vsock_event *event)
+{
+ switch (le32_to_cpu(event->id)) {
+ case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
+ virtio_vsock_update_guest_cid(vsock);
+ vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+ break;
+ }
+}
+
+static void virtio_transport_event_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, event_work);
+ struct virtqueue *vq;
+
+ vq = vsock->vqs[VSOCK_VQ_EVENT];
+
+ mutex_lock(&vsock->event_lock);
+
+ do {
+ struct virtio_vsock_event *event;
+ unsigned int len;
+
+ virtqueue_disable_cb(vq);
+ while ((event = virtqueue_get_buf(vq, &len)) != NULL) {
+ if (len == sizeof(*event))
+ virtio_vsock_event_handle(vsock, event);
+
+ virtio_vsock_event_fill_one(vsock, event);
+ }
+ } while (!virtqueue_enable_cb(vq));
+
+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
+
+ mutex_unlock(&vsock->event_lock);
+}
+
+static void virtio_vsock_event_done(struct virtqueue *vq)
+{
+ struct virtio_vsock *vsock = vq->vdev->priv;
+
+ if (!vsock)
+ return;
+ queue_work(virtio_vsock_workqueue, &vsock->event_work);
+}
+
+static void virtio_vsock_tx_done(struct virtqueue *vq)
+{
+ struct virtio_vsock *vsock = vq->vdev->priv;
+
+ if (!vsock)
+ return;
+ queue_work(virtio_vsock_workqueue, &vsock->tx_work);
+}
+
+static void virtio_vsock_rx_done(struct virtqueue *vq)
+{
+ struct virtio_vsock *vsock = vq->vdev->priv;
+
+ if (!vsock)
+ return;
+ queue_work(virtio_vsock_workqueue, &vsock->rx_work);
+}
+
+static struct virtio_transport virtio_transport = {
+ .transport = {
+ .get_local_cid = virtio_transport_get_local_cid,
+
+ .init = virtio_transport_do_socket_init,
+ .destruct = virtio_transport_destruct,
+ .release = virtio_transport_release,
+ .connect = virtio_transport_connect,
+ .shutdown = virtio_transport_shutdown,
+
+ .dgram_bind = virtio_transport_dgram_bind,
+ .dgram_dequeue = virtio_transport_dgram_dequeue,
+ .dgram_enqueue = virtio_transport_dgram_enqueue,
+ .dgram_allow = virtio_transport_dgram_allow,
+
+ .stream_dequeue = virtio_transport_stream_dequeue,
+ .stream_enqueue = virtio_transport_stream_enqueue,
+ .stream_has_data = virtio_transport_stream_has_data,
+ .stream_has_space = virtio_transport_stream_has_space,
+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
+ .stream_is_active = virtio_transport_stream_is_active,
+ .stream_allow = virtio_transport_stream_allow,
+
+ .notify_poll_in = virtio_transport_notify_poll_in,
+ .notify_poll_out = virtio_transport_notify_poll_out,
+ .notify_recv_init = virtio_transport_notify_recv_init,
+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
+ .notify_send_init = virtio_transport_notify_send_init,
+ .notify_send_pre_block = virtio_transport_notify_send_pre_block,
+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+
+ .set_buffer_size = virtio_transport_set_buffer_size,
+ .set_min_buffer_size = virtio_transport_set_min_buffer_size,
+ .set_max_buffer_size = virtio_transport_set_max_buffer_size,
+ .get_buffer_size = virtio_transport_get_buffer_size,
+ .get_min_buffer_size = virtio_transport_get_min_buffer_size,
+ .get_max_buffer_size = virtio_transport_get_max_buffer_size,
+ },
+
+ .send_pkt = virtio_transport_send_pkt,
+ .send_pkt_no_sock = virtio_transport_send_pkt_no_sock,
+};
+
+static int virtio_vsock_probe(struct virtio_device *vdev)
+{
+ vq_callback_t *callbacks[] = {
+ virtio_vsock_rx_done,
+ virtio_vsock_tx_done,
+ virtio_vsock_event_done,
+ };
+ static const char * const names[] = {
+ "rx",
+ "tx",
+ "event",
+ };
+ struct virtio_vsock *vsock = NULL;
+ int ret;
+
+ ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
+ if (ret)
+ return ret;
+
+ /* Only one virtio-vsock device per guest is supported */
+ if (the_virtio_vsock) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
+ if (!vsock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vsock->vdev = vdev;
+
+ ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
+ vsock->vqs, callbacks, names);
+ if (ret < 0)
+ goto out;
+
+ virtio_vsock_update_guest_cid(vsock);
+
+ ret = vsock_core_init(&virtio_transport.transport);
+ if (ret < 0)
+ goto out_vqs;
+
+ vsock->rx_buf_nr = 0;
+ vsock->rx_buf_max_nr = 0;
+
+ vdev->priv = vsock;
+ the_virtio_vsock = vsock;
+ init_waitqueue_head(&vsock->tx_wait);
+ mutex_init(&vsock->tx_lock);
+ mutex_init(&vsock->rx_lock);
+ mutex_init(&vsock->event_lock);
+ INIT_WORK(&vsock->rx_work, virtio_transport_recv_pkt_work);
+ INIT_WORK(&vsock->tx_work, virtio_transport_send_pkt_work);
+ INIT_WORK(&vsock->event_work, virtio_transport_event_work);
+
+ mutex_lock(&vsock->rx_lock);
+ virtio_vsock_rx_fill(vsock);
+ mutex_unlock(&vsock->rx_lock);
+
+ mutex_lock(&vsock->event_lock);
+ virtio_vsock_event_fill(vsock);
+ mutex_unlock(&vsock->event_lock);
+
+ mutex_unlock(&the_virtio_vsock_mutex);
+ return 0;
+
+out_vqs:
+ vsock->vdev->config->del_vqs(vsock->vdev);
+out:
+ kfree(vsock);
+ mutex_unlock(&the_virtio_vsock_mutex);
+ return ret;
+}
+
+static void virtio_vsock_remove(struct virtio_device *vdev)
+{
+ struct virtio_vsock *vsock = vdev->priv;
+
+ flush_work(&vsock->rx_work);
+ flush_work(&vsock->tx_work);
+ flush_work(&vsock->event_work);
+
+ vdev->config->reset(vdev);
+
+ mutex_lock(&the_virtio_vsock_mutex);
+ the_virtio_vsock = NULL;
+ vsock_core_exit();
+ mutex_unlock(&the_virtio_vsock_mutex);
+
+ vdev->config->del_vqs(vdev);
+
+ kfree(vsock);
+}
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static unsigned int features[] = {
+};
+
+static struct virtio_driver virtio_vsock_driver = {
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtio_vsock_probe,
+ .remove = virtio_vsock_remove,
+};
+
+static int __init virtio_vsock_init(void)
+{
+ int ret;
+
+ virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0);
+ if (!virtio_vsock_workqueue)
+ return -ENOMEM;
+ ret = register_virtio_driver(&virtio_vsock_driver);
+ if (ret)
+ destroy_workqueue(virtio_vsock_workqueue);
+ return ret;
+}
+
+static void __exit virtio_vsock_exit(void)
+{
+ unregister_virtio_driver(&virtio_vsock_driver);
+ destroy_workqueue(virtio_vsock_workqueue);
+}
+
+module_init(virtio_vsock_init);
+module_exit(virtio_vsock_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Asias He");
+MODULE_DESCRIPTION("virtio transport for vsock");
+MODULE_DEVICE_TABLE(virtio, id_table);
--
2.8.0.rc3

View File

@ -0,0 +1,772 @@
From 2da9f4eef909efa13574326bf29efab439bcc77c Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Thu, 13 Jun 2013 18:29:21 +0800
Subject: [PATCH 7/9] VSOCK: Introduce vhost_vsock.ko
VM sockets vhost transport implementation. This driver runs on the
host.
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
v5:
* Only take rx/tx virtqueues, userspace handles the other virtqueues
* Explicitly skip instances without a CID when transferring packets
* Add VHOST_VSOCK_START ioctl to being vhost virtqueue processing
* Reset established connections when device is closed
v4:
* Add MAINTAINERS file entry
* virtqueue used len is now sizeof(pkt->hdr) + pkt->len instead of just
pkt->len
* checkpatch.pl cleanups
* Clarify struct vhost_vsock locking
* Add comments about optimization that disables virtqueue notify
* Drop unused vhost_vsock_handle_ctl_kick()
* Call wake_up() after decrementing total_tx_buf to prevent deadlock
v3:
* Remove unneeded variable used to store return value
(Fengguang Wu <fengguang.wu@intel.com> and Julia Lawall
<julia.lawall@lip6.fr>)
v2:
* Add missing total_tx_buf decrement
* Support flexible rx/tx descriptor layout
* Refuse to assign reserved CIDs
* Refuse guest CID if already in use
* Only accept correctly addressed packets
(cherry picked from commit 40d1901b66f0250e91094df07955edf57cf2f41e)
---
MAINTAINERS | 2 +
drivers/vhost/vsock.c | 694 ++++++++++++++++++++++++++++++++++++++++++++++++++
drivers/vhost/vsock.h | 5 +
3 files changed, 701 insertions(+)
create mode 100644 drivers/vhost/vsock.c
create mode 100644 drivers/vhost/vsock.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 403c4cc..530bce8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11405,6 +11405,8 @@ F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: net/vmw_vsock/virtio_transport_common.c
F: net/vmw_vsock/virtio_transport.c
+F: drivers/vhost/vsock.c
+F: drivers/vhost/vsock.h
VIRTUAL SERIO DEVICE DRIVER
M: Stephen Chandler Paul <thatslyude@gmail.com>
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
new file mode 100644
index 0000000..8488d01
--- /dev/null
+++ b/drivers/vhost/vsock.c
@@ -0,0 +1,694 @@
+/*
+ * vhost transport for vsock
+ *
+ * Copyright (C) 2013-2015 Red Hat, Inc.
+ * Author: Asias He <asias@redhat.com>
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <net/sock.h>
+#include <linux/virtio_vsock.h>
+#include <linux/vhost.h>
+
+#include <net/af_vsock.h>
+#include "vhost.h"
+#include "vsock.h"
+
+#define VHOST_VSOCK_DEFAULT_HOST_CID 2
+
+enum {
+ VHOST_VSOCK_FEATURES = VHOST_FEATURES,
+};
+
+/* Used to track all the vhost_vsock instances on the system. */
+static LIST_HEAD(vhost_vsock_list);
+static DEFINE_MUTEX(vhost_vsock_mutex);
+
+struct vhost_vsock {
+ struct vhost_dev dev;
+ struct vhost_virtqueue vqs[2];
+
+ /* Link to global vhost_vsock_list, protected by vhost_vsock_mutex */
+ struct list_head list;
+
+ struct vhost_work send_pkt_work;
+ wait_queue_head_t send_wait;
+
+ /* Fields protected by vqs[VSOCK_VQ_RX].mutex */
+ struct list_head send_pkt_list; /* host->guest pending packets */
+ u32 total_tx_buf;
+
+ u32 guest_cid;
+};
+
+static u32 vhost_transport_get_local_cid(void)
+{
+ return VHOST_VSOCK_DEFAULT_HOST_CID;
+}
+
+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
+{
+ struct vhost_vsock *vsock;
+
+ mutex_lock(&vhost_vsock_mutex);
+ list_for_each_entry(vsock, &vhost_vsock_list, list) {
+ u32 other_cid = vsock->guest_cid;
+
+ /* Skip instances that have no CID yet */
+ if (other_cid == 0)
+ continue;
+
+ if (other_cid == guest_cid) {
+ mutex_unlock(&vhost_vsock_mutex);
+ return vsock;
+ }
+ }
+ mutex_unlock(&vhost_vsock_mutex);
+
+ return NULL;
+}
+
+static void
+vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
+ struct vhost_virtqueue *vq)
+{
+ bool added = false;
+
+ mutex_lock(&vq->mutex);
+
+ /* Avoid further vmexits, we're already processing the virtqueue */
+ vhost_disable_notify(&vsock->dev, vq);
+
+ for (;;) {
+ struct virtio_vsock_pkt *pkt;
+ struct iov_iter iov_iter;
+ unsigned out, in;
+ size_t nbytes;
+ size_t len;
+ int head;
+
+ if (list_empty(&vsock->send_pkt_list)) {
+ vhost_enable_notify(&vsock->dev, vq);
+ break;
+ }
+
+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+ &out, &in, NULL, NULL);
+ if (head < 0)
+ break;
+
+ if (head == vq->num) {
+ /* We cannot finish yet if more buffers snuck in while
+ * re-enabling notify.
+ */
+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
+ vhost_disable_notify(&vsock->dev, vq);
+ continue;
+ }
+ break;
+ }
+
+ pkt = list_first_entry(&vsock->send_pkt_list,
+ struct virtio_vsock_pkt, list);
+ list_del_init(&pkt->list);
+
+ if (out) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Expected 0 output buffers, got %u\n", out);
+ break;
+ }
+
+ len = iov_length(&vq->iov[out], in);
+ iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
+
+ nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
+ if (nbytes != sizeof(pkt->hdr)) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Faulted on copying pkt hdr\n");
+ break;
+ }
+
+ nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter);
+ if (nbytes != pkt->len) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Faulted on copying pkt buf\n");
+ break;
+ }
+
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+ added = true;
+
+ vsock->total_tx_buf -= pkt->len;
+
+ virtio_transport_free_pkt(pkt);
+ }
+ if (added)
+ vhost_signal(&vsock->dev, vq);
+ mutex_unlock(&vq->mutex);
+
+ if (added)
+ wake_up(&vsock->send_wait);
+}
+
+static void vhost_transport_send_pkt_work(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq;
+ struct vhost_vsock *vsock;
+
+ vsock = container_of(work, struct vhost_vsock, send_pkt_work);
+ vq = &vsock->vqs[VSOCK_VQ_RX];
+
+ vhost_transport_do_send_pkt(vsock, vq);
+}
+
+static int
+vhost_transport_send_one_pkt(struct vhost_vsock *vsock,
+ struct virtio_vsock_pkt *pkt)
+{
+ struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_RX];
+
+ /* Queue it up in vhost work */
+ mutex_lock(&vq->mutex);
+ list_add_tail(&pkt->list, &vsock->send_pkt_list);
+ vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
+ mutex_unlock(&vq->mutex);
+
+ return pkt->len;
+}
+
+static int
+vhost_transport_send_pkt_no_sock(struct virtio_vsock_pkt *pkt)
+{
+ struct vhost_vsock *vsock;
+
+ /* Find the vhost_vsock according to guest context id */
+ vsock = vhost_vsock_get(le32_to_cpu(pkt->hdr.dst_cid));
+ if (!vsock) {
+ virtio_transport_free_pkt(pkt);
+ return -ENODEV;
+ }
+
+ return vhost_transport_send_one_pkt(vsock, pkt);
+}
+
+static int
+vhost_transport_send_pkt(struct vsock_sock *vsk,
+ struct virtio_vsock_pkt_info *info)
+{
+ u32 src_cid, src_port, dst_cid, dst_port;
+ struct virtio_vsock_sock *vvs;
+ struct virtio_vsock_pkt *pkt;
+ struct vhost_virtqueue *vq;
+ struct vhost_vsock *vsock;
+ u32 pkt_len = info->pkt_len;
+ DEFINE_WAIT(wait);
+
+ src_cid = vhost_transport_get_local_cid();
+ src_port = vsk->local_addr.svm_port;
+ if (!info->remote_cid) {
+ dst_cid = vsk->remote_addr.svm_cid;
+ dst_port = vsk->remote_addr.svm_port;
+ } else {
+ dst_cid = info->remote_cid;
+ dst_port = info->remote_port;
+ }
+
+ /* Find the vhost_vsock according to guest context id */
+ vsock = vhost_vsock_get(dst_cid);
+ if (!vsock)
+ return -ENODEV;
+
+ vvs = vsk->trans;
+ vq = &vsock->vqs[VSOCK_VQ_RX];
+
+ /* we can send less than pkt_len bytes */
+ if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
+ pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+
+ /* virtio_transport_get_credit might return less than pkt_len credit */
+ pkt_len = virtio_transport_get_credit(vvs, pkt_len);
+
+ /* Do not send zero length OP_RW pkt*/
+ if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
+ return pkt_len;
+
+ /* Respect global tx buf limitation */
+ mutex_lock(&vq->mutex);
+ while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) {
+ prepare_to_wait_exclusive(&vsock->send_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ mutex_unlock(&vq->mutex);
+ schedule();
+ mutex_lock(&vq->mutex);
+ finish_wait(&vsock->send_wait, &wait);
+ }
+ vsock->total_tx_buf += pkt_len;
+ mutex_unlock(&vq->mutex);
+
+ pkt = virtio_transport_alloc_pkt(info, pkt_len,
+ src_cid, src_port,
+ dst_cid, dst_port);
+ if (!pkt) {
+ mutex_lock(&vq->mutex);
+ vsock->total_tx_buf -= pkt_len;
+ mutex_unlock(&vq->mutex);
+ virtio_transport_put_credit(vvs, pkt_len);
+ wake_up(&vsock->send_wait);
+ return -ENOMEM;
+ }
+
+ virtio_transport_inc_tx_pkt(vvs, pkt);
+
+ return vhost_transport_send_one_pkt(vsock, pkt);
+}
+
+static struct virtio_vsock_pkt *
+vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
+ unsigned int out, unsigned int in)
+{
+ struct virtio_vsock_pkt *pkt;
+ struct iov_iter iov_iter;
+ size_t nbytes;
+ size_t len;
+
+ if (in != 0) {
+ vq_err(vq, "Expected 0 input buffers, got %u\n", in);
+ return NULL;
+ }
+
+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+ if (!pkt)
+ return NULL;
+
+ len = iov_length(vq->iov, out);
+ iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
+
+ nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
+ if (nbytes != sizeof(pkt->hdr)) {
+ vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
+ sizeof(pkt->hdr), nbytes);
+ kfree(pkt);
+ return NULL;
+ }
+
+ if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
+ pkt->len = le32_to_cpu(pkt->hdr.len);
+
+ /* No payload */
+ if (!pkt->len)
+ return pkt;
+
+ /* The pkt is too big */
+ if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
+ kfree(pkt);
+ return NULL;
+ }
+
+ pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
+ if (!pkt->buf) {
+ kfree(pkt);
+ return NULL;
+ }
+
+ nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
+ if (nbytes != pkt->len) {
+ vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
+ pkt->len, nbytes);
+ virtio_transport_free_pkt(pkt);
+ return NULL;
+ }
+
+ return pkt;
+}
+
+static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+ poll.work);
+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
+ dev);
+ struct virtio_vsock_pkt *pkt;
+ int head;
+ unsigned int out, in;
+ bool added = false;
+
+ mutex_lock(&vq->mutex);
+ vhost_disable_notify(&vsock->dev, vq);
+ for (;;) {
+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+ &out, &in, NULL, NULL);
+ if (head < 0)
+ break;
+
+ if (head == vq->num) {
+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
+ vhost_disable_notify(&vsock->dev, vq);
+ continue;
+ }
+ break;
+ }
+
+ pkt = vhost_vsock_alloc_pkt(vq, out, in);
+ if (!pkt) {
+ vq_err(vq, "Faulted on pkt\n");
+ continue;
+ }
+
+ /* Only accept correctly addressed packets */
+ if (le32_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
+ virtio_transport_recv_pkt(pkt);
+ else
+ virtio_transport_free_pkt(pkt);
+
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+ added = true;
+ }
+ if (added)
+ vhost_signal(&vsock->dev, vq);
+ mutex_unlock(&vq->mutex);
+}
+
+static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+ poll.work);
+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
+ dev);
+
+ vhost_transport_do_send_pkt(vsock, vq);
+}
+
+static int vhost_vsock_start(struct vhost_vsock *vsock)
+{
+ size_t i;
+ int ret;
+
+ mutex_lock(&vsock->dev.mutex);
+
+ ret = vhost_dev_check_owner(&vsock->dev);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ struct vhost_virtqueue *vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+
+ if (!vhost_vq_access_ok(vq)) {
+ ret = -EFAULT;
+ mutex_unlock(&vq->mutex);
+ goto err_vq;
+ }
+
+ if (!vq->private_data) {
+ vq->private_data = vsock;
+ vhost_vq_init_access(vq);
+ }
+
+ mutex_unlock(&vq->mutex);
+ }
+
+ mutex_unlock(&vsock->dev.mutex);
+ return 0;
+
+err_vq:
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ struct vhost_virtqueue *vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+ vq->private_data = NULL;
+ mutex_unlock(&vq->mutex);
+ }
+err:
+ mutex_unlock(&vsock->dev.mutex);
+ return ret;
+}
+
+static void vhost_vsock_stop(struct vhost_vsock *vsock)
+{
+ size_t i;
+
+ mutex_lock(&vsock->dev.mutex);
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ struct vhost_virtqueue *vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+ vq->private_data = vsock;
+ mutex_unlock(&vq->mutex);
+ }
+
+ mutex_unlock(&vsock->dev.mutex);
+}
+
+static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
+{
+ struct vhost_virtqueue **vqs;
+ struct vhost_vsock *vsock;
+ int ret;
+
+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
+ if (!vsock)
+ return -ENOMEM;
+
+ vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
+ if (!vqs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
+ vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
+ vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
+ vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
+
+ vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs));
+
+ file->private_data = vsock;
+ init_waitqueue_head(&vsock->send_wait);
+ INIT_LIST_HEAD(&vsock->send_pkt_list);
+ vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
+
+ mutex_lock(&vhost_vsock_mutex);
+ list_add_tail(&vsock->list, &vhost_vsock_list);
+ mutex_unlock(&vhost_vsock_mutex);
+ return 0;
+
+out:
+ kfree(vsock);
+ return ret;
+}
+
+static void vhost_vsock_flush(struct vhost_vsock *vsock)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
+ if (vsock->vqs[i].handle_kick)
+ vhost_poll_flush(&vsock->vqs[i].poll);
+ vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
+}
+
+static void vhost_vsock_reset_orphans(struct sock *sk)
+{
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ lock_sock(sk);
+ if (!vhost_vsock_get(vsk->local_addr.svm_cid)) {
+ sk->sk_state = SS_UNCONNECTED;
+ sk->sk_err = ECONNRESET;
+ sk->sk_error_report(sk);
+ }
+ release_sock(sk);
+}
+
+static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
+{
+ struct vhost_vsock *vsock = file->private_data;
+
+ mutex_lock(&vhost_vsock_mutex);
+ list_del(&vsock->list);
+ mutex_unlock(&vhost_vsock_mutex);
+
+ /* Iterating over all connections for all CIDs to find orphans is
+ * inefficient. Room for improvement here. */
+ vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
+
+ vhost_vsock_stop(vsock);
+ vhost_vsock_flush(vsock);
+ vhost_dev_stop(&vsock->dev);
+ vhost_dev_cleanup(&vsock->dev, false);
+ kfree(vsock->dev.vqs);
+ kfree(vsock);
+ return 0;
+}
+
+static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u32 guest_cid)
+{
+ struct vhost_vsock *other;
+
+ /* Refuse reserved CIDs */
+ if (guest_cid <= VMADDR_CID_HOST)
+ return -EINVAL;
+
+ /* Refuse if CID is already in use */
+ other = vhost_vsock_get(guest_cid);
+ if (other && other != vsock)
+ return -EADDRINUSE;
+
+ mutex_lock(&vhost_vsock_mutex);
+ vsock->guest_cid = guest_cid;
+ mutex_unlock(&vhost_vsock_mutex);
+
+ return 0;
+}
+
+static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
+{
+ struct vhost_virtqueue *vq;
+ int i;
+
+ if (features & ~VHOST_VSOCK_FEATURES)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&vsock->dev.mutex);
+ if ((features & (1 << VHOST_F_LOG_ALL)) &&
+ !vhost_log_access_ok(&vsock->dev)) {
+ mutex_unlock(&vsock->dev.mutex);
+ return -EFAULT;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ vq = &vsock->vqs[i];
+ mutex_lock(&vq->mutex);
+ vq->acked_features = features;
+ mutex_unlock(&vq->mutex);
+ }
+ mutex_unlock(&vsock->dev.mutex);
+ return 0;
+}
+
+static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct vhost_vsock *vsock = f->private_data;
+ void __user *argp = (void __user *)arg;
+ u64 __user *featurep = argp;
+ u32 __user *cidp = argp;
+ u32 guest_cid;
+ u64 features;
+ int r;
+
+ switch (ioctl) {
+ case VHOST_VSOCK_SET_GUEST_CID:
+ if (get_user(guest_cid, cidp))
+ return -EFAULT;
+ return vhost_vsock_set_cid(vsock, guest_cid);
+ case VHOST_VSOCK_START:
+ return vhost_vsock_start(vsock);
+ case VHOST_GET_FEATURES:
+ features = VHOST_VSOCK_FEATURES;
+ if (copy_to_user(featurep, &features, sizeof(features)))
+ return -EFAULT;
+ return 0;
+ case VHOST_SET_FEATURES:
+ if (copy_from_user(&features, featurep, sizeof(features)))
+ return -EFAULT;
+ return vhost_vsock_set_features(vsock, features);
+ default:
+ mutex_lock(&vsock->dev.mutex);
+ r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
+ if (r == -ENOIOCTLCMD)
+ r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
+ else
+ vhost_vsock_flush(vsock);
+ mutex_unlock(&vsock->dev.mutex);
+ return r;
+ }
+}
+
+static const struct file_operations vhost_vsock_fops = {
+ .owner = THIS_MODULE,
+ .open = vhost_vsock_dev_open,
+ .release = vhost_vsock_dev_release,
+ .llseek = noop_llseek,
+ .unlocked_ioctl = vhost_vsock_dev_ioctl,
+};
+
+static struct miscdevice vhost_vsock_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "vhost-vsock",
+ .fops = &vhost_vsock_fops,
+};
+
+static struct virtio_transport vhost_transport = {
+ .transport = {
+ .get_local_cid = vhost_transport_get_local_cid,
+
+ .init = virtio_transport_do_socket_init,
+ .destruct = virtio_transport_destruct,
+ .release = virtio_transport_release,
+ .connect = virtio_transport_connect,
+ .shutdown = virtio_transport_shutdown,
+
+ .dgram_enqueue = virtio_transport_dgram_enqueue,
+ .dgram_dequeue = virtio_transport_dgram_dequeue,
+ .dgram_bind = virtio_transport_dgram_bind,
+ .dgram_allow = virtio_transport_dgram_allow,
+
+ .stream_enqueue = virtio_transport_stream_enqueue,
+ .stream_dequeue = virtio_transport_stream_dequeue,
+ .stream_has_data = virtio_transport_stream_has_data,
+ .stream_has_space = virtio_transport_stream_has_space,
+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
+ .stream_is_active = virtio_transport_stream_is_active,
+ .stream_allow = virtio_transport_stream_allow,
+
+ .notify_poll_in = virtio_transport_notify_poll_in,
+ .notify_poll_out = virtio_transport_notify_poll_out,
+ .notify_recv_init = virtio_transport_notify_recv_init,
+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
+ .notify_send_init = virtio_transport_notify_send_init,
+ .notify_send_pre_block = virtio_transport_notify_send_pre_block,
+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+
+ .set_buffer_size = virtio_transport_set_buffer_size,
+ .set_min_buffer_size = virtio_transport_set_min_buffer_size,
+ .set_max_buffer_size = virtio_transport_set_max_buffer_size,
+ .get_buffer_size = virtio_transport_get_buffer_size,
+ .get_min_buffer_size = virtio_transport_get_min_buffer_size,
+ .get_max_buffer_size = virtio_transport_get_max_buffer_size,
+ },
+
+ .send_pkt = vhost_transport_send_pkt,
+ .send_pkt_no_sock = vhost_transport_send_pkt_no_sock,
+};
+
+static int __init vhost_vsock_init(void)
+{
+ int ret;
+
+ ret = vsock_core_init(&vhost_transport.transport);
+ if (ret < 0)
+ return ret;
+ return misc_register(&vhost_vsock_misc);
+};
+
+static void __exit vhost_vsock_exit(void)
+{
+ misc_deregister(&vhost_vsock_misc);
+ vsock_core_exit();
+};
+
+module_init(vhost_vsock_init);
+module_exit(vhost_vsock_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Asias He");
+MODULE_DESCRIPTION("vhost transport for vsock ");
diff --git a/drivers/vhost/vsock.h b/drivers/vhost/vsock.h
new file mode 100644
index 0000000..173f9fc
--- /dev/null
+++ b/drivers/vhost/vsock.h
@@ -0,0 +1,5 @@
+#ifndef VHOST_VSOCK_H
+#define VHOST_VSOCK_H
+#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u32)
+#define VHOST_VSOCK_START _IO(VHOST_VIRTIO, 0x61)
+#endif
--
2.8.0.rc3

View File

@ -0,0 +1,108 @@
From 4293a00ac211e20980a0739498a73aae4d8546bf Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Thu, 13 Jun 2013 18:30:19 +0800
Subject: [PATCH 8/9] VSOCK: Add Makefile and Kconfig
Enable virtio-vsock and vhost-vsock.
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
v4:
* Make checkpatch.pl happy with longer option description
* Clarify dependency on virtio rather than QEMU as suggested by Alex
Bennee
v3:
* Don't put vhost vsock driver into staging
* Add missing Kconfig dependencies (Arnd Bergmann <arnd@arndb.de>)
(cherry picked from commit 4c9d2a6be1c69ec22f8ee90bb4a5cc21d3848077)
---
drivers/vhost/Kconfig | 15 +++++++++++++++
drivers/vhost/Makefile | 4 ++++
net/vmw_vsock/Kconfig | 19 +++++++++++++++++++
net/vmw_vsock/Makefile | 2 ++
4 files changed, 40 insertions(+)
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 533eaf0..d7aae9e 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -21,6 +21,21 @@ config VHOST_SCSI
Say M here to enable the vhost_scsi TCM fabric module
for use with virtio-scsi guests
+config VHOST_VSOCK
+ tristate "vhost virtio-vsock driver"
+ depends on VSOCKETS && EVENTFD
+ select VIRTIO_VSOCKETS_COMMON
+ select VHOST
+ select VHOST_RING
+ default n
+ ---help---
+ This kernel module can be loaded in the host kernel to provide AF_VSOCK
+ sockets for communicating with guests. The guests must have the
+ virtio_transport.ko driver loaded to use the virtio-vsock device.
+
+ To compile this driver as a module, choose M here: the module will be called
+ vhost_vsock.
+
config VHOST_RING
tristate
---help---
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
index e0441c3..6b012b9 100644
--- a/drivers/vhost/Makefile
+++ b/drivers/vhost/Makefile
@@ -4,5 +4,9 @@ vhost_net-y := net.o
obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
vhost_scsi-y := scsi.o
+obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o
+vhost_vsock-y := vsock.o
+
obj-$(CONFIG_VHOST_RING) += vringh.o
+
obj-$(CONFIG_VHOST) += vhost.o
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index 14810ab..f27e74b 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -26,3 +26,22 @@ config VMWARE_VMCI_VSOCKETS
To compile this driver as a module, choose M here: the module
will be called vmw_vsock_vmci_transport. If unsure, say N.
+
+config VIRTIO_VSOCKETS
+ tristate "virtio transport for Virtual Sockets"
+ depends on VSOCKETS && VIRTIO
+ select VIRTIO_VSOCKETS_COMMON
+ help
+ This module implements a virtio transport for Virtual Sockets.
+
+ Enable this transport if your Virtual Machine host supports Virtual
+ Sockets over virtio.
+
+ To compile this driver as a module, choose M here: the module
+ will be called virtio_vsock_transport. If unsure, say N.
+
+config VIRTIO_VSOCKETS_COMMON
+ tristate
+ ---help---
+ This option is selected by any driver which needs to access
+ the virtio_vsock.
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index 2ce52d7..cf4c294 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -1,5 +1,7 @@
obj-$(CONFIG_VSOCKETS) += vsock.o
obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
+obj-$(CONFIG_VIRTIO_VSOCKETS) += virtio_transport.o
+obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += virtio_transport_common.o
vsock-y += af_vsock.o vsock_addr.o
--
2.8.0.rc3

View File

@ -0,0 +1,30 @@
From 366c9c42afb9bd54f92f72518470c09e46f12e88 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@docker.com>
Date: Mon, 4 Apr 2016 14:50:10 +0100
Subject: [PATCH 9/9] VSOCK: Only allow host network namespace to use AF_VSOCK.
The VSOCK addressing schema does not really lend itself to simply creating an
alternative end point address within a namespace.
Signed-off-by: Ian Campbell <ian.campbell@docker.com>
---
net/vmw_vsock/af_vsock.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 1e5f5ed..cdb3dd3 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1840,6 +1840,9 @@ static const struct proto_ops vsock_stream_ops = {
static int vsock_create(struct net *net, struct socket *sock,
int protocol, int kern)
{
+ if (!net_eq(net, &init_net))
+ return -EAFNOSUPPORT;
+
if (!sock)
return -EINVAL;
--
2.8.0.rc3

View File

@ -5,6 +5,8 @@ all:
$(MAKE) -C hupper OS=linux
$(MAKE) -C hvtools OS=linux
$(MAKE) -C docker OS=Linux
$(MAKE) -C nc-vsock OS=linux
$(MAKE) -C vsudd OS=linux
arm:
$(MAKE) -C proxy OS=linux ARCH=arm
@ -12,6 +14,8 @@ arm:
$(MAKE) -C mdnstool OS=linux ARCH=arm
$(MAKE) -C hupper OS=linux ARCH=arm
$(MAKE) -C docker OS=Linux ARCH=arm
$(MAKE) -C nc-vsock OS=linux ARCH=arm
$(MAKE) -C vsudd OS=linux ARCH=arm
clean:
$(MAKE) -C proxy clean
@ -20,3 +24,5 @@ clean:
$(MAKE) -C docker clean
$(MAKE) -C hupper clean
$(MAKE) -C hvtools clean
$(MAKE) -C nc-vsock clean
$(MAKE) -C vsudd clean

1
alpine/packages/nc-vsock/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
nc-vsock

View File

@ -0,0 +1,20 @@
From https://github.com/stefanha/linux vsock-extras
f442d29db40a1999d139dccf110f3b590a0ed5b8:nc-vsock.c
including include/uapi/linux/vm_sockets.h, patched with:
--- /home/ijc/x 2016-03-17 09:31:23.288944691 +0000
+++ alpine/packages/nc-vsock/include/uapi/linux/vm_sockets.h 2016-03-17 09:30:07.916069307 +0000
@@ -16,7 +16,12 @@
#ifndef _UAPI_VM_SOCKETS_H
#define _UAPI_VM_SOCKETS_H
+#ifdef __KERNEL__
#include <linux/socket.h>
+#else
+#define __kernel_sa_family_t sa_family_t
+#include <sys/socket.h>
+#endif
/* Option name for STREAM socket buffer size. Use as the option name in
* setsockopt(3) or getsockopt(3) to set or get an unsigned long long that

View File

@ -0,0 +1,10 @@
FROM alpine:3.3
RUN apk update && apk upgrade && apk add alpine-sdk
RUN mkdir -p /nc-vsock
WORKDIR /nc-vsock
COPY . /nc-vsock
RUN make nc-vsock

View File

@ -0,0 +1,15 @@
.PHONY: all
DEPS=nc-vsock.c
all: Dockerfile $(DEPS)
docker build -t nc-vsock:build .
docker run --rm nc-vsock:build cat nc-vsock > nc-vsock
chmod 755 nc-vsock
nc-vsock: $(DEPS)
gcc -Wall -Werror -o nc-vsock nc-vsock.c
clean:
rm -f nc-vsock
docker images -q nc-vsock:build | xargs docker rmi -f

View File

@ -0,0 +1,161 @@
/*
* VMware vSockets Driver
*
* Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation version 2 and no later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _UAPI_VM_SOCKETS_H
#define _UAPI_VM_SOCKETS_H
#ifdef __KERNEL__
#include <linux/socket.h>
#else
#define __kernel_sa_family_t sa_family_t
#include <sys/socket.h>
#endif
/* Option name for STREAM socket buffer size. Use as the option name in
* setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
* specifies the size of the buffer underlying a vSockets STREAM socket.
* Value is clamped to the MIN and MAX.
*/
#define SO_VM_SOCKETS_BUFFER_SIZE 0
/* Option name for STREAM socket minimum buffer size. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
* specifies the minimum size allowed for the buffer underlying a vSockets
* STREAM socket.
*/
#define SO_VM_SOCKETS_BUFFER_MIN_SIZE 1
/* Option name for STREAM socket maximum buffer size. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get an unsigned long long
* that specifies the maximum size allowed for the buffer underlying a
* vSockets STREAM socket.
*/
#define SO_VM_SOCKETS_BUFFER_MAX_SIZE 2
/* Option name for socket peer's host-specific VM ID. Use as the option name
* in getsockopt(3) to get a host-specific identifier for the peer endpoint's
* VM. The identifier is a signed integer.
* Only available for hypervisor endpoints.
*/
#define SO_VM_SOCKETS_PEER_HOST_VM_ID 3
/* Option name for determining if a socket is trusted. Use as the option name
* in getsockopt(3) to determine if a socket is trusted. The value is a
* signed integer.
*/
#define SO_VM_SOCKETS_TRUSTED 5
/* Option name for STREAM socket connection timeout. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get the connection
* timeout for a STREAM socket.
*/
#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6
/* Option name for using non-blocking send/receive. Use as the option name
* for setsockopt(3) or getsockopt(3) to set or get the non-blocking
* transmit/receive flag for a STREAM socket. This flag determines whether
* send() and recv() can be called in non-blocking contexts for the given
* socket. The value is a signed integer.
*
* This option is only relevant to kernel endpoints, where descheduling the
* thread of execution is not allowed, for example, while holding a spinlock.
* It is not to be confused with conventional non-blocking socket operations.
*
* Only available for hypervisor endpoints.
*/
#define SO_VM_SOCKETS_NONBLOCK_TXRX 7
/* The vSocket equivalent of INADDR_ANY. This works for the svm_cid field of
* sockaddr_vm and indicates the context ID of the current endpoint.
*/
#define VMADDR_CID_ANY -1U
/* Bind to any available port. Works for the svm_port field of
* sockaddr_vm.
*/
#define VMADDR_PORT_ANY -1U
/* Use this as the destination CID in an address when referring to the
* hypervisor. VMCI relies on it being 0, but this would be useful for other
* transports too.
*/
#define VMADDR_CID_HYPERVISOR 0
/* This CID is specific to VMCI and can be considered reserved (even VMCI
* doesn't use it anymore, it's a legacy value from an older release).
*/
#define VMADDR_CID_RESERVED 1
/* Use this as the destination CID in an address when referring to the host
* (any process other than the hypervisor). VMCI relies on it being 2, but
* this would be useful for other transports too.
*/
#define VMADDR_CID_HOST 2
/* Invalid vSockets version. */
#define VM_SOCKETS_INVALID_VERSION -1U
/* The epoch (first) component of the vSockets version. A single byte
* representing the epoch component of the vSockets version.
*/
#define VM_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24)
/* The major (second) component of the vSockets version. A single byte
* representing the major component of the vSockets version. Typically
* changes for every major release of a product.
*/
#define VM_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16)
/* The minor (third) component of the vSockets version. Two bytes representing
* the minor component of the vSockets version.
*/
#define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF))
/* Address structure for vSockets. The address family should be set to
* AF_VSOCK. The structure members should all align on their natural
* boundaries without resorting to compiler packing directives. The total size
* of this structure should be exactly the same as that of struct sockaddr.
*/
struct sockaddr_vm {
__kernel_sa_family_t svm_family;
unsigned short svm_reserved1;
unsigned int svm_port;
unsigned int svm_cid;
unsigned char svm_zero[sizeof(struct sockaddr) -
sizeof(sa_family_t) -
sizeof(unsigned short) -
sizeof(unsigned int) - sizeof(unsigned int)];
};
#define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9)
#endif /* _UAPI_VM_SOCKETS_H */

View File

@ -0,0 +1,336 @@
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/select.h>
#include <netdb.h>
#include "include/uapi/linux/vm_sockets.h"
#define MODE_READ 1 /* From the vsock */
#define MODE_WRITE 2 /* To the vsock */
#define MODE_RDWR (MODE_READ|MODE_WRITE)
static int parse_cid(const char *cid_str)
{
char *end = NULL;
long cid = strtol(cid_str, &end, 10);
if (cid_str != end && *end == '\0') {
return cid;
} else {
fprintf(stderr, "invalid cid: %s\n", cid_str);
return -1;
}
}
static int parse_port(const char *port_str)
{
char *end = NULL;
long port = strtol(port_str, &end, 10);
if (port_str != end && *end == '\0') {
return port;
} else {
fprintf(stderr, "invalid port number: %s\n", port_str);
return -1;
}
}
static int vsock_listen(const char *port_str)
{
int listen_fd;
int client_fd;
struct sockaddr_vm sa_listen = {
.svm_family = AF_VSOCK,
.svm_cid = VMADDR_CID_ANY,
};
struct sockaddr_vm sa_client;
socklen_t socklen_client = sizeof(sa_client);
int port = parse_port(port_str);
if (port < 0) {
return -1;
}
sa_listen.svm_port = port;
listen_fd = socket(AF_VSOCK, SOCK_STREAM, 0);
if (listen_fd < 0) {
perror("socket");
return -1;
}
if (bind(listen_fd, (struct sockaddr*)&sa_listen, sizeof(sa_listen)) != 0) {
perror("bind");
close(listen_fd);
return -1;
}
if (listen(listen_fd, 1) != 0) {
perror("listen");
close(listen_fd);
return -1;
}
client_fd = accept(listen_fd, (struct sockaddr*)&sa_client, &socklen_client);
if (client_fd < 0) {
perror("accept");
close(listen_fd);
return -1;
}
fprintf(stderr, "Connection from cid %u port %u...\n", sa_client.svm_cid, sa_client.svm_port);
close(listen_fd);
return client_fd;
}
static int tcp_connect(const char *node, const char *service)
{
int fd;
int ret;
const struct addrinfo hints = {
.ai_family = AF_INET,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *res = NULL;
struct addrinfo *addrinfo;
ret = getaddrinfo(node, service, &hints, &res);
if (ret != 0) {
fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(ret));
return -1;
}
for (addrinfo = res; addrinfo; addrinfo = addrinfo->ai_next) {
fd = socket(addrinfo->ai_family, addrinfo->ai_socktype, addrinfo->ai_protocol);
if (fd < 0) {
perror("socket");
continue;
}
if (connect(fd, addrinfo->ai_addr, addrinfo->ai_addrlen) != 0) {
perror("connect");
close(fd);
continue;
}
break;
}
freeaddrinfo(res);
return fd;
}
static int vsock_connect(const char *cid_str, const char *port_str)
{
int fd;
int cid;
int port;
struct sockaddr_vm sa = {
.svm_family = AF_VSOCK,
};
cid = parse_cid(cid_str);
if (cid < 0) {
return -1;
}
sa.svm_cid = cid;
port = parse_port(port_str);
if (port < 0) {
return -1;
}
sa.svm_port = port;
fd = socket(AF_VSOCK, SOCK_STREAM, 0);
if (fd < 0) {
perror("socket");
return -1;
}
if (connect(fd, (struct sockaddr*)&sa, sizeof(sa)) != 0) {
perror("connect");
close(fd);
return -1;
}
return fd;
}
static int get_fds(int argc, char **argv, int fds[2])
{
fds[0] = STDIN_FILENO;
fds[1] = -1;
if (argc >= 3 && strcmp(argv[1], "-l") == 0) {
fds[1] = vsock_listen(argv[2]);
if (fds[1] < 0) {
return -1;
}
if (argc == 6 && strcmp(argv[3], "-t") == 0) {
fds[0] = tcp_connect(argv[4], argv[5]);
if (fds[0] < 0) {
return -1;
}
}
return 0;
} else if (argc == 3) {
fds[1] = vsock_connect(argv[1], argv[2]);
if (fds[1] < 0) {
return -1;
}
return 0;
} else {
fprintf(stderr, "usage: %s [-r|-w] [-l <port> [-t <dst> <dstport>] | <cid> <port>]\n", argv[0]);
return -1;
}
}
static void set_nonblock(int fd, bool enable)
{
int ret;
int flags;
ret = fcntl(fd, F_GETFL);
if (ret < 0) {
perror("fcntl");
return;
}
flags = ret & ~O_NONBLOCK;
if (enable) {
flags |= O_NONBLOCK;
}
fcntl(fd, F_SETFL, flags);
}
static int xfer_data(int in_fd, int out_fd)
{
char buf[256*1024];
char *send_ptr = buf;
ssize_t nbytes;
ssize_t remaining;
if (out_fd == STDIN_FILENO) out_fd = STDOUT_FILENO;
nbytes = read(in_fd, buf, sizeof(buf));
if (nbytes < 0) {
return -1;
}
if (nbytes == 0) {
int rc;
if (out_fd == STDOUT_FILENO) return 0;
rc = shutdown(out_fd, SHUT_WR);
if (rc == 0) return 0;
perror("shutdown");
return -1;
}
remaining = nbytes;
while (remaining > 0) {
nbytes = write(out_fd, send_ptr, remaining);
if (nbytes < 0 && errno == EAGAIN) {
nbytes = 0;
} else if (nbytes <= 0) {
return -1;
}
if (remaining > nbytes) {
/* Wait for fd to become writeable again */
for (;;) {
fd_set wfds;
FD_ZERO(&wfds);
FD_SET(out_fd, &wfds);
if (select(out_fd + 1, NULL, &wfds, NULL, NULL) < 0) {
if (errno == EINTR) {
continue;
} else {
perror("select");
return -1;
}
}
if (FD_ISSET(out_fd, &wfds)) {
break;
}
}
}
send_ptr += nbytes;
remaining -= nbytes;
}
return 1;
}
static void main_loop(int fds[2], int mode)
{
fd_set rfds;
int nfds = fds[fds[0] > fds[1] ? 0 : 1] + 1;
/* Which fd's are readable */
bool rfd0 = !!(mode&MODE_WRITE), rfd1 = !!(mode&MODE_READ);
set_nonblock(fds[0], true);
set_nonblock(fds[1], true);
for (;;) {
if (!rfd0 && !rfd1)
return;
FD_ZERO(&rfds);
if (rfd0) FD_SET(fds[0], &rfds);
if (rfd1) FD_SET(fds[1], &rfds);
if (select(nfds, &rfds, NULL, NULL, NULL) < 0) {
if (errno == EINTR) {
continue;
} else {
perror("select");
return;
}
}
if (rfd0 && FD_ISSET(fds[0], &rfds)) {
switch (xfer_data(fds[0], fds[1])) {
case -1: return;
case 0: rfd0 = false; break;
case 1: break;
}
}
if (rfd1 && FD_ISSET(fds[1], &rfds)) {
switch (xfer_data(fds[1], fds[0])) {
case -1: return;
case 0: rfd1 = false; break;
case 1: break;
}
}
}
}
int main(int argc, char **argv)
{
int mode = MODE_RDWR;
int fds[2];
if (argc >= 2) {
if (!strcmp(argv[1], "-r")) {
mode = MODE_READ;
argv++; argc--;
} else if (!strcmp(argv[1], "-w")) {
mode = MODE_WRITE;
argv++; argc--;
}
}
if (get_fds(argc, argv, fds) < 0) {
return EXIT_FAILURE;
}
main_loop(fds, mode);
return EXIT_SUCCESS;
}

1
alpine/packages/vsudd/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/vsudd

BIN
alpine/packages/vsudd/9pudc Executable file

Binary file not shown.

View File

@ -0,0 +1,15 @@
FROM golang:alpine
RUN apk update && apk add alpine-sdk
RUN mkdir -p /go/src/vsudd
WORKDIR /go/src/vsudd
COPY . /go/src/vsudd/
ARG GOARCH
ARG GOOS
RUN go install --ldflags '-extldflags "-fno-PIC"'
RUN [ -f /go/bin/*/vsudd ] && mv /go/bin/*/vsudd /go/bin/ || true

View File

@ -0,0 +1,10 @@
all: vsudd
vsudd: Dockerfile main.go
docker build --build-arg GOOS=$(OS) --build-arg GOARCH=$(ARCH) -t vsudd:build .
docker run --rm vsudd:build cat /go/bin/vsudd > vsudd
chmod 755 vsudd
clean:
rm -f vsudd
docker images -q vsudd:build | xargs docker rmi -f

View File

@ -0,0 +1,31 @@
#!/sbin/openrc-run
description="vsock socket proxy client"
start()
{
ebegin "Starting docker socket vsock passthrough"
[ -n "${PIDFILE}" ] || PIDFILE=/var/run/vsudd.pid
[ -n "${LOGFILE}" ] || LOGFILE=/var/log/vsudd.log
start-stop-daemon --start --quiet \
--background \
--exec /sbin/vsudd \
--make-pidfile --pidfile ${PIDFILE} \
--stderr "${LOGFILE}" --stdout "${LOGFILE}" \
-- -port 2376 -sock /var/run/docker.sock
eend $? "Failed to start vsudd"
}
stop()
{
ebegin "Stopping docker socket passthrough"
[ -n "${PIDFILE}" ] || PIDFILE=/var/run/vsudd.pid
start-stop-daemon --stop --quiet --pidfile ${PIDFILE}
eend $? "Failed to stop vsudd"
}

View File

@ -0,0 +1,161 @@
/*
* VMware vSockets Driver
*
* Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation version 2 and no later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _UAPI_VM_SOCKETS_H
#define _UAPI_VM_SOCKETS_H
#ifdef __KERNEL__
#include <linux/socket.h>
#else
#define __kernel_sa_family_t sa_family_t
#include <sys/socket.h>
#endif
/* Option name for STREAM socket buffer size. Use as the option name in
* setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
* specifies the size of the buffer underlying a vSockets STREAM socket.
* Value is clamped to the MIN and MAX.
*/
#define SO_VM_SOCKETS_BUFFER_SIZE 0
/* Option name for STREAM socket minimum buffer size. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
* specifies the minimum size allowed for the buffer underlying a vSockets
* STREAM socket.
*/
#define SO_VM_SOCKETS_BUFFER_MIN_SIZE 1
/* Option name for STREAM socket maximum buffer size. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get an unsigned long long
* that specifies the maximum size allowed for the buffer underlying a
* vSockets STREAM socket.
*/
#define SO_VM_SOCKETS_BUFFER_MAX_SIZE 2
/* Option name for socket peer's host-specific VM ID. Use as the option name
* in getsockopt(3) to get a host-specific identifier for the peer endpoint's
* VM. The identifier is a signed integer.
* Only available for hypervisor endpoints.
*/
#define SO_VM_SOCKETS_PEER_HOST_VM_ID 3
/* Option name for determining if a socket is trusted. Use as the option name
* in getsockopt(3) to determine if a socket is trusted. The value is a
* signed integer.
*/
#define SO_VM_SOCKETS_TRUSTED 5
/* Option name for STREAM socket connection timeout. Use as the option name
* in setsockopt(3) or getsockopt(3) to set or get the connection
* timeout for a STREAM socket.
*/
#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6
/* Option name for using non-blocking send/receive. Use as the option name
* for setsockopt(3) or getsockopt(3) to set or get the non-blocking
* transmit/receive flag for a STREAM socket. This flag determines whether
* send() and recv() can be called in non-blocking contexts for the given
* socket. The value is a signed integer.
*
* This option is only relevant to kernel endpoints, where descheduling the
* thread of execution is not allowed, for example, while holding a spinlock.
* It is not to be confused with conventional non-blocking socket operations.
*
* Only available for hypervisor endpoints.
*/
#define SO_VM_SOCKETS_NONBLOCK_TXRX 7
/* The vSocket equivalent of INADDR_ANY. This works for the svm_cid field of
* sockaddr_vm and indicates the context ID of the current endpoint.
*/
#define VMADDR_CID_ANY -1U
/* Bind to any available port. Works for the svm_port field of
* sockaddr_vm.
*/
#define VMADDR_PORT_ANY -1U
/* Use this as the destination CID in an address when referring to the
* hypervisor. VMCI relies on it being 0, but this would be useful for other
* transports too.
*/
#define VMADDR_CID_HYPERVISOR 0
/* This CID is specific to VMCI and can be considered reserved (even VMCI
* doesn't use it anymore, it's a legacy value from an older release).
*/
#define VMADDR_CID_RESERVED 1
/* Use this as the destination CID in an address when referring to the host
* (any process other than the hypervisor). VMCI relies on it being 2, but
* this would be useful for other transports too.
*/
#define VMADDR_CID_HOST 2
/* Invalid vSockets version. */
#define VM_SOCKETS_INVALID_VERSION -1U
/* The epoch (first) component of the vSockets version. A single byte
* representing the epoch component of the vSockets version.
*/
#define VM_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24)
/* The major (second) component of the vSockets version. A single byte
* representing the major component of the vSockets version. Typically
* changes for every major release of a product.
*/
#define VM_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16)
/* The minor (third) component of the vSockets version. Two bytes representing
* the minor component of the vSockets version.
*/
#define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF))
/* Address structure for vSockets. The address family should be set to
* AF_VSOCK. The structure members should all align on their natural
* boundaries without resorting to compiler packing directives. The total size
* of this structure should be exactly the same as that of struct sockaddr.
*/
struct sockaddr_vm {
__kernel_sa_family_t svm_family;
unsigned short svm_reserved1;
unsigned int svm_port;
unsigned int svm_cid;
unsigned char svm_zero[sizeof(struct sockaddr) -
sizeof(sa_family_t) -
sizeof(unsigned short) -
sizeof(unsigned int) - sizeof(unsigned int)];
};
#define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9)
#endif /* _UAPI_VM_SOCKETS_H */

View File

@ -0,0 +1,163 @@
package main
import (
"flag"
"fmt"
"io"
"log"
"net"
"os"
"syscall"
"time"
)
/* No way to teach net or syscall about vsock sockaddr, so go right to C */
/*
#include <sys/socket.h>
#include "include/uapi/linux/vm_sockets.h"
int bind_sockaddr_vm(int fd, const struct sockaddr_vm *sa_vm) {
return bind(fd, (const struct sockaddr*)sa_vm, sizeof(*sa_vm));
}
int connect_sockaddr_vm(int fd, const struct sockaddr_vm *sa_vm) {
return connect(fd, (const struct sockaddr*)sa_vm, sizeof(*sa_vm));
}
int accept_vm(int fd) {
return accept(fd, 0, 0);
}
*/
import "C"
const (
AF_VSOCK = 40
VSOCK_CID_ANY = 4294967295 /* 2^32-1 */
)
var (
port uint
sock string
detach bool
)
func init() {
flag.UintVar(&port, "port", 2376, "vsock port to forward")
flag.StringVar(&sock, "sock", "/var/run/docker.sock", "path of the local Unix domain socket to forward to")
flag.BoolVar(&detach, "detach", false, "detach from terminal")
}
func main() {
log.SetFlags(log.LstdFlags)
flag.Parse()
if detach {
logFile, err := os.Create("/var/log/vsudd.log")
if err != nil {
log.Fatalln("Failed to open log file", err)
}
log.SetOutput(logFile)
null, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
if err != nil {
log.Fatalln("Failed to open /dev/null", err)
}
fd := null.Fd()
syscall.Dup2(int(fd), int(os.Stdin.Fd()))
syscall.Dup2(int(fd), int(os.Stdout.Fd()))
syscall.Dup2(int(fd), int(os.Stderr.Fd()))
}
accept_fd, err := syscall.Socket(AF_VSOCK, syscall.SOCK_STREAM, 0)
if err != nil {
log.Fatal(err)
}
sa := C.struct_sockaddr_vm{}
sa.svm_family = AF_VSOCK
sa.svm_port = C.uint(port)
sa.svm_cid = 3
if ret := C.bind_sockaddr_vm(C.int(accept_fd), &sa); ret != 0 {
log.Fatal(fmt.Sprintf("failed bind vsock connection to %08x.%08x, returned %d", sa.svm_cid, sa.svm_port, ret))
}
err = syscall.Listen(accept_fd, syscall.SOMAXCONN)
if err != nil {
log.Fatalln("Failed to listen to VSOCK", err)
}
log.Printf("Listening on fd %d", accept_fd)
connid := 0
for {
connid++
fd, err := C.accept_vm(C.int(accept_fd))
if err != nil {
log.Fatalln("Error accepting connection", err)
}
go handleOne(connid, int(fd))
}
}
func handleOne(connid int, fd int) {
vsock := os.NewFile(uintptr(fd), "vsock connection")
log.Println(connid, "Accepted connection on fd", fd)
defer syscall.Close(fd)
var docker *net.UnixConn
var err error
// Cope with the server socket appearing up to 10s later
for i := 0; i < 200; i++ {
docker, err = net.DialUnix("unix", nil, &net.UnixAddr{sock, "unix"})
if err == nil {
break
}
time.Sleep(50 * time.Millisecond)
}
defer docker.Close()
if err != nil {
// If the forwarding program has broken then close and continue
log.Println(connid, "Failed to connect to Unix domain socket after 10s", sock, err)
return
}
w := make(chan int64)
go func() {
n, err := io.Copy(vsock, docker)
if err != nil {
log.Println(connid, "error copying from docker to vsock:", err)
}
log.Println(connid, "copying from docker to vsock: ", n, "bytes done")
err = docker.CloseRead()
if err != nil {
log.Println(connid, "error CloseRead on docker socket:", err)
}
err = syscall.Shutdown(fd, syscall.SHUT_WR)
if err != nil {
log.Println(connid, "error SHUT_WR on vsock:", err)
}
w <- n
}()
n, err := io.Copy(docker, vsock)
if err != nil {
log.Println(connid, "error copying from vsock to docker:", err)
}
log.Println(connid, "copying from vsock to docker: ", n, "bytes done")
totalRead := n
err = docker.CloseWrite()
if err != nil {
log.Println(connid, "error CloseWrite on docker socket:", err)
}
err = syscall.Shutdown(fd, syscall.SHUT_RD)
if err != nil {
log.Println(connid, "error SHUT_RD on vsock:", err)
}
totalWritten := <-w
log.Println(connid, "Done. read:", totalRead, "written:", totalWritten)
}

View File

@ -69,6 +69,7 @@ mkdir -p /output/kernel
cd /output/kernel
cp /proc/config.gz .
wget ${KERNEL_SOURCE=} || ( printf "Failed to download kernel source\n" && exit 1 )
cp -r /hostetc/kernel-patches /output/kernel/patches
git clone -b "$AUFS_BRANCH" "$AUFS_REPO" /output/kernel/aufs
cd /output/kernel/aufs