Add VSOCK patches

These are WIP taken from git@github.com:stefanha/linux.git#vsock
(==4c9d2a6be1c6, using "cherry-pick -x") and correspond to RFC v5 of the
frontend patches posted in
http://thread.gmane.org/gmane.linux.kernel.virtualization/27455

There is no corresponding spec proposal update yet, but this set of patches
correspond (roughly) to addressing the feedback on v4 of the spec proposal
http://thread.gmane.org/gmane.comp.emulators.virtio.devel/1062.

kernel_config.arm modifications copied from x86, not tested.

Added /etc/kernel-patches/ directory to the image to be consumed by the
licensing.

Signed-off-by: Ian Campbell <ian.campbell@docker.com>
This commit is contained in:
Ian Campbell 2016-03-22 09:36:37 +00:00
parent 4651a150f8
commit eb221b4d4f
16 changed files with 3449 additions and 3 deletions

View File

@ -22,6 +22,7 @@ COPY etc /etc/
ADD kernel/aufs-utils.tar / ADD kernel/aufs-utils.tar /
COPY mkinitrd.sh /bin/ COPY mkinitrd.sh /bin/
COPY kernel/kernel-source-info /etc/ COPY kernel/kernel-source-info /etc/
ADD kernel/kernel-patches.tar /etc/kernel-patches
COPY packages/proxy/proxy /sbin/ COPY packages/proxy/proxy /sbin/
COPY packages/transfused/transfused /sbin/ COPY packages/transfused/transfused /sbin/

View File

@ -1,4 +1,5 @@
vmlinuz64 vmlinuz64
zImage zImage
aufs-utils.tar aufs-utils.tar
kernel-patches.tar
kernel-source-info kernel-source-info

View File

@ -57,6 +57,17 @@ RUN git clone -b "$AUFS_BRANCH" "$AUFS_REPO" /aufs && \
COPY kernel_config /linux/arch/x86/configs/x86_64_defconfig COPY kernel_config /linux/arch/x86/configs/x86_64_defconfig
COPY kernel_config.arm /linux/arch/arm/configs/versatile_defconfig COPY kernel_config.arm /linux/arch/arm/configs/versatile_defconfig
# Apply local patches
COPY patches /patches
RUN cd /linux && \
mkdir /etc/kernel-patches && \
set -e && for patch in /patches/*.patch; do \
echo "Applying $patch"; \
cp "$patch" /etc/kernel-patches; \
patch -p1 < "$patch"; \
done && \
cd /etc/kernel-patches && tar cf /kernel-patches.tar .
RUN jobs=$(nproc); \ RUN jobs=$(nproc); \
cd /linux && \ cd /linux && \
make ARCH=$ARCH defconfig && \ make ARCH=$ARCH defconfig && \

View File

@ -5,6 +5,7 @@ vmlinuz64: kernel_config Dockerfile
docker run --rm mobykernel:build cat /linux/arch/x86_64/boot/bzImage > $@ docker run --rm mobykernel:build cat /linux/arch/x86_64/boot/bzImage > $@
docker run --rm mobykernel:build cat /aufs-utils.tar > aufs-utils.tar docker run --rm mobykernel:build cat /aufs-utils.tar > aufs-utils.tar
docker run --rm mobykernel:build cat /kernel-source-info > kernel-source-info docker run --rm mobykernel:build cat /kernel-source-info > kernel-source-info
docker run --rm mobykernel:build cat /kernel-patches.tar > kernel-patches.tar
arm: zImage arm: zImage
@ -13,8 +14,9 @@ zImage: kernel_config.arm Dockerfile
docker run --rm mobyarmkernel:build cat /linux/arch/arm/boot/zImage > $@ docker run --rm mobyarmkernel:build cat /linux/arch/arm/boot/zImage > $@
docker run --rm mobyarmkernel:build cat /aufs-utils.tar > aufs-utils.tar docker run --rm mobyarmkernel:build cat /aufs-utils.tar > aufs-utils.tar
docker run --rm mobykernel:build cat /kernel-source-info > kernel-source-info docker run --rm mobykernel:build cat /kernel-source-info > kernel-source-info
docker run --rm mobykernel:build cat /kernel-patches.tar > kernel-patches.tar
clean: clean:
rm -f zImage vmlinuz64 aufs-utils.tar kernel-source-info rm -f zImage vmlinuz64 aufs-utils.tar kernel-source-info kernel-patches.tar
docker images -q mobykernel:build | xargs docker rmi -f docker images -q mobykernel:build | xargs docker rmi -f
docker images -q mobyarmkernel:build | xargs docker rmi -f docker images -q mobyarmkernel:build | xargs docker rmi -f

View File

@ -1194,7 +1194,9 @@ CONFIG_DNS_RESOLVER=y
# CONFIG_BATMAN_ADV is not set # CONFIG_BATMAN_ADV is not set
CONFIG_OPENVSWITCH=y CONFIG_OPENVSWITCH=y
CONFIG_OPENVSWITCH_VXLAN=y CONFIG_OPENVSWITCH_VXLAN=y
# CONFIG_VSOCKETS is not set CONFIG_VSOCKETS=y
CONFIG_VIRTIO_VSOCKETS=y
CONFIG_VIRTIO_VSOCKETS_COMMON=y
CONFIG_NETLINK_MMAP=y CONFIG_NETLINK_MMAP=y
CONFIG_NETLINK_DIAG=y CONFIG_NETLINK_DIAG=y
CONFIG_MPLS=y CONFIG_MPLS=y

View File

@ -1183,7 +1183,9 @@ CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=y CONFIG_OPENVSWITCH=y
CONFIG_OPENVSWITCH_GRE=y CONFIG_OPENVSWITCH_GRE=y
CONFIG_OPENVSWITCH_VXLAN=y CONFIG_OPENVSWITCH_VXLAN=y
# CONFIG_VSOCKETS is not set CONFIG_VSOCKETS=y
CONFIG_VIRTIO_VSOCKETS=y
CONFIG_VIRTIO_VSOCKETS_COMMON=y
# CONFIG_NETLINK_MMAP is not set # CONFIG_NETLINK_MMAP is not set
CONFIG_NETLINK_DIAG=y CONFIG_NETLINK_DIAG=y
CONFIG_MPLS=y CONFIG_MPLS=y

View File

@ -0,0 +1,219 @@
From d8f7730e3211cdb16cd9d26143121aeb05f22509 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 17 Dec 2015 16:53:43 +0800
Subject: [PATCH 1/9] virtio: make find_vqs() checkpatch.pl-friendly
checkpatch.pl wants arrays of strings declared as follows:
static const char * const names[] = { "vq-1", "vq-2", "vq-3" };
Currently the find_vqs() function takes a const char *names[] argument
so passing checkpatch.pl's const char * const names[] results in a
compiler error due to losing the second const.
This patch adjusts the find_vqs() prototype and updates all virtio
transports. This makes it possible for virtio_balloon.c, virtio_input.c,
virtgpu_kms.c, and virtio_rpmsg_bus.c to use the checkpatch.pl-friendly
type.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
(cherry picked from commit f7ad26ff952b3ca2702d7da03aad0ab1f6c01d7c)
---
drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +-
drivers/misc/mic/card/mic_virtio.c | 2 +-
drivers/remoteproc/remoteproc_virtio.c | 2 +-
drivers/rpmsg/virtio_rpmsg_bus.c | 2 +-
drivers/s390/virtio/kvm_virtio.c | 2 +-
drivers/s390/virtio/virtio_ccw.c | 2 +-
drivers/virtio/virtio_balloon.c | 2 +-
drivers/virtio/virtio_input.c | 2 +-
drivers/virtio/virtio_mmio.c | 2 +-
drivers/virtio/virtio_pci_common.c | 4 ++--
drivers/virtio/virtio_pci_common.h | 2 +-
drivers/virtio/virtio_pci_modern.c | 2 +-
include/linux/virtio_config.h | 2 +-
13 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 06496a1..4150873 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -130,7 +130,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
static vq_callback_t *callbacks[] = {
virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack
};
- static const char *names[] = { "control", "cursor" };
+ static const char * const names[] = { "control", "cursor" };
struct virtio_gpu_device *vgdev;
/* this will expand later */
diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c
index e486a0c..f6ed57d 100644
--- a/drivers/misc/mic/card/mic_virtio.c
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -311,7 +311,7 @@ unmap:
static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct mic_vdev *mvdev = to_micvdev(vdev);
struct mic_device_ctrl __iomem *dc = mvdev->dc;
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index e1a1023..e44872f 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -147,7 +147,7 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev)
static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct rproc *rproc = vdev_to_rproc(vdev);
int i, ret;
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 73354ee..1fcd27c 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -945,7 +945,7 @@ static void rpmsg_ns_cb(struct rpmsg_channel *rpdev, void *data, int len,
static int rpmsg_probe(struct virtio_device *vdev)
{
vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done };
- const char *names[] = { "input", "output" };
+ static const char * const names[] = { "input", "output" };
struct virtqueue *vqs[2];
struct virtproc_info *vrp;
void *bufs_va;
diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c
index 53fb975..1d060fd 100644
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -255,7 +255,7 @@ static void kvm_del_vqs(struct virtio_device *vdev)
static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct kvm_device *kdev = to_kvmdev(vdev);
int i;
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 1b83159..bf2d130 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -635,7 +635,7 @@ out:
static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
unsigned long *indicatorp = NULL;
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 7d3e5d0..0c3691f 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -388,7 +388,7 @@ static int init_vqs(struct virtio_balloon *vb)
{
struct virtqueue *vqs[3];
vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
- const char *names[] = { "inflate", "deflate", "stats" };
+ static const char * const names[] = { "inflate", "deflate", "stats" };
int err, nvqs;
/*
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index c96944b..350a2a5 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -170,7 +170,7 @@ static int virtinput_init_vqs(struct virtio_input *vi)
struct virtqueue *vqs[2];
vq_callback_t *cbs[] = { virtinput_recv_events,
virtinput_recv_status };
- static const char *names[] = { "events", "status" };
+ static const char * const names[] = { "events", "status" };
int err;
err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names);
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index f499d9d..745c6ee 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -482,7 +482,7 @@ error_available:
static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 2046a68..f6bed86 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -296,7 +296,7 @@ void vp_del_vqs(struct virtio_device *vdev)
static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[],
+ const char * const names[],
bool use_msix,
bool per_vq_vectors)
{
@@ -376,7 +376,7 @@ error_find:
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
int err;
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index b976d96..2cc2522 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -139,7 +139,7 @@ void vp_del_vqs(struct virtio_device *vdev);
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[]);
+ const char * const names[]);
const char *vp_bus_name(struct virtio_device *vdev);
/* Setup the affinity for a virtqueue:
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 8e5cf19..c0c11fa 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -418,7 +418,7 @@ err_new_queue:
static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index e5ce8ab..6e6cb0c 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -70,7 +70,7 @@ struct virtio_config_ops {
int (*find_vqs)(struct virtio_device *, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[]);
+ const char * const names[]);
void (*del_vqs)(struct virtio_device *);
u64 (*get_features)(struct virtio_device *vdev);
int (*finalize_features)(struct virtio_device *vdev);
--
2.8.0.rc3

View File

@ -0,0 +1,77 @@
From ba1dc9346ba14b24f22ea04db21011f2874c3a67 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Sat, 21 Nov 2015 18:39:17 +0100
Subject: [PATCH 2/9] VSOCK: constify vmci_transport_notify_ops structures
The vmci_transport_notify_ops structures are never modified, so declare
them as const.
Done with the help of Coccinelle.
Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 3b22dae38db1cea9ead3229f08cfb0b69aca5706)
---
net/vmw_vsock/vmci_transport.h | 2 +-
net/vmw_vsock/vmci_transport_notify.c | 2 +-
net/vmw_vsock/vmci_transport_notify.h | 5 +++--
net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +-
4 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index 2ad46f3..1820e74 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -121,7 +121,7 @@ struct vmci_transport {
u64 queue_pair_max_size;
u32 detach_sub_id;
union vmci_transport_notify notify;
- struct vmci_transport_notify_ops *notify_ops;
+ const struct vmci_transport_notify_ops *notify_ops;
struct list_head elem;
struct sock *sk;
spinlock_t lock; /* protects sk. */
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 9b7f207..fd8cf02 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
}
/* Socket control packet based operations. */
-struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
vmci_transport_notify_pkt_socket_init,
vmci_transport_notify_pkt_socket_destruct,
vmci_transport_notify_pkt_poll_in,
diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h
index 7df7932..3c464d3 100644
--- a/net/vmw_vsock/vmci_transport_notify.h
+++ b/net/vmw_vsock/vmci_transport_notify.h
@@ -77,7 +77,8 @@ struct vmci_transport_notify_ops {
void (*process_negotiate) (struct sock *sk);
};
-extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
-extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
+extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
+extern const
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index dc9c792..21e591d 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue(
}
/* Socket always on control packet based operations. */
-struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
vmci_transport_notify_pkt_socket_init,
vmci_transport_notify_pkt_socket_destruct,
vmci_transport_notify_pkt_poll_in,
--
2.8.0.rc3

View File

@ -0,0 +1,164 @@
From e4d5bbced996499e080e790508113985963a55d5 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@fedoraproject.org>
Date: Thu, 4 Feb 2016 10:50:45 -0800
Subject: [PATCH 3/9] vsock: Fix blocking ops call in prepare_to_wait
We receoved a bug report from someone using vmware:
WARNING: CPU: 3 PID: 660 at kernel/sched/core.c:7389
__might_sleep+0x7d/0x90()
do not call blocking ops when !TASK_RUNNING; state=1 set at
[<ffffffff810fa68d>] prepare_to_wait+0x2d/0x90
Modules linked in: vmw_vsock_vmci_transport vsock snd_seq_midi
snd_seq_midi_event snd_ens1371 iosf_mbi gameport snd_rawmidi
snd_ac97_codec ac97_bus snd_seq coretemp snd_seq_device snd_pcm
snd_timer snd soundcore ppdev crct10dif_pclmul crc32_pclmul
ghash_clmulni_intel vmw_vmci vmw_balloon i2c_piix4 shpchp parport_pc
parport acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc btrfs
xor raid6_pq 8021q garp stp llc mrp crc32c_intel serio_raw mptspi vmwgfx
drm_kms_helper ttm drm scsi_transport_spi mptscsih e1000 ata_generic
mptbase pata_acpi
CPU: 3 PID: 660 Comm: vmtoolsd Not tainted
4.2.0-0.rc1.git3.1.fc23.x86_64 #1
Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop
Reference Platform, BIOS 6.00 05/20/2014
0000000000000000 0000000049e617f3 ffff88006ac37ac8 ffffffff818641f5
0000000000000000 ffff88006ac37b20 ffff88006ac37b08 ffffffff810ab446
ffff880068009f40 ffffffff81c63bc0 0000000000000061 0000000000000000
Call Trace:
[<ffffffff818641f5>] dump_stack+0x4c/0x65
[<ffffffff810ab446>] warn_slowpath_common+0x86/0xc0
[<ffffffff810ab4d5>] warn_slowpath_fmt+0x55/0x70
[<ffffffff8112551d>] ? debug_lockdep_rcu_enabled+0x1d/0x20
[<ffffffff810fa68d>] ? prepare_to_wait+0x2d/0x90
[<ffffffff810fa68d>] ? prepare_to_wait+0x2d/0x90
[<ffffffff810da2bd>] __might_sleep+0x7d/0x90
[<ffffffff812163b3>] __might_fault+0x43/0xa0
[<ffffffff81430477>] copy_from_iter+0x87/0x2a0
[<ffffffffa039460a>] __qp_memcpy_to_queue+0x9a/0x1b0 [vmw_vmci]
[<ffffffffa0394740>] ? qp_memcpy_to_queue+0x20/0x20 [vmw_vmci]
[<ffffffffa0394757>] qp_memcpy_to_queue_iov+0x17/0x20 [vmw_vmci]
[<ffffffffa0394d50>] qp_enqueue_locked+0xa0/0x140 [vmw_vmci]
[<ffffffffa039593f>] vmci_qpair_enquev+0x4f/0xd0 [vmw_vmci]
[<ffffffffa04847bb>] vmci_transport_stream_enqueue+0x1b/0x20
[vmw_vsock_vmci_transport]
[<ffffffffa047ae05>] vsock_stream_sendmsg+0x2c5/0x320 [vsock]
[<ffffffff810fabd0>] ? wake_atomic_t_function+0x70/0x70
[<ffffffff81702af8>] sock_sendmsg+0x38/0x50
[<ffffffff81702ff4>] SYSC_sendto+0x104/0x190
[<ffffffff8126e25a>] ? vfs_read+0x8a/0x140
[<ffffffff817042ee>] SyS_sendto+0xe/0x10
[<ffffffff8186d9ae>] entry_SYSCALL_64_fastpath+0x12/0x76
transport->stream_enqueue may call copy_to_user so it should
not be called inside a prepare_to_wait. Narrow the scope of
the prepare_to_wait to avoid the bad call. This also applies
to vsock_stream_recvmsg as well.
Reported-by: Vinson Lee <vlee@freedesktop.org>
Tested-by: Vinson Lee <vlee@freedesktop.org>
Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 5988818008257ca42010d6b43a3e0e48afec9898)
---
net/vmw_vsock/af_vsock.c | 19 ++++++-------------
1 file changed, 6 insertions(+), 13 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7fd1220..bbe65dc 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1557,8 +1557,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
while (total_written < len) {
ssize_t written;
@@ -1578,7 +1576,9 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_wait;
release_sock(sk);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
timeout = schedule_timeout(timeout);
+ finish_wait(sk_sleep(sk), &wait);
lock_sock(sk);
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
@@ -1588,8 +1588,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_wait;
}
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
/* These checks occur both as part of and after the loop
@@ -1635,7 +1633,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
out_wait:
if (total_written > 0)
err = total_written;
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
@@ -1716,7 +1713,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
while (1) {
s64 ready = vsock_stream_has_data(vsk);
@@ -1727,7 +1723,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
*/
err = -ENOMEM;
- goto out_wait;
+ goto out;
} else if (ready > 0) {
ssize_t read;
@@ -1750,7 +1746,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
vsk, target, read,
!(flags & MSG_PEEK), &recv_data);
if (err < 0)
- goto out_wait;
+ goto out;
if (read >= target || flags & MSG_PEEK)
break;
@@ -1773,7 +1769,9 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
break;
release_sock(sk);
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
timeout = schedule_timeout(timeout);
+ finish_wait(sk_sleep(sk), &wait);
lock_sock(sk);
if (signal_pending(current)) {
@@ -1783,9 +1781,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
err = -EAGAIN;
break;
}
-
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
}
@@ -1816,8 +1811,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
err = copied;
}
-out_wait:
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
--
2.8.0.rc3

View File

@ -0,0 +1,58 @@
From 99f109b560a95f17f7d034a48c8479c37a685c5a Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 17 Dec 2015 11:10:21 +0800
Subject: [PATCH 4/9] VSOCK: transport-specific vsock_transport functions
struct vsock_transport contains function pointers called by AF_VSOCK
core code. The transport may want its own transport-specific function
pointers and they can be added after struct vsock_transport.
Allow the transport to fetch vsock_transport. It can downcast it to
access transport-specific function pointers.
The virtio transport will use this.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 7740f7aafc9e6f415e8b6d5e8421deae63033b8d)
---
include/net/af_vsock.h | 3 +++
net/vmw_vsock/af_vsock.c | 9 +++++++++
2 files changed, 12 insertions(+)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index e9eb2d6..23f5525 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -165,6 +165,9 @@ static inline int vsock_core_init(const struct vsock_transport *t)
}
void vsock_core_exit(void);
+/* The transport may downcast this to access transport-specific functions */
+const struct vsock_transport *vsock_core_get_transport(void);
+
/**** UTILS ****/
void vsock_release_pending(struct sock *pending);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index bbe65dc..1e5f5ed 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1987,6 +1987,15 @@ void vsock_core_exit(void)
}
EXPORT_SYMBOL_GPL(vsock_core_exit);
+const struct vsock_transport *vsock_core_get_transport(void)
+{
+ /* vsock_register_mutex not taken since only the transport uses this
+ * function and only while registered.
+ */
+ return transport;
+}
+EXPORT_SYMBOL_GPL(vsock_core_get_transport);
+
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family");
MODULE_VERSION("1.0.1.0-k");
--
2.8.0.rc3

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,637 @@
From 9e5562c5785843a8247d172112302fb8d1cdfcec Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Thu, 13 Jun 2013 18:28:48 +0800
Subject: [PATCH 6/9] VSOCK: Introduce virtio_transport.ko
VM sockets virtio transport implementation. This driver runs in the
guest.
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
v5:
* Add transport reset event handling
* Drop ctrl virtqueue
v4:
* Add MAINTAINERS file entry
* Drop short/long rx packets
* checkpatch.pl cleanups
* Clarify locking in struct virtio_vsock
* Narrow local variable scopes as suggested by Alex Bennee
* Call wake_up() after decrementing total_tx_buf to avoid deadlock
v2:
* Fix total_tx_buf accounting
* Add virtio_transport global mutex to prevent races
(cherry picked from commit 1b5c3d05a4c0c1dc87d29d3bee701cf1c84cd5d3)
---
MAINTAINERS | 1 +
net/vmw_vsock/virtio_transport.c | 584 +++++++++++++++++++++++++++++++++++++++
2 files changed, 585 insertions(+)
create mode 100644 net/vmw_vsock/virtio_transport.c
diff --git a/MAINTAINERS b/MAINTAINERS
index fab150d..403c4cc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11404,6 +11404,7 @@ S: Maintained
F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: net/vmw_vsock/virtio_transport_common.c
+F: net/vmw_vsock/virtio_transport.c
VIRTUAL SERIO DEVICE DRIVER
M: Stephen Chandler Paul <thatslyude@gmail.com>
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
new file mode 100644
index 0000000..45472e0
--- /dev/null
+++ b/net/vmw_vsock/virtio_transport.c
@@ -0,0 +1,584 @@
+/*
+ * virtio transport for vsock
+ *
+ * Copyright (C) 2013-2015 Red Hat, Inc.
+ * Author: Asias He <asias@redhat.com>
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * Some of the code is take from Gerd Hoffmann <kraxel@redhat.com>'s
+ * early virtio-vsock proof-of-concept bits.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_vsock.h>
+#include <net/sock.h>
+#include <linux/mutex.h>
+#include <net/af_vsock.h>
+
+static struct workqueue_struct *virtio_vsock_workqueue;
+static struct virtio_vsock *the_virtio_vsock;
+static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
+static void virtio_vsock_rx_fill(struct virtio_vsock *vsock);
+
+struct virtio_vsock {
+ struct virtio_device *vdev;
+ struct virtqueue *vqs[VSOCK_VQ_MAX];
+
+ /* Virtqueue processing is deferred to a workqueue */
+ struct work_struct tx_work;
+ struct work_struct rx_work;
+ struct work_struct event_work;
+
+ wait_queue_head_t tx_wait; /* for waiting for tx resources */
+
+ /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX]
+ * must be accessed with tx_lock held.
+ */
+ struct mutex tx_lock;
+ u32 total_tx_buf;
+
+ /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX]
+ * must be accessed with rx_lock held.
+ */
+ struct mutex rx_lock;
+ int rx_buf_nr;
+ int rx_buf_max_nr;
+
+ /* The following fields are protected by event_lock.
+ * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held.
+ */
+ struct mutex event_lock;
+ struct virtio_vsock_event event_list[8];
+
+ u32 guest_cid;
+};
+
+static struct virtio_vsock *virtio_vsock_get(void)
+{
+ return the_virtio_vsock;
+}
+
+static u32 virtio_transport_get_local_cid(void)
+{
+ struct virtio_vsock *vsock = virtio_vsock_get();
+
+ return vsock->guest_cid;
+}
+
+static int
+virtio_transport_send_one_pkt(struct virtio_vsock *vsock,
+ struct virtio_vsock_pkt *pkt)
+{
+ struct scatterlist hdr, buf, *sgs[2];
+ int ret, in_sg = 0, out_sg = 0;
+ struct virtqueue *vq;
+ DEFINE_WAIT(wait);
+
+ vq = vsock->vqs[VSOCK_VQ_TX];
+
+ /* Put pkt in the virtqueue */
+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
+ sgs[out_sg++] = &hdr;
+ if (pkt->buf) {
+ sg_init_one(&buf, pkt->buf, pkt->len);
+ sgs[out_sg++] = &buf;
+ }
+
+ mutex_lock(&vsock->tx_lock);
+ while ((ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt,
+ GFP_KERNEL)) < 0) {
+ prepare_to_wait_exclusive(&vsock->tx_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ mutex_unlock(&vsock->tx_lock);
+ schedule();
+ mutex_lock(&vsock->tx_lock);
+ finish_wait(&vsock->tx_wait, &wait);
+ }
+ virtqueue_kick(vq);
+ mutex_unlock(&vsock->tx_lock);
+
+ return pkt->len;
+}
+
+static int
+virtio_transport_send_pkt_no_sock(struct virtio_vsock_pkt *pkt)
+{
+ struct virtio_vsock *vsock;
+
+ vsock = virtio_vsock_get();
+ if (!vsock) {
+ virtio_transport_free_pkt(pkt);
+ return -ENODEV;
+ }
+
+ return virtio_transport_send_one_pkt(vsock, pkt);
+}
+
+static int
+virtio_transport_send_pkt(struct vsock_sock *vsk,
+ struct virtio_vsock_pkt_info *info)
+{
+ u32 src_cid, src_port, dst_cid, dst_port;
+ struct virtio_vsock_sock *vvs;
+ struct virtio_vsock_pkt *pkt;
+ struct virtio_vsock *vsock;
+ u32 pkt_len = info->pkt_len;
+ DEFINE_WAIT(wait);
+
+ vsock = virtio_vsock_get();
+ if (!vsock)
+ return -ENODEV;
+
+ src_cid = virtio_transport_get_local_cid();
+ src_port = vsk->local_addr.svm_port;
+ if (!info->remote_cid) {
+ dst_cid = vsk->remote_addr.svm_cid;
+ dst_port = vsk->remote_addr.svm_port;
+ } else {
+ dst_cid = info->remote_cid;
+ dst_port = info->remote_port;
+ }
+
+ vvs = vsk->trans;
+
+ if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
+ pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+ pkt_len = virtio_transport_get_credit(vvs, pkt_len);
+ /* Do not send zero length OP_RW pkt*/
+ if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
+ return pkt_len;
+
+ /* Respect global tx buf limitation */
+ mutex_lock(&vsock->tx_lock);
+ while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) {
+ prepare_to_wait_exclusive(&vsock->tx_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ mutex_unlock(&vsock->tx_lock);
+ schedule();
+ mutex_lock(&vsock->tx_lock);
+ finish_wait(&vsock->tx_wait, &wait);
+ }
+ vsock->total_tx_buf += pkt_len;
+ mutex_unlock(&vsock->tx_lock);
+
+ pkt = virtio_transport_alloc_pkt(info, pkt_len,
+ src_cid, src_port,
+ dst_cid, dst_port);
+ if (!pkt) {
+ mutex_lock(&vsock->tx_lock);
+ vsock->total_tx_buf -= pkt_len;
+ mutex_unlock(&vsock->tx_lock);
+ virtio_transport_put_credit(vvs, pkt_len);
+ wake_up(&vsock->tx_wait);
+ return -ENOMEM;
+ }
+
+ virtio_transport_inc_tx_pkt(vvs, pkt);
+
+ return virtio_transport_send_one_pkt(vsock, pkt);
+}
+
+static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
+{
+ int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+ struct virtio_vsock_pkt *pkt;
+ struct scatterlist hdr, buf, *sgs[2];
+ struct virtqueue *vq;
+ int ret;
+
+ vq = vsock->vqs[VSOCK_VQ_RX];
+
+ do {
+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+ if (!pkt)
+ break;
+
+ pkt->buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!pkt->buf) {
+ virtio_transport_free_pkt(pkt);
+ break;
+ }
+
+ pkt->len = buf_len;
+
+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
+ sgs[0] = &hdr;
+
+ sg_init_one(&buf, pkt->buf, buf_len);
+ sgs[1] = &buf;
+ ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL);
+ if (ret) {
+ virtio_transport_free_pkt(pkt);
+ break;
+ }
+ vsock->rx_buf_nr++;
+ } while (vq->num_free);
+ if (vsock->rx_buf_nr > vsock->rx_buf_max_nr)
+ vsock->rx_buf_max_nr = vsock->rx_buf_nr;
+ virtqueue_kick(vq);
+}
+
+static void virtio_transport_send_pkt_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, tx_work);
+ struct virtqueue *vq;
+ bool added = false;
+
+ vq = vsock->vqs[VSOCK_VQ_TX];
+ mutex_lock(&vsock->tx_lock);
+ do {
+ struct virtio_vsock_pkt *pkt;
+ unsigned int len;
+
+ virtqueue_disable_cb(vq);
+ while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) {
+ vsock->total_tx_buf -= pkt->len;
+ virtio_transport_free_pkt(pkt);
+ added = true;
+ }
+ } while (!virtqueue_enable_cb(vq));
+ mutex_unlock(&vsock->tx_lock);
+
+ if (added)
+ wake_up(&vsock->tx_wait);
+}
+
+static void virtio_transport_recv_pkt_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, rx_work);
+ struct virtqueue *vq;
+
+ vq = vsock->vqs[VSOCK_VQ_RX];
+ mutex_lock(&vsock->rx_lock);
+ do {
+ struct virtio_vsock_pkt *pkt;
+ unsigned int len;
+
+ virtqueue_disable_cb(vq);
+ while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) {
+ vsock->rx_buf_nr--;
+
+ /* Drop short/long packets */
+ if (unlikely(len < sizeof(pkt->hdr) ||
+ len > sizeof(pkt->hdr) + pkt->len)) {
+ virtio_transport_free_pkt(pkt);
+ continue;
+ }
+
+ pkt->len = len - sizeof(pkt->hdr);
+ virtio_transport_recv_pkt(pkt);
+ }
+ } while (!virtqueue_enable_cb(vq));
+
+ if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2)
+ virtio_vsock_rx_fill(vsock);
+ mutex_unlock(&vsock->rx_lock);
+}
+
+/* event_lock must be held */
+static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock,
+ struct virtio_vsock_event *event)
+{
+ struct scatterlist sg;
+ struct virtqueue *vq;
+
+ vq = vsock->vqs[VSOCK_VQ_EVENT];
+
+ sg_init_one(&sg, event, sizeof(*event));
+
+ return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL);
+}
+
+/* event_lock must be held */
+static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) {
+ struct virtio_vsock_event *event = &vsock->event_list[i];
+
+ virtio_vsock_event_fill_one(vsock, event);
+ }
+
+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
+}
+
+static void virtio_vsock_reset_sock(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_state = SS_UNCONNECTED;
+ sk->sk_err = ECONNRESET;
+ sk->sk_error_report(sk);
+ release_sock(sk);
+}
+
+static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)
+{
+ struct virtio_device *vdev = vsock->vdev;
+ u32 guest_cid;
+
+ vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid),
+ &guest_cid, sizeof(guest_cid));
+ vsock->guest_cid = le32_to_cpu(guest_cid);
+}
+
+/* event_lock must be held */
+static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
+ struct virtio_vsock_event *event)
+{
+ switch (le32_to_cpu(event->id)) {
+ case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
+ virtio_vsock_update_guest_cid(vsock);
+ vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+ break;
+ }
+}
+
+static void virtio_transport_event_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, event_work);
+ struct virtqueue *vq;
+
+ vq = vsock->vqs[VSOCK_VQ_EVENT];
+
+ mutex_lock(&vsock->event_lock);
+
+ do {
+ struct virtio_vsock_event *event;
+ unsigned int len;
+
+ virtqueue_disable_cb(vq);
+ while ((event = virtqueue_get_buf(vq, &len)) != NULL) {
+ if (len == sizeof(*event))
+ virtio_vsock_event_handle(vsock, event);
+
+ virtio_vsock_event_fill_one(vsock, event);
+ }
+ } while (!virtqueue_enable_cb(vq));
+
+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
+
+ mutex_unlock(&vsock->event_lock);
+}
+
+static void virtio_vsock_event_done(struct virtqueue *vq)
+{
+ struct virtio_vsock *vsock = vq->vdev->priv;
+
+ if (!vsock)
+ return;
+ queue_work(virtio_vsock_workqueue, &vsock->event_work);
+}
+
+static void virtio_vsock_tx_done(struct virtqueue *vq)
+{
+ struct virtio_vsock *vsock = vq->vdev->priv;
+
+ if (!vsock)
+ return;
+ queue_work(virtio_vsock_workqueue, &vsock->tx_work);
+}
+
+static void virtio_vsock_rx_done(struct virtqueue *vq)
+{
+ struct virtio_vsock *vsock = vq->vdev->priv;
+
+ if (!vsock)
+ return;
+ queue_work(virtio_vsock_workqueue, &vsock->rx_work);
+}
+
+static struct virtio_transport virtio_transport = {
+ .transport = {
+ .get_local_cid = virtio_transport_get_local_cid,
+
+ .init = virtio_transport_do_socket_init,
+ .destruct = virtio_transport_destruct,
+ .release = virtio_transport_release,
+ .connect = virtio_transport_connect,
+ .shutdown = virtio_transport_shutdown,
+
+ .dgram_bind = virtio_transport_dgram_bind,
+ .dgram_dequeue = virtio_transport_dgram_dequeue,
+ .dgram_enqueue = virtio_transport_dgram_enqueue,
+ .dgram_allow = virtio_transport_dgram_allow,
+
+ .stream_dequeue = virtio_transport_stream_dequeue,
+ .stream_enqueue = virtio_transport_stream_enqueue,
+ .stream_has_data = virtio_transport_stream_has_data,
+ .stream_has_space = virtio_transport_stream_has_space,
+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
+ .stream_is_active = virtio_transport_stream_is_active,
+ .stream_allow = virtio_transport_stream_allow,
+
+ .notify_poll_in = virtio_transport_notify_poll_in,
+ .notify_poll_out = virtio_transport_notify_poll_out,
+ .notify_recv_init = virtio_transport_notify_recv_init,
+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
+ .notify_send_init = virtio_transport_notify_send_init,
+ .notify_send_pre_block = virtio_transport_notify_send_pre_block,
+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+
+ .set_buffer_size = virtio_transport_set_buffer_size,
+ .set_min_buffer_size = virtio_transport_set_min_buffer_size,
+ .set_max_buffer_size = virtio_transport_set_max_buffer_size,
+ .get_buffer_size = virtio_transport_get_buffer_size,
+ .get_min_buffer_size = virtio_transport_get_min_buffer_size,
+ .get_max_buffer_size = virtio_transport_get_max_buffer_size,
+ },
+
+ .send_pkt = virtio_transport_send_pkt,
+ .send_pkt_no_sock = virtio_transport_send_pkt_no_sock,
+};
+
+static int virtio_vsock_probe(struct virtio_device *vdev)
+{
+ vq_callback_t *callbacks[] = {
+ virtio_vsock_rx_done,
+ virtio_vsock_tx_done,
+ virtio_vsock_event_done,
+ };
+ static const char * const names[] = {
+ "rx",
+ "tx",
+ "event",
+ };
+ struct virtio_vsock *vsock = NULL;
+ int ret;
+
+ ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
+ if (ret)
+ return ret;
+
+ /* Only one virtio-vsock device per guest is supported */
+ if (the_virtio_vsock) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
+ if (!vsock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vsock->vdev = vdev;
+
+ ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
+ vsock->vqs, callbacks, names);
+ if (ret < 0)
+ goto out;
+
+ virtio_vsock_update_guest_cid(vsock);
+
+ ret = vsock_core_init(&virtio_transport.transport);
+ if (ret < 0)
+ goto out_vqs;
+
+ vsock->rx_buf_nr = 0;
+ vsock->rx_buf_max_nr = 0;
+
+ vdev->priv = vsock;
+ the_virtio_vsock = vsock;
+ init_waitqueue_head(&vsock->tx_wait);
+ mutex_init(&vsock->tx_lock);
+ mutex_init(&vsock->rx_lock);
+ mutex_init(&vsock->event_lock);
+ INIT_WORK(&vsock->rx_work, virtio_transport_recv_pkt_work);
+ INIT_WORK(&vsock->tx_work, virtio_transport_send_pkt_work);
+ INIT_WORK(&vsock->event_work, virtio_transport_event_work);
+
+ mutex_lock(&vsock->rx_lock);
+ virtio_vsock_rx_fill(vsock);
+ mutex_unlock(&vsock->rx_lock);
+
+ mutex_lock(&vsock->event_lock);
+ virtio_vsock_event_fill(vsock);
+ mutex_unlock(&vsock->event_lock);
+
+ mutex_unlock(&the_virtio_vsock_mutex);
+ return 0;
+
+out_vqs:
+ vsock->vdev->config->del_vqs(vsock->vdev);
+out:
+ kfree(vsock);
+ mutex_unlock(&the_virtio_vsock_mutex);
+ return ret;
+}
+
+static void virtio_vsock_remove(struct virtio_device *vdev)
+{
+ struct virtio_vsock *vsock = vdev->priv;
+
+ flush_work(&vsock->rx_work);
+ flush_work(&vsock->tx_work);
+ flush_work(&vsock->event_work);
+
+ vdev->config->reset(vdev);
+
+ mutex_lock(&the_virtio_vsock_mutex);
+ the_virtio_vsock = NULL;
+ vsock_core_exit();
+ mutex_unlock(&the_virtio_vsock_mutex);
+
+ vdev->config->del_vqs(vdev);
+
+ kfree(vsock);
+}
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static unsigned int features[] = {
+};
+
+static struct virtio_driver virtio_vsock_driver = {
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtio_vsock_probe,
+ .remove = virtio_vsock_remove,
+};
+
+static int __init virtio_vsock_init(void)
+{
+ int ret;
+
+ virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0);
+ if (!virtio_vsock_workqueue)
+ return -ENOMEM;
+ ret = register_virtio_driver(&virtio_vsock_driver);
+ if (ret)
+ destroy_workqueue(virtio_vsock_workqueue);
+ return ret;
+}
+
+static void __exit virtio_vsock_exit(void)
+{
+ unregister_virtio_driver(&virtio_vsock_driver);
+ destroy_workqueue(virtio_vsock_workqueue);
+}
+
+module_init(virtio_vsock_init);
+module_exit(virtio_vsock_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Asias He");
+MODULE_DESCRIPTION("virtio transport for vsock");
+MODULE_DEVICE_TABLE(virtio, id_table);
--
2.8.0.rc3

View File

@ -0,0 +1,772 @@
From 2da9f4eef909efa13574326bf29efab439bcc77c Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Thu, 13 Jun 2013 18:29:21 +0800
Subject: [PATCH 7/9] VSOCK: Introduce vhost_vsock.ko
VM sockets vhost transport implementation. This driver runs on the
host.
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
v5:
* Only take rx/tx virtqueues, userspace handles the other virtqueues
* Explicitly skip instances without a CID when transferring packets
* Add VHOST_VSOCK_START ioctl to being vhost virtqueue processing
* Reset established connections when device is closed
v4:
* Add MAINTAINERS file entry
* virtqueue used len is now sizeof(pkt->hdr) + pkt->len instead of just
pkt->len
* checkpatch.pl cleanups
* Clarify struct vhost_vsock locking
* Add comments about optimization that disables virtqueue notify
* Drop unused vhost_vsock_handle_ctl_kick()
* Call wake_up() after decrementing total_tx_buf to prevent deadlock
v3:
* Remove unneeded variable used to store return value
(Fengguang Wu <fengguang.wu@intel.com> and Julia Lawall
<julia.lawall@lip6.fr>)
v2:
* Add missing total_tx_buf decrement
* Support flexible rx/tx descriptor layout
* Refuse to assign reserved CIDs
* Refuse guest CID if already in use
* Only accept correctly addressed packets
(cherry picked from commit 40d1901b66f0250e91094df07955edf57cf2f41e)
---
MAINTAINERS | 2 +
drivers/vhost/vsock.c | 694 ++++++++++++++++++++++++++++++++++++++++++++++++++
drivers/vhost/vsock.h | 5 +
3 files changed, 701 insertions(+)
create mode 100644 drivers/vhost/vsock.c
create mode 100644 drivers/vhost/vsock.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 403c4cc..530bce8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11405,6 +11405,8 @@ F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: net/vmw_vsock/virtio_transport_common.c
F: net/vmw_vsock/virtio_transport.c
+F: drivers/vhost/vsock.c
+F: drivers/vhost/vsock.h
VIRTUAL SERIO DEVICE DRIVER
M: Stephen Chandler Paul <thatslyude@gmail.com>
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
new file mode 100644
index 0000000..8488d01
--- /dev/null
+++ b/drivers/vhost/vsock.c
@@ -0,0 +1,694 @@
+/*
+ * vhost transport for vsock
+ *
+ * Copyright (C) 2013-2015 Red Hat, Inc.
+ * Author: Asias He <asias@redhat.com>
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <net/sock.h>
+#include <linux/virtio_vsock.h>
+#include <linux/vhost.h>
+
+#include <net/af_vsock.h>
+#include "vhost.h"
+#include "vsock.h"
+
+#define VHOST_VSOCK_DEFAULT_HOST_CID 2
+
+enum {
+ VHOST_VSOCK_FEATURES = VHOST_FEATURES,
+};
+
+/* Used to track all the vhost_vsock instances on the system. */
+static LIST_HEAD(vhost_vsock_list);
+static DEFINE_MUTEX(vhost_vsock_mutex);
+
+struct vhost_vsock {
+ struct vhost_dev dev;
+ struct vhost_virtqueue vqs[2];
+
+ /* Link to global vhost_vsock_list, protected by vhost_vsock_mutex */
+ struct list_head list;
+
+ struct vhost_work send_pkt_work;
+ wait_queue_head_t send_wait;
+
+ /* Fields protected by vqs[VSOCK_VQ_RX].mutex */
+ struct list_head send_pkt_list; /* host->guest pending packets */
+ u32 total_tx_buf;
+
+ u32 guest_cid;
+};
+
+static u32 vhost_transport_get_local_cid(void)
+{
+ return VHOST_VSOCK_DEFAULT_HOST_CID;
+}
+
+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
+{
+ struct vhost_vsock *vsock;
+
+ mutex_lock(&vhost_vsock_mutex);
+ list_for_each_entry(vsock, &vhost_vsock_list, list) {
+ u32 other_cid = vsock->guest_cid;
+
+ /* Skip instances that have no CID yet */
+ if (other_cid == 0)
+ continue;
+
+ if (other_cid == guest_cid) {
+ mutex_unlock(&vhost_vsock_mutex);
+ return vsock;
+ }
+ }
+ mutex_unlock(&vhost_vsock_mutex);
+
+ return NULL;
+}
+
+static void
+vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
+ struct vhost_virtqueue *vq)
+{
+ bool added = false;
+
+ mutex_lock(&vq->mutex);
+
+ /* Avoid further vmexits, we're already processing the virtqueue */
+ vhost_disable_notify(&vsock->dev, vq);
+
+ for (;;) {
+ struct virtio_vsock_pkt *pkt;
+ struct iov_iter iov_iter;
+ unsigned out, in;
+ size_t nbytes;
+ size_t len;
+ int head;
+
+ if (list_empty(&vsock->send_pkt_list)) {
+ vhost_enable_notify(&vsock->dev, vq);
+ break;
+ }
+
+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+ &out, &in, NULL, NULL);
+ if (head < 0)
+ break;
+
+ if (head == vq->num) {
+ /* We cannot finish yet if more buffers snuck in while
+ * re-enabling notify.
+ */
+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
+ vhost_disable_notify(&vsock->dev, vq);
+ continue;
+ }
+ break;
+ }
+
+ pkt = list_first_entry(&vsock->send_pkt_list,
+ struct virtio_vsock_pkt, list);
+ list_del_init(&pkt->list);
+
+ if (out) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Expected 0 output buffers, got %u\n", out);
+ break;
+ }
+
+ len = iov_length(&vq->iov[out], in);
+ iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
+
+ nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
+ if (nbytes != sizeof(pkt->hdr)) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Faulted on copying pkt hdr\n");
+ break;
+ }
+
+ nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter);
+ if (nbytes != pkt->len) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Faulted on copying pkt buf\n");
+ break;
+ }
+
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+ added = true;
+
+ vsock->total_tx_buf -= pkt->len;
+
+ virtio_transport_free_pkt(pkt);
+ }
+ if (added)
+ vhost_signal(&vsock->dev, vq);
+ mutex_unlock(&vq->mutex);
+
+ if (added)
+ wake_up(&vsock->send_wait);
+}
+
+static void vhost_transport_send_pkt_work(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq;
+ struct vhost_vsock *vsock;
+
+ vsock = container_of(work, struct vhost_vsock, send_pkt_work);
+ vq = &vsock->vqs[VSOCK_VQ_RX];
+
+ vhost_transport_do_send_pkt(vsock, vq);
+}
+
+static int
+vhost_transport_send_one_pkt(struct vhost_vsock *vsock,
+ struct virtio_vsock_pkt *pkt)
+{
+ struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_RX];
+
+ /* Queue it up in vhost work */
+ mutex_lock(&vq->mutex);
+ list_add_tail(&pkt->list, &vsock->send_pkt_list);
+ vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
+ mutex_unlock(&vq->mutex);
+
+ return pkt->len;
+}
+
+static int
+vhost_transport_send_pkt_no_sock(struct virtio_vsock_pkt *pkt)
+{
+ struct vhost_vsock *vsock;
+
+ /* Find the vhost_vsock according to guest context id */
+ vsock = vhost_vsock_get(le32_to_cpu(pkt->hdr.dst_cid));
+ if (!vsock) {
+ virtio_transport_free_pkt(pkt);
+ return -ENODEV;
+ }
+
+ return vhost_transport_send_one_pkt(vsock, pkt);
+}
+
+static int
+vhost_transport_send_pkt(struct vsock_sock *vsk,
+ struct virtio_vsock_pkt_info *info)
+{
+ u32 src_cid, src_port, dst_cid, dst_port;
+ struct virtio_vsock_sock *vvs;
+ struct virtio_vsock_pkt *pkt;
+ struct vhost_virtqueue *vq;
+ struct vhost_vsock *vsock;
+ u32 pkt_len = info->pkt_len;
+ DEFINE_WAIT(wait);
+
+ src_cid = vhost_transport_get_local_cid();
+ src_port = vsk->local_addr.svm_port;
+ if (!info->remote_cid) {
+ dst_cid = vsk->remote_addr.svm_cid;
+ dst_port = vsk->remote_addr.svm_port;
+ } else {
+ dst_cid = info->remote_cid;
+ dst_port = info->remote_port;
+ }
+
+ /* Find the vhost_vsock according to guest context id */
+ vsock = vhost_vsock_get(dst_cid);
+ if (!vsock)
+ return -ENODEV;
+
+ vvs = vsk->trans;
+ vq = &vsock->vqs[VSOCK_VQ_RX];
+
+ /* we can send less than pkt_len bytes */
+ if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
+ pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+
+ /* virtio_transport_get_credit might return less than pkt_len credit */
+ pkt_len = virtio_transport_get_credit(vvs, pkt_len);
+
+ /* Do not send zero length OP_RW pkt*/
+ if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
+ return pkt_len;
+
+ /* Respect global tx buf limitation */
+ mutex_lock(&vq->mutex);
+ while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) {
+ prepare_to_wait_exclusive(&vsock->send_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ mutex_unlock(&vq->mutex);
+ schedule();
+ mutex_lock(&vq->mutex);
+ finish_wait(&vsock->send_wait, &wait);
+ }
+ vsock->total_tx_buf += pkt_len;
+ mutex_unlock(&vq->mutex);
+
+ pkt = virtio_transport_alloc_pkt(info, pkt_len,
+ src_cid, src_port,
+ dst_cid, dst_port);
+ if (!pkt) {
+ mutex_lock(&vq->mutex);
+ vsock->total_tx_buf -= pkt_len;
+ mutex_unlock(&vq->mutex);
+ virtio_transport_put_credit(vvs, pkt_len);
+ wake_up(&vsock->send_wait);
+ return -ENOMEM;
+ }
+
+ virtio_transport_inc_tx_pkt(vvs, pkt);
+
+ return vhost_transport_send_one_pkt(vsock, pkt);
+}
+
+static struct virtio_vsock_pkt *
+vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
+ unsigned int out, unsigned int in)
+{
+ struct virtio_vsock_pkt *pkt;
+ struct iov_iter iov_iter;
+ size_t nbytes;
+ size_t len;
+
+ if (in != 0) {
+ vq_err(vq, "Expected 0 input buffers, got %u\n", in);
+ return NULL;
+ }
+
+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+ if (!pkt)
+ return NULL;
+
+ len = iov_length(vq->iov, out);
+ iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
+
+ nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
+ if (nbytes != sizeof(pkt->hdr)) {
+ vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
+ sizeof(pkt->hdr), nbytes);
+ kfree(pkt);
+ return NULL;
+ }
+
+ if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
+ pkt->len = le32_to_cpu(pkt->hdr.len);
+
+ /* No payload */
+ if (!pkt->len)
+ return pkt;
+
+ /* The pkt is too big */
+ if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
+ kfree(pkt);
+ return NULL;
+ }
+
+ pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
+ if (!pkt->buf) {
+ kfree(pkt);
+ return NULL;
+ }
+
+ nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
+ if (nbytes != pkt->len) {
+ vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
+ pkt->len, nbytes);
+ virtio_transport_free_pkt(pkt);
+ return NULL;
+ }
+
+ return pkt;
+}
+
+static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+ poll.work);
+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
+ dev);
+ struct virtio_vsock_pkt *pkt;
+ int head;
+ unsigned int out, in;
+ bool added = false;
+
+ mutex_lock(&vq->mutex);
+ vhost_disable_notify(&vsock->dev, vq);
+ for (;;) {
+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+ &out, &in, NULL, NULL);
+ if (head < 0)
+ break;
+
+ if (head == vq->num) {
+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
+ vhost_disable_notify(&vsock->dev, vq);
+ continue;
+ }
+ break;
+ }
+
+ pkt = vhost_vsock_alloc_pkt(vq, out, in);
+ if (!pkt) {
+ vq_err(vq, "Faulted on pkt\n");
+ continue;
+ }
+
+ /* Only accept correctly addressed packets */
+ if (le32_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
+ virtio_transport_recv_pkt(pkt);
+ else
+ virtio_transport_free_pkt(pkt);
+
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+ added = true;
+ }
+ if (added)
+ vhost_signal(&vsock->dev, vq);
+ mutex_unlock(&vq->mutex);
+}
+
+static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+ poll.work);
+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
+ dev);
+
+ vhost_transport_do_send_pkt(vsock, vq);
+}
+
+static int vhost_vsock_start(struct vhost_vsock *vsock)
+{
+ size_t i;
+ int ret;
+
+ mutex_lock(&vsock->dev.mutex);
+
+ ret = vhost_dev_check_owner(&vsock->dev);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ struct vhost_virtqueue *vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+
+ if (!vhost_vq_access_ok(vq)) {
+ ret = -EFAULT;
+ mutex_unlock(&vq->mutex);
+ goto err_vq;
+ }
+
+ if (!vq->private_data) {
+ vq->private_data = vsock;
+ vhost_vq_init_access(vq);
+ }
+
+ mutex_unlock(&vq->mutex);
+ }
+
+ mutex_unlock(&vsock->dev.mutex);
+ return 0;
+
+err_vq:
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ struct vhost_virtqueue *vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+ vq->private_data = NULL;
+ mutex_unlock(&vq->mutex);
+ }
+err:
+ mutex_unlock(&vsock->dev.mutex);
+ return ret;
+}
+
+static void vhost_vsock_stop(struct vhost_vsock *vsock)
+{
+ size_t i;
+
+ mutex_lock(&vsock->dev.mutex);
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ struct vhost_virtqueue *vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+ vq->private_data = vsock;
+ mutex_unlock(&vq->mutex);
+ }
+
+ mutex_unlock(&vsock->dev.mutex);
+}
+
+static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
+{
+ struct vhost_virtqueue **vqs;
+ struct vhost_vsock *vsock;
+ int ret;
+
+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
+ if (!vsock)
+ return -ENOMEM;
+
+ vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
+ if (!vqs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
+ vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
+ vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
+ vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
+
+ vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs));
+
+ file->private_data = vsock;
+ init_waitqueue_head(&vsock->send_wait);
+ INIT_LIST_HEAD(&vsock->send_pkt_list);
+ vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
+
+ mutex_lock(&vhost_vsock_mutex);
+ list_add_tail(&vsock->list, &vhost_vsock_list);
+ mutex_unlock(&vhost_vsock_mutex);
+ return 0;
+
+out:
+ kfree(vsock);
+ return ret;
+}
+
+static void vhost_vsock_flush(struct vhost_vsock *vsock)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
+ if (vsock->vqs[i].handle_kick)
+ vhost_poll_flush(&vsock->vqs[i].poll);
+ vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
+}
+
+static void vhost_vsock_reset_orphans(struct sock *sk)
+{
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ lock_sock(sk);
+ if (!vhost_vsock_get(vsk->local_addr.svm_cid)) {
+ sk->sk_state = SS_UNCONNECTED;
+ sk->sk_err = ECONNRESET;
+ sk->sk_error_report(sk);
+ }
+ release_sock(sk);
+}
+
+static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
+{
+ struct vhost_vsock *vsock = file->private_data;
+
+ mutex_lock(&vhost_vsock_mutex);
+ list_del(&vsock->list);
+ mutex_unlock(&vhost_vsock_mutex);
+
+ /* Iterating over all connections for all CIDs to find orphans is
+ * inefficient. Room for improvement here. */
+ vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
+
+ vhost_vsock_stop(vsock);
+ vhost_vsock_flush(vsock);
+ vhost_dev_stop(&vsock->dev);
+ vhost_dev_cleanup(&vsock->dev, false);
+ kfree(vsock->dev.vqs);
+ kfree(vsock);
+ return 0;
+}
+
+static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u32 guest_cid)
+{
+ struct vhost_vsock *other;
+
+ /* Refuse reserved CIDs */
+ if (guest_cid <= VMADDR_CID_HOST)
+ return -EINVAL;
+
+ /* Refuse if CID is already in use */
+ other = vhost_vsock_get(guest_cid);
+ if (other && other != vsock)
+ return -EADDRINUSE;
+
+ mutex_lock(&vhost_vsock_mutex);
+ vsock->guest_cid = guest_cid;
+ mutex_unlock(&vhost_vsock_mutex);
+
+ return 0;
+}
+
+static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
+{
+ struct vhost_virtqueue *vq;
+ int i;
+
+ if (features & ~VHOST_VSOCK_FEATURES)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&vsock->dev.mutex);
+ if ((features & (1 << VHOST_F_LOG_ALL)) &&
+ !vhost_log_access_ok(&vsock->dev)) {
+ mutex_unlock(&vsock->dev.mutex);
+ return -EFAULT;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ vq = &vsock->vqs[i];
+ mutex_lock(&vq->mutex);
+ vq->acked_features = features;
+ mutex_unlock(&vq->mutex);
+ }
+ mutex_unlock(&vsock->dev.mutex);
+ return 0;
+}
+
+static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct vhost_vsock *vsock = f->private_data;
+ void __user *argp = (void __user *)arg;
+ u64 __user *featurep = argp;
+ u32 __user *cidp = argp;
+ u32 guest_cid;
+ u64 features;
+ int r;
+
+ switch (ioctl) {
+ case VHOST_VSOCK_SET_GUEST_CID:
+ if (get_user(guest_cid, cidp))
+ return -EFAULT;
+ return vhost_vsock_set_cid(vsock, guest_cid);
+ case VHOST_VSOCK_START:
+ return vhost_vsock_start(vsock);
+ case VHOST_GET_FEATURES:
+ features = VHOST_VSOCK_FEATURES;
+ if (copy_to_user(featurep, &features, sizeof(features)))
+ return -EFAULT;
+ return 0;
+ case VHOST_SET_FEATURES:
+ if (copy_from_user(&features, featurep, sizeof(features)))
+ return -EFAULT;
+ return vhost_vsock_set_features(vsock, features);
+ default:
+ mutex_lock(&vsock->dev.mutex);
+ r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
+ if (r == -ENOIOCTLCMD)
+ r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
+ else
+ vhost_vsock_flush(vsock);
+ mutex_unlock(&vsock->dev.mutex);
+ return r;
+ }
+}
+
+static const struct file_operations vhost_vsock_fops = {
+ .owner = THIS_MODULE,
+ .open = vhost_vsock_dev_open,
+ .release = vhost_vsock_dev_release,
+ .llseek = noop_llseek,
+ .unlocked_ioctl = vhost_vsock_dev_ioctl,
+};
+
+static struct miscdevice vhost_vsock_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "vhost-vsock",
+ .fops = &vhost_vsock_fops,
+};
+
+static struct virtio_transport vhost_transport = {
+ .transport = {
+ .get_local_cid = vhost_transport_get_local_cid,
+
+ .init = virtio_transport_do_socket_init,
+ .destruct = virtio_transport_destruct,
+ .release = virtio_transport_release,
+ .connect = virtio_transport_connect,
+ .shutdown = virtio_transport_shutdown,
+
+ .dgram_enqueue = virtio_transport_dgram_enqueue,
+ .dgram_dequeue = virtio_transport_dgram_dequeue,
+ .dgram_bind = virtio_transport_dgram_bind,
+ .dgram_allow = virtio_transport_dgram_allow,
+
+ .stream_enqueue = virtio_transport_stream_enqueue,
+ .stream_dequeue = virtio_transport_stream_dequeue,
+ .stream_has_data = virtio_transport_stream_has_data,
+ .stream_has_space = virtio_transport_stream_has_space,
+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
+ .stream_is_active = virtio_transport_stream_is_active,
+ .stream_allow = virtio_transport_stream_allow,
+
+ .notify_poll_in = virtio_transport_notify_poll_in,
+ .notify_poll_out = virtio_transport_notify_poll_out,
+ .notify_recv_init = virtio_transport_notify_recv_init,
+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
+ .notify_send_init = virtio_transport_notify_send_init,
+ .notify_send_pre_block = virtio_transport_notify_send_pre_block,
+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+
+ .set_buffer_size = virtio_transport_set_buffer_size,
+ .set_min_buffer_size = virtio_transport_set_min_buffer_size,
+ .set_max_buffer_size = virtio_transport_set_max_buffer_size,
+ .get_buffer_size = virtio_transport_get_buffer_size,
+ .get_min_buffer_size = virtio_transport_get_min_buffer_size,
+ .get_max_buffer_size = virtio_transport_get_max_buffer_size,
+ },
+
+ .send_pkt = vhost_transport_send_pkt,
+ .send_pkt_no_sock = vhost_transport_send_pkt_no_sock,
+};
+
+static int __init vhost_vsock_init(void)
+{
+ int ret;
+
+ ret = vsock_core_init(&vhost_transport.transport);
+ if (ret < 0)
+ return ret;
+ return misc_register(&vhost_vsock_misc);
+};
+
+static void __exit vhost_vsock_exit(void)
+{
+ misc_deregister(&vhost_vsock_misc);
+ vsock_core_exit();
+};
+
+module_init(vhost_vsock_init);
+module_exit(vhost_vsock_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Asias He");
+MODULE_DESCRIPTION("vhost transport for vsock ");
diff --git a/drivers/vhost/vsock.h b/drivers/vhost/vsock.h
new file mode 100644
index 0000000..173f9fc
--- /dev/null
+++ b/drivers/vhost/vsock.h
@@ -0,0 +1,5 @@
+#ifndef VHOST_VSOCK_H
+#define VHOST_VSOCK_H
+#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u32)
+#define VHOST_VSOCK_START _IO(VHOST_VIRTIO, 0x61)
+#endif
--
2.8.0.rc3

View File

@ -0,0 +1,108 @@
From 4293a00ac211e20980a0739498a73aae4d8546bf Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Thu, 13 Jun 2013 18:30:19 +0800
Subject: [PATCH 8/9] VSOCK: Add Makefile and Kconfig
Enable virtio-vsock and vhost-vsock.
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
v4:
* Make checkpatch.pl happy with longer option description
* Clarify dependency on virtio rather than QEMU as suggested by Alex
Bennee
v3:
* Don't put vhost vsock driver into staging
* Add missing Kconfig dependencies (Arnd Bergmann <arnd@arndb.de>)
(cherry picked from commit 4c9d2a6be1c69ec22f8ee90bb4a5cc21d3848077)
---
drivers/vhost/Kconfig | 15 +++++++++++++++
drivers/vhost/Makefile | 4 ++++
net/vmw_vsock/Kconfig | 19 +++++++++++++++++++
net/vmw_vsock/Makefile | 2 ++
4 files changed, 40 insertions(+)
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 533eaf0..d7aae9e 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -21,6 +21,21 @@ config VHOST_SCSI
Say M here to enable the vhost_scsi TCM fabric module
for use with virtio-scsi guests
+config VHOST_VSOCK
+ tristate "vhost virtio-vsock driver"
+ depends on VSOCKETS && EVENTFD
+ select VIRTIO_VSOCKETS_COMMON
+ select VHOST
+ select VHOST_RING
+ default n
+ ---help---
+ This kernel module can be loaded in the host kernel to provide AF_VSOCK
+ sockets for communicating with guests. The guests must have the
+ virtio_transport.ko driver loaded to use the virtio-vsock device.
+
+ To compile this driver as a module, choose M here: the module will be called
+ vhost_vsock.
+
config VHOST_RING
tristate
---help---
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
index e0441c3..6b012b9 100644
--- a/drivers/vhost/Makefile
+++ b/drivers/vhost/Makefile
@@ -4,5 +4,9 @@ vhost_net-y := net.o
obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
vhost_scsi-y := scsi.o
+obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o
+vhost_vsock-y := vsock.o
+
obj-$(CONFIG_VHOST_RING) += vringh.o
+
obj-$(CONFIG_VHOST) += vhost.o
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index 14810ab..f27e74b 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -26,3 +26,22 @@ config VMWARE_VMCI_VSOCKETS
To compile this driver as a module, choose M here: the module
will be called vmw_vsock_vmci_transport. If unsure, say N.
+
+config VIRTIO_VSOCKETS
+ tristate "virtio transport for Virtual Sockets"
+ depends on VSOCKETS && VIRTIO
+ select VIRTIO_VSOCKETS_COMMON
+ help
+ This module implements a virtio transport for Virtual Sockets.
+
+ Enable this transport if your Virtual Machine host supports Virtual
+ Sockets over virtio.
+
+ To compile this driver as a module, choose M here: the module
+ will be called virtio_vsock_transport. If unsure, say N.
+
+config VIRTIO_VSOCKETS_COMMON
+ tristate
+ ---help---
+ This option is selected by any driver which needs to access
+ the virtio_vsock.
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index 2ce52d7..cf4c294 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -1,5 +1,7 @@
obj-$(CONFIG_VSOCKETS) += vsock.o
obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
+obj-$(CONFIG_VIRTIO_VSOCKETS) += virtio_transport.o
+obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += virtio_transport_common.o
vsock-y += af_vsock.o vsock_addr.o
--
2.8.0.rc3

View File

@ -0,0 +1,30 @@
From 366c9c42afb9bd54f92f72518470c09e46f12e88 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@docker.com>
Date: Mon, 4 Apr 2016 14:50:10 +0100
Subject: [PATCH 9/9] VSOCK: Only allow host network namespace to use AF_VSOCK.
The VSOCK addressing schema does not really lend itself to simply creating an
alternative end point address within a namespace.
Signed-off-by: Ian Campbell <ian.campbell@docker.com>
---
net/vmw_vsock/af_vsock.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 1e5f5ed..cdb3dd3 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1840,6 +1840,9 @@ static const struct proto_ops vsock_stream_ops = {
static int vsock_create(struct net *net, struct socket *sock,
int protocol, int kern)
{
+ if (!net_eq(net, &init_net))
+ return -EAFNOSUPPORT;
+
if (!sock)
return -EINVAL;
--
2.8.0.rc3

View File

@ -59,6 +59,7 @@ mkdir -p /output/kernel
cd /output/kernel cd /output/kernel
cp /proc/config.gz . cp /proc/config.gz .
wget ${KERNEL_SOURCE=} || ( printf "Failed to download kernel source\n" && exit 1 ) wget ${KERNEL_SOURCE=} || ( printf "Failed to download kernel source\n" && exit 1 )
cp -r /hostetc/kernel-patches /output/kernel/patches
git clone -b "$AUFS_BRANCH" "$AUFS_REPO" /output/kernel/aufs git clone -b "$AUFS_BRANCH" "$AUFS_REPO" /output/kernel/aufs
cd /output/kernel/aufs cd /output/kernel/aufs