Move kernel directory to top level

Signed-off-by: Justin Cormack <justin.cormack@docker.com>
This commit is contained in:
Justin Cormack
2017-02-02 16:08:46 +00:00
parent 5964e9640c
commit 2fdf0918b3
69 changed files with 32 additions and 33 deletions

View File

@@ -0,0 +1,219 @@
From 190d2525e7d454f9fb5a20bb6a49277877143e6b Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 17 Dec 2015 16:53:43 +0800
Subject: [PATCH 01/44] virtio: make find_vqs() checkpatch.pl-friendly
checkpatch.pl wants arrays of strings declared as follows:
static const char * const names[] = { "vq-1", "vq-2", "vq-3" };
Currently the find_vqs() function takes a const char *names[] argument
so passing checkpatch.pl's const char * const names[] results in a
compiler error due to losing the second const.
This patch adjusts the find_vqs() prototype and updates all virtio
transports. This makes it possible for virtio_balloon.c, virtio_input.c,
virtgpu_kms.c, and virtio_rpmsg_bus.c to use the checkpatch.pl-friendly
type.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
(cherry picked from commit f7ad26ff952b3ca2702d7da03aad0ab1f6c01d7c)
---
drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +-
drivers/misc/mic/card/mic_virtio.c | 2 +-
drivers/remoteproc/remoteproc_virtio.c | 2 +-
drivers/rpmsg/virtio_rpmsg_bus.c | 2 +-
drivers/s390/virtio/kvm_virtio.c | 2 +-
drivers/s390/virtio/virtio_ccw.c | 2 +-
drivers/virtio/virtio_balloon.c | 2 +-
drivers/virtio/virtio_input.c | 2 +-
drivers/virtio/virtio_mmio.c | 2 +-
drivers/virtio/virtio_pci_common.c | 4 ++--
drivers/virtio/virtio_pci_common.h | 2 +-
drivers/virtio/virtio_pci_modern.c | 2 +-
include/linux/virtio_config.h | 2 +-
13 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 06496a128162..4150873d432e 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -130,7 +130,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
static vq_callback_t *callbacks[] = {
virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack
};
- static const char *names[] = { "control", "cursor" };
+ static const char * const names[] = { "control", "cursor" };
struct virtio_gpu_device *vgdev;
/* this will expand later */
diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c
index e486a0c26267..f6ed57d3125c 100644
--- a/drivers/misc/mic/card/mic_virtio.c
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -311,7 +311,7 @@ unmap:
static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct mic_vdev *mvdev = to_micvdev(vdev);
struct mic_device_ctrl __iomem *dc = mvdev->dc;
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index e1a10232a943..e44872fb9e5e 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -147,7 +147,7 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev)
static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct rproc *rproc = vdev_to_rproc(vdev);
int i, ret;
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 73354ee27877..1fcd27c1f183 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -945,7 +945,7 @@ static void rpmsg_ns_cb(struct rpmsg_channel *rpdev, void *data, int len,
static int rpmsg_probe(struct virtio_device *vdev)
{
vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done };
- const char *names[] = { "input", "output" };
+ static const char * const names[] = { "input", "output" };
struct virtqueue *vqs[2];
struct virtproc_info *vrp;
void *bufs_va;
diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c
index 53fb975c404b..1d060fd293a3 100644
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -255,7 +255,7 @@ static void kvm_del_vqs(struct virtio_device *vdev)
static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct kvm_device *kdev = to_kvmdev(vdev);
int i;
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 1b831598df7c..bf2d1300a957 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -635,7 +635,7 @@ out:
static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
unsigned long *indicatorp = NULL;
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 56f7e2521202..66082c99f6ea 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -394,7 +394,7 @@ static int init_vqs(struct virtio_balloon *vb)
{
struct virtqueue *vqs[3];
vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
- const char *names[] = { "inflate", "deflate", "stats" };
+ static const char * const names[] = { "inflate", "deflate", "stats" };
int err, nvqs;
/*
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index c96944b59856..350a2a5a49db 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -170,7 +170,7 @@ static int virtinput_init_vqs(struct virtio_input *vi)
struct virtqueue *vqs[2];
vq_callback_t *cbs[] = { virtinput_recv_events,
virtinput_recv_status };
- static const char *names[] = { "events", "status" };
+ static const char * const names[] = { "events", "status" };
int err;
err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names);
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index f499d9da7237..745c6ee1bb3e 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -482,7 +482,7 @@ error_available:
static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 2046a68ad0ba..f6bed86c17f9 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -296,7 +296,7 @@ void vp_del_vqs(struct virtio_device *vdev)
static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[],
+ const char * const names[],
bool use_msix,
bool per_vq_vectors)
{
@@ -376,7 +376,7 @@ error_find:
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
int err;
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index b976d968e793..2cc252270b2d 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -139,7 +139,7 @@ void vp_del_vqs(struct virtio_device *vdev);
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[]);
+ const char * const names[]);
const char *vp_bus_name(struct virtio_device *vdev);
/* Setup the affinity for a virtqueue:
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 4469202eaa8e..631021cfc740 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -423,7 +423,7 @@ err_new_queue:
static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[])
+ const char * const names[])
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index e5ce8ab0b8b0..6e6cb0c9d7cb 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -70,7 +70,7 @@ struct virtio_config_ops {
int (*find_vqs)(struct virtio_device *, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
- const char *names[]);
+ const char * const names[]);
void (*del_vqs)(struct virtio_device *);
u64 (*get_features)(struct virtio_device *vdev);
int (*finalize_features)(struct virtio_device *vdev);
--
2.11.0

View File

@@ -0,0 +1,77 @@
From 2b608e3d358b8eac9c32840aaaf4ab212d532493 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia.lawall@lip6.fr>
Date: Sat, 21 Nov 2015 18:39:17 +0100
Subject: [PATCH 02/44] VSOCK: constify vmci_transport_notify_ops structures
The vmci_transport_notify_ops structures are never modified, so declare
them as const.
Done with the help of Coccinelle.
Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 3b22dae38db1cea9ead3229f08cfb0b69aca5706)
---
net/vmw_vsock/vmci_transport.h | 2 +-
net/vmw_vsock/vmci_transport_notify.c | 2 +-
net/vmw_vsock/vmci_transport_notify.h | 5 +++--
net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +-
4 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index 2ad46f39649f..1820e74a5752 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -121,7 +121,7 @@ struct vmci_transport {
u64 queue_pair_max_size;
u32 detach_sub_id;
union vmci_transport_notify notify;
- struct vmci_transport_notify_ops *notify_ops;
+ const struct vmci_transport_notify_ops *notify_ops;
struct list_head elem;
struct sock *sk;
spinlock_t lock; /* protects sk. */
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 9b7f207f2bee..fd8cf0214d51 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
}
/* Socket control packet based operations. */
-struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
vmci_transport_notify_pkt_socket_init,
vmci_transport_notify_pkt_socket_destruct,
vmci_transport_notify_pkt_poll_in,
diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h
index 7df793249b6c..3c464d394a8f 100644
--- a/net/vmw_vsock/vmci_transport_notify.h
+++ b/net/vmw_vsock/vmci_transport_notify.h
@@ -77,7 +77,8 @@ struct vmci_transport_notify_ops {
void (*process_negotiate) (struct sock *sk);
};
-extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
-extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
+extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
+extern const
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index dc9c7929a2f9..21e591dafb03 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue(
}
/* Socket always on control packet based operations. */
-struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
vmci_transport_notify_pkt_socket_init,
vmci_transport_notify_pkt_socket_destruct,
vmci_transport_notify_pkt_poll_in,
--
2.11.0

View File

@@ -0,0 +1,336 @@
From 4644873be14865b14b9dd11a78fb20881a116ea0 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Date: Tue, 22 Mar 2016 17:05:52 +0100
Subject: [PATCH 03/44] AF_VSOCK: Shrink the area influenced by prepare_to_wait
When a thread is prepared for waiting by calling prepare_to_wait, sleeping
is not allowed until either the wait has taken place or finish_wait has
been called. The existing code in af_vsock imposed unnecessary no-sleep
assumptions to a broad list of backend functions.
This patch shrinks the influence of prepare_to_wait to the area where it
is strictly needed, therefore relaxing the no-sleep restriction there.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit f7f9b5e7f8eccfd68ffa7b8d74b07c478bb9e7f0)
---
net/vmw_vsock/af_vsock.c | 158 +++++++++++++++++++++++++----------------------
1 file changed, 85 insertions(+), 73 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9b5bd6d142dc..b5f1221f48d4 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1209,10 +1209,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- goto out_wait_error;
+ sk->sk_state = SS_UNCONNECTED;
+ sock->state = SS_UNCONNECTED;
+ goto out_wait;
} else if (timeout == 0) {
err = -ETIMEDOUT;
- goto out_wait_error;
+ sk->sk_state = SS_UNCONNECTED;
+ sock->state = SS_UNCONNECTED;
+ goto out_wait;
}
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
@@ -1220,20 +1224,17 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (sk->sk_err) {
err = -sk->sk_err;
- goto out_wait_error;
- } else
+ sk->sk_state = SS_UNCONNECTED;
+ sock->state = SS_UNCONNECTED;
+ } else {
err = 0;
+ }
out_wait:
finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
-
-out_wait_error:
- sk->sk_state = SS_UNCONNECTED;
- sock->state = SS_UNCONNECTED;
- goto out_wait;
}
static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
@@ -1270,18 +1271,20 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
listener->sk_err == 0) {
release_sock(listener);
timeout = schedule_timeout(timeout);
+ finish_wait(sk_sleep(listener), &wait);
lock_sock(listener);
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- goto out_wait;
+ goto out;
} else if (timeout == 0) {
err = -EAGAIN;
- goto out_wait;
+ goto out;
}
prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
}
+ finish_wait(sk_sleep(listener), &wait);
if (listener->sk_err)
err = -listener->sk_err;
@@ -1301,19 +1304,15 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
*/
if (err) {
vconnected->rejected = true;
- release_sock(connected);
- sock_put(connected);
- goto out_wait;
+ } else {
+ newsock->state = SS_CONNECTED;
+ sock_graft(connected, newsock);
}
- newsock->state = SS_CONNECTED;
- sock_graft(connected, newsock);
release_sock(connected);
sock_put(connected);
}
-out_wait:
- finish_wait(sk_sleep(listener), &wait);
out:
release_sock(listener);
return err;
@@ -1557,11 +1556,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
while (total_written < len) {
ssize_t written;
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
while (vsock_stream_has_space(vsk) == 0 &&
sk->sk_err == 0 &&
!(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1570,27 +1569,33 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
/* Don't wait for non-blocking sockets. */
if (timeout == 0) {
err = -EAGAIN;
- goto out_wait;
+ finish_wait(sk_sleep(sk), &wait);
+ goto out_err;
}
err = transport->notify_send_pre_block(vsk, &send_data);
- if (err < 0)
- goto out_wait;
+ if (err < 0) {
+ finish_wait(sk_sleep(sk), &wait);
+ goto out_err;
+ }
release_sock(sk);
timeout = schedule_timeout(timeout);
lock_sock(sk);
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- goto out_wait;
+ finish_wait(sk_sleep(sk), &wait);
+ goto out_err;
} else if (timeout == 0) {
err = -EAGAIN;
- goto out_wait;
+ finish_wait(sk_sleep(sk), &wait);
+ goto out_err;
}
prepare_to_wait(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
}
+ finish_wait(sk_sleep(sk), &wait);
/* These checks occur both as part of and after the loop
* conditional since we need to check before and after
@@ -1598,16 +1603,16 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
*/
if (sk->sk_err) {
err = -sk->sk_err;
- goto out_wait;
+ goto out_err;
} else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
(vsk->peer_shutdown & RCV_SHUTDOWN)) {
err = -EPIPE;
- goto out_wait;
+ goto out_err;
}
err = transport->notify_send_pre_enqueue(vsk, &send_data);
if (err < 0)
- goto out_wait;
+ goto out_err;
/* Note that enqueue will only write as many bytes as are free
* in the produce queue, so we don't need to ensure len is
@@ -1620,7 +1625,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
len - total_written);
if (written < 0) {
err = -ENOMEM;
- goto out_wait;
+ goto out_err;
}
total_written += written;
@@ -1628,14 +1633,13 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
err = transport->notify_send_post_enqueue(
vsk, written, &send_data);
if (err < 0)
- goto out_wait;
+ goto out_err;
}
-out_wait:
+out_err:
if (total_written > 0)
err = total_written;
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
@@ -1716,21 +1720,61 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
while (1) {
- s64 ready = vsock_stream_has_data(vsk);
+ s64 ready;
- if (ready < 0) {
- /* Invalid queue pair content. XXX This should be
- * changed to a connection reset in a later change.
- */
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ ready = vsock_stream_has_data(vsk);
- err = -ENOMEM;
- goto out_wait;
- } else if (ready > 0) {
+ if (ready == 0) {
+ if (sk->sk_err != 0 ||
+ (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ (vsk->peer_shutdown & SEND_SHUTDOWN)) {
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ }
+ /* Don't wait for non-blocking sockets. */
+ if (timeout == 0) {
+ err = -EAGAIN;
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ }
+
+ err = transport->notify_recv_pre_block(
+ vsk, target, &recv_data);
+ if (err < 0) {
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ }
+ release_sock(sk);
+ timeout = schedule_timeout(timeout);
+ lock_sock(sk);
+
+ if (signal_pending(current)) {
+ err = sock_intr_errno(timeout);
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ } else if (timeout == 0) {
+ err = -EAGAIN;
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ }
+ } else {
ssize_t read;
+ finish_wait(sk_sleep(sk), &wait);
+
+ if (ready < 0) {
+ /* Invalid queue pair content. XXX This should
+ * be changed to a connection reset in a later
+ * change.
+ */
+
+ err = -ENOMEM;
+ goto out;
+ }
+
err = transport->notify_recv_pre_dequeue(
vsk, target, &recv_data);
if (err < 0)
@@ -1750,42 +1794,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
vsk, target, read,
!(flags & MSG_PEEK), &recv_data);
if (err < 0)
- goto out_wait;
+ goto out;
if (read >= target || flags & MSG_PEEK)
break;
target -= read;
- } else {
- if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN)
- || (vsk->peer_shutdown & SEND_SHUTDOWN)) {
- break;
- }
- /* Don't wait for non-blocking sockets. */
- if (timeout == 0) {
- err = -EAGAIN;
- break;
- }
-
- err = transport->notify_recv_pre_block(
- vsk, target, &recv_data);
- if (err < 0)
- break;
-
- release_sock(sk);
- timeout = schedule_timeout(timeout);
- lock_sock(sk);
-
- if (signal_pending(current)) {
- err = sock_intr_errno(timeout);
- break;
- } else if (timeout == 0) {
- err = -EAGAIN;
- break;
- }
-
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
}
@@ -1797,8 +1811,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (copied > 0)
err = copied;
-out_wait:
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
--
2.11.0

View File

@@ -0,0 +1,63 @@
From 58af9ca05d4337a956f9b8303a2d7b0b968788bb Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 23 Jun 2016 16:28:58 +0100
Subject: [PATCH 04/44] vsock: make listener child lock ordering explicit
There are several places where the listener and pending or accept queue
child sockets are accessed at the same time. Lockdep is unhappy that
two locks from the same class are held.
Tell lockdep that it is safe and document the lock ordering.
Originally Claudio Imbrenda <imbrenda@linux.vnet.ibm.com> sent a similar
patch asking whether this is safe. I have audited the code and also
covered the vsock_pending_work() function.
Suggested-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 4192f672fae559f32d82de72a677701853cc98a7)
---
net/vmw_vsock/af_vsock.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b5f1221f48d4..b96ac918e0ba 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -61,6 +61,14 @@
* function will also cleanup rejected sockets, those that reach the connected
* state but leave it before they have been accepted.
*
+ * - Lock ordering for pending or accept queue sockets is:
+ *
+ * lock_sock(listener);
+ * lock_sock_nested(pending, SINGLE_DEPTH_NESTING);
+ *
+ * Using explicit nested locking keeps lockdep happy since normally only one
+ * lock of a given class may be taken at a time.
+ *
* - Sockets created by user action will be cleaned up when the user process
* calls close(2), causing our release implementation to be called. Our release
* implementation will perform some cleanup then drop the last reference so our
@@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work)
cleanup = true;
lock_sock(listener);
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
if (vsock_is_pending(sk)) {
vsock_remove_pending(listener, sk);
@@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
if (connected) {
listener->sk_ack_backlog--;
- lock_sock(connected);
+ lock_sock_nested(connected, SINGLE_DEPTH_NESTING);
vconnected = vsock_sk(connected);
/* If the listener socket has received an error, then we should
--
2.11.0

View File

@@ -0,0 +1,59 @@
From c122f28889868dc16add4712af3603a467bb7fd3 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 28 Jul 2016 15:36:30 +0100
Subject: [PATCH 05/44] VSOCK: transport-specific vsock_transport functions
struct vsock_transport contains function pointers called by AF_VSOCK
core code. The transport may want its own transport-specific function
pointers and they can be added after struct vsock_transport.
Allow the transport to fetch vsock_transport. It can downcast it to
access transport-specific function pointers.
The virtio transport will use this.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 0b01aeb3d2fbf16787f0c9629f4ca52ae792f732)
---
include/net/af_vsock.h | 3 +++
net/vmw_vsock/af_vsock.c | 9 +++++++++
2 files changed, 12 insertions(+)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index e9eb2d6791b3..23f55259b60d 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -165,6 +165,9 @@ static inline int vsock_core_init(const struct vsock_transport *t)
}
void vsock_core_exit(void);
+/* The transport may downcast this to access transport-specific functions */
+const struct vsock_transport *vsock_core_get_transport(void);
+
/**** UTILS ****/
void vsock_release_pending(struct sock *pending);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b96ac918e0ba..e34d96f8bde2 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1995,6 +1995,15 @@ void vsock_core_exit(void)
}
EXPORT_SYMBOL_GPL(vsock_core_exit);
+const struct vsock_transport *vsock_core_get_transport(void)
+{
+ /* vsock_register_mutex not taken since only the transport uses this
+ * function and only while registered.
+ */
+ return transport;
+}
+EXPORT_SYMBOL_GPL(vsock_core_get_transport);
+
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family");
MODULE_VERSION("1.0.1.0-k");
--
2.11.0

View File

@@ -0,0 +1,83 @@
From c388401f49ad3f0ec37aab115f883e180427aae0 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 28 Jul 2016 15:36:31 +0100
Subject: [PATCH 06/44] VSOCK: defer sock removal to transports
The virtio transport will implement graceful shutdown and the related
SO_LINGER socket option. This requires orphaning the sock but keeping
it in the table of connections after .release().
This patch adds the vsock_remove_sock() function and leaves it up to the
transport when to remove the sock.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 6773b7dc39f165bd9d824b50ac52cbb3f87d53c8)
---
include/net/af_vsock.h | 1 +
net/vmw_vsock/af_vsock.c | 16 ++++++++++------
net/vmw_vsock/vmci_transport.c | 2 ++
3 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 23f55259b60d..3af0b224f754 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -180,6 +180,7 @@ void vsock_remove_connected(struct vsock_sock *vsk);
struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
struct sockaddr_vm *dst);
+void vsock_remove_sock(struct vsock_sock *vsk);
void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
#endif /* __AF_VSOCK_H__ */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index e34d96f8bde2..17dbbe64cd73 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -344,6 +344,16 @@ static bool vsock_in_connected_table(struct vsock_sock *vsk)
return ret;
}
+void vsock_remove_sock(struct vsock_sock *vsk)
+{
+ if (vsock_in_bound_table(vsk))
+ vsock_remove_bound(vsk);
+
+ if (vsock_in_connected_table(vsk))
+ vsock_remove_connected(vsk);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_sock);
+
void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
{
int i;
@@ -660,12 +670,6 @@ static void __vsock_release(struct sock *sk)
vsk = vsock_sk(sk);
pending = NULL; /* Compiler warning. */
- if (vsock_in_bound_table(vsk))
- vsock_remove_bound(vsk);
-
- if (vsock_in_connected_table(vsk))
- vsock_remove_connected(vsk);
-
transport->release(vsk);
lock_sock(sk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 0a369bb440e7..706991e00e93 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1644,6 +1644,8 @@ static void vmci_transport_destruct(struct vsock_sock *vsk)
static void vmci_transport_release(struct vsock_sock *vsk)
{
+ vsock_remove_sock(vsk);
+
if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
--
2.11.0

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,663 @@
From fb158aa4227b765ec8605ef39d23846389a7ffb2 Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Thu, 28 Jul 2016 15:36:33 +0100
Subject: [PATCH 08/44] VSOCK: Introduce virtio_transport.ko
VM sockets virtio transport implementation. This driver runs in the
guest.
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 0ea9e1d3a9e3ef7d2a1462d3de6b95131dc7d872)
---
MAINTAINERS | 1 +
net/vmw_vsock/virtio_transport.c | 624 +++++++++++++++++++++++++++++++++++++++
2 files changed, 625 insertions(+)
create mode 100644 net/vmw_vsock/virtio_transport.c
diff --git a/MAINTAINERS b/MAINTAINERS
index b93ba8b21be7..82d11235cacb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11391,6 +11391,7 @@ S: Maintained
F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: net/vmw_vsock/virtio_transport_common.c
+F: net/vmw_vsock/virtio_transport.c
VIRTUAL SERIO DEVICE DRIVER
M: Stephen Chandler Paul <thatslyude@gmail.com>
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
new file mode 100644
index 000000000000..699dfabdbccd
--- /dev/null
+++ b/net/vmw_vsock/virtio_transport.c
@@ -0,0 +1,624 @@
+/*
+ * virtio transport for vsock
+ *
+ * Copyright (C) 2013-2015 Red Hat, Inc.
+ * Author: Asias He <asias@redhat.com>
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * Some of the code is take from Gerd Hoffmann <kraxel@redhat.com>'s
+ * early virtio-vsock proof-of-concept bits.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/atomic.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_vsock.h>
+#include <net/sock.h>
+#include <linux/mutex.h>
+#include <net/af_vsock.h>
+
+static struct workqueue_struct *virtio_vsock_workqueue;
+static struct virtio_vsock *the_virtio_vsock;
+static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
+
+struct virtio_vsock {
+ struct virtio_device *vdev;
+ struct virtqueue *vqs[VSOCK_VQ_MAX];
+
+ /* Virtqueue processing is deferred to a workqueue */
+ struct work_struct tx_work;
+ struct work_struct rx_work;
+ struct work_struct event_work;
+
+ /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX]
+ * must be accessed with tx_lock held.
+ */
+ struct mutex tx_lock;
+
+ struct work_struct send_pkt_work;
+ spinlock_t send_pkt_list_lock;
+ struct list_head send_pkt_list;
+
+ atomic_t queued_replies;
+
+ /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX]
+ * must be accessed with rx_lock held.
+ */
+ struct mutex rx_lock;
+ int rx_buf_nr;
+ int rx_buf_max_nr;
+
+ /* The following fields are protected by event_lock.
+ * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held.
+ */
+ struct mutex event_lock;
+ struct virtio_vsock_event event_list[8];
+
+ u32 guest_cid;
+};
+
+static struct virtio_vsock *virtio_vsock_get(void)
+{
+ return the_virtio_vsock;
+}
+
+static u32 virtio_transport_get_local_cid(void)
+{
+ struct virtio_vsock *vsock = virtio_vsock_get();
+
+ return vsock->guest_cid;
+}
+
+static void
+virtio_transport_send_pkt_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, send_pkt_work);
+ struct virtqueue *vq;
+ bool added = false;
+ bool restart_rx = false;
+
+ mutex_lock(&vsock->tx_lock);
+
+ vq = vsock->vqs[VSOCK_VQ_TX];
+
+ /* Avoid unnecessary interrupts while we're processing the ring */
+ virtqueue_disable_cb(vq);
+
+ for (;;) {
+ struct virtio_vsock_pkt *pkt;
+ struct scatterlist hdr, buf, *sgs[2];
+ int ret, in_sg = 0, out_sg = 0;
+ bool reply;
+
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ if (list_empty(&vsock->send_pkt_list)) {
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+ virtqueue_enable_cb(vq);
+ break;
+ }
+
+ pkt = list_first_entry(&vsock->send_pkt_list,
+ struct virtio_vsock_pkt, list);
+ list_del_init(&pkt->list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ reply = pkt->reply;
+
+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
+ sgs[out_sg++] = &hdr;
+ if (pkt->buf) {
+ sg_init_one(&buf, pkt->buf, pkt->len);
+ sgs[out_sg++] = &buf;
+ }
+
+ ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL);
+ if (ret < 0) {
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ list_add(&pkt->list, &vsock->send_pkt_list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ if (!virtqueue_enable_cb(vq) && ret == -ENOSPC)
+ continue; /* retry now that we have more space */
+ break;
+ }
+
+ if (reply) {
+ struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
+ int val;
+
+ val = atomic_dec_return(&vsock->queued_replies);
+
+ /* Do we now have resources to resume rx processing? */
+ if (val + 1 == virtqueue_get_vring_size(rx_vq))
+ restart_rx = true;
+ }
+
+ added = true;
+ }
+
+ if (added)
+ virtqueue_kick(vq);
+
+ mutex_unlock(&vsock->tx_lock);
+
+ if (restart_rx)
+ queue_work(virtio_vsock_workqueue, &vsock->rx_work);
+}
+
+static int
+virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
+{
+ struct virtio_vsock *vsock;
+ int len = pkt->len;
+
+ vsock = virtio_vsock_get();
+ if (!vsock) {
+ virtio_transport_free_pkt(pkt);
+ return -ENODEV;
+ }
+
+ if (pkt->reply)
+ atomic_inc(&vsock->queued_replies);
+
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ list_add_tail(&pkt->list, &vsock->send_pkt_list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
+ return len;
+}
+
+static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
+{
+ int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
+ struct virtio_vsock_pkt *pkt;
+ struct scatterlist hdr, buf, *sgs[2];
+ struct virtqueue *vq;
+ int ret;
+
+ vq = vsock->vqs[VSOCK_VQ_RX];
+
+ do {
+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+ if (!pkt)
+ break;
+
+ pkt->buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!pkt->buf) {
+ virtio_transport_free_pkt(pkt);
+ break;
+ }
+
+ pkt->len = buf_len;
+
+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
+ sgs[0] = &hdr;
+
+ sg_init_one(&buf, pkt->buf, buf_len);
+ sgs[1] = &buf;
+ ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL);
+ if (ret) {
+ virtio_transport_free_pkt(pkt);
+ break;
+ }
+ vsock->rx_buf_nr++;
+ } while (vq->num_free);
+ if (vsock->rx_buf_nr > vsock->rx_buf_max_nr)
+ vsock->rx_buf_max_nr = vsock->rx_buf_nr;
+ virtqueue_kick(vq);
+}
+
+static void virtio_transport_tx_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, tx_work);
+ struct virtqueue *vq;
+ bool added = false;
+
+ vq = vsock->vqs[VSOCK_VQ_TX];
+ mutex_lock(&vsock->tx_lock);
+ do {
+ struct virtio_vsock_pkt *pkt;
+ unsigned int len;
+
+ virtqueue_disable_cb(vq);
+ while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) {
+ virtio_transport_free_pkt(pkt);
+ added = true;
+ }
+ } while (!virtqueue_enable_cb(vq));
+ mutex_unlock(&vsock->tx_lock);
+
+ if (added)
+ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
+}
+
+/* Is there space left for replies to rx packets? */
+static bool virtio_transport_more_replies(struct virtio_vsock *vsock)
+{
+ struct virtqueue *vq = vsock->vqs[VSOCK_VQ_RX];
+ int val;
+
+ smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
+ val = atomic_read(&vsock->queued_replies);
+
+ return val < virtqueue_get_vring_size(vq);
+}
+
+static void virtio_transport_rx_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, rx_work);
+ struct virtqueue *vq;
+
+ vq = vsock->vqs[VSOCK_VQ_RX];
+
+ mutex_lock(&vsock->rx_lock);
+
+ do {
+ virtqueue_disable_cb(vq);
+ for (;;) {
+ struct virtio_vsock_pkt *pkt;
+ unsigned int len;
+
+ if (!virtio_transport_more_replies(vsock)) {
+ /* Stop rx until the device processes already
+ * pending replies. Leave rx virtqueue
+ * callbacks disabled.
+ */
+ goto out;
+ }
+
+ pkt = virtqueue_get_buf(vq, &len);
+ if (!pkt) {
+ break;
+ }
+
+ vsock->rx_buf_nr--;
+
+ /* Drop short/long packets */
+ if (unlikely(len < sizeof(pkt->hdr) ||
+ len > sizeof(pkt->hdr) + pkt->len)) {
+ virtio_transport_free_pkt(pkt);
+ continue;
+ }
+
+ pkt->len = len - sizeof(pkt->hdr);
+ virtio_transport_recv_pkt(pkt);
+ }
+ } while (!virtqueue_enable_cb(vq));
+
+out:
+ if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2)
+ virtio_vsock_rx_fill(vsock);
+ mutex_unlock(&vsock->rx_lock);
+}
+
+/* event_lock must be held */
+static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock,
+ struct virtio_vsock_event *event)
+{
+ struct scatterlist sg;
+ struct virtqueue *vq;
+
+ vq = vsock->vqs[VSOCK_VQ_EVENT];
+
+ sg_init_one(&sg, event, sizeof(*event));
+
+ return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL);
+}
+
+/* event_lock must be held */
+static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) {
+ struct virtio_vsock_event *event = &vsock->event_list[i];
+
+ virtio_vsock_event_fill_one(vsock, event);
+ }
+
+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
+}
+
+static void virtio_vsock_reset_sock(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_state = SS_UNCONNECTED;
+ sk->sk_err = ECONNRESET;
+ sk->sk_error_report(sk);
+ release_sock(sk);
+}
+
+static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)
+{
+ struct virtio_device *vdev = vsock->vdev;
+ u64 guest_cid;
+
+ vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid),
+ &guest_cid, sizeof(guest_cid));
+ vsock->guest_cid = le64_to_cpu(guest_cid);
+}
+
+/* event_lock must be held */
+static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
+ struct virtio_vsock_event *event)
+{
+ switch (le32_to_cpu(event->id)) {
+ case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
+ virtio_vsock_update_guest_cid(vsock);
+ vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+ break;
+ }
+}
+
+static void virtio_transport_event_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, event_work);
+ struct virtqueue *vq;
+
+ vq = vsock->vqs[VSOCK_VQ_EVENT];
+
+ mutex_lock(&vsock->event_lock);
+
+ do {
+ struct virtio_vsock_event *event;
+ unsigned int len;
+
+ virtqueue_disable_cb(vq);
+ while ((event = virtqueue_get_buf(vq, &len)) != NULL) {
+ if (len == sizeof(*event))
+ virtio_vsock_event_handle(vsock, event);
+
+ virtio_vsock_event_fill_one(vsock, event);
+ }
+ } while (!virtqueue_enable_cb(vq));
+
+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
+
+ mutex_unlock(&vsock->event_lock);
+}
+
+static void virtio_vsock_event_done(struct virtqueue *vq)
+{
+ struct virtio_vsock *vsock = vq->vdev->priv;
+
+ if (!vsock)
+ return;
+ queue_work(virtio_vsock_workqueue, &vsock->event_work);
+}
+
+static void virtio_vsock_tx_done(struct virtqueue *vq)
+{
+ struct virtio_vsock *vsock = vq->vdev->priv;
+
+ if (!vsock)
+ return;
+ queue_work(virtio_vsock_workqueue, &vsock->tx_work);
+}
+
+static void virtio_vsock_rx_done(struct virtqueue *vq)
+{
+ struct virtio_vsock *vsock = vq->vdev->priv;
+
+ if (!vsock)
+ return;
+ queue_work(virtio_vsock_workqueue, &vsock->rx_work);
+}
+
+static struct virtio_transport virtio_transport = {
+ .transport = {
+ .get_local_cid = virtio_transport_get_local_cid,
+
+ .init = virtio_transport_do_socket_init,
+ .destruct = virtio_transport_destruct,
+ .release = virtio_transport_release,
+ .connect = virtio_transport_connect,
+ .shutdown = virtio_transport_shutdown,
+
+ .dgram_bind = virtio_transport_dgram_bind,
+ .dgram_dequeue = virtio_transport_dgram_dequeue,
+ .dgram_enqueue = virtio_transport_dgram_enqueue,
+ .dgram_allow = virtio_transport_dgram_allow,
+
+ .stream_dequeue = virtio_transport_stream_dequeue,
+ .stream_enqueue = virtio_transport_stream_enqueue,
+ .stream_has_data = virtio_transport_stream_has_data,
+ .stream_has_space = virtio_transport_stream_has_space,
+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
+ .stream_is_active = virtio_transport_stream_is_active,
+ .stream_allow = virtio_transport_stream_allow,
+
+ .notify_poll_in = virtio_transport_notify_poll_in,
+ .notify_poll_out = virtio_transport_notify_poll_out,
+ .notify_recv_init = virtio_transport_notify_recv_init,
+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
+ .notify_send_init = virtio_transport_notify_send_init,
+ .notify_send_pre_block = virtio_transport_notify_send_pre_block,
+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+
+ .set_buffer_size = virtio_transport_set_buffer_size,
+ .set_min_buffer_size = virtio_transport_set_min_buffer_size,
+ .set_max_buffer_size = virtio_transport_set_max_buffer_size,
+ .get_buffer_size = virtio_transport_get_buffer_size,
+ .get_min_buffer_size = virtio_transport_get_min_buffer_size,
+ .get_max_buffer_size = virtio_transport_get_max_buffer_size,
+ },
+
+ .send_pkt = virtio_transport_send_pkt,
+};
+
+static int virtio_vsock_probe(struct virtio_device *vdev)
+{
+ vq_callback_t *callbacks[] = {
+ virtio_vsock_rx_done,
+ virtio_vsock_tx_done,
+ virtio_vsock_event_done,
+ };
+ static const char * const names[] = {
+ "rx",
+ "tx",
+ "event",
+ };
+ struct virtio_vsock *vsock = NULL;
+ int ret;
+
+ ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
+ if (ret)
+ return ret;
+
+ /* Only one virtio-vsock device per guest is supported */
+ if (the_virtio_vsock) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
+ if (!vsock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vsock->vdev = vdev;
+
+ ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
+ vsock->vqs, callbacks, names);
+ if (ret < 0)
+ goto out;
+
+ virtio_vsock_update_guest_cid(vsock);
+
+ ret = vsock_core_init(&virtio_transport.transport);
+ if (ret < 0)
+ goto out_vqs;
+
+ vsock->rx_buf_nr = 0;
+ vsock->rx_buf_max_nr = 0;
+ atomic_set(&vsock->queued_replies, 0);
+
+ vdev->priv = vsock;
+ the_virtio_vsock = vsock;
+ mutex_init(&vsock->tx_lock);
+ mutex_init(&vsock->rx_lock);
+ mutex_init(&vsock->event_lock);
+ spin_lock_init(&vsock->send_pkt_list_lock);
+ INIT_LIST_HEAD(&vsock->send_pkt_list);
+ INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
+ INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
+ INIT_WORK(&vsock->event_work, virtio_transport_event_work);
+ INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
+
+ mutex_lock(&vsock->rx_lock);
+ virtio_vsock_rx_fill(vsock);
+ mutex_unlock(&vsock->rx_lock);
+
+ mutex_lock(&vsock->event_lock);
+ virtio_vsock_event_fill(vsock);
+ mutex_unlock(&vsock->event_lock);
+
+ mutex_unlock(&the_virtio_vsock_mutex);
+ return 0;
+
+out_vqs:
+ vsock->vdev->config->del_vqs(vsock->vdev);
+out:
+ kfree(vsock);
+ mutex_unlock(&the_virtio_vsock_mutex);
+ return ret;
+}
+
+static void virtio_vsock_remove(struct virtio_device *vdev)
+{
+ struct virtio_vsock *vsock = vdev->priv;
+ struct virtio_vsock_pkt *pkt;
+
+ flush_work(&vsock->rx_work);
+ flush_work(&vsock->tx_work);
+ flush_work(&vsock->event_work);
+ flush_work(&vsock->send_pkt_work);
+
+ vdev->config->reset(vdev);
+
+ mutex_lock(&vsock->rx_lock);
+ while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
+ virtio_transport_free_pkt(pkt);
+ mutex_unlock(&vsock->rx_lock);
+
+ mutex_lock(&vsock->tx_lock);
+ while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX])))
+ virtio_transport_free_pkt(pkt);
+ mutex_unlock(&vsock->tx_lock);
+
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ while (!list_empty(&vsock->send_pkt_list)) {
+ pkt = list_first_entry(&vsock->send_pkt_list,
+ struct virtio_vsock_pkt, list);
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ mutex_lock(&the_virtio_vsock_mutex);
+ the_virtio_vsock = NULL;
+ vsock_core_exit();
+ mutex_unlock(&the_virtio_vsock_mutex);
+
+ vdev->config->del_vqs(vdev);
+
+ kfree(vsock);
+}
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static unsigned int features[] = {
+};
+
+static struct virtio_driver virtio_vsock_driver = {
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtio_vsock_probe,
+ .remove = virtio_vsock_remove,
+};
+
+static int __init virtio_vsock_init(void)
+{
+ int ret;
+
+ virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0);
+ if (!virtio_vsock_workqueue)
+ return -ENOMEM;
+ ret = register_virtio_driver(&virtio_vsock_driver);
+ if (ret)
+ destroy_workqueue(virtio_vsock_workqueue);
+ return ret;
+}
+
+static void __exit virtio_vsock_exit(void)
+{
+ unregister_virtio_driver(&virtio_vsock_driver);
+ destroy_workqueue(virtio_vsock_workqueue);
+}
+
+module_init(virtio_vsock_init);
+module_exit(virtio_vsock_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Asias He");
+MODULE_DESCRIPTION("virtio transport for vsock");
+MODULE_DEVICE_TABLE(virtio, id_table);
--
2.11.0

View File

@@ -0,0 +1,777 @@
From 429b9374590476305988dcf27c291f3b25d4fabe Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Thu, 28 Jul 2016 15:36:34 +0100
Subject: [PATCH 09/44] VSOCK: Introduce vhost_vsock.ko
VM sockets vhost transport implementation. This driver runs on the
host.
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 433fc58e6bf2c8bd97e57153ed28e64fd78207b8)
---
MAINTAINERS | 2 +
drivers/vhost/vsock.c | 722 +++++++++++++++++++++++++++++++++++++++++++++
include/uapi/linux/vhost.h | 5 +
3 files changed, 729 insertions(+)
create mode 100644 drivers/vhost/vsock.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 82d11235cacb..12d49f58c4e0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11392,6 +11392,8 @@ F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: net/vmw_vsock/virtio_transport_common.c
F: net/vmw_vsock/virtio_transport.c
+F: drivers/vhost/vsock.c
+F: drivers/vhost/vsock.h
VIRTUAL SERIO DEVICE DRIVER
M: Stephen Chandler Paul <thatslyude@gmail.com>
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
new file mode 100644
index 000000000000..028ca16c2d36
--- /dev/null
+++ b/drivers/vhost/vsock.c
@@ -0,0 +1,722 @@
+/*
+ * vhost transport for vsock
+ *
+ * Copyright (C) 2013-2015 Red Hat, Inc.
+ * Author: Asias He <asias@redhat.com>
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include <linux/miscdevice.h>
+#include <linux/atomic.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/vmalloc.h>
+#include <net/sock.h>
+#include <linux/virtio_vsock.h>
+#include <linux/vhost.h>
+
+#include <net/af_vsock.h>
+#include "vhost.h"
+
+#define VHOST_VSOCK_DEFAULT_HOST_CID 2
+
+enum {
+ VHOST_VSOCK_FEATURES = VHOST_FEATURES,
+};
+
+/* Used to track all the vhost_vsock instances on the system. */
+static DEFINE_SPINLOCK(vhost_vsock_lock);
+static LIST_HEAD(vhost_vsock_list);
+
+struct vhost_vsock {
+ struct vhost_dev dev;
+ struct vhost_virtqueue vqs[2];
+
+ /* Link to global vhost_vsock_list, protected by vhost_vsock_lock */
+ struct list_head list;
+
+ struct vhost_work send_pkt_work;
+ spinlock_t send_pkt_list_lock;
+ struct list_head send_pkt_list; /* host->guest pending packets */
+
+ atomic_t queued_replies;
+
+ u32 guest_cid;
+};
+
+static u32 vhost_transport_get_local_cid(void)
+{
+ return VHOST_VSOCK_DEFAULT_HOST_CID;
+}
+
+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
+{
+ struct vhost_vsock *vsock;
+
+ spin_lock_bh(&vhost_vsock_lock);
+ list_for_each_entry(vsock, &vhost_vsock_list, list) {
+ u32 other_cid = vsock->guest_cid;
+
+ /* Skip instances that have no CID yet */
+ if (other_cid == 0)
+ continue;
+
+ if (other_cid == guest_cid) {
+ spin_unlock_bh(&vhost_vsock_lock);
+ return vsock;
+ }
+ }
+ spin_unlock_bh(&vhost_vsock_lock);
+
+ return NULL;
+}
+
+static void
+vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
+ struct vhost_virtqueue *vq)
+{
+ struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
+ bool added = false;
+ bool restart_tx = false;
+
+ mutex_lock(&vq->mutex);
+
+ if (!vq->private_data)
+ goto out;
+
+ /* Avoid further vmexits, we're already processing the virtqueue */
+ vhost_disable_notify(&vsock->dev, vq);
+
+ for (;;) {
+ struct virtio_vsock_pkt *pkt;
+ struct iov_iter iov_iter;
+ unsigned out, in;
+ size_t nbytes;
+ size_t len;
+ int head;
+
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ if (list_empty(&vsock->send_pkt_list)) {
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+ vhost_enable_notify(&vsock->dev, vq);
+ break;
+ }
+
+ pkt = list_first_entry(&vsock->send_pkt_list,
+ struct virtio_vsock_pkt, list);
+ list_del_init(&pkt->list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+ &out, &in, NULL, NULL);
+ if (head < 0) {
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ list_add(&pkt->list, &vsock->send_pkt_list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+ break;
+ }
+
+ if (head == vq->num) {
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ list_add(&pkt->list, &vsock->send_pkt_list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ /* We cannot finish yet if more buffers snuck in while
+ * re-enabling notify.
+ */
+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
+ vhost_disable_notify(&vsock->dev, vq);
+ continue;
+ }
+ break;
+ }
+
+ if (out) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Expected 0 output buffers, got %u\n", out);
+ break;
+ }
+
+ len = iov_length(&vq->iov[out], in);
+ iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
+
+ nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
+ if (nbytes != sizeof(pkt->hdr)) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Faulted on copying pkt hdr\n");
+ break;
+ }
+
+ nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter);
+ if (nbytes != pkt->len) {
+ virtio_transport_free_pkt(pkt);
+ vq_err(vq, "Faulted on copying pkt buf\n");
+ break;
+ }
+
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+ added = true;
+
+ if (pkt->reply) {
+ int val;
+
+ val = atomic_dec_return(&vsock->queued_replies);
+
+ /* Do we have resources to resume tx processing? */
+ if (val + 1 == tx_vq->num)
+ restart_tx = true;
+ }
+
+ virtio_transport_free_pkt(pkt);
+ }
+ if (added)
+ vhost_signal(&vsock->dev, vq);
+
+out:
+ mutex_unlock(&vq->mutex);
+
+ if (restart_tx)
+ vhost_poll_queue(&tx_vq->poll);
+}
+
+static void vhost_transport_send_pkt_work(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq;
+ struct vhost_vsock *vsock;
+
+ vsock = container_of(work, struct vhost_vsock, send_pkt_work);
+ vq = &vsock->vqs[VSOCK_VQ_RX];
+
+ vhost_transport_do_send_pkt(vsock, vq);
+}
+
+static int
+vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
+{
+ struct vhost_vsock *vsock;
+ struct vhost_virtqueue *vq;
+ int len = pkt->len;
+
+ /* Find the vhost_vsock according to guest context id */
+ vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
+ if (!vsock) {
+ virtio_transport_free_pkt(pkt);
+ return -ENODEV;
+ }
+
+ vq = &vsock->vqs[VSOCK_VQ_RX];
+
+ if (pkt->reply)
+ atomic_inc(&vsock->queued_replies);
+
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ list_add_tail(&pkt->list, &vsock->send_pkt_list);
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
+ return len;
+}
+
+static struct virtio_vsock_pkt *
+vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
+ unsigned int out, unsigned int in)
+{
+ struct virtio_vsock_pkt *pkt;
+ struct iov_iter iov_iter;
+ size_t nbytes;
+ size_t len;
+
+ if (in != 0) {
+ vq_err(vq, "Expected 0 input buffers, got %u\n", in);
+ return NULL;
+ }
+
+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+ if (!pkt)
+ return NULL;
+
+ len = iov_length(vq->iov, out);
+ iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
+
+ nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
+ if (nbytes != sizeof(pkt->hdr)) {
+ vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
+ sizeof(pkt->hdr), nbytes);
+ kfree(pkt);
+ return NULL;
+ }
+
+ if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
+ pkt->len = le32_to_cpu(pkt->hdr.len);
+
+ /* No payload */
+ if (!pkt->len)
+ return pkt;
+
+ /* The pkt is too big */
+ if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
+ kfree(pkt);
+ return NULL;
+ }
+
+ pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
+ if (!pkt->buf) {
+ kfree(pkt);
+ return NULL;
+ }
+
+ nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
+ if (nbytes != pkt->len) {
+ vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
+ pkt->len, nbytes);
+ virtio_transport_free_pkt(pkt);
+ return NULL;
+ }
+
+ return pkt;
+}
+
+/* Is there space left for replies to rx packets? */
+static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
+{
+ struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
+ int val;
+
+ smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
+ val = atomic_read(&vsock->queued_replies);
+
+ return val < vq->num;
+}
+
+static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+ poll.work);
+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
+ dev);
+ struct virtio_vsock_pkt *pkt;
+ int head;
+ unsigned int out, in;
+ bool added = false;
+
+ mutex_lock(&vq->mutex);
+
+ if (!vq->private_data)
+ goto out;
+
+ vhost_disable_notify(&vsock->dev, vq);
+ for (;;) {
+ if (!vhost_vsock_more_replies(vsock)) {
+ /* Stop tx until the device processes already
+ * pending replies. Leave tx virtqueue
+ * callbacks disabled.
+ */
+ goto no_more_replies;
+ }
+
+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
+ &out, &in, NULL, NULL);
+ if (head < 0)
+ break;
+
+ if (head == vq->num) {
+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
+ vhost_disable_notify(&vsock->dev, vq);
+ continue;
+ }
+ break;
+ }
+
+ pkt = vhost_vsock_alloc_pkt(vq, out, in);
+ if (!pkt) {
+ vq_err(vq, "Faulted on pkt\n");
+ continue;
+ }
+
+ /* Only accept correctly addressed packets */
+ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
+ virtio_transport_recv_pkt(pkt);
+ else
+ virtio_transport_free_pkt(pkt);
+
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+ added = true;
+ }
+
+no_more_replies:
+ if (added)
+ vhost_signal(&vsock->dev, vq);
+
+out:
+ mutex_unlock(&vq->mutex);
+}
+
+static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+ poll.work);
+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
+ dev);
+
+ vhost_transport_do_send_pkt(vsock, vq);
+}
+
+static int vhost_vsock_start(struct vhost_vsock *vsock)
+{
+ size_t i;
+ int ret;
+
+ mutex_lock(&vsock->dev.mutex);
+
+ ret = vhost_dev_check_owner(&vsock->dev);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ struct vhost_virtqueue *vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+
+ if (!vhost_vq_access_ok(vq)) {
+ ret = -EFAULT;
+ mutex_unlock(&vq->mutex);
+ goto err_vq;
+ }
+
+ if (!vq->private_data) {
+ vq->private_data = vsock;
+ vhost_vq_init_access(vq);
+ }
+
+ mutex_unlock(&vq->mutex);
+ }
+
+ mutex_unlock(&vsock->dev.mutex);
+ return 0;
+
+err_vq:
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ struct vhost_virtqueue *vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+ vq->private_data = NULL;
+ mutex_unlock(&vq->mutex);
+ }
+err:
+ mutex_unlock(&vsock->dev.mutex);
+ return ret;
+}
+
+static int vhost_vsock_stop(struct vhost_vsock *vsock)
+{
+ size_t i;
+ int ret;
+
+ mutex_lock(&vsock->dev.mutex);
+
+ ret = vhost_dev_check_owner(&vsock->dev);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ struct vhost_virtqueue *vq = &vsock->vqs[i];
+
+ mutex_lock(&vq->mutex);
+ vq->private_data = NULL;
+ mutex_unlock(&vq->mutex);
+ }
+
+err:
+ mutex_unlock(&vsock->dev.mutex);
+ return ret;
+}
+
+static void vhost_vsock_free(struct vhost_vsock *vsock)
+{
+ if (is_vmalloc_addr(vsock))
+ vfree(vsock);
+ else
+ kfree(vsock);
+}
+
+static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
+{
+ struct vhost_virtqueue **vqs;
+ struct vhost_vsock *vsock;
+ int ret;
+
+ /* This struct is large and allocation could fail, fall back to vmalloc
+ * if there is no other way.
+ */
+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ if (!vsock) {
+ vsock = vmalloc(sizeof(*vsock));
+ if (!vsock)
+ return -ENOMEM;
+ }
+
+ vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
+ if (!vqs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ atomic_set(&vsock->queued_replies, 0);
+
+ vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
+ vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
+ vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
+ vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
+
+ vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs));
+
+ file->private_data = vsock;
+ spin_lock_init(&vsock->send_pkt_list_lock);
+ INIT_LIST_HEAD(&vsock->send_pkt_list);
+ vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
+
+ spin_lock_bh(&vhost_vsock_lock);
+ list_add_tail(&vsock->list, &vhost_vsock_list);
+ spin_unlock_bh(&vhost_vsock_lock);
+ return 0;
+
+out:
+ vhost_vsock_free(vsock);
+ return ret;
+}
+
+static void vhost_vsock_flush(struct vhost_vsock *vsock)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
+ if (vsock->vqs[i].handle_kick)
+ vhost_poll_flush(&vsock->vqs[i].poll);
+ vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
+}
+
+static void vhost_vsock_reset_orphans(struct sock *sk)
+{
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ /* vmci_transport.c doesn't take sk_lock here either. At least we're
+ * under vsock_table_lock so the sock cannot disappear while we're
+ * executing.
+ */
+
+ if (!vhost_vsock_get(vsk->local_addr.svm_cid)) {
+ sock_set_flag(sk, SOCK_DONE);
+ vsk->peer_shutdown = SHUTDOWN_MASK;
+ sk->sk_state = SS_UNCONNECTED;
+ sk->sk_err = ECONNRESET;
+ sk->sk_error_report(sk);
+ }
+}
+
+static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
+{
+ struct vhost_vsock *vsock = file->private_data;
+
+ spin_lock_bh(&vhost_vsock_lock);
+ list_del(&vsock->list);
+ spin_unlock_bh(&vhost_vsock_lock);
+
+ /* Iterating over all connections for all CIDs to find orphans is
+ * inefficient. Room for improvement here. */
+ vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
+
+ vhost_vsock_stop(vsock);
+ vhost_vsock_flush(vsock);
+ vhost_dev_stop(&vsock->dev);
+
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ while (!list_empty(&vsock->send_pkt_list)) {
+ struct virtio_vsock_pkt *pkt;
+
+ pkt = list_first_entry(&vsock->send_pkt_list,
+ struct virtio_vsock_pkt, list);
+ list_del_init(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ vhost_dev_cleanup(&vsock->dev, false);
+ kfree(vsock->dev.vqs);
+ vhost_vsock_free(vsock);
+ return 0;
+}
+
+static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
+{
+ struct vhost_vsock *other;
+
+ /* Refuse reserved CIDs */
+ if (guest_cid <= VMADDR_CID_HOST ||
+ guest_cid == U32_MAX)
+ return -EINVAL;
+
+ /* 64-bit CIDs are not yet supported */
+ if (guest_cid > U32_MAX)
+ return -EINVAL;
+
+ /* Refuse if CID is already in use */
+ other = vhost_vsock_get(guest_cid);
+ if (other && other != vsock)
+ return -EADDRINUSE;
+
+ spin_lock_bh(&vhost_vsock_lock);
+ vsock->guest_cid = guest_cid;
+ spin_unlock_bh(&vhost_vsock_lock);
+
+ return 0;
+}
+
+static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
+{
+ struct vhost_virtqueue *vq;
+ int i;
+
+ if (features & ~VHOST_VSOCK_FEATURES)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&vsock->dev.mutex);
+ if ((features & (1 << VHOST_F_LOG_ALL)) &&
+ !vhost_log_access_ok(&vsock->dev)) {
+ mutex_unlock(&vsock->dev.mutex);
+ return -EFAULT;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
+ vq = &vsock->vqs[i];
+ mutex_lock(&vq->mutex);
+ vq->acked_features = features;
+ mutex_unlock(&vq->mutex);
+ }
+ mutex_unlock(&vsock->dev.mutex);
+ return 0;
+}
+
+static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct vhost_vsock *vsock = f->private_data;
+ void __user *argp = (void __user *)arg;
+ u64 guest_cid;
+ u64 features;
+ int start;
+ int r;
+
+ switch (ioctl) {
+ case VHOST_VSOCK_SET_GUEST_CID:
+ if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
+ return -EFAULT;
+ return vhost_vsock_set_cid(vsock, guest_cid);
+ case VHOST_VSOCK_SET_RUNNING:
+ if (copy_from_user(&start, argp, sizeof(start)))
+ return -EFAULT;
+ if (start)
+ return vhost_vsock_start(vsock);
+ else
+ return vhost_vsock_stop(vsock);
+ case VHOST_GET_FEATURES:
+ features = VHOST_VSOCK_FEATURES;
+ if (copy_to_user(argp, &features, sizeof(features)))
+ return -EFAULT;
+ return 0;
+ case VHOST_SET_FEATURES:
+ if (copy_from_user(&features, argp, sizeof(features)))
+ return -EFAULT;
+ return vhost_vsock_set_features(vsock, features);
+ default:
+ mutex_lock(&vsock->dev.mutex);
+ r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
+ if (r == -ENOIOCTLCMD)
+ r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
+ else
+ vhost_vsock_flush(vsock);
+ mutex_unlock(&vsock->dev.mutex);
+ return r;
+ }
+}
+
+static const struct file_operations vhost_vsock_fops = {
+ .owner = THIS_MODULE,
+ .open = vhost_vsock_dev_open,
+ .release = vhost_vsock_dev_release,
+ .llseek = noop_llseek,
+ .unlocked_ioctl = vhost_vsock_dev_ioctl,
+};
+
+static struct miscdevice vhost_vsock_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "vhost-vsock",
+ .fops = &vhost_vsock_fops,
+};
+
+static struct virtio_transport vhost_transport = {
+ .transport = {
+ .get_local_cid = vhost_transport_get_local_cid,
+
+ .init = virtio_transport_do_socket_init,
+ .destruct = virtio_transport_destruct,
+ .release = virtio_transport_release,
+ .connect = virtio_transport_connect,
+ .shutdown = virtio_transport_shutdown,
+
+ .dgram_enqueue = virtio_transport_dgram_enqueue,
+ .dgram_dequeue = virtio_transport_dgram_dequeue,
+ .dgram_bind = virtio_transport_dgram_bind,
+ .dgram_allow = virtio_transport_dgram_allow,
+
+ .stream_enqueue = virtio_transport_stream_enqueue,
+ .stream_dequeue = virtio_transport_stream_dequeue,
+ .stream_has_data = virtio_transport_stream_has_data,
+ .stream_has_space = virtio_transport_stream_has_space,
+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
+ .stream_is_active = virtio_transport_stream_is_active,
+ .stream_allow = virtio_transport_stream_allow,
+
+ .notify_poll_in = virtio_transport_notify_poll_in,
+ .notify_poll_out = virtio_transport_notify_poll_out,
+ .notify_recv_init = virtio_transport_notify_recv_init,
+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
+ .notify_send_init = virtio_transport_notify_send_init,
+ .notify_send_pre_block = virtio_transport_notify_send_pre_block,
+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+
+ .set_buffer_size = virtio_transport_set_buffer_size,
+ .set_min_buffer_size = virtio_transport_set_min_buffer_size,
+ .set_max_buffer_size = virtio_transport_set_max_buffer_size,
+ .get_buffer_size = virtio_transport_get_buffer_size,
+ .get_min_buffer_size = virtio_transport_get_min_buffer_size,
+ .get_max_buffer_size = virtio_transport_get_max_buffer_size,
+ },
+
+ .send_pkt = vhost_transport_send_pkt,
+};
+
+static int __init vhost_vsock_init(void)
+{
+ int ret;
+
+ ret = vsock_core_init(&vhost_transport.transport);
+ if (ret < 0)
+ return ret;
+ return misc_register(&vhost_vsock_misc);
+};
+
+static void __exit vhost_vsock_exit(void)
+{
+ misc_deregister(&vhost_vsock_misc);
+ vsock_core_exit();
+};
+
+module_init(vhost_vsock_init);
+module_exit(vhost_vsock_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Asias He");
+MODULE_DESCRIPTION("vhost transport for vsock ");
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index ab3731917bac..b30647697774 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -169,4 +169,9 @@ struct vhost_scsi_target {
#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
+/* VHOST_VSOCK specific defines */
+
+#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64)
+#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int)
+
#endif
--
2.11.0

View File

@@ -0,0 +1,106 @@
From 17b7a6b41700d4a075c228526761d8c1d2d555d2 Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Thu, 28 Jul 2016 15:36:35 +0100
Subject: [PATCH 10/44] VSOCK: Add Makefile and Kconfig
Enable virtio-vsock and vhost-vsock.
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 304ba62fd4e670c1a5784585da0fac9f7309ef6c)
---
drivers/vhost/Kconfig | 14 ++++++++++++++
drivers/vhost/Makefile | 4 ++++
net/vmw_vsock/Kconfig | 20 ++++++++++++++++++++
net/vmw_vsock/Makefile | 6 ++++++
4 files changed, 44 insertions(+)
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 533eaf04f12f..2b5f588f5b1e 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -21,6 +21,20 @@ config VHOST_SCSI
Say M here to enable the vhost_scsi TCM fabric module
for use with virtio-scsi guests
+config VHOST_VSOCK
+ tristate "vhost virtio-vsock driver"
+ depends on VSOCKETS && EVENTFD
+ select VIRTIO_VSOCKETS_COMMON
+ select VHOST
+ default n
+ ---help---
+ This kernel module can be loaded in the host kernel to provide AF_VSOCK
+ sockets for communicating with guests. The guests must have the
+ virtio_transport.ko driver loaded to use the virtio-vsock device.
+
+ To compile this driver as a module, choose M here: the module will be called
+ vhost_vsock.
+
config VHOST_RING
tristate
---help---
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
index e0441c34db1c..6b012b986b57 100644
--- a/drivers/vhost/Makefile
+++ b/drivers/vhost/Makefile
@@ -4,5 +4,9 @@ vhost_net-y := net.o
obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
vhost_scsi-y := scsi.o
+obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o
+vhost_vsock-y := vsock.o
+
obj-$(CONFIG_VHOST_RING) += vringh.o
+
obj-$(CONFIG_VHOST) += vhost.o
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index 14810abedc2e..8831e7c42167 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -26,3 +26,23 @@ config VMWARE_VMCI_VSOCKETS
To compile this driver as a module, choose M here: the module
will be called vmw_vsock_vmci_transport. If unsure, say N.
+
+config VIRTIO_VSOCKETS
+ tristate "virtio transport for Virtual Sockets"
+ depends on VSOCKETS && VIRTIO
+ select VIRTIO_VSOCKETS_COMMON
+ help
+ This module implements a virtio transport for Virtual Sockets.
+
+ Enable this transport if your Virtual Machine host supports Virtual
+ Sockets over virtio.
+
+ To compile this driver as a module, choose M here: the module will be
+ called vmw_vsock_virtio_transport. If unsure, say N.
+
+config VIRTIO_VSOCKETS_COMMON
+ tristate
+ help
+ This option is selected by any driver which needs to access
+ the virtio_vsock. The module will be called
+ vmw_vsock_virtio_transport_common.
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index 2ce52d70f224..bc27c70e0e59 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -1,7 +1,13 @@
obj-$(CONFIG_VSOCKETS) += vsock.o
obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
+obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
+obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
vsock-y += af_vsock.o vsock_addr.o
vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
vmci_transport_notify_qstate.o
+
+vmw_vsock_virtio_transport-y += virtio_transport.o
+
+vmw_vsock_virtio_transport_common-y += virtio_transport_common.o
--
2.11.0

View File

@@ -0,0 +1,33 @@
From 2eb16145fb76667bcc47b02bf3708ff2fc6389fb Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyj.lk@gmail.com>
Date: Tue, 2 Aug 2016 13:50:42 +0000
Subject: [PATCH 11/44] VSOCK: Use kvfree()
Use kvfree() instead of open-coding it.
Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit b226acab2f6aaa45c2af27279b63f622b23a44bd)
---
drivers/vhost/vsock.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 028ca16c2d36..0ddf3a2dbfc4 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -434,10 +434,7 @@ err:
static void vhost_vsock_free(struct vhost_vsock *vsock)
{
- if (is_vmalloc_addr(vsock))
- vfree(vsock);
- else
- kfree(vsock);
+ kvfree(vsock);
}
static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
--
2.11.0

View File

@@ -0,0 +1,53 @@
From de224dfc853e82a22203479455d8fd3192e9b535 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 4 Aug 2016 14:52:53 +0100
Subject: [PATCH 12/44] vhost/vsock: fix vhost virtio_vsock_pkt use-after-free
Stash the packet length in a local variable before handing over
ownership of the packet to virtio_transport_recv_pkt() or
virtio_transport_free_pkt().
This patch solves the use-after-free since pkt is no longer guaranteed
to be alive.
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 3fda5d6e580193fa005014355b3a61498f1b3ae0)
---
drivers/vhost/vsock.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 0ddf3a2dbfc4..e3b30ea9ece5 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -307,6 +307,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
vhost_disable_notify(&vsock->dev, vq);
for (;;) {
+ u32 len;
+
if (!vhost_vsock_more_replies(vsock)) {
/* Stop tx until the device processes already
* pending replies. Leave tx virtqueue
@@ -334,13 +336,15 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
continue;
}
+ len = pkt->len;
+
/* Only accept correctly addressed packets */
if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
virtio_transport_recv_pkt(pkt);
else
virtio_transport_free_pkt(pkt);
- vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
added = true;
}
--
2.11.0

View File

@@ -0,0 +1,28 @@
From cad999979a54a6b0a301544645ca2ea99a2cb695 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Fri, 5 Aug 2016 13:52:09 +0100
Subject: [PATCH 13/44] virtio-vsock: fix include guard typo
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 28ad55578b8a76390d966b09da8c7fa3644f5140)
---
include/uapi/linux/virtio_vsock.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 6b011c19b50f..1d57ed3d84d2 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -32,7 +32,7 @@
*/
#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H
-#define _UAPI_LINUX_VIRTIO_VOSCK_H
+#define _UAPI_LINUX_VIRTIO_VSOCK_H
#include <linux/types.h>
#include <linux/virtio_ids.h>
--
2.11.0

View File

@@ -0,0 +1,61 @@
From 20fcf536994ef51a2786d1c41c143d0278fc4afc Mon Sep 17 00:00:00 2001
From: Gerard Garcia <ggarcia@deic.uab.cat>
Date: Wed, 10 Aug 2016 17:24:34 +0200
Subject: [PATCH 14/44] vhost/vsock: drop space available check for TX vq
Remove unnecessary use of enable/disable callback notifications
and the incorrect more space available check.
The virtio_transport_tx_work handles when the TX virtqueue
has more buffers available.
Signed-off-by: Gerard Garcia <ggarcia@deic.uab.cat>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 21bc54fc0cdc31de72b57d2b3c79cf9c2b83cf39)
---
net/vmw_vsock/virtio_transport.c | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 699dfabdbccd..936d7eee62d0 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -87,9 +87,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
vq = vsock->vqs[VSOCK_VQ_TX];
- /* Avoid unnecessary interrupts while we're processing the ring */
- virtqueue_disable_cb(vq);
-
for (;;) {
struct virtio_vsock_pkt *pkt;
struct scatterlist hdr, buf, *sgs[2];
@@ -99,7 +96,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
spin_lock_bh(&vsock->send_pkt_list_lock);
if (list_empty(&vsock->send_pkt_list)) {
spin_unlock_bh(&vsock->send_pkt_list_lock);
- virtqueue_enable_cb(vq);
break;
}
@@ -118,13 +114,13 @@ virtio_transport_send_pkt_work(struct work_struct *work)
}
ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL);
+ /* Usually this means that there is no more space available in
+ * the vq
+ */
if (ret < 0) {
spin_lock_bh(&vsock->send_pkt_list_lock);
list_add(&pkt->list, &vsock->send_pkt_list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
-
- if (!virtqueue_enable_cb(vq) && ret == -ENOSPC)
- continue; /* retry now that we have more space */
break;
}
--
2.11.0

View File

@@ -0,0 +1,31 @@
From e2d417429738c3a8da5086a4f9bd815fca3ba0a6 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@docker.com>
Date: Mon, 4 Apr 2016 14:50:10 +0100
Subject: [PATCH 15/44] VSOCK: Only allow host network namespace to use
AF_VSOCK.
The VSOCK addressing schema does not really lend itself to simply creating an
alternative end point address within a namespace.
Signed-off-by: Ian Campbell <ian.campbell@docker.com>
---
net/vmw_vsock/af_vsock.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 17dbbe64cd73..1bb1b016e945 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1852,6 +1852,9 @@ static const struct proto_ops vsock_stream_ops = {
static int vsock_create(struct net *net, struct socket *sock,
int protocol, int kern)
{
+ if (!net_eq(net, &init_net))
+ return -EAFNOSUPPORT;
+
if (!sock)
return -EINVAL;
--
2.11.0

View File

@@ -0,0 +1,63 @@
From 4331f8b43f608d81e14b5caee4b0c24cd896eb4d Mon Sep 17 00:00:00 2001
From: Jake Oshins <jakeo@microsoft.com>
Date: Mon, 14 Dec 2015 16:01:41 -0800
Subject: [PATCH 16/44] drivers:hv: Define the channel type for Hyper-V PCI
Express pass-through
This defines the channel type for PCI front-ends in Hyper-V VMs.
Signed-off-by: Jake Oshins <jakeo@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 3053c762444a83ec6a8777f9476668b23b8ab180)
---
drivers/hv/channel_mgmt.c | 3 +++
include/linux/hyperv.h | 11 +++++++++++
2 files changed, 14 insertions(+)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 37238dffd947..a562318b856b 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -359,6 +359,7 @@ enum {
SCSI,
NIC,
ND_NIC,
+ PCIE,
MAX_PERF_CHN,
};
@@ -376,6 +377,8 @@ static const struct hv_vmbus_device_id hp_devs[] = {
{ HV_NIC_GUID, },
/* NetworkDirect Guest RDMA */
{ HV_ND_GUID, },
+ /* PCI Express Pass Through */
+ { HV_PCIE_GUID, },
};
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index ae6a711dcd1d..10dda1e3b560 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1156,6 +1156,17 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
}
/*
+ * PCI Express Pass Through
+ * {44C4F61D-4444-4400-9D52-802E27EDE19F}
+ */
+
+#define HV_PCIE_GUID \
+ .guid = { \
+ 0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, \
+ 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F \
+ }
+
+/*
* Common header for Hyper-V ICs
*/
--
2.11.0

View File

@@ -0,0 +1,297 @@
From ac16740f1c65a5b960e288a96965053171f654d5 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Mon, 14 Dec 2015 16:01:43 -0800
Subject: [PATCH 17/44] Drivers: hv: vmbus: Use uuid_le type consistently
Consistently use uuid_le type in the Hyper-V driver code.
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit af3ff643ea91ba64dd8d0b1cbed54d44512f96cd)
---
drivers/hv/channel_mgmt.c | 2 +-
drivers/hv/vmbus_drv.c | 10 ++---
include/linux/hyperv.h | 92 ++++++++++++++---------------------------
include/linux/mod_devicetable.h | 2 +-
scripts/mod/file2alias.c | 2 +-
5 files changed, 40 insertions(+), 68 deletions(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index a562318b856b..339277b76468 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -409,7 +409,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui
struct cpumask *alloced_mask;
for (i = IDE; i < MAX_PERF_CHN; i++) {
- if (!memcmp(type_guid->b, hp_devs[i].guid,
+ if (!memcmp(type_guid->b, &hp_devs[i].guid,
sizeof(uuid_le))) {
perf_chn = true;
break;
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 509ed9731630..6ce2bf81dae3 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -533,7 +533,7 @@ static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
static const uuid_le null_guid;
-static inline bool is_null_guid(const __u8 *guid)
+static inline bool is_null_guid(const uuid_le *guid)
{
if (memcmp(guid, &null_guid, sizeof(uuid_le)))
return false;
@@ -546,9 +546,9 @@ static inline bool is_null_guid(const __u8 *guid)
*/
static const struct hv_vmbus_device_id *hv_vmbus_get_id(
const struct hv_vmbus_device_id *id,
- const __u8 *guid)
+ const uuid_le *guid)
{
- for (; !is_null_guid(id->guid); id++)
+ for (; !is_null_guid(&id->guid); id++)
if (!memcmp(&id->guid, guid, sizeof(uuid_le)))
return id;
@@ -565,7 +565,7 @@ static int vmbus_match(struct device *device, struct device_driver *driver)
struct hv_driver *drv = drv_to_hv_drv(driver);
struct hv_device *hv_dev = device_to_hv_device(device);
- if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b))
+ if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type))
return 1;
return 0;
@@ -582,7 +582,7 @@ static int vmbus_probe(struct device *child_device)
struct hv_device *dev = device_to_hv_device(child_device);
const struct hv_vmbus_device_id *dev_id;
- dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b);
+ dev_id = hv_vmbus_get_id(drv->id_table, &dev->dev_type);
if (drv->probe) {
ret = drv->probe(dev, dev_id);
if (ret != 0)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 10dda1e3b560..4712d7d07b8c 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1012,6 +1012,8 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
.guid = { g0, g1, g2, g3, g4, g5, g6, g7, \
g8, g9, ga, gb, gc, gd, ge, gf },
+
+
/*
* GUID definitions of various offer types - services offered to the guest.
*/
@@ -1021,118 +1023,94 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
* {f8615163-df3e-46c5-913f-f2d2f965ed0e}
*/
#define HV_NIC_GUID \
- .guid = { \
- 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, \
- 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e \
- }
+ .guid = UUID_LE(0xf8615163, 0xdf3e, 0x46c5, 0x91, 0x3f, \
+ 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e)
/*
* IDE GUID
* {32412632-86cb-44a2-9b5c-50d1417354f5}
*/
#define HV_IDE_GUID \
- .guid = { \
- 0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, \
- 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5 \
- }
+ .guid = UUID_LE(0x32412632, 0x86cb, 0x44a2, 0x9b, 0x5c, \
+ 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5)
/*
* SCSI GUID
* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}
*/
#define HV_SCSI_GUID \
- .guid = { \
- 0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, \
- 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f \
- }
+ .guid = UUID_LE(0xba6163d9, 0x04a1, 0x4d29, 0xb6, 0x05, \
+ 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f)
/*
* Shutdown GUID
* {0e0b6031-5213-4934-818b-38d90ced39db}
*/
#define HV_SHUTDOWN_GUID \
- .guid = { \
- 0x31, 0x60, 0x0b, 0x0e, 0x13, 0x52, 0x34, 0x49, \
- 0x81, 0x8b, 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb \
- }
+ .guid = UUID_LE(0x0e0b6031, 0x5213, 0x4934, 0x81, 0x8b, \
+ 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb)
/*
* Time Synch GUID
* {9527E630-D0AE-497b-ADCE-E80AB0175CAF}
*/
#define HV_TS_GUID \
- .guid = { \
- 0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49, \
- 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf \
- }
+ .guid = UUID_LE(0x9527e630, 0xd0ae, 0x497b, 0xad, 0xce, \
+ 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf)
/*
* Heartbeat GUID
* {57164f39-9115-4e78-ab55-382f3bd5422d}
*/
#define HV_HEART_BEAT_GUID \
- .guid = { \
- 0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e, \
- 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d \
- }
+ .guid = UUID_LE(0x57164f39, 0x9115, 0x4e78, 0xab, 0x55, \
+ 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d)
/*
* KVP GUID
* {a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}
*/
#define HV_KVP_GUID \
- .guid = { \
- 0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d, \
- 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6 \
- }
+ .guid = UUID_LE(0xa9a0f4e7, 0x5a45, 0x4d96, 0xb8, 0x27, \
+ 0x8a, 0x84, 0x1e, 0x8c, 0x03, 0xe6)
/*
* Dynamic memory GUID
* {525074dc-8985-46e2-8057-a307dc18a502}
*/
#define HV_DM_GUID \
- .guid = { \
- 0xdc, 0x74, 0x50, 0X52, 0x85, 0x89, 0xe2, 0x46, \
- 0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02 \
- }
+ .guid = UUID_LE(0x525074dc, 0x8985, 0x46e2, 0x80, 0x57, \
+ 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02)
/*
* Mouse GUID
* {cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}
*/
#define HV_MOUSE_GUID \
- .guid = { \
- 0x9e, 0xb6, 0xa8, 0xcf, 0x4a, 0x5b, 0xc0, 0x4c, \
- 0xb9, 0x8b, 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a \
- }
+ .guid = UUID_LE(0xcfa8b69e, 0x5b4a, 0x4cc0, 0xb9, 0x8b, \
+ 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a)
/*
* VSS (Backup/Restore) GUID
*/
#define HV_VSS_GUID \
- .guid = { \
- 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, \
- 0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40 \
- }
+ .guid = UUID_LE(0x35fa2e29, 0xea23, 0x4236, 0x96, 0xae, \
+ 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40)
/*
* Synthetic Video GUID
* {DA0A7802-E377-4aac-8E77-0558EB1073F8}
*/
#define HV_SYNTHVID_GUID \
- .guid = { \
- 0x02, 0x78, 0x0a, 0xda, 0x77, 0xe3, 0xac, 0x4a, \
- 0x8e, 0x77, 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8 \
- }
+ .guid = UUID_LE(0xda0a7802, 0xe377, 0x4aac, 0x8e, 0x77, \
+ 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8)
/*
* Synthetic FC GUID
* {2f9bcc4a-0069-4af3-b76b-6fd0be528cda}
*/
#define HV_SYNTHFC_GUID \
- .guid = { \
- 0x4A, 0xCC, 0x9B, 0x2F, 0x69, 0x00, 0xF3, 0x4A, \
- 0xB7, 0x6B, 0x6F, 0xD0, 0xBE, 0x52, 0x8C, 0xDA \
- }
+ .guid = UUID_LE(0x2f9bcc4a, 0x0069, 0x4af3, 0xb7, 0x6b, \
+ 0x6f, 0xd0, 0xbe, 0x52, 0x8c, 0xda)
/*
* Guest File Copy Service
@@ -1140,20 +1118,16 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
*/
#define HV_FCOPY_GUID \
- .guid = { \
- 0xE3, 0x4B, 0xD1, 0x34, 0xE4, 0xDE, 0xC8, 0x41, \
- 0x9A, 0xE7, 0x6B, 0x17, 0x49, 0x77, 0xC1, 0x92 \
- }
+ .guid = UUID_LE(0x34d14be3, 0xdee4, 0x41c8, 0x9a, 0xe7, \
+ 0x6b, 0x17, 0x49, 0x77, 0xc1, 0x92)
/*
* NetworkDirect. This is the guest RDMA service.
* {8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}
*/
#define HV_ND_GUID \
- .guid = { \
- 0x3d, 0xaf, 0x2e, 0x8c, 0xa7, 0x32, 0x09, 0x4b, \
- 0xab, 0x99, 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01 \
- }
+ .guid = UUID_LE(0x8c2eaf3d, 0x32a7, 0x4b09, 0xab, 0x99, \
+ 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01)
/*
* PCI Express Pass Through
@@ -1161,10 +1135,8 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
*/
#define HV_PCIE_GUID \
- .guid = { \
- 0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, \
- 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F \
- }
+ .guid = UUID_LE(0x44c4f61d, 0x4444, 0x4400, 0x9d, 0x52, \
+ 0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f)
/*
* Common header for Hyper-V ICs
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 64f36e09a790..6e4c645e1c0d 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -404,7 +404,7 @@ struct virtio_device_id {
* For Hyper-V devices we use the device guid as the id.
*/
struct hv_vmbus_device_id {
- __u8 guid[16];
+ uuid_le guid;
kernel_ulong_t driver_data; /* Data private to the driver */
};
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 9f5cdd49ff0b..8e8c69bee78f 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -917,7 +917,7 @@ static int do_vmbus_entry(const char *filename, void *symval,
char guid_name[(sizeof(*guid) + 1) * 2];
for (i = 0; i < (sizeof(*guid) * 2); i += 2)
- sprintf(&guid_name[i], "%02x", TO_NATIVE((*guid)[i/2]));
+ sprintf(&guid_name[i], "%02x", TO_NATIVE((guid->b)[i/2]));
strcpy(alias, "vmbus:");
strcat(alias, guid_name);
--
2.11.0

View File

@@ -0,0 +1,55 @@
From bd9affada850f2723a678ec65910d5547184843e Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Mon, 14 Dec 2015 16:01:44 -0800
Subject: [PATCH 18/44] Drivers: hv: vmbus: Use uuid_le_cmp() for comparing
GUIDs
Use uuid_le_cmp() for comparing GUIDs.
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 4ae9250893485f380275e7d5cb291df87c4d9710)
---
drivers/hv/channel_mgmt.c | 3 +--
drivers/hv/vmbus_drv.c | 4 ++--
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 339277b76468..9b4525c56376 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -409,8 +409,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui
struct cpumask *alloced_mask;
for (i = IDE; i < MAX_PERF_CHN; i++) {
- if (!memcmp(type_guid->b, &hp_devs[i].guid,
- sizeof(uuid_le))) {
+ if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) {
perf_chn = true;
break;
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 6ce2bf81dae3..7973aa55fec8 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -535,7 +535,7 @@ static const uuid_le null_guid;
static inline bool is_null_guid(const uuid_le *guid)
{
- if (memcmp(guid, &null_guid, sizeof(uuid_le)))
+ if (uuid_le_cmp(*guid, null_guid))
return false;
return true;
}
@@ -549,7 +549,7 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(
const uuid_le *guid)
{
for (; !is_null_guid(&id->guid); id++)
- if (!memcmp(&id->guid, guid, sizeof(uuid_le)))
+ if (!uuid_le_cmp(id->guid, *guid))
return id;
return NULL;
--
2.11.0

View File

@@ -0,0 +1,42 @@
From ed712a86d76146f4e5279ac473ce346f6acaaae1 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 14 Dec 2015 16:01:48 -0800
Subject: [PATCH 19/44] Drivers: hv: vmbus: do sanity check of channel state in
vmbus_close_internal()
This fixes an incorrect assumption of channel state in the function.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 64b7faf903dae2df94d89edf2c688b16751800e4)
---
drivers/hv/channel.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 1ef37c727572..2889d97c03b1 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -512,6 +512,18 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
tasklet = hv_context.event_dpc[channel->target_cpu];
tasklet_disable(tasklet);
+ /*
+ * In case a device driver's probe() fails (e.g.,
+ * util_probe() -> vmbus_open() returns -ENOMEM) and the device is
+ * rescinded later (e.g., we dynamically disble an Integrated Service
+ * in Hyper-V Manager), the driver's remove() invokes vmbus_close():
+ * here we should skip most of the below cleanup work.
+ */
+ if (channel->state != CHANNEL_OPENED_STATE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
channel->state = CHANNEL_OPEN_STATE;
channel->sc_creation_callback = NULL;
/* Stop callback and cancel the timer asap */
--
2.11.0

View File

@@ -0,0 +1,74 @@
From d6f12dc4d3efb1005519e87382c2800843dd3e0d Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 14 Dec 2015 16:01:50 -0800
Subject: [PATCH 20/44] Drivers: hv: vmbus: release relid on error in
vmbus_process_offer()
We want to simplify vmbus_onoffer_rescind() by not invoking
hv_process_channel_removal(NULL, ...).
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit f52078cf5711ce47c113a58702b35c8ff5f212f5)
---
drivers/hv/channel_mgmt.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 9b4525c56376..8529dd2ebc3d 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -178,19 +178,22 @@ static void percpu_channel_deq(void *arg)
}
-void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
+static void vmbus_release_relid(u32 relid)
{
struct vmbus_channel_relid_released msg;
- unsigned long flags;
- struct vmbus_channel *primary_channel;
memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
msg.child_relid = relid;
msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
+}
- if (channel == NULL)
- return;
+void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
+{
+ unsigned long flags;
+ struct vmbus_channel *primary_channel;
+
+ vmbus_release_relid(relid);
BUG_ON(!channel->rescind);
@@ -337,6 +340,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
return;
err_deq_chan:
+ vmbus_release_relid(newchannel->offermsg.child_relid);
+
spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
list_del(&newchannel->listentry);
spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
@@ -640,7 +645,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
channel = relid2channel(rescind->child_relid);
if (channel == NULL) {
- hv_process_channel_removal(NULL, rescind->child_relid);
+ /*
+ * This is very impossible, because in
+ * vmbus_process_offer(), we have already invoked
+ * vmbus_release_relid() on error.
+ */
return;
}
--
2.11.0

View File

@@ -0,0 +1,116 @@
From 0d14411db1498d805f8b4b060295ca77e0c872cd Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 14 Dec 2015 16:01:51 -0800
Subject: [PATCH 21/44] Drivers: hv: vmbus: channge
vmbus_connection.channel_lock to mutex
spinlock is unnecessary here.
mutex is enough.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit d6f591e339d23f434efda11917da511870891472)
---
drivers/hv/channel_mgmt.c | 12 ++++++------
drivers/hv/connection.c | 7 +++----
drivers/hv/hyperv_vmbus.h | 2 +-
3 files changed, 10 insertions(+), 11 deletions(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 8529dd2ebc3d..306c7dff6c77 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -207,9 +207,9 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
}
if (channel->primary_channel == NULL) {
- spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
+ mutex_lock(&vmbus_connection.channel_mutex);
list_del(&channel->listentry);
- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
+ mutex_unlock(&vmbus_connection.channel_mutex);
primary_channel = channel;
} else {
@@ -254,7 +254,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
unsigned long flags;
/* Make sure this is a new offer */
- spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
+ mutex_lock(&vmbus_connection.channel_mutex);
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
if (!uuid_le_cmp(channel->offermsg.offer.if_type,
@@ -270,7 +270,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
list_add_tail(&newchannel->listentry,
&vmbus_connection.chn_list);
- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
+ mutex_unlock(&vmbus_connection.channel_mutex);
if (!fnew) {
/*
@@ -342,9 +342,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
err_deq_chan:
vmbus_release_relid(newchannel->offermsg.child_relid);
- spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
+ mutex_lock(&vmbus_connection.channel_mutex);
list_del(&newchannel->listentry);
- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
+ mutex_unlock(&vmbus_connection.channel_mutex);
if (newchannel->target_cpu != get_cpu()) {
put_cpu();
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 4fc2e8836e60..521f48ed188e 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -146,7 +146,7 @@ int vmbus_connect(void)
spin_lock_init(&vmbus_connection.channelmsg_lock);
INIT_LIST_HEAD(&vmbus_connection.chn_list);
- spin_lock_init(&vmbus_connection.channel_lock);
+ mutex_init(&vmbus_connection.channel_mutex);
/*
* Setup the vmbus event connection for channel interrupt
@@ -282,11 +282,10 @@ struct vmbus_channel *relid2channel(u32 relid)
{
struct vmbus_channel *channel;
struct vmbus_channel *found_channel = NULL;
- unsigned long flags;
struct list_head *cur, *tmp;
struct vmbus_channel *cur_sc;
- spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
+ mutex_lock(&vmbus_connection.channel_mutex);
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
if (channel->offermsg.child_relid == relid) {
found_channel = channel;
@@ -305,7 +304,7 @@ struct vmbus_channel *relid2channel(u32 relid)
}
}
}
- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
+ mutex_unlock(&vmbus_connection.channel_mutex);
return found_channel;
}
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 12156db2e88e..50b1de7439cf 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -683,7 +683,7 @@ struct vmbus_connection {
/* List of channels */
struct list_head chn_list;
- spinlock_t channel_lock;
+ struct mutex channel_mutex;
struct workqueue_struct *work_queue;
};
--
2.11.0

View File

@@ -0,0 +1,126 @@
From 76f7fdc166bb0b747b666c1e6de0184cd00835b8 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 14 Dec 2015 19:02:00 -0800
Subject: [PATCH 22/44] Drivers: hv: remove code duplication between
vmbus_recvpacket()/vmbus_recvpacket_raw()
vmbus_recvpacket() and vmbus_recvpacket_raw() are almost identical but
there are two discrepancies:
1) vmbus_recvpacket() doesn't propagate errors from hv_ringbuffer_read()
which looks like it is not desired.
2) There is an error message printed in packetlen > bufferlen case in
vmbus_recvpacket(). I'm removing it as it is usless for users to see
such messages and /vmbus_recvpacket_raw() doesn't have it.
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 667d374064b0cc48b6122101b287908d1b392bdb)
---
drivers/hv/channel.c | 65 ++++++++++++++++++----------------------------------
1 file changed, 22 insertions(+), 43 deletions(-)
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 2889d97c03b1..dd6de7fc442f 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -922,8 +922,10 @@ EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer);
*
* Mainly used by Hyper-V drivers.
*/
-int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
- u32 bufferlen, u32 *buffer_actual_len, u64 *requestid)
+static inline int
+__vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
+ u32 bufferlen, u32 *buffer_actual_len, u64 *requestid,
+ bool raw)
{
struct vmpacket_descriptor desc;
u32 packetlen;
@@ -941,27 +943,34 @@ int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
return 0;
packetlen = desc.len8 << 3;
- userlen = packetlen - (desc.offset8 << 3);
+ if (!raw)
+ userlen = packetlen - (desc.offset8 << 3);
+ else
+ userlen = packetlen;
*buffer_actual_len = userlen;
- if (userlen > bufferlen) {
-
- pr_err("Buffer too small - got %d needs %d\n",
- bufferlen, userlen);
- return -ETOOSMALL;
- }
+ if (userlen > bufferlen)
+ return -ENOBUFS;
*requestid = desc.trans_id;
/* Copy over the packet to the user buffer */
ret = hv_ringbuffer_read(&channel->inbound, buffer, userlen,
- (desc.offset8 << 3), &signal);
+ raw ? 0 : desc.offset8 << 3, &signal);
if (signal)
vmbus_setevent(channel);
- return 0;
+ return ret;
+}
+
+int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
+ u32 bufferlen, u32 *buffer_actual_len,
+ u64 *requestid)
+{
+ return __vmbus_recvpacket(channel, buffer, bufferlen,
+ buffer_actual_len, requestid, false);
}
EXPORT_SYMBOL(vmbus_recvpacket);
@@ -972,37 +981,7 @@ int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer,
u32 bufferlen, u32 *buffer_actual_len,
u64 *requestid)
{
- struct vmpacket_descriptor desc;
- u32 packetlen;
- int ret;
- bool signal = false;
-
- *buffer_actual_len = 0;
- *requestid = 0;
-
-
- ret = hv_ringbuffer_peek(&channel->inbound, &desc,
- sizeof(struct vmpacket_descriptor));
- if (ret != 0)
- return 0;
-
-
- packetlen = desc.len8 << 3;
-
- *buffer_actual_len = packetlen;
-
- if (packetlen > bufferlen)
- return -ENOBUFS;
-
- *requestid = desc.trans_id;
-
- /* Copy over the entire packet to the user buffer */
- ret = hv_ringbuffer_read(&channel->inbound, buffer, packetlen, 0,
- &signal);
-
- if (signal)
- vmbus_setevent(channel);
-
- return ret;
+ return __vmbus_recvpacket(channel, buffer, bufferlen,
+ buffer_actual_len, requestid, true);
}
EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw);
--
2.11.0

View File

@@ -0,0 +1,72 @@
From acdf851a460410451073ae3eea087e48c9244770 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 21 Dec 2015 12:21:22 -0800
Subject: [PATCH 23/44] Drivers: hv: vmbus: fix the building warning with
hyperv-keyboard
With the recent change af3ff643ea91ba64dd8d0b1cbed54d44512f96cd
(Drivers: hv: vmbus: Use uuid_le type consistently), we always get this
warning:
CC [M] drivers/input/serio/hyperv-keyboard.o
drivers/input/serio/hyperv-keyboard.c:427:2: warning: missing braces around
initializer [-Wmissing-braces]
{ HV_KBD_GUID, },
^
drivers/input/serio/hyperv-keyboard.c:427:2: warning: (near initialization
for .id_table[0].guid.b.) [-Wmissing-braces]
The patch fixes the warning.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 2048157ad02e65f6327118dd4a7b9c9f1fd12f77)
---
drivers/input/serio/hyperv-keyboard.c | 10 ----------
include/linux/hyperv.h | 8 ++++++++
2 files changed, 8 insertions(+), 10 deletions(-)
diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c
index e74e5d6e5f9f..c948866edf87 100644
--- a/drivers/input/serio/hyperv-keyboard.c
+++ b/drivers/input/serio/hyperv-keyboard.c
@@ -412,16 +412,6 @@ static int hv_kbd_remove(struct hv_device *hv_dev)
return 0;
}
-/*
- * Keyboard GUID
- * {f912ad6d-2b17-48ea-bd65-f927a61c7684}
- */
-#define HV_KBD_GUID \
- .guid = { \
- 0x6d, 0xad, 0x12, 0xf9, 0x17, 0x2b, 0xea, 0x48, \
- 0xbd, 0x65, 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84 \
- }
-
static const struct hv_vmbus_device_id id_table[] = {
/* Keyboard guid */
{ HV_KBD_GUID, },
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 4712d7d07b8c..9e2de6a7cc96 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1091,6 +1091,14 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a)
/*
+ * Keyboard GUID
+ * {f912ad6d-2b17-48ea-bd65-f927a61c7684}
+ */
+#define HV_KBD_GUID \
+ .guid = UUID_LE(0xf912ad6d, 0x2b17, 0x48ea, 0xbd, 0x65, \
+ 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84)
+
+/*
* VSS (Backup/Restore) GUID
*/
#define HV_VSS_GUID \
--
2.11.0

View File

@@ -0,0 +1,42 @@
From c952417ede883823ddf02ca6747c19f03728559a Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Tue, 15 Dec 2015 16:27:27 -0800
Subject: [PATCH 24/44] Drivers: hv: vmbus: Treat Fibre Channel devices as
performance critical
For performance critical devices, we distribute the incoming
channel interrupt load across available CPUs in the guest.
Include Fibre channel devices in the set of devices for which
we would distribute the interrupt load.
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 879a650a273bc3efb9d472886b8ced12630ea8ed)
---
drivers/hv/channel_mgmt.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 306c7dff6c77..763d0c19c16f 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -362,6 +362,7 @@ err_free_chan:
enum {
IDE = 0,
SCSI,
+ FC,
NIC,
ND_NIC,
PCIE,
@@ -378,6 +379,8 @@ static const struct hv_vmbus_device_id hp_devs[] = {
{ HV_IDE_GUID, },
/* Storage - SCSI */
{ HV_SCSI_GUID, },
+ /* Storage - FC */
+ { HV_SYNTHFC_GUID, },
/* Network */
{ HV_NIC_GUID, },
/* NetworkDirect Guest RDMA */
--
2.11.0

View File

@@ -0,0 +1,355 @@
From b8f2da0e5160cd380ee25b81a1e5f8c922a713fe Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 25 Dec 2015 20:00:30 -0800
Subject: [PATCH 25/44] Drivers: hv: vmbus: Add vendor and device atttributes
Add vendor and device attributes to VMBUS devices. These will be used
by Hyper-V tools as well user-level RDMA libraries that will use the
vendor/device tuple to discover the RDMA device.
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 7047f17d70fc0599563d30d0791692cb5fe42ae6)
---
Documentation/ABI/stable/sysfs-bus-vmbus | 14 +++
drivers/hv/channel_mgmt.c | 166 +++++++++++++++++++++++--------
drivers/hv/vmbus_drv.c | 21 ++++
include/linux/hyperv.h | 28 ++++++
4 files changed, 186 insertions(+), 43 deletions(-)
diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus
index 636e938d5e33..5d0125f7bcaf 100644
--- a/Documentation/ABI/stable/sysfs-bus-vmbus
+++ b/Documentation/ABI/stable/sysfs-bus-vmbus
@@ -27,3 +27,17 @@ Description: The mapping of which primary/sub channels are bound to which
Virtual Processors.
Format: <channel's child_relid:the bound cpu's number>
Users: tools/hv/lsvmbus
+
+What: /sys/bus/vmbus/devices/vmbus_*/device
+Date: Dec. 2015
+KernelVersion: 4.5
+Contact: K. Y. Srinivasan <kys@microsoft.com>
+Description: The 16 bit device ID of the device
+Users: tools/hv/lsvmbus and user level RDMA libraries
+
+What: /sys/bus/vmbus/devices/vmbus_*/vendor
+Date: Dec. 2015
+KernelVersion: 4.5
+Contact: K. Y. Srinivasan <kys@microsoft.com>
+Description: The 16 bit vendor ID of the device
+Users: tools/hv/lsvmbus and user level RDMA libraries
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 763d0c19c16f..d6c611457601 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -33,8 +33,122 @@
#include "hyperv_vmbus.h"
-static void init_vp_index(struct vmbus_channel *channel,
- const uuid_le *type_guid);
+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
+
+static const struct vmbus_device vmbus_devs[] = {
+ /* IDE */
+ { .dev_type = HV_IDE,
+ HV_IDE_GUID,
+ .perf_device = true,
+ },
+
+ /* SCSI */
+ { .dev_type = HV_SCSI,
+ HV_SCSI_GUID,
+ .perf_device = true,
+ },
+
+ /* Fibre Channel */
+ { .dev_type = HV_FC,
+ HV_SYNTHFC_GUID,
+ .perf_device = true,
+ },
+
+ /* Synthetic NIC */
+ { .dev_type = HV_NIC,
+ HV_NIC_GUID,
+ .perf_device = true,
+ },
+
+ /* Network Direct */
+ { .dev_type = HV_ND,
+ HV_ND_GUID,
+ .perf_device = true,
+ },
+
+ /* PCIE */
+ { .dev_type = HV_PCIE,
+ HV_PCIE_GUID,
+ .perf_device = true,
+ },
+
+ /* Synthetic Frame Buffer */
+ { .dev_type = HV_FB,
+ HV_SYNTHVID_GUID,
+ .perf_device = false,
+ },
+
+ /* Synthetic Keyboard */
+ { .dev_type = HV_KBD,
+ HV_KBD_GUID,
+ .perf_device = false,
+ },
+
+ /* Synthetic MOUSE */
+ { .dev_type = HV_MOUSE,
+ HV_MOUSE_GUID,
+ .perf_device = false,
+ },
+
+ /* KVP */
+ { .dev_type = HV_KVP,
+ HV_KVP_GUID,
+ .perf_device = false,
+ },
+
+ /* Time Synch */
+ { .dev_type = HV_TS,
+ HV_TS_GUID,
+ .perf_device = false,
+ },
+
+ /* Heartbeat */
+ { .dev_type = HV_HB,
+ HV_HEART_BEAT_GUID,
+ .perf_device = false,
+ },
+
+ /* Shutdown */
+ { .dev_type = HV_SHUTDOWN,
+ HV_SHUTDOWN_GUID,
+ .perf_device = false,
+ },
+
+ /* File copy */
+ { .dev_type = HV_FCOPY,
+ HV_FCOPY_GUID,
+ .perf_device = false,
+ },
+
+ /* Backup */
+ { .dev_type = HV_BACKUP,
+ HV_VSS_GUID,
+ .perf_device = false,
+ },
+
+ /* Dynamic Memory */
+ { .dev_type = HV_DM,
+ HV_DM_GUID,
+ .perf_device = false,
+ },
+
+ /* Unknown GUID */
+ { .dev_type = HV_UNKOWN,
+ .perf_device = false,
+ },
+};
+
+static u16 hv_get_dev_type(const uuid_le *guid)
+{
+ u16 i;
+
+ for (i = HV_IDE; i < HV_UNKOWN; i++) {
+ if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
+ return i;
+ }
+ pr_info("Unknown GUID: %pUl\n", guid);
+ return i;
+}
/**
* vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
@@ -252,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
struct vmbus_channel *channel;
bool fnew = true;
unsigned long flags;
+ u16 dev_type;
/* Make sure this is a new offer */
mutex_lock(&vmbus_connection.channel_mutex);
@@ -289,7 +404,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
goto err_free_chan;
}
- init_vp_index(newchannel, &newchannel->offermsg.offer.if_type);
+ dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type);
+
+ init_vp_index(newchannel, dev_type);
if (newchannel->target_cpu != get_cpu()) {
put_cpu();
@@ -326,6 +443,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
if (!newchannel->device_obj)
goto err_deq_chan;
+ newchannel->device_obj->device_id = dev_type;
/*
* Add the new device to the bus. This will kick off device-driver
* binding which eventually invokes the device driver's AddDevice()
@@ -359,37 +477,6 @@ err_free_chan:
free_channel(newchannel);
}
-enum {
- IDE = 0,
- SCSI,
- FC,
- NIC,
- ND_NIC,
- PCIE,
- MAX_PERF_CHN,
-};
-
-/*
- * This is an array of device_ids (device types) that are performance critical.
- * We attempt to distribute the interrupt load for these devices across
- * all available CPUs.
- */
-static const struct hv_vmbus_device_id hp_devs[] = {
- /* IDE */
- { HV_IDE_GUID, },
- /* Storage - SCSI */
- { HV_SCSI_GUID, },
- /* Storage - FC */
- { HV_SYNTHFC_GUID, },
- /* Network */
- { HV_NIC_GUID, },
- /* NetworkDirect Guest RDMA */
- { HV_ND_GUID, },
- /* PCI Express Pass Through */
- { HV_PCIE_GUID, },
-};
-
-
/*
* We use this state to statically distribute the channel interrupt load.
*/
@@ -406,22 +493,15 @@ static int next_numa_node_id;
* For pre-win8 hosts or non-performance critical channels we assign the
* first CPU in the first NUMA node.
*/
-static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
{
u32 cur_cpu;
- int i;
- bool perf_chn = false;
+ bool perf_chn = vmbus_devs[dev_type].perf_device;
struct vmbus_channel *primary = channel->primary_channel;
int next_node;
struct cpumask available_mask;
struct cpumask *alloced_mask;
- for (i = IDE; i < MAX_PERF_CHN; i++) {
- if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) {
- perf_chn = true;
- break;
- }
- }
if ((vmbus_proto_version == VERSION_WS2008) ||
(vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
/*
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 7973aa55fec8..de7130c58a62 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -480,6 +480,24 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
}
static DEVICE_ATTR_RO(channel_vp_mapping);
+static ssize_t vendor_show(struct device *dev,
+ struct device_attribute *dev_attr,
+ char *buf)
+{
+ struct hv_device *hv_dev = device_to_hv_device(dev);
+ return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t device_show(struct device *dev,
+ struct device_attribute *dev_attr,
+ char *buf)
+{
+ struct hv_device *hv_dev = device_to_hv_device(dev);
+ return sprintf(buf, "0x%x\n", hv_dev->device_id);
+}
+static DEVICE_ATTR_RO(device);
+
/* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
static struct attribute *vmbus_attrs[] = {
&dev_attr_id.attr,
@@ -505,6 +523,8 @@ static struct attribute *vmbus_attrs[] = {
&dev_attr_in_read_bytes_avail.attr,
&dev_attr_in_write_bytes_avail.attr,
&dev_attr_channel_vp_mapping.attr,
+ &dev_attr_vendor.attr,
+ &dev_attr_device.attr,
NULL,
};
ATTRIBUTE_GROUPS(vmbus);
@@ -963,6 +983,7 @@ struct hv_device *vmbus_device_create(const uuid_le *type,
memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
memcpy(&child_device_obj->dev_instance, instance,
sizeof(uuid_le));
+ child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */
return child_device_obj;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 9e2de6a7cc96..51c98fd6044d 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -635,6 +635,32 @@ enum hv_signal_policy {
HV_SIGNAL_POLICY_EXPLICIT,
};
+enum vmbus_device_type {
+ HV_IDE = 0,
+ HV_SCSI,
+ HV_FC,
+ HV_NIC,
+ HV_ND,
+ HV_PCIE,
+ HV_FB,
+ HV_KBD,
+ HV_MOUSE,
+ HV_KVP,
+ HV_TS,
+ HV_HB,
+ HV_SHUTDOWN,
+ HV_FCOPY,
+ HV_BACKUP,
+ HV_DM,
+ HV_UNKOWN,
+};
+
+struct vmbus_device {
+ u16 dev_type;
+ uuid_le guid;
+ bool perf_device;
+};
+
struct vmbus_channel {
/* Unique channel id */
int id;
@@ -961,6 +987,8 @@ struct hv_device {
/* the device instance id of this device */
uuid_le dev_instance;
+ u16 vendor_id;
+ u16 device_id;
struct device device;
--
2.11.0

View File

@@ -0,0 +1,36 @@
From 26e5ac0096b7b4f80dbda30ab21101077ce75286 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:37 -0800
Subject: [PATCH 26/44] Drivers: hv: vmbus: add a helper function to set a
channel's pending send size
This will be used by the coming net/hvsock driver.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 3c75354d043ad546148d6992e40033ecaefc5ea5)
---
include/linux/hyperv.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 51c98fd6044d..934542ac1394 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -818,6 +818,12 @@ static inline void *get_per_channel_state(struct vmbus_channel *c)
return c->per_channel_state;
}
+static inline void set_channel_pending_send_size(struct vmbus_channel *c,
+ u32 size)
+{
+ c->outbound.ring_buffer->pending_send_sz = size;
+}
+
void vmbus_onmessage(void *context);
int vmbus_request_offers(void);
--
2.11.0

View File

@@ -0,0 +1,44 @@
From 47c82abe6f2ddc2de58719c0dc3fb8588b2d6765 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:38 -0800
Subject: [PATCH 27/44] Drivers: hv: vmbus: define the new offer type for
Hyper-V socket (hvsock)
A helper function is also added.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit e8d6ca023efce3bd80050dcd9e708ee3cf8babd4)
---
include/linux/hyperv.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 934542ac1394..a4f105d55881 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -237,6 +237,7 @@ struct vmbus_channel_offer {
#define VMBUS_CHANNEL_LOOPBACK_OFFER 0x100
#define VMBUS_CHANNEL_PARENT_OFFER 0x200
#define VMBUS_CHANNEL_REQUEST_MONITORED_NOTIFICATION 0x400
+#define VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER 0x2000
struct vmpacket_descriptor {
u16 type;
@@ -797,6 +798,12 @@ struct vmbus_channel {
enum hv_signal_policy signal_policy;
};
+static inline bool is_hvsock_channel(const struct vmbus_channel *c)
+{
+ return !!(c->offermsg.offer.chn_flags &
+ VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER);
+}
+
static inline void set_channel_signal_state(struct vmbus_channel *c,
enum hv_signal_policy policy)
{
--
2.11.0

View File

@@ -0,0 +1,45 @@
From 3f42252209bb88705c6c00609b258f902871bc2a Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:39 -0800
Subject: [PATCH 28/44] Drivers: hv: vmbus: vmbus_sendpacket_ctl: hvsock: avoid
unnecessary signaling
When the hvsock channel's outbound ringbuffer is full (i.e.,
hv_ringbuffer_write() returns -EAGAIN), we should avoid the unnecessary
signaling the host.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 5f363bc38f810d238d1e8b19998625ddec3b8138)
---
drivers/hv/channel.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index dd6de7fc442f..128dcf22e459 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -659,6 +659,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
* If we cannot write to the ring-buffer; signal the host
* even if we may not have written anything. This is a rare
* enough condition that it should not matter.
+ * NOTE: in this case, the hvsock channel is an exception, because
+ * it looks the host side's hvsock implementation has a throttling
+ * mechanism which can hurt the performance otherwise.
*/
if (channel->signal_policy)
@@ -666,7 +669,8 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
else
kick_q = true;
- if (((ret == 0) && kick_q && signal) || (ret))
+ if (((ret == 0) && kick_q && signal) ||
+ (ret && !is_hvsock_channel(channel)))
vmbus_setevent(channel);
return ret;
--
2.11.0

View File

@@ -0,0 +1,101 @@
From 0dccd1ae4a259fed2c1301d5cc1476eba75919bb Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:40 -0800
Subject: [PATCH 29/44] Drivers: hv: vmbus: define a new VMBus message type for
hvsock
A function to send the type of message is also added.
The coming net/hvsock driver will use this function to proactively request
the host to offer a VMBus channel for a new hvsock connection.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 5c23a1a5c60b0f472cfa61cd7d8279f8aaeb5b64)
---
drivers/hv/channel.c | 15 +++++++++++++++
drivers/hv/channel_mgmt.c | 4 ++++
include/linux/hyperv.h | 13 +++++++++++++
3 files changed, 32 insertions(+)
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 128dcf22e459..415f6c7d92a6 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -219,6 +219,21 @@ error0:
}
EXPORT_SYMBOL_GPL(vmbus_open);
+/* Used for Hyper-V Socket: a guest client's connect() to the host */
+int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
+ const uuid_le *shv_host_servie_id)
+{
+ struct vmbus_channel_tl_connect_request conn_msg;
+
+ memset(&conn_msg, 0, sizeof(conn_msg));
+ conn_msg.header.msgtype = CHANNELMSG_TL_CONNECT_REQUEST;
+ conn_msg.guest_endpoint_id = *shv_guest_servie_id;
+ conn_msg.host_service_id = *shv_host_servie_id;
+
+ return vmbus_post_msg(&conn_msg, sizeof(conn_msg));
+}
+EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
+
/*
* create_gpadl_header - Creates a gpadl for the specified buffer
*/
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index d6c611457601..60ca25b93b4c 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -958,6 +958,10 @@ struct vmbus_channel_message_table_entry
{CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response},
{CHANNELMSG_UNLOAD, 0, NULL},
{CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response},
+ {CHANNELMSG_18, 0, NULL},
+ {CHANNELMSG_19, 0, NULL},
+ {CHANNELMSG_20, 0, NULL},
+ {CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL},
};
/*
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index a4f105d55881..191bc5d0ffbf 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -394,6 +394,10 @@ enum vmbus_channel_message_type {
CHANNELMSG_VERSION_RESPONSE = 15,
CHANNELMSG_UNLOAD = 16,
CHANNELMSG_UNLOAD_RESPONSE = 17,
+ CHANNELMSG_18 = 18,
+ CHANNELMSG_19 = 19,
+ CHANNELMSG_20 = 20,
+ CHANNELMSG_TL_CONNECT_REQUEST = 21,
CHANNELMSG_COUNT
};
@@ -564,6 +568,13 @@ struct vmbus_channel_initiate_contact {
u64 monitor_page2;
} __packed;
+/* Hyper-V socket: guest's connect()-ing to host */
+struct vmbus_channel_tl_connect_request {
+ struct vmbus_channel_message_header header;
+ uuid_le guest_endpoint_id;
+ uuid_le host_service_id;
+} __packed;
+
struct vmbus_channel_version_response {
struct vmbus_channel_message_header header;
u8 version_supported;
@@ -1295,4 +1306,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
extern __u32 vmbus_proto_version;
+int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
+ const uuid_le *shv_host_servie_id);
#endif /* _HYPERV_H */
--
2.11.0

View File

@@ -0,0 +1,64 @@
From 373b96e951d271e553d9854267ce65baab0241da Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:41 -0800
Subject: [PATCH 30/44] Drivers: hv: vmbus: add a hvsock flag in struct
hv_driver
Only the coming hv_sock driver has a "true" value for this flag.
We treat the hvsock offers/channels as special VMBus devices.
Since the hv_sock driver handles all the hvsock offers/channels, we need to
tweak vmbus_match() for hv_sock driver, so we introduce this flag.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 8981da320a11217589aa3c50f9e891bcdef07ece)
---
drivers/hv/vmbus_drv.c | 4 ++++
include/linux/hyperv.h | 14 ++++++++++++++
2 files changed, 18 insertions(+)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index de7130c58a62..03fc5d317b22 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -585,6 +585,10 @@ static int vmbus_match(struct device *device, struct device_driver *driver)
struct hv_driver *drv = drv_to_hv_drv(driver);
struct hv_device *hv_dev = device_to_hv_device(device);
+ /* The hv_sock driver handles all hv_sock offers. */
+ if (is_hvsock_channel(hv_dev->channel))
+ return drv->hvsock;
+
if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type))
return 1;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 191bc5d0ffbf..05966e279ec8 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -992,6 +992,20 @@ extern void vmbus_ontimer(unsigned long data);
struct hv_driver {
const char *name;
+ /*
+ * A hvsock offer, which has a VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER
+ * channel flag, actually doesn't mean a synthetic device because the
+ * offer's if_type/if_instance can change for every new hvsock
+ * connection.
+ *
+ * However, to facilitate the notification of new-offer/rescind-offer
+ * from vmbus driver to hvsock driver, we can handle hvsock offer as
+ * a special vmbus device, and hence we need the below flag to
+ * indicate if the driver is the hvsock driver or not: we need to
+ * specially treat the hvosck offer & driver in vmbus_match().
+ */
+ bool hvsock;
+
/* the device type supported by this driver */
uuid_le dev_type;
const struct hv_vmbus_device_id *id_table;
--
2.11.0

View File

@@ -0,0 +1,72 @@
From af38922b309452853cb5cf1e67a07aab4f0da8dd Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:42 -0800
Subject: [PATCH 31/44] Drivers: hv: vmbus: add a per-channel rescind callback
This will be used by the coming hv_sock driver.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 499e8401a515d04daa986b995da710d2b9737764)
---
drivers/hv/channel_mgmt.c | 11 +++++++++++
include/linux/hyperv.h | 9 +++++++++
2 files changed, 20 insertions(+)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 60ca25b93b4c..76864c98a110 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -741,6 +741,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
spin_unlock_irqrestore(&channel->lock, flags);
if (channel->device_obj) {
+ if (channel->chn_rescind_callback) {
+ channel->chn_rescind_callback(channel);
+ return;
+ }
/*
* We will have to unregister this device from the
* driver core.
@@ -1110,3 +1114,10 @@ bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
return ret;
}
EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
+
+void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
+ void (*chn_rescind_cb)(struct vmbus_channel *))
+{
+ channel->chn_rescind_callback = chn_rescind_cb;
+}
+EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 05966e279ec8..ad04017ba06f 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -768,6 +768,12 @@ struct vmbus_channel {
void (*sc_creation_callback)(struct vmbus_channel *new_sc);
/*
+ * Channel rescind callback. Some channels (the hvsock ones), need to
+ * register a callback which is invoked in vmbus_onoffer_rescind().
+ */
+ void (*chn_rescind_callback)(struct vmbus_channel *channel);
+
+ /*
* The spinlock to protect the structure. It is being used to protect
* test-and-set access to various attributes of the structure as well
* as all sc_list operations.
@@ -853,6 +859,9 @@ int vmbus_request_offers(void);
void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
void (*sc_cr_cb)(struct vmbus_channel *new_sc));
+void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
+ void (*chn_rescind_cb)(struct vmbus_channel *));
+
/*
* Retrieve the (sub) channel on which to send an outgoing request.
* When a primary channel has multiple sub-channels, we choose a
--
2.11.0

View File

@@ -0,0 +1,153 @@
From 9b2cf4ec84e167a677b1deda17e8cef862d795b6 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:43 -0800
Subject: [PATCH 32/44] Drivers: hv: vmbus: add an API
vmbus_hvsock_device_unregister()
The hvsock driver needs this API to release all the resources related
to the channel.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 85d9aa705184a4504d0330017e3956fcdae8a9d6)
---
drivers/hv/channel_mgmt.c | 33 ++++++++++++++++++++++++++++-----
drivers/hv/connection.c | 4 ++--
include/linux/hyperv.h | 2 ++
3 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 76864c98a110..cf311be88cb4 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -310,6 +310,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
vmbus_release_relid(relid);
BUG_ON(!channel->rescind);
+ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
if (channel->target_cpu != get_cpu()) {
put_cpu();
@@ -321,9 +322,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
}
if (channel->primary_channel == NULL) {
- mutex_lock(&vmbus_connection.channel_mutex);
list_del(&channel->listentry);
- mutex_unlock(&vmbus_connection.channel_mutex);
primary_channel = channel;
} else {
@@ -367,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
bool fnew = true;
unsigned long flags;
u16 dev_type;
+ int ret;
/* Make sure this is a new offer */
mutex_lock(&vmbus_connection.channel_mutex);
@@ -449,7 +449,11 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
* binding which eventually invokes the device driver's AddDevice()
* method.
*/
- if (vmbus_device_register(newchannel->device_obj) != 0) {
+ mutex_lock(&vmbus_connection.channel_mutex);
+ ret = vmbus_device_register(newchannel->device_obj);
+ mutex_unlock(&vmbus_connection.channel_mutex);
+
+ if (ret != 0) {
pr_err("unable to add child device object (relid %d)\n",
newchannel->offermsg.child_relid);
kfree(newchannel->device_obj);
@@ -725,6 +729,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
struct device *dev;
rescind = (struct vmbus_channel_rescind_offer *)hdr;
+
+ mutex_lock(&vmbus_connection.channel_mutex);
channel = relid2channel(rescind->child_relid);
if (channel == NULL) {
@@ -733,7 +739,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
* vmbus_process_offer(), we have already invoked
* vmbus_release_relid() on error.
*/
- return;
+ goto out;
}
spin_lock_irqsave(&channel->lock, flags);
@@ -743,7 +749,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
if (channel->device_obj) {
if (channel->chn_rescind_callback) {
channel->chn_rescind_callback(channel);
- return;
+ goto out;
}
/*
* We will have to unregister this device from the
@@ -758,8 +764,25 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
hv_process_channel_removal(channel,
channel->offermsg.child_relid);
}
+
+out:
+ mutex_unlock(&vmbus_connection.channel_mutex);
}
+void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
+{
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ BUG_ON(!is_hvsock_channel(channel));
+
+ channel->rescind = true;
+ vmbus_device_unregister(channel->device_obj);
+
+ mutex_unlock(&vmbus_connection.channel_mutex);
+}
+EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
+
+
/*
* vmbus_onoffers_delivered -
* This is invoked when all offers have been delivered.
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 521f48ed188e..09c08b56e3dc 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -285,7 +285,8 @@ struct vmbus_channel *relid2channel(u32 relid)
struct list_head *cur, *tmp;
struct vmbus_channel *cur_sc;
- mutex_lock(&vmbus_connection.channel_mutex);
+ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
if (channel->offermsg.child_relid == relid) {
found_channel = channel;
@@ -304,7 +305,6 @@ struct vmbus_channel *relid2channel(u32 relid)
}
}
}
- mutex_unlock(&vmbus_connection.channel_mutex);
return found_channel;
}
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index ad04017ba06f..993318a6d147 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1071,6 +1071,8 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver,
const char *mod_name);
void vmbus_driver_unregister(struct hv_driver *hv_driver);
+void vmbus_hvsock_device_unregister(struct vmbus_channel *channel);
+
int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
resource_size_t min, resource_size_t max,
resource_size_t size, resource_size_t align,
--
2.11.0

View File

@@ -0,0 +1,208 @@
From a62a66fbd2a18f72dfae5f6e3a50e1abb0cf8322 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Wed, 27 Jan 2016 22:29:45 -0800
Subject: [PATCH 33/44] Drivers: hv: vmbus: Give control over how the ring
access is serialized
On the channel send side, many of the VMBUS
device drivers explicity serialize access to the
outgoing ring buffer. Give more control to the
VMBUS device drivers in terms how to serialize
accesss to the outgoing ring buffer.
The default behavior will be to aquire the
ring lock to preserve the current behavior.
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit fe760e4d64fe5c17c39e86c410d41f6587ee88bc)
---
drivers/hv/channel.c | 15 +++++++++++----
drivers/hv/channel_mgmt.c | 1 +
drivers/hv/hyperv_vmbus.h | 2 +-
drivers/hv/ring_buffer.c | 13 ++++++++-----
include/linux/hyperv.h | 16 ++++++++++++++++
5 files changed, 37 insertions(+), 10 deletions(-)
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 415f6c7d92a6..57a1b659d05f 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -639,6 +639,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
u64 aligned_data = 0;
int ret;
bool signal = false;
+ bool lock = channel->acquire_ring_lock;
int num_vecs = ((bufferlen != 0) ? 3 : 1);
@@ -658,7 +659,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
- &signal);
+ &signal, lock);
/*
* Signalling the host is conditional on many factors:
@@ -738,6 +739,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
struct kvec bufferlist[3];
u64 aligned_data = 0;
bool signal = false;
+ bool lock = channel->acquire_ring_lock;
if (pagecount > MAX_PAGE_BUFFER_COUNT)
return -EINVAL;
@@ -774,7 +776,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
bufferlist[2].iov_base = &aligned_data;
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
+ &signal, lock);
/*
* Signalling the host is conditional on many factors:
@@ -837,6 +840,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
struct kvec bufferlist[3];
u64 aligned_data = 0;
bool signal = false;
+ bool lock = channel->acquire_ring_lock;
packetlen = desc_size + bufferlen;
packetlen_aligned = ALIGN(packetlen, sizeof(u64));
@@ -856,7 +860,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
bufferlist[2].iov_base = &aligned_data;
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
+ &signal, lock);
if (ret == 0 && signal)
vmbus_setevent(channel);
@@ -881,6 +886,7 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
struct kvec bufferlist[3];
u64 aligned_data = 0;
bool signal = false;
+ bool lock = channel->acquire_ring_lock;
u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset,
multi_pagebuffer->len);
@@ -919,7 +925,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
bufferlist[2].iov_base = &aligned_data;
bufferlist[2].iov_len = (packetlen_aligned - packetlen);
- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal);
+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
+ &signal, lock);
if (ret == 0 && signal)
vmbus_setevent(channel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index cf311be88cb4..b40f429aaa13 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -259,6 +259,7 @@ static struct vmbus_channel *alloc_channel(void)
return NULL;
channel->id = atomic_inc_return(&chan_num);
+ channel->acquire_ring_lock = true;
spin_lock_init(&channel->inbound_lock);
spin_lock_init(&channel->lock);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 50b1de7439cf..89bb5591498e 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -617,7 +617,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info,
struct kvec *kv_list,
- u32 kv_count, bool *signal);
+ u32 kv_count, bool *signal, bool lock);
int hv_ringbuffer_peek(struct hv_ring_buffer_info *ring_info, void *buffer,
u32 buflen);
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 70a1a9a22f87..89a428f7dc46 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -388,7 +388,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
*
*/
int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
- struct kvec *kv_list, u32 kv_count, bool *signal)
+ struct kvec *kv_list, u32 kv_count, bool *signal, bool lock)
{
int i = 0;
u32 bytes_avail_towrite;
@@ -398,14 +398,15 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
u32 next_write_location;
u32 old_write;
u64 prev_indices = 0;
- unsigned long flags;
+ unsigned long flags = 0;
for (i = 0; i < kv_count; i++)
totalbytes_towrite += kv_list[i].iov_len;
totalbytes_towrite += sizeof(u64);
- spin_lock_irqsave(&outring_info->ring_lock, flags);
+ if (lock)
+ spin_lock_irqsave(&outring_info->ring_lock, flags);
hv_get_ringbuffer_availbytes(outring_info,
&bytes_avail_toread,
@@ -416,7 +417,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
/* Otherwise, the next time around, we think the ring buffer */
/* is empty since the read index == write index */
if (bytes_avail_towrite <= totalbytes_towrite) {
- spin_unlock_irqrestore(&outring_info->ring_lock, flags);
+ if (lock)
+ spin_unlock_irqrestore(&outring_info->ring_lock, flags);
return -EAGAIN;
}
@@ -447,7 +449,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
hv_set_next_write_location(outring_info, next_write_location);
- spin_unlock_irqrestore(&outring_info->ring_lock, flags);
+ if (lock)
+ spin_unlock_irqrestore(&outring_info->ring_lock, flags);
*signal = hv_need_to_signal(old_write, outring_info);
return 0;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 993318a6d147..6c9695ef757e 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -813,8 +813,24 @@ struct vmbus_channel {
* signaling control.
*/
enum hv_signal_policy signal_policy;
+ /*
+ * On the channel send side, many of the VMBUS
+ * device drivers explicity serialize access to the
+ * outgoing ring buffer. Give more control to the
+ * VMBUS device drivers in terms how to serialize
+ * accesss to the outgoing ring buffer.
+ * The default behavior will be to aquire the
+ * ring lock to preserve the current behavior.
+ */
+ bool acquire_ring_lock;
+
};
+static inline void set_channel_lock_state(struct vmbus_channel *c, bool state)
+{
+ c->acquire_ring_lock = state;
+}
+
static inline bool is_hvsock_channel(const struct vmbus_channel *c)
{
return !!(c->offermsg.offer.chn_flags &
--
2.11.0

View File

@@ -0,0 +1,100 @@
From a71950ae5474cdb6dabb2aac9ef05bf51a6331d6 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 26 Feb 2016 15:13:16 -0800
Subject: [PATCH 34/44] Drivers: hv: vmbus: avoid wait_for_completion() on
crash
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
wait_for_completion() may sleep, it enables interrupts and this
is something we really want to avoid on crashes because interrupt
handlers can cause other crashes. Switch to the recently introduced
vmbus_wait_for_unload() doing busy wait instead.
Reported-by: Radim Krcmar <rkrcmar@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Radim Kr.má<rkrcmar@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 75ff3a8a9168df750b5bd0589e897a6c0517a9f1)
---
drivers/hv/channel_mgmt.c | 4 ++--
drivers/hv/connection.c | 2 +-
drivers/hv/hyperv_vmbus.h | 2 +-
drivers/hv/vmbus_drv.c | 4 ++--
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index b40f429aaa13..f70e35278b94 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -641,7 +641,7 @@ static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
complete(&vmbus_connection.unload_event);
}
-void vmbus_initiate_unload(void)
+void vmbus_initiate_unload(bool crash)
{
struct vmbus_channel_message_header hdr;
@@ -658,7 +658,7 @@ void vmbus_initiate_unload(void)
* vmbus_initiate_unload() is also called on crash and the crash can be
* happening in an interrupt context, where scheduling is impossible.
*/
- if (!in_interrupt())
+ if (!crash)
wait_for_completion(&vmbus_connection.unload_event);
else
vmbus_wait_for_unload();
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 09c08b56e3dc..78b8be87844c 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -233,7 +233,7 @@ void vmbus_disconnect(void)
/*
* First send the unload request to the host.
*/
- vmbus_initiate_unload();
+ vmbus_initiate_unload(false);
if (vmbus_connection.work_queue) {
drain_workqueue(vmbus_connection.work_queue);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 89bb5591498e..f424c2df6c19 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -756,7 +756,7 @@ void hv_vss_onchannelcallback(void *);
int hv_fcopy_init(struct hv_util_service *);
void hv_fcopy_deinit(void);
void hv_fcopy_onchannelcallback(void *);
-void vmbus_initiate_unload(void);
+void vmbus_initiate_unload(bool crash);
static inline void hv_poll_channel(struct vmbus_channel *channel,
void (*cb)(void *))
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 03fc5d317b22..b0cc6fd74fd0 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1276,7 +1276,7 @@ static void hv_kexec_handler(void)
int cpu;
hv_synic_clockevents_cleanup();
- vmbus_initiate_unload();
+ vmbus_initiate_unload(false);
for_each_online_cpu(cpu)
smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
hv_cleanup();
@@ -1284,7 +1284,7 @@ static void hv_kexec_handler(void)
static void hv_crash_handler(struct pt_regs *regs)
{
- vmbus_initiate_unload();
+ vmbus_initiate_unload(true);
/*
* In crash handler we can't schedule synic cleanup for all CPUs,
* doing the cleanup for current CPU only. This should be sufficient
--
2.11.0

View File

@@ -0,0 +1,39 @@
From b0bf62f85f29a28c998b288aa164f98b449776a9 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 26 Feb 2016 15:13:18 -0800
Subject: [PATCH 35/44] Drivers: hv: vmbus: avoid unneeded compiler
optimizations in vmbus_wait_for_unload()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Message header is modified by the hypervisor and we read it in a loop,
we need to prevent compilers from optimizing accesses. There are no such
optimizations at this moment, this is just a future proof.
Suggested-by: Radim Krcmar <rkrcmar@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Radim Kr.má<rkrcmar@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit d452ab7b4c65dfcaee88a0d6866eeeb98a3d1884)
---
drivers/hv/channel_mgmt.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index f70e35278b94..c892db5df665 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -605,7 +605,7 @@ static void vmbus_wait_for_unload(void)
bool unloaded = false;
while (1) {
- if (msg->header.message_type == HVMSG_NONE) {
+ if (READ_ONCE(msg->header.message_type) == HVMSG_NONE) {
mdelay(10);
continue;
}
--
2.11.0

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,52 @@
From b6ca7ee0ff47bd5858de7dd01a1e4144f60b02fd Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 21 Mar 2016 02:51:09 -0700
Subject: [PATCH 37/44] net: add the AF_KCM entries to family name tables
This is for the recent kcm driver, which introduces AF_KCM(41) in
b7ac4eb(kcm: Kernel Connection Multiplexor module).
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Signed-off-by: Tom Herbert <tom@herbertland.com>
Origin: https://patchwork.ozlabs.org/patch/600006
---
net/core/sock.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/net/core/sock.c b/net/core/sock.c
index f4c0917e66b5..ba24aeb6999c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -263,7 +263,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
- "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
+ "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" ,
+ "sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -279,7 +280,8 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
- "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
+ "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" ,
+ "slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -295,7 +297,8 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
- "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
+ "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" ,
+ "clock-AF_MAX"
};
/*
--
2.11.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,49 @@
From 9f77b052096cbdc1fe56c00b19a95855a8c3849e Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 21 Mar 2016 02:53:08 -0700
Subject: [PATCH 40/44] net: add the AF_HYPERV entries to family name tables
This is for the hv_sock driver, which introduces AF_HYPERV(42).
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Origin: https://patchwork.ozlabs.org/patch/600009
---
net/core/sock.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/net/core/sock.c b/net/core/sock.c
index ba24aeb6999c..c75ebeec3290 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -264,7 +264,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
"sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" ,
- "sk_lock-AF_MAX"
+ "sk_lock-AF_HYPERV", "sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -281,7 +281,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
"slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" ,
- "slock-AF_MAX"
+ "slock-AF_HYPERV", "slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -298,7 +298,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
"clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" ,
- "clock-AF_MAX"
+ "clock-AF_HYPERV", "clock-AF_MAX"
};
/*
--
2.11.0

View File

@@ -0,0 +1,133 @@
From 23b4d08adcd276a1fcd8949b09f75f722f5ecaa2 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Sat, 21 May 2016 16:55:50 +0800
Subject: [PATCH 41/44] Drivers: hv: vmbus: fix the race when querying &
updating the percpu list
There is a rare race when we remove an entry from the global list
hv_context.percpu_list[cpu] in hv_process_channel_removal() ->
percpu_channel_deq() -> list_del(): at this time, if vmbus_on_event() ->
process_chn_event() -> pcpu_relid2channel() is trying to query the list,
we can get the general protection fault:
general protection fault: 0000 [#1] SMP
...
RIP: 0010:[<ffffffff81461b6b>] [<ffffffff81461b6b>] vmbus_on_event+0xc4/0x149
Similarly, we also have the issue in the code path: vmbus_process_offer() ->
percpu_channel_enq().
We can resolve the issue by disabling the tasklet when updating the list.
Reported-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Origin: https://github.com/dcui/linux/commit/fbcca73228b9b90911ab30fdf75f532b2b7c07e5
---
drivers/hv/channel.c | 1 +
drivers/hv/channel_mgmt.c | 18 ++++++++++++++++--
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 57a1b659d05f..da76a2e9199a 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -592,6 +592,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
out:
tasklet_enable(tasklet);
+ tasklet_schedule(tasklet);
return ret;
}
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index c892db5df665..0a543170eba0 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -21,6 +21,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
+#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/mm.h>
@@ -307,12 +308,13 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
{
unsigned long flags;
struct vmbus_channel *primary_channel;
-
- vmbus_release_relid(relid);
+ struct tasklet_struct *tasklet;
BUG_ON(!channel->rescind);
BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+ tasklet = hv_context.event_dpc[channel->target_cpu];
+ tasklet_disable(tasklet);
if (channel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(channel->target_cpu,
@@ -321,6 +323,8 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
percpu_channel_deq(channel);
put_cpu();
}
+ tasklet_enable(tasklet);
+ tasklet_schedule(tasklet);
if (channel->primary_channel == NULL) {
list_del(&channel->listentry);
@@ -342,6 +346,8 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
&primary_channel->alloced_cpus_in_node);
free_channel(channel);
+
+ vmbus_release_relid(relid);
}
void vmbus_free_channels(void)
@@ -363,6 +369,7 @@ void vmbus_free_channels(void)
*/
static void vmbus_process_offer(struct vmbus_channel *newchannel)
{
+ struct tasklet_struct *tasklet;
struct vmbus_channel *channel;
bool fnew = true;
unsigned long flags;
@@ -409,6 +416,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
init_vp_index(newchannel, dev_type);
+ tasklet = hv_context.event_dpc[newchannel->target_cpu];
+ tasklet_disable(tasklet);
if (newchannel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(newchannel->target_cpu,
@@ -418,6 +427,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
percpu_channel_enq(newchannel);
put_cpu();
}
+ tasklet_enable(tasklet);
+ tasklet_schedule(tasklet);
/*
* This state is used to indicate a successful open
@@ -469,6 +480,7 @@ err_deq_chan:
list_del(&newchannel->listentry);
mutex_unlock(&vmbus_connection.channel_mutex);
+ tasklet_disable(tasklet);
if (newchannel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(newchannel->target_cpu,
@@ -477,6 +489,8 @@ err_deq_chan:
percpu_channel_deq(newchannel);
put_cpu();
}
+ tasklet_enable(tasklet);
+ tasklet_schedule(tasklet);
err_free_chan:
free_channel(newchannel);
--
2.11.0

View File

@@ -0,0 +1,30 @@
From ffb0e64e9dbe037beb69dda9daf6567582adcac1 Mon Sep 17 00:00:00 2001
From: Rolf Neugebauer <rolf.neugebauer@gmail.com>
Date: Mon, 23 May 2016 18:55:45 +0100
Subject: [PATCH 42/44] vmbus: Don't spam the logs with unknown GUIDs
With Hyper-V sockets device types are introduced on the fly. The pr_info()
then prints a message on every connection, which is way too verbose. Since
there doesn't seem to be an easy way to check for registered services,
disable the pr_info() completely.
Signed-off-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
---
drivers/hv/channel_mgmt.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 0a543170eba0..120ee22c945e 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -147,7 +147,6 @@ static u16 hv_get_dev_type(const uuid_le *guid)
if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
return i;
}
- pr_info("Unknown GUID: %pUl\n", guid);
return i;
}
--
2.11.0

View File

@@ -0,0 +1,64 @@
From 784c13b38b4413144b146699bb3ab090c291e6c2 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Wed, 17 Feb 2016 16:49:38 -0800
Subject: [PATCH 43/44] fs: add filp_clone_open API
I need an API that allows me to obtain a clone of the current file
pointer to pass in to an exec handler. I've labelled this as an
internal API because I can't see how it would be useful outside of the
fs subsystem. The use case will be a persistent binfmt_misc handler.
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Jan Kara <jack@suse.cz>
---
fs/internal.h | 1 +
fs/open.c | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+)
diff --git a/fs/internal.h b/fs/internal.h
index 71859c4d0b41..c0022708ff3a 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -108,6 +108,7 @@ extern long do_handle_open(int mountdirfd,
struct file_handle __user *ufh, int open_flag);
extern int open_check_o_direct(struct file *f);
extern int vfs_open(const struct path *, struct file *, const struct cred *);
+extern struct file *filp_clone_open(struct file *);
/*
* inode.c
diff --git a/fs/open.c b/fs/open.c
index 157b9940dd73..9d993f928ea0 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1001,6 +1001,26 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(file_open_root);
+struct file *filp_clone_open(struct file *oldfile)
+{
+ struct file *file;
+ int retval;
+
+ file = get_empty_filp();
+ if (IS_ERR(file))
+ return file;
+
+ file->f_flags = oldfile->f_flags;
+ retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred);
+ if (retval) {
+ put_filp(file);
+ return ERR_PTR(retval);
+ }
+
+ return file;
+}
+EXPORT_SYMBOL(filp_clone_open);
+
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
struct open_flags op;
--
2.11.0

View File

@@ -0,0 +1,132 @@
From 20127ca4e2ea3b4adfdf2161a915439454005579 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Wed, 17 Feb 2016 16:51:16 -0800
Subject: [PATCH 44/44] binfmt_misc: add persistent opened binary handler for
containers
This patch adds a new flag 'F' to the binfmt handlers. If you pass in
'F' the binary that runs the emulation will be opened immediately and
in future, will be cloned from the open file.
The net effect is that the handler survives both changeroots and mount
namespace changes, making it easy to work with foreign architecture
containers without contaminating the container image with the
emulator.
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
---
fs/binfmt_misc.c | 41 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 39 insertions(+), 2 deletions(-)
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 78f005f37847..4beb3d9e0001 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -26,6 +26,8 @@
#include <linux/fs.h>
#include <linux/uaccess.h>
+#include "internal.h"
+
#ifdef DEBUG
# define USE_DEBUG 1
#else
@@ -43,6 +45,7 @@ enum {Enabled, Magic};
#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
#define MISC_FMT_OPEN_BINARY (1 << 30)
#define MISC_FMT_CREDENTIALS (1 << 29)
+#define MISC_FMT_OPEN_FILE (1 << 28)
typedef struct {
struct list_head list;
@@ -54,6 +57,7 @@ typedef struct {
char *interpreter; /* filename of interpreter */
char *name;
struct dentry *dentry;
+ struct file *interp_file;
} Node;
static DEFINE_RWLOCK(entries_lock);
@@ -201,7 +205,13 @@ static int load_misc_binary(struct linux_binprm *bprm)
if (retval < 0)
goto error;
- interp_file = open_exec(iname);
+ if (fmt->flags & MISC_FMT_OPEN_FILE && fmt->interp_file) {
+ interp_file = filp_clone_open(fmt->interp_file);
+ if (!IS_ERR(interp_file))
+ deny_write_access(interp_file);
+ } else {
+ interp_file = open_exec(iname);
+ }
retval = PTR_ERR(interp_file);
if (IS_ERR(interp_file))
goto error;
@@ -285,6 +295,11 @@ static char *check_special_flags(char *sfs, Node *e)
e->flags |= (MISC_FMT_CREDENTIALS |
MISC_FMT_OPEN_BINARY);
break;
+ case 'F':
+ pr_debug("register: flag: F: open interpreter file now\n");
+ p++;
+ e->flags |= MISC_FMT_OPEN_FILE;
+ break;
default:
cont = 0;
}
@@ -543,6 +558,8 @@ static void entry_status(Node *e, char *page)
*dp++ = 'O';
if (e->flags & MISC_FMT_CREDENTIALS)
*dp++ = 'C';
+ if (e->flags & MISC_FMT_OPEN_FILE)
+ *dp++ = 'F';
*dp++ = '\n';
if (!test_bit(Magic, &e->flags)) {
@@ -590,6 +607,11 @@ static void kill_node(Node *e)
}
write_unlock(&entries_lock);
+ if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) {
+ filp_close(e->interp_file, NULL);
+ e->interp_file = NULL;
+ }
+
if (dentry) {
drop_nlink(d_inode(dentry));
d_drop(dentry);
@@ -698,6 +720,21 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
goto out2;
}
+ if (e->flags & MISC_FMT_OPEN_FILE) {
+ struct file *f;
+
+ f = open_exec(e->interpreter);
+ if (IS_ERR(f)) {
+ err = PTR_ERR(f);
+ pr_notice("register: failed to install interpreter file %s\n", e->interpreter);
+ simple_release_fs(&bm_mnt, &entry_count);
+ iput(inode);
+ inode = NULL;
+ goto out2;
+ }
+ e->interp_file = f;
+ }
+
e->dentry = dget(dentry);
inode->i_private = e;
inode->i_fop = &bm_entry_operations;
@@ -716,7 +753,7 @@ out:
if (err) {
kfree(e);
- return -EINVAL;
+ return err;
}
return count;
}
--
2.11.0