diff --git a/alpine/kernel/Dockerfile b/alpine/kernel/Dockerfile index fa8ebc04f..ee0daba10 100644 --- a/alpine/kernel/Dockerfile +++ b/alpine/kernel/Dockerfile @@ -1,6 +1,6 @@ FROM mobylinux/alpine-build-c:f97e13b3a7339af5da5d620fc053c6a6c81b00a6 -ARG KERNEL_VERSION=4.4.25 +ARG KERNEL_VERSION=4.8.2 ENV KERNEL_SOURCE=https://www.kernel.org/pub/linux/kernel/v4.x/linux-${KERNEL_VERSION}.tar.xz @@ -8,10 +8,10 @@ RUN curl -sSL -o linux-${KERNEL_VERSION}.tar.xz ${KERNEL_SOURCE} RUN cat linux-${KERNEL_VERSION}.tar.xz | tar --absolute-names -xJ && mv /linux-${KERNEL_VERSION} /linux -# this is aufs4.4 20160912 +# this is aufs4.8 20161010 ENV AUFS_REPO https://github.com/sfjro/aufs4-standalone -ENV AUFS_BRANCH aufs4.4 -ENV AUFS_COMMIT 7d174ae40b4c9c876ee51aa50fa4ee1f3747de23 +ENV AUFS_BRANCH aufs4.8 +ENV AUFS_COMMIT e9fd128dcb16167417683e199a5feb14f3c9eca8 # Download AUFS RUN git clone -b "$AUFS_BRANCH" "$AUFS_REPO" /aufs && \ diff --git a/alpine/kernel/patches/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch b/alpine/kernel/patches/0001-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch similarity index 72% rename from alpine/kernel/patches/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch rename to alpine/kernel/patches/0001-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch index cc76287a8..ef3345f85 100644 --- a/alpine/kernel/patches/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch +++ b/alpine/kernel/patches/0001-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch @@ -1,8 +1,7 @@ -From 8719b508f509c06a7821d6f8e2fc1fcad84d6fbb Mon Sep 17 00:00:00 2001 +From 98286199f2ba568d31ec78535c12e1818efd7daf Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 4 Apr 2016 14:50:10 +0100 -Subject: [PATCH 15/42] VSOCK: Only allow host network namespace to use - AF_VSOCK. +Subject: [PATCH 1/4] VSOCK: Only allow host network namespace to use AF_VSOCK. The VSOCK addressing schema does not really lend itself to simply creating an alternative end point address within a namespace. @@ -13,19 +12,18 @@ Signed-off-by: Ian Campbell 1 file changed, 3 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index 17dbbe6..1bb1b01 100644 +index 8a398b3..0edc54c 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1852,6 +1852,9 @@ static const struct proto_ops vsock_stream_ops = { static int vsock_create(struct net *net, struct socket *sock, - int protocol, int kern) + int protocol, int kern) { + if (!net_eq(net, &init_net)) + return -EAFNOSUPPORT; + - if (!sock) - return -EINVAL; - --- -2.10.0 + if (!sock) + return -EINVAL; +-- +2.10.1 diff --git a/alpine/kernel/patches/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch b/alpine/kernel/patches/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch deleted file mode 100644 index 0fd255114..000000000 --- a/alpine/kernel/patches/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch +++ /dev/null @@ -1,219 +0,0 @@ -From 622883ec571c468f756195c13726740bdd33a0ee Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 17 Dec 2015 16:53:43 +0800 -Subject: [PATCH 01/42] virtio: make find_vqs() checkpatch.pl-friendly - -checkpatch.pl wants arrays of strings declared as follows: - - static const char * const names[] = { "vq-1", "vq-2", "vq-3" }; - -Currently the find_vqs() function takes a const char *names[] argument -so passing checkpatch.pl's const char * const names[] results in a -compiler error due to losing the second const. - -This patch adjusts the find_vqs() prototype and updates all virtio -transports. This makes it possible for virtio_balloon.c, virtio_input.c, -virtgpu_kms.c, and virtio_rpmsg_bus.c to use the checkpatch.pl-friendly -type. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -Acked-by: Bjorn Andersson -(cherry picked from commit f7ad26ff952b3ca2702d7da03aad0ab1f6c01d7c) ---- - drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +- - drivers/misc/mic/card/mic_virtio.c | 2 +- - drivers/remoteproc/remoteproc_virtio.c | 2 +- - drivers/rpmsg/virtio_rpmsg_bus.c | 2 +- - drivers/s390/virtio/kvm_virtio.c | 2 +- - drivers/s390/virtio/virtio_ccw.c | 2 +- - drivers/virtio/virtio_balloon.c | 2 +- - drivers/virtio/virtio_input.c | 2 +- - drivers/virtio/virtio_mmio.c | 2 +- - drivers/virtio/virtio_pci_common.c | 4 ++-- - drivers/virtio/virtio_pci_common.h | 2 +- - drivers/virtio/virtio_pci_modern.c | 2 +- - include/linux/virtio_config.h | 2 +- - 13 files changed, 14 insertions(+), 14 deletions(-) - -diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c -index 06496a1..4150873 100644 ---- a/drivers/gpu/drm/virtio/virtgpu_kms.c -+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c -@@ -130,7 +130,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) - static vq_callback_t *callbacks[] = { - virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack - }; -- static const char *names[] = { "control", "cursor" }; -+ static const char * const names[] = { "control", "cursor" }; - - struct virtio_gpu_device *vgdev; - /* this will expand later */ -diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c -index e486a0c..f6ed57d 100644 ---- a/drivers/misc/mic/card/mic_virtio.c -+++ b/drivers/misc/mic/card/mic_virtio.c -@@ -311,7 +311,7 @@ unmap: - static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct mic_vdev *mvdev = to_micvdev(vdev); - struct mic_device_ctrl __iomem *dc = mvdev->dc; -diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c -index e1a1023..e44872f 100644 ---- a/drivers/remoteproc/remoteproc_virtio.c -+++ b/drivers/remoteproc/remoteproc_virtio.c -@@ -147,7 +147,7 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev) - static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct rproc *rproc = vdev_to_rproc(vdev); - int i, ret; -diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c -index 73354ee..1fcd27c 100644 ---- a/drivers/rpmsg/virtio_rpmsg_bus.c -+++ b/drivers/rpmsg/virtio_rpmsg_bus.c -@@ -945,7 +945,7 @@ static void rpmsg_ns_cb(struct rpmsg_channel *rpdev, void *data, int len, - static int rpmsg_probe(struct virtio_device *vdev) - { - vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done }; -- const char *names[] = { "input", "output" }; -+ static const char * const names[] = { "input", "output" }; - struct virtqueue *vqs[2]; - struct virtproc_info *vrp; - void *bufs_va; -diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c -index 53fb975..1d060fd 100644 ---- a/drivers/s390/virtio/kvm_virtio.c -+++ b/drivers/s390/virtio/kvm_virtio.c -@@ -255,7 +255,7 @@ static void kvm_del_vqs(struct virtio_device *vdev) - static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct kvm_device *kdev = to_kvmdev(vdev); - int i; -diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c -index 1b83159..bf2d130 100644 ---- a/drivers/s390/virtio/virtio_ccw.c -+++ b/drivers/s390/virtio/virtio_ccw.c -@@ -635,7 +635,7 @@ out: - static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct virtio_ccw_device *vcdev = to_vc_device(vdev); - unsigned long *indicatorp = NULL; -diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c -index 56f7e25..66082c9 100644 ---- a/drivers/virtio/virtio_balloon.c -+++ b/drivers/virtio/virtio_balloon.c -@@ -394,7 +394,7 @@ static int init_vqs(struct virtio_balloon *vb) - { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; -- const char *names[] = { "inflate", "deflate", "stats" }; -+ static const char * const names[] = { "inflate", "deflate", "stats" }; - int err, nvqs; - - /* -diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c -index c96944b..350a2a5 100644 ---- a/drivers/virtio/virtio_input.c -+++ b/drivers/virtio/virtio_input.c -@@ -170,7 +170,7 @@ static int virtinput_init_vqs(struct virtio_input *vi) - struct virtqueue *vqs[2]; - vq_callback_t *cbs[] = { virtinput_recv_events, - virtinput_recv_status }; -- static const char *names[] = { "events", "status" }; -+ static const char * const names[] = { "events", "status" }; - int err; - - err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names); -diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c -index f499d9d..745c6ee 100644 ---- a/drivers/virtio/virtio_mmio.c -+++ b/drivers/virtio/virtio_mmio.c -@@ -482,7 +482,7 @@ error_available: - static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); - unsigned int irq = platform_get_irq(vm_dev->pdev, 0); -diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c -index 2046a68..f6bed86 100644 ---- a/drivers/virtio/virtio_pci_common.c -+++ b/drivers/virtio/virtio_pci_common.c -@@ -296,7 +296,7 @@ void vp_del_vqs(struct virtio_device *vdev) - static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[], -+ const char * const names[], - bool use_msix, - bool per_vq_vectors) - { -@@ -376,7 +376,7 @@ error_find: - int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - int err; - -diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h -index b976d96..2cc2522 100644 ---- a/drivers/virtio/virtio_pci_common.h -+++ b/drivers/virtio/virtio_pci_common.h -@@ -139,7 +139,7 @@ void vp_del_vqs(struct virtio_device *vdev); - int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]); -+ const char * const names[]); - const char *vp_bus_name(struct virtio_device *vdev); - - /* Setup the affinity for a virtqueue: -diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c -index 4469202..631021c 100644 ---- a/drivers/virtio/virtio_pci_modern.c -+++ b/drivers/virtio/virtio_pci_modern.c -@@ -423,7 +423,7 @@ err_new_queue: - static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtqueue *vq; -diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h -index e5ce8ab..6e6cb0c 100644 ---- a/include/linux/virtio_config.h -+++ b/include/linux/virtio_config.h -@@ -70,7 +70,7 @@ struct virtio_config_ops { - int (*find_vqs)(struct virtio_device *, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]); -+ const char * const names[]); - void (*del_vqs)(struct virtio_device *); - u64 (*get_features)(struct virtio_device *vdev); - int (*finalize_features)(struct virtio_device *vdev); --- -2.10.0 - diff --git a/alpine/kernel/patches/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch b/alpine/kernel/patches/0002-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch similarity index 67% rename from alpine/kernel/patches/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch rename to alpine/kernel/patches/0002-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch index 8c3619340..aa2a432bb 100644 --- a/alpine/kernel/patches/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch +++ b/alpine/kernel/patches/0002-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch @@ -1,8 +1,8 @@ -From cd11346c60451032d97062e25ed025bf692dff91 Mon Sep 17 00:00:00 2001 +From b7c88e0b81e971a99a4213515ea3bce1c136a724 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 21 May 2016 16:55:50 +0800 -Subject: [PATCH 41/42] Drivers: hv: vmbus: fix the race when querying & - updating the percpu list +Subject: [PATCH 2/4] Drivers: hv: vmbus: fix the race when querying & updating + the percpu list There is a rare race when we remove an entry from the global list hv_context.percpu_list[cpu] in hv_process_channel_removal() -> @@ -28,24 +28,24 @@ Origin: https://github.com/dcui/linux/commit/fbcca73228b9b90911ab30fdf75f532b2b7 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 57a1b65..da76a2e 100644 +index 56dd261..75343e0 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -592,6 +592,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel) - + out: - tasklet_enable(tasklet); + tasklet_enable(tasklet); + tasklet_schedule(tasklet); - - return ret; + + return ret; } diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index c892db5..0a54317 100644 +index b6c1211..8f4e6070 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -21,6 +21,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - + #include +#include #include @@ -53,81 +53,80 @@ index c892db5..0a54317 100644 #include @@ -307,12 +308,13 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) { - unsigned long flags; - struct vmbus_channel *primary_channel; + unsigned long flags; + struct vmbus_channel *primary_channel; - - vmbus_release_relid(relid); + struct tasklet_struct *tasklet; - - BUG_ON(!channel->rescind); - BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - + + BUG_ON(!channel->rescind); + BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + + tasklet = hv_context.event_dpc[channel->target_cpu]; + tasklet_disable(tasklet); - if (channel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(channel->target_cpu, + if (channel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(channel->target_cpu, @@ -321,6 +323,8 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - percpu_channel_deq(channel); - put_cpu(); - } + percpu_channel_deq(channel); + put_cpu(); + } + tasklet_enable(tasklet); + tasklet_schedule(tasklet); - - if (channel->primary_channel == NULL) { - list_del(&channel->listentry); + + if (channel->primary_channel == NULL) { + list_del(&channel->listentry); @@ -342,6 +346,8 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - &primary_channel->alloced_cpus_in_node); - - free_channel(channel); + &primary_channel->alloced_cpus_in_node); + + free_channel(channel); + + vmbus_release_relid(relid); } - + void vmbus_free_channels(void) @@ -363,6 +369,7 @@ void vmbus_free_channels(void) */ static void vmbus_process_offer(struct vmbus_channel *newchannel) { + struct tasklet_struct *tasklet; - struct vmbus_channel *channel; - bool fnew = true; - unsigned long flags; + struct vmbus_channel *channel; + bool fnew = true; + unsigned long flags; @@ -409,6 +416,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - - init_vp_index(newchannel, dev_type); - + + init_vp_index(newchannel, dev_type); + + tasklet = hv_context.event_dpc[newchannel->target_cpu]; + tasklet_disable(tasklet); - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(newchannel->target_cpu, + if (newchannel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(newchannel->target_cpu, @@ -418,6 +427,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - percpu_channel_enq(newchannel); - put_cpu(); - } + percpu_channel_enq(newchannel); + put_cpu(); + } + tasklet_enable(tasklet); + tasklet_schedule(tasklet); - - /* - * This state is used to indicate a successful open -@@ -469,6 +480,7 @@ err_deq_chan: - list_del(&newchannel->listentry); - mutex_unlock(&vmbus_connection.channel_mutex); - -+ tasklet_disable(tasklet); - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(newchannel->target_cpu, -@@ -477,6 +489,8 @@ err_deq_chan: - percpu_channel_deq(newchannel); - put_cpu(); - } -+ tasklet_enable(tasklet); -+ tasklet_schedule(tasklet); - - err_free_chan: - free_channel(newchannel); --- -2.10.0 + /* + * This state is used to indicate a successful open +@@ -469,6 +480,7 @@ err_deq_chan: + list_del(&newchannel->listentry); + mutex_unlock(&vmbus_connection.channel_mutex); + ++ tasklet_disable(tasklet); + if (newchannel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(newchannel->target_cpu, +@@ -477,6 +489,8 @@ err_deq_chan: + percpu_channel_deq(newchannel); + put_cpu(); + } ++ tasklet_enable(tasklet); ++ tasklet_schedule(tasklet); + + err_free_chan: + free_channel(newchannel); +-- +2.10.1 diff --git a/alpine/kernel/patches/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch b/alpine/kernel/patches/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch deleted file mode 100644 index ff2c9634d..000000000 --- a/alpine/kernel/patches/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 8dc15fd8fab55e076a640d1a5d6f34b77e196632 Mon Sep 17 00:00:00 2001 -From: Julia Lawall -Date: Sat, 21 Nov 2015 18:39:17 +0100 -Subject: [PATCH 02/42] VSOCK: constify vmci_transport_notify_ops structures - -The vmci_transport_notify_ops structures are never modified, so declare -them as const. - -Done with the help of Coccinelle. - -Signed-off-by: Julia Lawall -Signed-off-by: David S. Miller -(cherry picked from commit 3b22dae38db1cea9ead3229f08cfb0b69aca5706) ---- - net/vmw_vsock/vmci_transport.h | 2 +- - net/vmw_vsock/vmci_transport_notify.c | 2 +- - net/vmw_vsock/vmci_transport_notify.h | 5 +++-- - net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +- - 4 files changed, 6 insertions(+), 5 deletions(-) - -diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h -index 2ad46f3..1820e74 100644 ---- a/net/vmw_vsock/vmci_transport.h -+++ b/net/vmw_vsock/vmci_transport.h -@@ -121,7 +121,7 @@ struct vmci_transport { - u64 queue_pair_max_size; - u32 detach_sub_id; - union vmci_transport_notify notify; -- struct vmci_transport_notify_ops *notify_ops; -+ const struct vmci_transport_notify_ops *notify_ops; - struct list_head elem; - struct sock *sk; - spinlock_t lock; /* protects sk. */ -diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c -index 9b7f207..fd8cf02 100644 ---- a/net/vmw_vsock/vmci_transport_notify.c -+++ b/net/vmw_vsock/vmci_transport_notify.c -@@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) - } - - /* Socket control packet based operations. */ --struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { -+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { - vmci_transport_notify_pkt_socket_init, - vmci_transport_notify_pkt_socket_destruct, - vmci_transport_notify_pkt_poll_in, -diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h -index 7df7932..3c464d3 100644 ---- a/net/vmw_vsock/vmci_transport_notify.h -+++ b/net/vmw_vsock/vmci_transport_notify.h -@@ -77,7 +77,8 @@ struct vmci_transport_notify_ops { - void (*process_negotiate) (struct sock *sk); - }; - --extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; --extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; -+extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; -+extern const -+struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; - - #endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ -diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c -index dc9c792..21e591d 100644 ---- a/net/vmw_vsock/vmci_transport_notify_qstate.c -+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c -@@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue( - } - - /* Socket always on control packet based operations. */ --struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { -+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { - vmci_transport_notify_pkt_socket_init, - vmci_transport_notify_pkt_socket_destruct, - vmci_transport_notify_pkt_poll_in, --- -2.10.0 - diff --git a/alpine/kernel/patches/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch b/alpine/kernel/patches/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch deleted file mode 100644 index 8c0d62952..000000000 --- a/alpine/kernel/patches/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch +++ /dev/null @@ -1,336 +0,0 @@ -From 761aa629641afa804127aea0e3ce5c95dddfcb17 Mon Sep 17 00:00:00 2001 -From: Claudio Imbrenda -Date: Tue, 22 Mar 2016 17:05:52 +0100 -Subject: [PATCH 03/42] AF_VSOCK: Shrink the area influenced by prepare_to_wait - -When a thread is prepared for waiting by calling prepare_to_wait, sleeping -is not allowed until either the wait has taken place or finish_wait has -been called. The existing code in af_vsock imposed unnecessary no-sleep -assumptions to a broad list of backend functions. -This patch shrinks the influence of prepare_to_wait to the area where it -is strictly needed, therefore relaxing the no-sleep restriction there. - -Signed-off-by: Claudio Imbrenda -Signed-off-by: David S. Miller -(cherry picked from commit f7f9b5e7f8eccfd68ffa7b8d74b07c478bb9e7f0) ---- - net/vmw_vsock/af_vsock.c | 158 +++++++++++++++++++++++++---------------------- - 1 file changed, 85 insertions(+), 73 deletions(-) - -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index 9b5bd6d..b5f1221 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -1209,10 +1209,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, - - if (signal_pending(current)) { - err = sock_intr_errno(timeout); -- goto out_wait_error; -+ sk->sk_state = SS_UNCONNECTED; -+ sock->state = SS_UNCONNECTED; -+ goto out_wait; - } else if (timeout == 0) { - err = -ETIMEDOUT; -- goto out_wait_error; -+ sk->sk_state = SS_UNCONNECTED; -+ sock->state = SS_UNCONNECTED; -+ goto out_wait; - } - - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -@@ -1220,20 +1224,17 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, - - if (sk->sk_err) { - err = -sk->sk_err; -- goto out_wait_error; -- } else -+ sk->sk_state = SS_UNCONNECTED; -+ sock->state = SS_UNCONNECTED; -+ } else { - err = 0; -+ } - - out_wait: - finish_wait(sk_sleep(sk), &wait); - out: - release_sock(sk); - return err; -- --out_wait_error: -- sk->sk_state = SS_UNCONNECTED; -- sock->state = SS_UNCONNECTED; -- goto out_wait; - } - - static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) -@@ -1270,18 +1271,20 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) - listener->sk_err == 0) { - release_sock(listener); - timeout = schedule_timeout(timeout); -+ finish_wait(sk_sleep(listener), &wait); - lock_sock(listener); - - if (signal_pending(current)) { - err = sock_intr_errno(timeout); -- goto out_wait; -+ goto out; - } else if (timeout == 0) { - err = -EAGAIN; -- goto out_wait; -+ goto out; - } - - prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); - } -+ finish_wait(sk_sleep(listener), &wait); - - if (listener->sk_err) - err = -listener->sk_err; -@@ -1301,19 +1304,15 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) - */ - if (err) { - vconnected->rejected = true; -- release_sock(connected); -- sock_put(connected); -- goto out_wait; -+ } else { -+ newsock->state = SS_CONNECTED; -+ sock_graft(connected, newsock); - } - -- newsock->state = SS_CONNECTED; -- sock_graft(connected, newsock); - release_sock(connected); - sock_put(connected); - } - --out_wait: -- finish_wait(sk_sleep(listener), &wait); - out: - release_sock(listener); - return err; -@@ -1557,11 +1556,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - if (err < 0) - goto out; - -- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - - while (total_written < len) { - ssize_t written; - -+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - while (vsock_stream_has_space(vsk) == 0 && - sk->sk_err == 0 && - !(sk->sk_shutdown & SEND_SHUTDOWN) && -@@ -1570,27 +1569,33 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - /* Don't wait for non-blocking sockets. */ - if (timeout == 0) { - err = -EAGAIN; -- goto out_wait; -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; - } - - err = transport->notify_send_pre_block(vsk, &send_data); -- if (err < 0) -- goto out_wait; -+ if (err < 0) { -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; -+ } - - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - if (signal_pending(current)) { - err = sock_intr_errno(timeout); -- goto out_wait; -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; - } else if (timeout == 0) { - err = -EAGAIN; -- goto out_wait; -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; - } - - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - } -+ finish_wait(sk_sleep(sk), &wait); - - /* These checks occur both as part of and after the loop - * conditional since we need to check before and after -@@ -1598,16 +1603,16 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - */ - if (sk->sk_err) { - err = -sk->sk_err; -- goto out_wait; -+ goto out_err; - } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || - (vsk->peer_shutdown & RCV_SHUTDOWN)) { - err = -EPIPE; -- goto out_wait; -+ goto out_err; - } - - err = transport->notify_send_pre_enqueue(vsk, &send_data); - if (err < 0) -- goto out_wait; -+ goto out_err; - - /* Note that enqueue will only write as many bytes as are free - * in the produce queue, so we don't need to ensure len is -@@ -1620,7 +1625,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - len - total_written); - if (written < 0) { - err = -ENOMEM; -- goto out_wait; -+ goto out_err; - } - - total_written += written; -@@ -1628,14 +1633,13 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - err = transport->notify_send_post_enqueue( - vsk, written, &send_data); - if (err < 0) -- goto out_wait; -+ goto out_err; - - } - --out_wait: -+out_err: - if (total_written > 0) - err = total_written; -- finish_wait(sk_sleep(sk), &wait); - out: - release_sock(sk); - return err; -@@ -1716,21 +1720,61 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - if (err < 0) - goto out; - -- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - - while (1) { -- s64 ready = vsock_stream_has_data(vsk); -+ s64 ready; - -- if (ready < 0) { -- /* Invalid queue pair content. XXX This should be -- * changed to a connection reset in a later change. -- */ -+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -+ ready = vsock_stream_has_data(vsk); - -- err = -ENOMEM; -- goto out_wait; -- } else if (ready > 0) { -+ if (ready == 0) { -+ if (sk->sk_err != 0 || -+ (sk->sk_shutdown & RCV_SHUTDOWN) || -+ (vsk->peer_shutdown & SEND_SHUTDOWN)) { -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ /* Don't wait for non-blocking sockets. */ -+ if (timeout == 0) { -+ err = -EAGAIN; -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ -+ err = transport->notify_recv_pre_block( -+ vsk, target, &recv_data); -+ if (err < 0) { -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ release_sock(sk); -+ timeout = schedule_timeout(timeout); -+ lock_sock(sk); -+ -+ if (signal_pending(current)) { -+ err = sock_intr_errno(timeout); -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } else if (timeout == 0) { -+ err = -EAGAIN; -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ } else { - ssize_t read; - -+ finish_wait(sk_sleep(sk), &wait); -+ -+ if (ready < 0) { -+ /* Invalid queue pair content. XXX This should -+ * be changed to a connection reset in a later -+ * change. -+ */ -+ -+ err = -ENOMEM; -+ goto out; -+ } -+ - err = transport->notify_recv_pre_dequeue( - vsk, target, &recv_data); - if (err < 0) -@@ -1750,42 +1794,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - vsk, target, read, - !(flags & MSG_PEEK), &recv_data); - if (err < 0) -- goto out_wait; -+ goto out; - - if (read >= target || flags & MSG_PEEK) - break; - - target -= read; -- } else { -- if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) -- || (vsk->peer_shutdown & SEND_SHUTDOWN)) { -- break; -- } -- /* Don't wait for non-blocking sockets. */ -- if (timeout == 0) { -- err = -EAGAIN; -- break; -- } -- -- err = transport->notify_recv_pre_block( -- vsk, target, &recv_data); -- if (err < 0) -- break; -- -- release_sock(sk); -- timeout = schedule_timeout(timeout); -- lock_sock(sk); -- -- if (signal_pending(current)) { -- err = sock_intr_errno(timeout); -- break; -- } else if (timeout == 0) { -- err = -EAGAIN; -- break; -- } -- -- prepare_to_wait(sk_sleep(sk), &wait, -- TASK_INTERRUPTIBLE); - } - } - -@@ -1797,8 +1811,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - if (copied > 0) - err = copied; - --out_wait: -- finish_wait(sk_sleep(sk), &wait); - out: - release_sock(sk); - return err; --- -2.10.0 - diff --git a/alpine/kernel/patches/0039-hv_sock-introduce-Hyper-V-Sockets.patch b/alpine/kernel/patches/0003-hv_sock-introduce-Hyper-V-Sockets.patch similarity index 81% rename from alpine/kernel/patches/0039-hv_sock-introduce-Hyper-V-Sockets.patch rename to alpine/kernel/patches/0003-hv_sock-introduce-Hyper-V-Sockets.patch index b1c07b141..e554e2ae6 100644 --- a/alpine/kernel/patches/0039-hv_sock-introduce-Hyper-V-Sockets.patch +++ b/alpine/kernel/patches/0003-hv_sock-introduce-Hyper-V-Sockets.patch @@ -1,7 +1,7 @@ -From 51293adacd73d7bc6baee18e87b0d17ad52a61d4 Mon Sep 17 00:00:00 2001 +From e5597ae98118b800f9930606af65503f64944af2 Mon Sep 17 00:00:00 2001 From: Dexuan Cui -Date: Sun, 15 May 2016 09:53:11 -0700 -Subject: [PATCH 39/42] hv_sock: introduce Hyper-V Sockets +Date: Sat, 23 Jul 2016 01:35:51 +0000 +Subject: [PATCH 3/4] hv_sock: introduce Hyper-V Sockets Hyper-V Sockets (hv_sock) supplies a byte-stream based communication mechanism between the host and the guest. It's somewhat like TCP over @@ -22,29 +22,30 @@ Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Vitaly Kuznetsov Cc: Cathy Avery -Origin: https://patchwork.ozlabs.org/patch/622404/ +Cc: Olaf Hering +Origin: https://patchwork.kernel.org/patch/9244467/ --- MAINTAINERS | 2 + - include/linux/hyperv.h | 14 + + include/linux/hyperv.h | 13 + include/linux/socket.h | 4 +- include/net/af_hvsock.h | 78 +++ - include/uapi/linux/hyperv.h | 25 + + include/uapi/linux/hyperv.h | 23 + net/Kconfig | 1 + net/Makefile | 1 + net/hv_sock/Kconfig | 10 + net/hv_sock/Makefile | 3 + - net/hv_sock/af_hvsock.c | 1520 +++++++++++++++++++++++++++++++++++++++++++ - 10 files changed, 1657 insertions(+), 1 deletion(-) + net/hv_sock/af_hvsock.c | 1507 +++++++++++++++++++++++++++++++++++++++++++ + 10 files changed, 1641 insertions(+), 1 deletion(-) create mode 100644 include/net/af_hvsock.h create mode 100644 net/hv_sock/Kconfig create mode 100644 net/hv_sock/Makefile create mode 100644 net/hv_sock/af_hvsock.c diff --git a/MAINTAINERS b/MAINTAINERS -index 12d49f5..fa87bdd 100644 +index f593300..7432d79 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -5123,7 +5123,9 @@ F: drivers/input/serio/hyperv-keyboard.c +@@ -5667,7 +5667,9 @@ F: drivers/pci/host/pci-hyperv.c F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/video/fbdev/hyperv_fb.c @@ -53,15 +54,15 @@ index 12d49f5..fa87bdd 100644 +F: include/net/af_hvsock.h F: tools/hv/ F: Documentation/ABI/stable/sysfs-bus-vmbus - + diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 6c9695e..187d4bd 100644 +index b10954a..50f8976 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h -@@ -1349,4 +1349,18 @@ extern __u32 vmbus_proto_version; - - int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, - const uuid_le *shv_host_servie_id); +@@ -1505,5 +1505,18 @@ static inline void commit_rd_index(struct vmbus_channel *channel) + vmbus_set_event(channel); + } + +struct vmpipe_proto_header { + u32 pkt_type; + u32 data_size; @@ -72,13 +73,13 @@ index 6c9695e..187d4bd 100644 + +/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write() */ +#define PREV_INDICES_LEN (sizeof(u64)) -+ + +#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \ + ALIGN((payload_len), 8) + \ + PREV_INDICES_LEN) #endif /* _HYPERV_H */ diff --git a/include/linux/socket.h b/include/linux/socket.h -index dbd81e7..6634c47 100644 +index b5cc5a6..0b68b58 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -202,8 +202,9 @@ struct ucred { @@ -86,10 +87,10 @@ index dbd81e7..6634c47 100644 #define AF_KCM 41 /* Kernel Connection Multiplexor*/ #define AF_QIPCRTR 42 /* Qualcomm IPC Router */ +#define AF_HYPERV 43 /* Hyper-V Sockets */ - + -#define AF_MAX 43 /* For now.. */ +#define AF_MAX 44 /* For now.. */ - + /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -251,6 +252,7 @@ struct ucred { @@ -98,11 +99,11 @@ index dbd81e7..6634c47 100644 #define PF_QIPCRTR AF_QIPCRTR +#define PF_HYPERV AF_HYPERV #define PF_MAX AF_MAX - + /* Maximum queue length specifiable by listen. */ diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h new file mode 100644 -index 0000000..7c8c41e +index 0000000..e7a8a3a --- /dev/null +++ b/include/net/af_hvsock.h @@ -0,0 +1,78 @@ @@ -113,49 +114,39 @@ index 0000000..7c8c41e +#include +#include + -+/* Note: 3-page is the minimal recv ringbuffer size by default: -+ * -+ * the 1st page is used as the shared read/write index etc, rather than data: -+ * see hv_ringbuffer_init(); -+ * -+ * the payload length in the vmbus pipe message received from the host can -+ * be 4096 bytes, and considing the header of HVSOCK_HEADER_LEN bytes, we -+ * need at least 2 extra pages for ringbuffer data. ++/* The host side's design of the feature requires 5 exact 4KB pages for ++ * recv/send rings respectively -- this is suboptimal considering memory ++ * consumption, however unluckily we have to live with it, before the ++ * host comes up with a better design in the future. + */ -+#define HVSOCK_RCV_BUF_SZ PAGE_SIZE -+#define DEF_RINGBUFFER_PAGES_HVSOCK_RCV 3 ++#define PAGE_SIZE_4K 4096 ++#define RINGBUFFER_HVSOCK_RCV_SIZE (PAGE_SIZE_4K * 5) ++#define RINGBUFFER_HVSOCK_SND_SIZE (PAGE_SIZE_4K * 5) + -+/* As to send, here let's make sure the hvsock_send_buf struct can be held in 1 -+ * page, and since we want to use 2 pages for the send ringbuffer size (this is -+ * the minimal size by default, because the 1st page of the two is used as the -+ * shared read/write index etc, rather than data), we only have 1 page for -+ * ringbuffer data, this means: the max payload length for hvsock data is -+ * PAGE_SIZE - HVSOCK_PKT_LEN(0). And, let's reduce the length by 8-bytes -+ * because the ringbuffer can't be 100% full: see hv_ringbuffer_write(). ++/* The MTU is 16KB per the host side's design. ++ * In future, the buffer can be elimiated when we switch to use the coming ++ * new VMBus ringbuffer "in-place consumption" APIs, by which we can ++ * directly copy data from VMBus ringbuffer into the userspace buffer. + */ -+#define HVSOCK_SND_BUF_SZ (PAGE_SIZE - HVSOCK_PKT_LEN(0) - 8) -+#define DEF_RINGBUFFER_PAGES_HVSOCK_SND 2 -+ -+/* We only send data when the available space is "big enough". This artificial -+ * value must be less than HVSOCK_SND_BUF_SZ. -+ * -+ */ -+#define HVSOCK_SND_THRESHOLD (PAGE_SIZE / 2) -+ -+#define sk_to_hvsock(__sk) ((struct hvsock_sock *)(__sk)) -+#define hvsock_to_sk(__hvsk) ((struct sock *)(__hvsk)) -+ -+struct hvsock_send_buf { -+ struct vmpipe_proto_header hdr; -+ u8 buf[HVSOCK_SND_BUF_SZ]; -+}; -+ ++#define HVSOCK_MTU_SIZE (1024 * 16) +struct hvsock_recv_buf { -+ struct vmpipe_proto_header hdr; -+ u8 buf[HVSOCK_RCV_BUF_SZ]; -+ + unsigned int data_len; + unsigned int data_offset; ++ ++ struct vmpipe_proto_header hdr; ++ u8 buf[HVSOCK_MTU_SIZE]; ++}; ++ ++/* In the VM, actually we can send up to HVSOCK_MTU_SIZE bytes of payload, ++ * but for now let's use a smaller size to minimize the dynamically-allocated ++ * buffer. Note: the buffer can be elimiated in future when we add new VMBus ++ * ringbuffer APIs that allow us to directly copy data from userspace buf to ++ * VMBus ringbuffer. ++ */ ++#define HVSOCK_MAX_SND_SIZE_BY_VM (1024 * 4) ++struct hvsock_send_buf { ++ struct vmpipe_proto_header hdr; ++ u8 buf[HVSOCK_MAX_SND_SIZE_BY_VM]; +}; + +struct hvsock_sock { @@ -183,25 +174,34 @@ index 0000000..7c8c41e + struct hvsock_recv_buf *recv; +}; + ++static inline struct hvsock_sock *sk_to_hvsock(struct sock *sk) ++{ ++ return (struct hvsock_sock *)sk; ++} ++ ++static inline struct sock *hvsock_to_sk(struct hvsock_sock *hvsk) ++{ ++ return (struct sock *)hvsk; ++} ++ +#endif /* __AF_HVSOCK_H__ */ diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h -index e347b24..408b832 100644 +index e347b24..eb3e44b 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -26,6 +26,7 @@ #define _UAPI_HYPERV_H - + #include +#include - + /* * Framework version for util services. -@@ -396,4 +397,28 @@ struct hv_kvp_ip_msg { - struct hv_kvp_ipaddr_value kvp_ip_val; +@@ -396,4 +397,26 @@ struct hv_kvp_ip_msg { + struct hv_kvp_ipaddr_value kvp_ip_val; } __attribute__((packed)); - -+/* -+ * This is the address fromat of Hyper-V Sockets. + ++/* This is the address format of Hyper-V Sockets. + * Note: here we just borrow the kernel's built-in type uuid_le. When + * an application calls bind() or connect(), the 2 members of struct + * sockaddr_hv must be of GUID. @@ -209,12 +209,11 @@ index e347b24..408b832 100644 + * the first 3 fields. Refer to: + * https://en.wikipedia.org/wiki/Globally_unique_identifier + */ -+#define guid_t uuid_le +struct sockaddr_hv { + __kernel_sa_family_t shv_family; /* Address family */ -+ __le16 reserved; /* Must be Zero */ -+ guid_t shv_vm_id; /* VM ID */ -+ guid_t shv_service_id; /* Service ID */ ++ u16 reserved; /* Must be Zero */ ++ uuid_le shv_vm_guid; /* VM ID */ ++ uuid_le shv_service_guid; /* Service ID */ +}; + +#define SHV_VMID_GUEST NULL_UUID_LE @@ -226,10 +225,10 @@ index e347b24..408b832 100644 + #endif /* _UAPI_HYPERV_H */ diff --git a/net/Kconfig b/net/Kconfig -index 1c9fda1..9eeccb7 100644 +index c2cdbce..921e86f 100644 --- a/net/Kconfig +++ b/net/Kconfig -@@ -228,6 +228,7 @@ source "net/dns_resolver/Kconfig" +@@ -231,6 +231,7 @@ source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" @@ -238,7 +237,7 @@ index 1c9fda1..9eeccb7 100644 source "net/mpls/Kconfig" source "net/hsr/Kconfig" diff --git a/net/Makefile b/net/Makefile -index bdd1455..ec175dd 100644 +index 9bd20bb..b4d4e9a 100644 --- a/net/Makefile +++ b/net/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ @@ -251,7 +250,7 @@ index bdd1455..ec175dd 100644 ifneq ($(CONFIG_NET_SWITCHDEV),) diff --git a/net/hv_sock/Kconfig b/net/hv_sock/Kconfig new file mode 100644 -index 0000000..1f41848 +index 0000000..ff84875 --- /dev/null +++ b/net/hv_sock/Kconfig @@ -0,0 +1,10 @@ @@ -260,8 +259,8 @@ index 0000000..1f41848 + depends on HYPERV + default m if HYPERV + help -+ Hyper-V Sockets is somewhat like TCP over VMBus, allowing -+ communication between Linux guest and Hyper-V host without TCP/IP. ++ Hyper-V Sockets is a socket interface for high speed ++ communication between Linux guest and Hyper-V host over VMBus. + + To compile this driver as a module, choose M here: the module + will be called hv_sock. @@ -276,15 +275,17 @@ index 0000000..716c012 +hv_sock-y += af_hvsock.o diff --git a/net/hv_sock/af_hvsock.c b/net/hv_sock/af_hvsock.c new file mode 100644 -index 0000000..b91bd60 +index 0000000..331d375 --- /dev/null +++ b/net/hv_sock/af_hvsock.c -@@ -0,0 +1,1520 @@ +@@ -0,0 +1,1507 @@ +/* + * Hyper-V Sockets -- a socket-based communication channel between the + * Hyper-V host and the virtual machines running on it. + * -+ * Copyright(c) 2016, Microsoft Corporation. All rights reserved. ++ * Copyright (c) 2016 Microsoft Corporation. ++ * ++ * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions @@ -299,6 +300,10 @@ index 0000000..b91bd60 + * products derived from this software without specific prior written + * permission. + * ++ * Alternatively, this software may be distributed under the terms of the ++ * GNU General Public License ("GPL") version 2 as published by the Free ++ * Software Foundation. ++ * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -315,23 +320,9 @@ index 0000000..b91bd60 + +#include +#include ++#include +#include + -+static uint send_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_SND; -+static uint recv_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_RCV; -+static uint max_socket_number = 1024; -+ -+static atomic_t total_num_hvsock = ATOMIC_INIT(0); -+ -+module_param(send_ring_page, uint, 0444); -+MODULE_PARM_DESC(send_ring_page, "Send ring buffer size (# of pages)"); -+ -+module_param(recv_ring_page, uint, 0444); -+MODULE_PARM_DESC(recv_ring_page, "Receive ring buffer size (# of pages)"); -+ -+module_param(max_socket_number, uint, 0644); -+MODULE_PARM_DESC(max_socket_number, "The max number of created sockets"); -+ +static struct proto hvsock_proto = { + .name = "HV_SOCK", + .owner = THIS_MODULE, @@ -340,22 +331,22 @@ index 0000000..b91bd60 + +#define SS_LISTEN 255 + ++#define HVSOCK_CONNECT_TIMEOUT (30 * HZ) ++ ++/* This is an artificial limit */ ++#define HVSOCK_MAX_BACKLOG 128 ++ +static LIST_HEAD(hvsock_bound_list); +static LIST_HEAD(hvsock_connected_list); +static DEFINE_MUTEX(hvsock_mutex); + -+static bool uuid_equals(uuid_le u1, uuid_le u2) -+{ -+ return !uuid_le_cmp(u1, u2); -+} -+ +static struct sock *hvsock_find_bound_socket(const struct sockaddr_hv *addr) +{ + struct hvsock_sock *hvsk; + + list_for_each_entry(hvsk, &hvsock_bound_list, bound_list) { -+ if (uuid_equals(addr->shv_service_id, -+ hvsk->local_addr.shv_service_id)) ++ if (!uuid_le_cmp(addr->shv_service_guid, ++ hvsk->local_addr.shv_service_guid)) + return hvsock_to_sk(hvsk); + } + return NULL; @@ -373,11 +364,11 @@ index 0000000..b91bd60 + return NULL; +} + -+static -+void hvsock_enqueue_accept(struct sock *listener, struct sock *connected) ++static void hvsock_enqueue_accept(struct sock *listener, ++ struct sock *connected) +{ -+ struct hvsock_sock *hvlistener; + struct hvsock_sock *hvconnected; ++ struct hvsock_sock *hvlistener; + + hvlistener = sk_to_hvsock(listener); + hvconnected = sk_to_hvsock(connected); @@ -393,8 +384,8 @@ index 0000000..b91bd60 + +static struct sock *hvsock_dequeue_accept(struct sock *listener) +{ -+ struct hvsock_sock *hvlistener; + struct hvsock_sock *hvconnected; ++ struct hvsock_sock *hvlistener; + + hvlistener = sk_to_hvsock(listener); + @@ -437,7 +428,7 @@ index 0000000..b91bd60 +{ + memset(addr, 0, sizeof(*addr)); + addr->shv_family = AF_HYPERV; -+ addr->shv_service_id = service_id; ++ addr->shv_service_guid = service_id; +} + +static int hvsock_addr_validate(const struct sockaddr_hv *addr) @@ -456,7 +447,7 @@ index 0000000..b91bd60 + +static bool hvsock_addr_bound(const struct sockaddr_hv *addr) +{ -+ return !uuid_equals(addr->shv_service_id, SHV_SERVICE_ID_ANY); ++ return !!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY); +} + +static int hvsock_addr_cast(const struct sockaddr *addr, size_t len, @@ -475,13 +466,13 @@ index 0000000..b91bd60 + struct sockaddr_hv hv_addr; + int ret = 0; + -+ hvsock_addr_init(&hv_addr, addr->shv_service_id); ++ hvsock_addr_init(&hv_addr, addr->shv_service_guid); + + mutex_lock(&hvsock_mutex); + -+ if (uuid_equals(addr->shv_service_id, SHV_SERVICE_ID_ANY)) { ++ if (!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY)) { + do { -+ uuid_le_gen(&hv_addr.shv_service_id); ++ uuid_le_gen(&hv_addr.shv_service_guid); + } while (hvsock_find_bound_socket(&hv_addr)); + } else { + if (hvsock_find_bound_socket(&hv_addr)) { @@ -490,7 +481,7 @@ index 0000000..b91bd60 + } + } + -+ hvsock_addr_init(&hvsk->local_addr, hv_addr.shv_service_id); ++ hvsock_addr_init(&hvsk->local_addr, hv_addr.shv_service_guid); + + sock_hold(&hvsk->sk); + list_add(&hvsk->bound_list, &hvsock_bound_list); @@ -535,13 +526,14 @@ index 0000000..b91bd60 + +static void hvsock_sk_destruct(struct sock *sk) +{ -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; ++ struct vmbus_channel *channel; ++ struct hvsock_sock *hvsk; + -+ kfree(hvsk->send); -+ kfree(hvsk->recv); -+ atomic_dec(&total_num_hvsock); ++ hvsk = sk_to_hvsock(sk); ++ vfree(hvsk->send); ++ vfree(hvsk->recv); + ++ channel = hvsk->channel; + if (!channel) + return; + @@ -597,51 +589,37 @@ index 0000000..b91bd60 + return 0; +} + -+static int hvsock_create(struct net *net, struct socket *sock, -+ gfp_t priority, unsigned short type, -+ struct sock **sk) ++static struct sock *hvsock_create(struct net *net, struct socket *sock, ++ gfp_t priority, unsigned short type) +{ -+ struct hvsock_send_buf *send = NULL; -+ struct hvsock_recv_buf *recv = NULL; + struct hvsock_sock *hvsk; -+ int ret = -EMFILE; -+ int num_hvsock; ++ struct sock *sk; + -+ num_hvsock = atomic_inc_return(&total_num_hvsock); -+ if (num_hvsock > max_socket_number) -+ goto err; ++ sk = sk_alloc(net, AF_HYPERV, priority, &hvsock_proto, 0); ++ if (!sk) ++ return NULL; + -+ ret = -ENOMEM; -+ send = kmalloc(sizeof(*send), GFP_KERNEL); -+ recv = kmalloc(sizeof(*recv), GFP_KERNEL); -+ if (!send || !recv) -+ goto err; ++ sock_init_data(sock, sk); + -+ *sk = sk_alloc(net, AF_HYPERV, priority, &hvsock_proto, 0); -+ if (!*sk) -+ goto err; -+ -+ sock_init_data(sock, *sk); -+ -+ /* (*sk)->sk_type is normally set in sock_init_data, but only if sock ++ /* sk->sk_type is normally set in sock_init_data, but only if sock + * is non-NULL. We make sure that our sockets always have a type by + * setting it here if needed. + */ + if (!sock) -+ (*sk)->sk_type = type; ++ sk->sk_type = type; + -+ (*sk)->sk_destruct = hvsock_sk_destruct; ++ sk->sk_destruct = hvsock_sk_destruct; + + /* Looks stream-based socket doesn't need this. */ -+ (*sk)->sk_backlog_rcv = NULL; ++ sk->sk_backlog_rcv = NULL; + -+ (*sk)->sk_state = 0; -+ sock_reset_flag(*sk, SOCK_DONE); ++ sk->sk_state = 0; ++ sock_reset_flag(sk, SOCK_DONE); + -+ hvsk = sk_to_hvsock(*sk); ++ hvsk = sk_to_hvsock(sk); + -+ hvsk->send = send; -+ hvsk->recv = recv; ++ hvsk->send = NULL; ++ hvsk->recv = NULL; + + hvsock_addr_init(&hvsk->local_addr, SHV_SERVICE_ID_ANY); + hvsock_addr_init(&hvsk->remote_addr, SHV_SERVICE_ID_ANY); @@ -654,16 +632,7 @@ index 0000000..b91bd60 + + hvsk->peer_shutdown = 0; + -+ hvsk->recv->data_len = 0; -+ hvsk->recv->data_offset = 0; -+ -+ return 0; -+err: -+ atomic_dec(&total_num_hvsock); -+ kfree(send); -+ kfree(recv); -+ *sk = NULL; -+ return ret; ++ return sk; +} + +static int hvsock_bind(struct socket *sock, struct sockaddr *addr, @@ -678,7 +647,7 @@ index 0000000..b91bd60 + if (hvsock_addr_cast(addr, addr_len, &hv_addr) != 0) + return -EINVAL; + -+ if (!uuid_equals(hv_addr->shv_vm_id, NULL_UUID_LE)) ++ if (uuid_le_cmp(hv_addr->shv_vm_guid, NULL_UUID_LE)) + return -EINVAL; + + lock_sock(sk); @@ -740,8 +709,8 @@ index 0000000..b91bd60 + &dummy, + &avl_write_bytes); + -+ *can_write = avl_write_bytes > -+ HVSOCK_PKT_LEN(HVSOCK_SND_THRESHOLD); ++ /* We only write if there is enough space */ ++ *can_write = avl_write_bytes > HVSOCK_PKT_LEN(PAGE_SIZE_4K); + } +} + @@ -754,18 +723,29 @@ index 0000000..b91bd60 + &dummy, + &avl_write_bytes); + -+ if (avl_write_bytes < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) -+ return 0; -+ + /* The ringbuffer mustn't be 100% full, and we should reserve a + * zero-length-payload packet for the FIN: see hv_ringbuffer_write() + * and hvsock_shutdown(). + */ ++ if (avl_write_bytes < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) ++ return 0; + ret = avl_write_bytes - HVSOCK_PKT_LEN(1) - HVSOCK_PKT_LEN(0); + + return round_down(ret, 8); +} + ++static int hvsock_get_send_buf(struct hvsock_sock *hvsk) ++{ ++ hvsk->send = vmalloc(sizeof(*hvsk->send)); ++ return hvsk->send ? 0 : -ENOMEM; ++} ++ ++static void hvsock_put_send_buf(struct hvsock_sock *hvsk) ++{ ++ vfree(hvsk->send); ++ hvsk->send = NULL; ++} ++ +static int hvsock_send_data(struct vmbus_channel *channel, + struct hvsock_sock *hvsk, + size_t to_write) @@ -777,6 +757,18 @@ index 0000000..b91bd60 + 0, VM_PKT_DATA_INBAND, 0); +} + ++static int hvsock_get_recv_buf(struct hvsock_sock *hvsk) ++{ ++ hvsk->recv = vmalloc(sizeof(*hvsk->recv)); ++ return hvsk->recv ? 0 : -ENOMEM; ++} ++ ++static void hvsock_put_recv_buf(struct hvsock_sock *hvsk) ++{ ++ vfree(hvsk->recv); ++ hvsk->recv = NULL; ++} ++ +static int hvsock_recv_data(struct vmbus_channel *channel, + struct hvsock_sock *hvsk, + size_t *payload_len) @@ -801,6 +793,7 @@ index 0000000..b91bd60 +{ + struct hvsock_sock *hvsk; + struct sock *sk; ++ int ret = 0; + + if (mode < SHUT_RD || mode > SHUT_RDWR) + return -EINVAL; @@ -825,13 +818,21 @@ index 0000000..b91bd60 + + if (mode & SEND_SHUTDOWN) { + hvsk = sk_to_hvsock(sk); ++ ++ ret = hvsock_get_send_buf(hvsk); ++ if (ret < 0) ++ goto out; ++ + /* It can't fail: see get_ringbuffer_writable_bytes(). */ + (void)hvsock_send_data(hvsk->channel, hvsk, 0); ++ ++ hvsock_put_send_buf(hvsk); + } + ++out: + release_sock(sk); + -+ return 0; ++ return ret; +} + +static unsigned int hvsock_poll(struct file *file, struct socket *sock, @@ -840,8 +841,8 @@ index 0000000..b91bd60 + struct vmbus_channel *channel; + bool can_read, can_write; + struct hvsock_sock *hvsk; -+ struct sock *sk; + unsigned int mask; ++ struct sock *sk; + + sk = sock->sk; + hvsk = sk_to_hvsock(sk); @@ -883,13 +884,12 @@ index 0000000..b91bd60 + /* If there is something in the queue then we can read */ + get_ringbuffer_rw_status(channel, &can_read, &can_write); + -+ if (!can_read && hvsk->recv->data_len > 0) ++ if (!can_read && hvsk->recv) + can_read = true; + + if (!(sk->sk_shutdown & RCV_SHUTDOWN) && can_read) + mask |= POLLIN | POLLRDNORM; + } else { -+ can_read = false; + can_write = false; + } + @@ -928,14 +928,13 @@ index 0000000..b91bd60 +static void hvsock_on_channel_cb(void *ctx) +{ + struct sock *sk = (struct sock *)ctx; -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; ++ struct vmbus_channel *channel; ++ struct hvsock_sock *hvsk; + bool can_read, can_write; + -+ if (!channel) { -+ WARN_ONCE(1, "NULL channel! There is a programming bug.\n"); -+ return; -+ } ++ hvsk = sk_to_hvsock(sk); ++ channel = hvsk->channel; ++ BUG_ON(!channel); + + get_ringbuffer_rw_status(channel, &can_read, &can_write); + @@ -972,12 +971,11 @@ index 0000000..b91bd60 + +static int hvsock_open_connection(struct vmbus_channel *channel) +{ -+ struct hvsock_sock *hvsk, *new_hvsk; -+ struct sockaddr_hv hv_addr; -+ struct sock *sk, *new_sk; -+ unsigned char conn_from_host; -+ ++ struct hvsock_sock *hvsk = NULL, *new_hvsk = NULL; + uuid_le *instance, *service_id; ++ unsigned char conn_from_host; ++ struct sockaddr_hv hv_addr; ++ struct sock *sk, *new_sk = NULL; + int ret; + + instance = &channel->offermsg.offer.if_instance; @@ -999,14 +997,16 @@ index 0000000..b91bd60 + + if (conn_from_host) { + if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { -+ ret = -EMFILE; ++ ret = -ECONNREFUSED; + goto out; + } + -+ ret = hvsock_create(sock_net(sk), NULL, GFP_KERNEL, -+ sk->sk_type, &new_sk); -+ if (ret != 0) ++ new_sk = hvsock_create(sock_net(sk), NULL, GFP_KERNEL, ++ sk->sk_type); ++ if (!new_sk) { ++ ret = -ENOMEM; + goto out; ++ } + + new_sk->sk_state = SS_CONNECTING; + new_hvsk = sk_to_hvsock(new_sk); @@ -1019,8 +1019,8 @@ index 0000000..b91bd60 + } + + set_channel_read_state(channel, false); -+ ret = vmbus_open(channel, send_ring_page * PAGE_SIZE, -+ recv_ring_page * PAGE_SIZE, NULL, 0, ++ ret = vmbus_open(channel, RINGBUFFER_HVSOCK_SND_SIZE, ++ RINGBUFFER_HVSOCK_RCV_SIZE, NULL, 0, + hvsock_on_channel_cb, conn_from_host ? new_sk : sk); + if (ret != 0) { + if (conn_from_host) { @@ -1033,8 +1033,10 @@ index 0000000..b91bd60 + } + + vmbus_set_chn_rescind_callback(channel, hvsock_close_connection); ++ ++ /* see get_ringbuffer_rw_status() */ + set_channel_pending_send_size(channel, -+ HVSOCK_PKT_LEN(HVSOCK_SND_THRESHOLD)); ++ HVSOCK_PKT_LEN(PAGE_SIZE_4K) + 1); + + if (conn_from_host) { + new_sk->sk_state = SS_CONNECTED; @@ -1081,13 +1083,13 @@ index 0000000..b91bd60 + int flags, int current_ret) +{ + struct sock *sk = sock->sk; -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ ++ struct hvsock_sock *hvsk; + int ret = current_ret; -+ -+ long timeout = 30 * HZ; + DEFINE_WAIT(wait); ++ long timeout; + ++ hvsk = sk_to_hvsock(sk); ++ timeout = HVSOCK_CONNECT_TIMEOUT; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { @@ -1181,8 +1183,8 @@ index 0000000..b91bd60 + sk->sk_state = SS_CONNECTING; + + ret = vmbus_send_tl_connect_request( -+ &hvsk->local_addr.shv_service_id, -+ &hvsk->remote_addr.shv_service_id); ++ &hvsk->local_addr.shv_service_guid, ++ &hvsk->remote_addr.shv_service_guid); + if (ret < 0) + goto out; + @@ -1240,15 +1242,12 @@ index 0000000..b91bd60 + lock_sock(connected); + hvconnected = sk_to_hvsock(connected); + -+ if (ret) { -+ release_sock(connected); -+ sock_put(connected); -+ } else { ++ if (!ret) { + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); -+ release_sock(connected); -+ sock_put(connected); + } ++ release_sock(connected); ++ sock_put(connected); + } + +out_wait: @@ -1256,8 +1255,8 @@ index 0000000..b91bd60 + return ret; +} + -+static -+int hvsock_accept(struct socket *sock, struct socket *newsock, int flags) ++static int hvsock_accept(struct socket *sock, struct socket *newsock, ++ int flags) +{ + struct sock *listener; + int ret; @@ -1305,9 +1304,8 @@ index 0000000..b91bd60 + ret = -EINVAL; + goto out; + } -+ /* This is an artificial limit */ -+ if (backlog > 128) -+ backlog = 128; ++ if (backlog > HVSOCK_MAX_BACKLOG) ++ backlog = HVSOCK_MAX_BACKLOG; + + hvsk = sk_to_hvsock(sk); + if (!hvsock_addr_bound(&hvsk->local_addr)) { @@ -1323,23 +1321,21 @@ index 0000000..b91bd60 + return ret; +} + -+static -+int hvsock_sendmsg_wait(struct sock *sk, struct msghdr *msg, size_t len) ++static int hvsock_sendmsg_wait(struct sock *sk, struct msghdr *msg, ++ size_t len) +{ + struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; -+ ++ struct vmbus_channel *channel; + size_t total_to_write = len; + size_t total_written = 0; -+ bool can_write; -+ -+ int ret = 0; -+ + DEFINE_WAIT(wait); ++ bool can_write; + long timeout; ++ int ret = -EIO; + + timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ channel = hvsk->channel; + + while (total_to_write > 0) { + size_t to_write, max_writable; @@ -1397,16 +1393,23 @@ index 0000000..b91bd60 + if (max_writable == 0) + goto out_wait; + -+ to_write = min_t(size_t, HVSOCK_SND_BUF_SZ, ++ to_write = min_t(size_t, sizeof(hvsk->send->buf), + total_to_write); + if (to_write > max_writable) + to_write = max_writable; + -+ ret = memcpy_from_msg(hvsk->send->buf, msg, to_write); -+ if (ret != 0) ++ ret = hvsock_get_send_buf(hvsk); ++ if (ret < 0) + goto out_wait; + ++ ret = memcpy_from_msg(hvsk->send->buf, msg, to_write); ++ if (ret != 0) { ++ hvsock_put_send_buf(hvsk); ++ goto out_wait; ++ } ++ + ret = hvsock_send_data(channel, hvsk, to_write); ++ hvsock_put_send_buf(hvsk); + if (ret != 0) + goto out_wait; + @@ -1423,7 +1426,8 @@ index 0000000..b91bd60 + return ret; +} + -+static int hvsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) ++static int hvsock_sendmsg(struct socket *sock, struct msghdr *msg, ++ size_t len) +{ + struct hvsock_sock *hvsk; + struct sock *sk; @@ -1432,11 +1436,8 @@ index 0000000..b91bd60 + if (len == 0) + return -EINVAL; + -+ if (msg->msg_flags & ~MSG_DONTWAIT) { -+ pr_err("%s: unsupported flags=0x%x\n", __func__, -+ msg->msg_flags); ++ if (msg->msg_flags & ~MSG_DONTWAIT) + return -EOPNOTSUPP; -+ } + + sk = sock->sk; + hvsk = sk_to_hvsock(sk); @@ -1471,11 +1472,10 @@ index 0000000..b91bd60 +out: + release_sock(sk); + -+ /* ret is a bigger-than-0 total_written or a negative err code. */ -+ if (ret == 0) { -+ WARN(1, "unexpected return value of 0\n"); -+ ret = -EIO; -+ } ++ /* ret should be a bigger-than-0 total_written or a negative err ++ * code. ++ */ ++ BUG_ON(ret == 0); + + return ret; +} @@ -1484,43 +1484,56 @@ index 0000000..b91bd60 + size_t len, int flags) +{ + struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; -+ + size_t to_read, total_to_read = len; ++ struct vmbus_channel *channel; ++ DEFINE_WAIT(wait); + size_t copied = 0; + bool can_read; -+ -+ int ret = 0; -+ -+ DEFINE_WAIT(wait); + long timeout; ++ int ret = 0; + + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ channel = hvsk->channel; + + while (1) { -+ bool need_refill = hvsk->recv->data_len == 0; ++ bool need_refill = !hvsk->recv; + -+ if (need_refill) -+ get_ringbuffer_rw_status(channel, &can_read, NULL); -+ else ++ if (need_refill) { ++ if (hvsk->peer_shutdown & SEND_SHUTDOWN) ++ can_read = false; ++ else ++ get_ringbuffer_rw_status(channel, &can_read, ++ NULL); ++ } else { + can_read = true; ++ } + + if (can_read) { + size_t payload_len; + + if (need_refill) { ++ ret = hvsock_get_recv_buf(hvsk); ++ if (ret < 0) { ++ if (copied > 0) ++ ret = copied; ++ goto out_wait; ++ } ++ + ret = hvsock_recv_data(channel, hvsk, + &payload_len); + if (ret != 0 || -+ payload_len > HVSOCK_RCV_BUF_SZ) { ++ payload_len > sizeof(hvsk->recv->buf)) { + ret = -EIO; ++ hvsock_put_recv_buf(hvsk); + goto out_wait; + } + + if (payload_len == 0) { + ret = copied; -+ goto out_wait; ++ hvsock_put_recv_buf(hvsk); ++ hvsk->peer_shutdown |= SEND_SHUTDOWN; ++ break; + } + + hvsk->recv->data_len = payload_len; @@ -1542,7 +1555,7 @@ index 0000000..b91bd60 + hvsk->recv->data_len -= to_read; + + if (hvsk->recv->data_len == 0) -+ hvsk->recv->data_offset = 0; ++ hvsock_put_recv_buf(hvsk); + else + hvsk->recv->data_offset += to_read; + @@ -1584,23 +1597,8 @@ index 0000000..b91bd60 + else if (sk->sk_shutdown & RCV_SHUTDOWN) + ret = 0; + -+ if (copied > 0) { ++ if (copied > 0) + ret = copied; -+ -+ /* If the other side has shutdown for sending and there -+ * is nothing more to read, then we modify the socket -+ * state. -+ */ -+ if ((hvsk->peer_shutdown & SEND_SHUTDOWN) && -+ hvsk->recv->data_len == 0) { -+ get_ringbuffer_rw_status(channel, &can_read, NULL); -+ if (!can_read) { -+ sk->sk_state = SS_UNCONNECTED; -+ sock_set_flag(sk, SOCK_DONE); -+ sk->sk_state_change(sk); -+ } -+ } -+ } +out_wait: + finish_wait(sk_sleep(sk), &wait); + return ret; @@ -1630,7 +1628,6 @@ index 0000000..b91bd60 + + /* We ignore msg->addr_name/len. */ + if (flags & ~MSG_DONTWAIT) { -+ pr_err("%s: unsupported flags=0x%x\n", __func__, flags); + ret = -EOPNOTSUPP; + goto out; + } @@ -1684,9 +1681,6 @@ index 0000000..b91bd60 +{ + struct sock *sk; + -+ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ + if (protocol != 0 && protocol != SHV_PROTO_RAW) + return -EPROTONOSUPPORT; + @@ -1700,7 +1694,8 @@ index 0000000..b91bd60 + + sock->state = SS_UNCONNECTED; + -+ return hvsock_create(net, sock, GFP_KERNEL, 0, &sk); ++ sk = hvsock_create(net, sock, GFP_KERNEL, 0); ++ return sk ? 0 : -ENOMEM; +} + +static const struct net_proto_family hvsock_family_ops = { @@ -1749,17 +1744,8 @@ index 0000000..b91bd60 +{ + int ret; + -+ if (send_ring_page < DEF_RINGBUFFER_PAGES_HVSOCK_SND) -+ send_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_SND; -+ -+ if (recv_ring_page < DEF_RINGBUFFER_PAGES_HVSOCK_RCV) -+ recv_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_RCV; -+ -+ /* Hyper-V Sockets requires at least VMBus 4.0 */ -+ if ((vmbus_proto_version >> 16) < 4) { -+ pr_err("failed to load: VMBus 4 or later is required\n"); ++ if (vmbus_proto_version < VERSION_WIN10) + return -ENODEV; -+ } + + ret = vmbus_driver_register(&hvsock_drv); + if (ret) { @@ -1800,6 +1786,5 @@ index 0000000..b91bd60 + +MODULE_DESCRIPTION("Hyper-V Sockets"); +MODULE_LICENSE("Dual BSD/GPL"); --- -2.10.0 - +-- +2.10.1 diff --git a/alpine/kernel/patches/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch b/alpine/kernel/patches/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch similarity index 74% rename from alpine/kernel/patches/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch rename to alpine/kernel/patches/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch index d477b2713..36f180fe9 100644 --- a/alpine/kernel/patches/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch +++ b/alpine/kernel/patches/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch @@ -1,7 +1,7 @@ -From 7abd92fd5987e1ad79f2272cbe544be0cfe84165 Mon Sep 17 00:00:00 2001 +From 7b394fbb825d9367fa6433ff2382af2fc32fb1c6 Mon Sep 17 00:00:00 2001 From: Rolf Neugebauer Date: Mon, 23 May 2016 18:55:45 +0100 -Subject: [PATCH 42/42] vmbus: Don't spam the logs with unknown GUIDs +Subject: [PATCH 4/4] vmbus: Don't spam the logs with unknown GUIDs With Hyper-V sockets device types are introduced on the fly. The pr_info() then prints a message on every connection, which is way too verbose. Since @@ -14,17 +14,16 @@ Signed-off-by: Rolf Neugebauer 1 file changed, 1 deletion(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 0a54317..120ee22 100644 +index 8f4e6070..ef4a512 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -147,7 +147,6 @@ static u16 hv_get_dev_type(const uuid_le *guid) - if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) - return i; - } + if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) + return i; + } - pr_info("Unknown GUID: %pUl\n", guid); - return i; + return i; } - --- -2.10.0 +-- +2.10.1 diff --git a/alpine/kernel/patches/0004-vsock-make-listener-child-lock-ordering-explicit.patch b/alpine/kernel/patches/0004-vsock-make-listener-child-lock-ordering-explicit.patch deleted file mode 100644 index ecaa8e3e0..000000000 --- a/alpine/kernel/patches/0004-vsock-make-listener-child-lock-ordering-explicit.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 8386f4e436f280cec08f95338ae5e44bc8aa5b5e Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 23 Jun 2016 16:28:58 +0100 -Subject: [PATCH 04/42] vsock: make listener child lock ordering explicit - -There are several places where the listener and pending or accept queue -child sockets are accessed at the same time. Lockdep is unhappy that -two locks from the same class are held. - -Tell lockdep that it is safe and document the lock ordering. - -Originally Claudio Imbrenda sent a similar -patch asking whether this is safe. I have audited the code and also -covered the vsock_pending_work() function. - -Suggested-by: Claudio Imbrenda -Signed-off-by: Stefan Hajnoczi -Signed-off-by: David S. Miller -(cherry picked from commit 4192f672fae559f32d82de72a677701853cc98a7) ---- - net/vmw_vsock/af_vsock.c | 12 ++++++++++-- - 1 file changed, 10 insertions(+), 2 deletions(-) - -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index b5f1221..b96ac91 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -61,6 +61,14 @@ - * function will also cleanup rejected sockets, those that reach the connected - * state but leave it before they have been accepted. - * -+ * - Lock ordering for pending or accept queue sockets is: -+ * -+ * lock_sock(listener); -+ * lock_sock_nested(pending, SINGLE_DEPTH_NESTING); -+ * -+ * Using explicit nested locking keeps lockdep happy since normally only one -+ * lock of a given class may be taken at a time. -+ * - * - Sockets created by user action will be cleaned up when the user process - * calls close(2), causing our release implementation to be called. Our release - * implementation will perform some cleanup then drop the last reference so our -@@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work) - cleanup = true; - - lock_sock(listener); -- lock_sock(sk); -+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - - if (vsock_is_pending(sk)) { - vsock_remove_pending(listener, sk); -@@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) - if (connected) { - listener->sk_ack_backlog--; - -- lock_sock(connected); -+ lock_sock_nested(connected, SINGLE_DEPTH_NESTING); - vconnected = vsock_sk(connected); - - /* If the listener socket has received an error, then we should --- -2.10.0 - diff --git a/alpine/kernel/patches/0005-VSOCK-transport-specific-vsock_transport-functions.patch b/alpine/kernel/patches/0005-VSOCK-transport-specific-vsock_transport-functions.patch deleted file mode 100644 index edf521c29..000000000 --- a/alpine/kernel/patches/0005-VSOCK-transport-specific-vsock_transport-functions.patch +++ /dev/null @@ -1,59 +0,0 @@ -From ae6d39c3a4cd08ce37606ab36b202702a48f5440 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 28 Jul 2016 15:36:30 +0100 -Subject: [PATCH 05/42] VSOCK: transport-specific vsock_transport functions - -struct vsock_transport contains function pointers called by AF_VSOCK -core code. The transport may want its own transport-specific function -pointers and they can be added after struct vsock_transport. - -Allow the transport to fetch vsock_transport. It can downcast it to -access transport-specific function pointers. - -The virtio transport will use this. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 0b01aeb3d2fbf16787f0c9629f4ca52ae792f732) ---- - include/net/af_vsock.h | 3 +++ - net/vmw_vsock/af_vsock.c | 9 +++++++++ - 2 files changed, 12 insertions(+) - -diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h -index e9eb2d6..23f5525 100644 ---- a/include/net/af_vsock.h -+++ b/include/net/af_vsock.h -@@ -165,6 +165,9 @@ static inline int vsock_core_init(const struct vsock_transport *t) - } - void vsock_core_exit(void); - -+/* The transport may downcast this to access transport-specific functions */ -+const struct vsock_transport *vsock_core_get_transport(void); -+ - /**** UTILS ****/ - - void vsock_release_pending(struct sock *pending); -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index b96ac91..e34d96f 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -1995,6 +1995,15 @@ void vsock_core_exit(void) - } - EXPORT_SYMBOL_GPL(vsock_core_exit); - -+const struct vsock_transport *vsock_core_get_transport(void) -+{ -+ /* vsock_register_mutex not taken since only the transport uses this -+ * function and only while registered. -+ */ -+ return transport; -+} -+EXPORT_SYMBOL_GPL(vsock_core_get_transport); -+ - MODULE_AUTHOR("VMware, Inc."); - MODULE_DESCRIPTION("VMware Virtual Socket Family"); - MODULE_VERSION("1.0.1.0-k"); --- -2.10.0 - diff --git a/alpine/kernel/patches/0006-VSOCK-defer-sock-removal-to-transports.patch b/alpine/kernel/patches/0006-VSOCK-defer-sock-removal-to-transports.patch deleted file mode 100644 index 36f65a70d..000000000 --- a/alpine/kernel/patches/0006-VSOCK-defer-sock-removal-to-transports.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 816c87fe6ecfa46981c0ca332d21f1e0d8bfd8a0 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 28 Jul 2016 15:36:31 +0100 -Subject: [PATCH 06/42] VSOCK: defer sock removal to transports - -The virtio transport will implement graceful shutdown and the related -SO_LINGER socket option. This requires orphaning the sock but keeping -it in the table of connections after .release(). - -This patch adds the vsock_remove_sock() function and leaves it up to the -transport when to remove the sock. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6773b7dc39f165bd9d824b50ac52cbb3f87d53c8) ---- - include/net/af_vsock.h | 1 + - net/vmw_vsock/af_vsock.c | 16 ++++++++++------ - net/vmw_vsock/vmci_transport.c | 2 ++ - 3 files changed, 13 insertions(+), 6 deletions(-) - -diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h -index 23f5525..3af0b22 100644 ---- a/include/net/af_vsock.h -+++ b/include/net/af_vsock.h -@@ -180,6 +180,7 @@ void vsock_remove_connected(struct vsock_sock *vsk); - struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); - struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, - struct sockaddr_vm *dst); -+void vsock_remove_sock(struct vsock_sock *vsk); - void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); - - #endif /* __AF_VSOCK_H__ */ -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index e34d96f..17dbbe6 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -344,6 +344,16 @@ static bool vsock_in_connected_table(struct vsock_sock *vsk) - return ret; - } - -+void vsock_remove_sock(struct vsock_sock *vsk) -+{ -+ if (vsock_in_bound_table(vsk)) -+ vsock_remove_bound(vsk); -+ -+ if (vsock_in_connected_table(vsk)) -+ vsock_remove_connected(vsk); -+} -+EXPORT_SYMBOL_GPL(vsock_remove_sock); -+ - void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) - { - int i; -@@ -660,12 +670,6 @@ static void __vsock_release(struct sock *sk) - vsk = vsock_sk(sk); - pending = NULL; /* Compiler warning. */ - -- if (vsock_in_bound_table(vsk)) -- vsock_remove_bound(vsk); -- -- if (vsock_in_connected_table(vsk)) -- vsock_remove_connected(vsk); -- - transport->release(vsk); - - lock_sock(sk); -diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c -index 0a369bb..706991e 100644 ---- a/net/vmw_vsock/vmci_transport.c -+++ b/net/vmw_vsock/vmci_transport.c -@@ -1644,6 +1644,8 @@ static void vmci_transport_destruct(struct vsock_sock *vsk) - - static void vmci_transport_release(struct vsock_sock *vsk) - { -+ vsock_remove_sock(vsk); -+ - if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { - vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); - vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; --- -2.10.0 - diff --git a/alpine/kernel/patches/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch b/alpine/kernel/patches/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch deleted file mode 100644 index aaa7a887a..000000000 --- a/alpine/kernel/patches/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch +++ /dev/null @@ -1,1496 +0,0 @@ -From fe9f8cb30a5c819adabb5b9b598f7776cbbdc4f0 Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:32 +0100 -Subject: [PATCH 07/42] VSOCK: Introduce virtio_vsock_common.ko - -This module contains the common code and header files for the following -virtio_transporto and vhost_vsock kernel modules. - -Signed-off-by: Asias He -Signed-off-by: Claudio Imbrenda -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 06a8fc78367d070720af960dcecec917d3ae5f3b) ---- - MAINTAINERS | 10 + - include/linux/virtio_vsock.h | 154 ++++ - include/net/af_vsock.h | 2 + - .../trace/events/vsock_virtio_transport_common.h | 144 +++ - include/uapi/linux/Kbuild | 1 + - include/uapi/linux/virtio_ids.h | 1 + - include/uapi/linux/virtio_vsock.h | 94 ++ - net/vmw_vsock/virtio_transport_common.c | 992 +++++++++++++++++++++ - 8 files changed, 1398 insertions(+) - create mode 100644 include/linux/virtio_vsock.h - create mode 100644 include/trace/events/vsock_virtio_transport_common.h - create mode 100644 include/uapi/linux/virtio_vsock.h - create mode 100644 net/vmw_vsock/virtio_transport_common.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index ab65bbe..b93ba8b 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -11382,6 +11382,16 @@ S: Maintained - F: drivers/media/v4l2-core/videobuf2-* - F: include/media/videobuf2-* - -+VIRTIO AND VHOST VSOCK DRIVER -+M: Stefan Hajnoczi -+L: kvm@vger.kernel.org -+L: virtualization@lists.linux-foundation.org -+L: netdev@vger.kernel.org -+S: Maintained -+F: include/linux/virtio_vsock.h -+F: include/uapi/linux/virtio_vsock.h -+F: net/vmw_vsock/virtio_transport_common.c -+ - VIRTUAL SERIO DEVICE DRIVER - M: Stephen Chandler Paul - S: Maintained -diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h -new file mode 100644 -index 0000000..9638bfe ---- /dev/null -+++ b/include/linux/virtio_vsock.h -@@ -0,0 +1,154 @@ -+#ifndef _LINUX_VIRTIO_VSOCK_H -+#define _LINUX_VIRTIO_VSOCK_H -+ -+#include -+#include -+#include -+#include -+ -+#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128 -+#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256) -+#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) -+#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) -+#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL -+#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) -+ -+enum { -+ VSOCK_VQ_RX = 0, /* for host to guest data */ -+ VSOCK_VQ_TX = 1, /* for guest to host data */ -+ VSOCK_VQ_EVENT = 2, -+ VSOCK_VQ_MAX = 3, -+}; -+ -+/* Per-socket state (accessed via vsk->trans) */ -+struct virtio_vsock_sock { -+ struct vsock_sock *vsk; -+ -+ /* Protected by lock_sock(sk_vsock(trans->vsk)) */ -+ u32 buf_size; -+ u32 buf_size_min; -+ u32 buf_size_max; -+ -+ spinlock_t tx_lock; -+ spinlock_t rx_lock; -+ -+ /* Protected by tx_lock */ -+ u32 tx_cnt; -+ u32 buf_alloc; -+ u32 peer_fwd_cnt; -+ u32 peer_buf_alloc; -+ -+ /* Protected by rx_lock */ -+ u32 fwd_cnt; -+ u32 rx_bytes; -+ struct list_head rx_queue; -+}; -+ -+struct virtio_vsock_pkt { -+ struct virtio_vsock_hdr hdr; -+ struct work_struct work; -+ struct list_head list; -+ void *buf; -+ u32 len; -+ u32 off; -+ bool reply; -+}; -+ -+struct virtio_vsock_pkt_info { -+ u32 remote_cid, remote_port; -+ struct msghdr *msg; -+ u32 pkt_len; -+ u16 type; -+ u16 op; -+ u32 flags; -+ bool reply; -+}; -+ -+struct virtio_transport { -+ /* This must be the first field */ -+ struct vsock_transport transport; -+ -+ /* Takes ownership of the packet */ -+ int (*send_pkt)(struct virtio_vsock_pkt *pkt); -+}; -+ -+ssize_t -+virtio_transport_stream_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, -+ int type); -+int -+virtio_transport_dgram_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, int flags); -+ -+s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); -+s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); -+ -+int virtio_transport_do_socket_init(struct vsock_sock *vsk, -+ struct vsock_sock *psk); -+u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk); -+u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk); -+u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk); -+void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val); -+void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val); -+void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val); -+int -+virtio_transport_notify_poll_in(struct vsock_sock *vsk, -+ size_t target, -+ bool *data_ready_now); -+int -+virtio_transport_notify_poll_out(struct vsock_sock *vsk, -+ size_t target, -+ bool *space_available_now); -+ -+int virtio_transport_notify_recv_init(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, -+ size_t target, ssize_t copied, bool data_read, -+ struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_send_init(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data); -+int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data); -+int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data); -+int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, -+ ssize_t written, struct vsock_transport_send_notify_data *data); -+ -+u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); -+bool virtio_transport_stream_is_active(struct vsock_sock *vsk); -+bool virtio_transport_stream_allow(u32 cid, u32 port); -+int virtio_transport_dgram_bind(struct vsock_sock *vsk, -+ struct sockaddr_vm *addr); -+bool virtio_transport_dgram_allow(u32 cid, u32 port); -+ -+int virtio_transport_connect(struct vsock_sock *vsk); -+ -+int virtio_transport_shutdown(struct vsock_sock *vsk, int mode); -+ -+void virtio_transport_release(struct vsock_sock *vsk); -+ -+ssize_t -+virtio_transport_stream_enqueue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len); -+int -+virtio_transport_dgram_enqueue(struct vsock_sock *vsk, -+ struct sockaddr_vm *remote_addr, -+ struct msghdr *msg, -+ size_t len); -+ -+void virtio_transport_destruct(struct vsock_sock *vsk); -+ -+void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); -+void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); -+void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt); -+u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); -+void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); -+ -+#endif /* _LINUX_VIRTIO_VSOCK_H */ -diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h -index 3af0b22..f275896 100644 ---- a/include/net/af_vsock.h -+++ b/include/net/af_vsock.h -@@ -63,6 +63,8 @@ struct vsock_sock { - struct list_head accept_queue; - bool rejected; - struct delayed_work dwork; -+ struct delayed_work close_work; -+ bool close_work_scheduled; - u32 peer_shutdown; - bool sent_request; - bool ignore_connecting_rst; -diff --git a/include/trace/events/vsock_virtio_transport_common.h b/include/trace/events/vsock_virtio_transport_common.h -new file mode 100644 -index 0000000..b7f1d62 ---- /dev/null -+++ b/include/trace/events/vsock_virtio_transport_common.h -@@ -0,0 +1,144 @@ -+#undef TRACE_SYSTEM -+#define TRACE_SYSTEM vsock -+ -+#if !defined(_TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H) || \ -+ defined(TRACE_HEADER_MULTI_READ) -+#define _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H -+ -+#include -+ -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_STREAM); -+ -+#define show_type(val) \ -+ __print_symbolic(val, { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" }) -+ -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_INVALID); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_REQUEST); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RESPONSE); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RST); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_SHUTDOWN); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RW); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_UPDATE); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_REQUEST); -+ -+#define show_op(val) \ -+ __print_symbolic(val, \ -+ { VIRTIO_VSOCK_OP_INVALID, "INVALID" }, \ -+ { VIRTIO_VSOCK_OP_REQUEST, "REQUEST" }, \ -+ { VIRTIO_VSOCK_OP_RESPONSE, "RESPONSE" }, \ -+ { VIRTIO_VSOCK_OP_RST, "RST" }, \ -+ { VIRTIO_VSOCK_OP_SHUTDOWN, "SHUTDOWN" }, \ -+ { VIRTIO_VSOCK_OP_RW, "RW" }, \ -+ { VIRTIO_VSOCK_OP_CREDIT_UPDATE, "CREDIT_UPDATE" }, \ -+ { VIRTIO_VSOCK_OP_CREDIT_REQUEST, "CREDIT_REQUEST" }) -+ -+TRACE_EVENT(virtio_transport_alloc_pkt, -+ TP_PROTO( -+ __u32 src_cid, __u32 src_port, -+ __u32 dst_cid, __u32 dst_port, -+ __u32 len, -+ __u16 type, -+ __u16 op, -+ __u32 flags -+ ), -+ TP_ARGS( -+ src_cid, src_port, -+ dst_cid, dst_port, -+ len, -+ type, -+ op, -+ flags -+ ), -+ TP_STRUCT__entry( -+ __field(__u32, src_cid) -+ __field(__u32, src_port) -+ __field(__u32, dst_cid) -+ __field(__u32, dst_port) -+ __field(__u32, len) -+ __field(__u16, type) -+ __field(__u16, op) -+ __field(__u32, flags) -+ ), -+ TP_fast_assign( -+ __entry->src_cid = src_cid; -+ __entry->src_port = src_port; -+ __entry->dst_cid = dst_cid; -+ __entry->dst_port = dst_port; -+ __entry->len = len; -+ __entry->type = type; -+ __entry->op = op; -+ __entry->flags = flags; -+ ), -+ TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x", -+ __entry->src_cid, __entry->src_port, -+ __entry->dst_cid, __entry->dst_port, -+ __entry->len, -+ show_type(__entry->type), -+ show_op(__entry->op), -+ __entry->flags) -+); -+ -+TRACE_EVENT(virtio_transport_recv_pkt, -+ TP_PROTO( -+ __u32 src_cid, __u32 src_port, -+ __u32 dst_cid, __u32 dst_port, -+ __u32 len, -+ __u16 type, -+ __u16 op, -+ __u32 flags, -+ __u32 buf_alloc, -+ __u32 fwd_cnt -+ ), -+ TP_ARGS( -+ src_cid, src_port, -+ dst_cid, dst_port, -+ len, -+ type, -+ op, -+ flags, -+ buf_alloc, -+ fwd_cnt -+ ), -+ TP_STRUCT__entry( -+ __field(__u32, src_cid) -+ __field(__u32, src_port) -+ __field(__u32, dst_cid) -+ __field(__u32, dst_port) -+ __field(__u32, len) -+ __field(__u16, type) -+ __field(__u16, op) -+ __field(__u32, flags) -+ __field(__u32, buf_alloc) -+ __field(__u32, fwd_cnt) -+ ), -+ TP_fast_assign( -+ __entry->src_cid = src_cid; -+ __entry->src_port = src_port; -+ __entry->dst_cid = dst_cid; -+ __entry->dst_port = dst_port; -+ __entry->len = len; -+ __entry->type = type; -+ __entry->op = op; -+ __entry->flags = flags; -+ __entry->buf_alloc = buf_alloc; -+ __entry->fwd_cnt = fwd_cnt; -+ ), -+ TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x " -+ "buf_alloc=%u fwd_cnt=%u", -+ __entry->src_cid, __entry->src_port, -+ __entry->dst_cid, __entry->dst_port, -+ __entry->len, -+ show_type(__entry->type), -+ show_op(__entry->op), -+ __entry->flags, -+ __entry->buf_alloc, -+ __entry->fwd_cnt) -+); -+ -+#endif /* _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H */ -+ -+#undef TRACE_INCLUDE_FILE -+#define TRACE_INCLUDE_FILE vsock_virtio_transport_common -+ -+/* This part must be outside protection */ -+#include -diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild -index ebd10e6..6c51a4d 100644 ---- a/include/uapi/linux/Kbuild -+++ b/include/uapi/linux/Kbuild -@@ -447,6 +447,7 @@ header-y += virtio_ring.h - header-y += virtio_rng.h - header-y += virtio_scsi.h - header-y += virtio_types.h -+header-y += virtio_vsock.h - header-y += vm_sockets.h - header-y += vt.h - header-y += wait.h -diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h -index 77925f5..3228d58 100644 ---- a/include/uapi/linux/virtio_ids.h -+++ b/include/uapi/linux/virtio_ids.h -@@ -41,5 +41,6 @@ - #define VIRTIO_ID_CAIF 12 /* Virtio caif */ - #define VIRTIO_ID_GPU 16 /* virtio GPU */ - #define VIRTIO_ID_INPUT 18 /* virtio input */ -+#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ - - #endif /* _LINUX_VIRTIO_IDS_H */ -diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h -new file mode 100644 -index 0000000..6b011c1 ---- /dev/null -+++ b/include/uapi/linux/virtio_vsock.h -@@ -0,0 +1,94 @@ -+/* -+ * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so -+ * anyone can use the definitions to implement compatible drivers/servers: -+ * -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of IBM nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * Copyright (C) Red Hat, Inc., 2013-2015 -+ * Copyright (C) Asias He , 2013 -+ * Copyright (C) Stefan Hajnoczi , 2015 -+ */ -+ -+#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H -+#define _UAPI_LINUX_VIRTIO_VOSCK_H -+ -+#include -+#include -+#include -+ -+struct virtio_vsock_config { -+ __le64 guest_cid; -+} __attribute__((packed)); -+ -+enum virtio_vsock_event_id { -+ VIRTIO_VSOCK_EVENT_TRANSPORT_RESET = 0, -+}; -+ -+struct virtio_vsock_event { -+ __le32 id; -+} __attribute__((packed)); -+ -+struct virtio_vsock_hdr { -+ __le64 src_cid; -+ __le64 dst_cid; -+ __le32 src_port; -+ __le32 dst_port; -+ __le32 len; -+ __le16 type; /* enum virtio_vsock_type */ -+ __le16 op; /* enum virtio_vsock_op */ -+ __le32 flags; -+ __le32 buf_alloc; -+ __le32 fwd_cnt; -+} __attribute__((packed)); -+ -+enum virtio_vsock_type { -+ VIRTIO_VSOCK_TYPE_STREAM = 1, -+}; -+ -+enum virtio_vsock_op { -+ VIRTIO_VSOCK_OP_INVALID = 0, -+ -+ /* Connect operations */ -+ VIRTIO_VSOCK_OP_REQUEST = 1, -+ VIRTIO_VSOCK_OP_RESPONSE = 2, -+ VIRTIO_VSOCK_OP_RST = 3, -+ VIRTIO_VSOCK_OP_SHUTDOWN = 4, -+ -+ /* To send payload */ -+ VIRTIO_VSOCK_OP_RW = 5, -+ -+ /* Tell the peer our credit info */ -+ VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6, -+ /* Request the peer to send the credit info to us */ -+ VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7, -+}; -+ -+/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ -+enum virtio_vsock_shutdown { -+ VIRTIO_VSOCK_SHUTDOWN_RCV = 1, -+ VIRTIO_VSOCK_SHUTDOWN_SEND = 2, -+}; -+ -+#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ -diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c -new file mode 100644 -index 0000000..a53b3a1 ---- /dev/null -+++ b/net/vmw_vsock/virtio_transport_common.c -@@ -0,0 +1,992 @@ -+/* -+ * common code for virtio vsock -+ * -+ * Copyright (C) 2013-2015 Red Hat, Inc. -+ * Author: Asias He -+ * Stefan Hajnoczi -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#define CREATE_TRACE_POINTS -+#include -+ -+/* How long to wait for graceful shutdown of a connection */ -+#define VSOCK_CLOSE_TIMEOUT (8 * HZ) -+ -+static const struct virtio_transport *virtio_transport_get_ops(void) -+{ -+ const struct vsock_transport *t = vsock_core_get_transport(); -+ -+ return container_of(t, struct virtio_transport, transport); -+} -+ -+struct virtio_vsock_pkt * -+virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, -+ size_t len, -+ u32 src_cid, -+ u32 src_port, -+ u32 dst_cid, -+ u32 dst_port) -+{ -+ struct virtio_vsock_pkt *pkt; -+ int err; -+ -+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); -+ if (!pkt) -+ return NULL; -+ -+ pkt->hdr.type = cpu_to_le16(info->type); -+ pkt->hdr.op = cpu_to_le16(info->op); -+ pkt->hdr.src_cid = cpu_to_le64(src_cid); -+ pkt->hdr.dst_cid = cpu_to_le64(dst_cid); -+ pkt->hdr.src_port = cpu_to_le32(src_port); -+ pkt->hdr.dst_port = cpu_to_le32(dst_port); -+ pkt->hdr.flags = cpu_to_le32(info->flags); -+ pkt->len = len; -+ pkt->hdr.len = cpu_to_le32(len); -+ pkt->reply = info->reply; -+ -+ if (info->msg && len > 0) { -+ pkt->buf = kmalloc(len, GFP_KERNEL); -+ if (!pkt->buf) -+ goto out_pkt; -+ err = memcpy_from_msg(pkt->buf, info->msg, len); -+ if (err) -+ goto out; -+ } -+ -+ trace_virtio_transport_alloc_pkt(src_cid, src_port, -+ dst_cid, dst_port, -+ len, -+ info->type, -+ info->op, -+ info->flags); -+ -+ return pkt; -+ -+out: -+ kfree(pkt->buf); -+out_pkt: -+ kfree(pkt); -+ return NULL; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); -+ -+static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, -+ struct virtio_vsock_pkt_info *info) -+{ -+ u32 src_cid, src_port, dst_cid, dst_port; -+ struct virtio_vsock_sock *vvs; -+ struct virtio_vsock_pkt *pkt; -+ u32 pkt_len = info->pkt_len; -+ -+ src_cid = vm_sockets_get_local_cid(); -+ src_port = vsk->local_addr.svm_port; -+ if (!info->remote_cid) { -+ dst_cid = vsk->remote_addr.svm_cid; -+ dst_port = vsk->remote_addr.svm_port; -+ } else { -+ dst_cid = info->remote_cid; -+ dst_port = info->remote_port; -+ } -+ -+ vvs = vsk->trans; -+ -+ /* we can send less than pkt_len bytes */ -+ if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) -+ pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; -+ -+ /* virtio_transport_get_credit might return less than pkt_len credit */ -+ pkt_len = virtio_transport_get_credit(vvs, pkt_len); -+ -+ /* Do not send zero length OP_RW pkt */ -+ if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) -+ return pkt_len; -+ -+ pkt = virtio_transport_alloc_pkt(info, pkt_len, -+ src_cid, src_port, -+ dst_cid, dst_port); -+ if (!pkt) { -+ virtio_transport_put_credit(vvs, pkt_len); -+ return -ENOMEM; -+ } -+ -+ virtio_transport_inc_tx_pkt(vvs, pkt); -+ -+ return virtio_transport_get_ops()->send_pkt(pkt); -+} -+ -+static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, -+ struct virtio_vsock_pkt *pkt) -+{ -+ vvs->rx_bytes += pkt->len; -+} -+ -+static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, -+ struct virtio_vsock_pkt *pkt) -+{ -+ vvs->rx_bytes -= pkt->len; -+ vvs->fwd_cnt += pkt->len; -+} -+ -+void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) -+{ -+ spin_lock_bh(&vvs->tx_lock); -+ pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); -+ pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); -+ spin_unlock_bh(&vvs->tx_lock); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); -+ -+u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) -+{ -+ u32 ret; -+ -+ spin_lock_bh(&vvs->tx_lock); -+ ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); -+ if (ret > credit) -+ ret = credit; -+ vvs->tx_cnt += ret; -+ spin_unlock_bh(&vvs->tx_lock); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_credit); -+ -+void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) -+{ -+ spin_lock_bh(&vvs->tx_lock); -+ vvs->tx_cnt -= credit; -+ spin_unlock_bh(&vvs->tx_lock); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_put_credit); -+ -+static int virtio_transport_send_credit_update(struct vsock_sock *vsk, -+ int type, -+ struct virtio_vsock_hdr *hdr) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, -+ .type = type, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+ -+static ssize_t -+virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ struct virtio_vsock_pkt *pkt; -+ size_t bytes, total = 0; -+ int err = -EFAULT; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ while (total < len && !list_empty(&vvs->rx_queue)) { -+ pkt = list_first_entry(&vvs->rx_queue, -+ struct virtio_vsock_pkt, list); -+ -+ bytes = len - total; -+ if (bytes > pkt->len - pkt->off) -+ bytes = pkt->len - pkt->off; -+ -+ /* sk_lock is held by caller so no one else can dequeue. -+ * Unlock rx_lock since memcpy_to_msg() may sleep. -+ */ -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); -+ if (err) -+ goto out; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ -+ total += bytes; -+ pkt->off += bytes; -+ if (pkt->off == pkt->len) { -+ virtio_transport_dec_rx_pkt(vvs, pkt); -+ list_del(&pkt->list); -+ virtio_transport_free_pkt(pkt); -+ } -+ } -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ /* Send a credit pkt to peer */ -+ virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, -+ NULL); -+ -+ return total; -+ -+out: -+ if (total) -+ err = total; -+ return err; -+} -+ -+ssize_t -+virtio_transport_stream_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, int flags) -+{ -+ if (flags & MSG_PEEK) -+ return -EOPNOTSUPP; -+ -+ return virtio_transport_stream_do_dequeue(vsk, msg, len); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); -+ -+int -+virtio_transport_dgram_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); -+ -+s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ s64 bytes; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ bytes = vvs->rx_bytes; -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ return bytes; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); -+ -+static s64 virtio_transport_has_space(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ s64 bytes; -+ -+ bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); -+ if (bytes < 0) -+ bytes = 0; -+ -+ return bytes; -+} -+ -+s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ s64 bytes; -+ -+ spin_lock_bh(&vvs->tx_lock); -+ bytes = virtio_transport_has_space(vsk); -+ spin_unlock_bh(&vvs->tx_lock); -+ -+ return bytes; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); -+ -+int virtio_transport_do_socket_init(struct vsock_sock *vsk, -+ struct vsock_sock *psk) -+{ -+ struct virtio_vsock_sock *vvs; -+ -+ vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); -+ if (!vvs) -+ return -ENOMEM; -+ -+ vsk->trans = vvs; -+ vvs->vsk = vsk; -+ if (psk) { -+ struct virtio_vsock_sock *ptrans = psk->trans; -+ -+ vvs->buf_size = ptrans->buf_size; -+ vvs->buf_size_min = ptrans->buf_size_min; -+ vvs->buf_size_max = ptrans->buf_size_max; -+ vvs->peer_buf_alloc = ptrans->peer_buf_alloc; -+ } else { -+ vvs->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; -+ vvs->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; -+ vvs->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; -+ } -+ -+ vvs->buf_alloc = vvs->buf_size; -+ -+ spin_lock_init(&vvs->rx_lock); -+ spin_lock_init(&vvs->tx_lock); -+ INIT_LIST_HEAD(&vvs->rx_queue); -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); -+ -+u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); -+ -+u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size_min; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); -+ -+u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size_max; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); -+ -+void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) -+ val = VIRTIO_VSOCK_MAX_BUF_SIZE; -+ if (val < vvs->buf_size_min) -+ vvs->buf_size_min = val; -+ if (val > vvs->buf_size_max) -+ vvs->buf_size_max = val; -+ vvs->buf_size = val; -+ vvs->buf_alloc = val; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); -+ -+void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) -+ val = VIRTIO_VSOCK_MAX_BUF_SIZE; -+ if (val > vvs->buf_size) -+ vvs->buf_size = val; -+ vvs->buf_size_min = val; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); -+ -+void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) -+ val = VIRTIO_VSOCK_MAX_BUF_SIZE; -+ if (val < vvs->buf_size) -+ vvs->buf_size = val; -+ vvs->buf_size_max = val; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); -+ -+int -+virtio_transport_notify_poll_in(struct vsock_sock *vsk, -+ size_t target, -+ bool *data_ready_now) -+{ -+ if (vsock_stream_has_data(vsk)) -+ *data_ready_now = true; -+ else -+ *data_ready_now = false; -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); -+ -+int -+virtio_transport_notify_poll_out(struct vsock_sock *vsk, -+ size_t target, -+ bool *space_avail_now) -+{ -+ s64 free_space; -+ -+ free_space = vsock_stream_has_space(vsk); -+ if (free_space > 0) -+ *space_avail_now = true; -+ else if (free_space == 0) -+ *space_avail_now = false; -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); -+ -+int virtio_transport_notify_recv_init(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); -+ -+int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); -+ -+int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); -+ -+int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, -+ size_t target, ssize_t copied, bool data_read, -+ struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); -+ -+int virtio_transport_notify_send_init(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); -+ -+int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); -+ -+int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); -+ -+int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, -+ ssize_t written, struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); -+ -+u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); -+ -+bool virtio_transport_stream_is_active(struct vsock_sock *vsk) -+{ -+ return true; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); -+ -+bool virtio_transport_stream_allow(u32 cid, u32 port) -+{ -+ return true; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); -+ -+int virtio_transport_dgram_bind(struct vsock_sock *vsk, -+ struct sockaddr_vm *addr) -+{ -+ return -EOPNOTSUPP; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); -+ -+bool virtio_transport_dgram_allow(u32 cid, u32 port) -+{ -+ return false; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); -+ -+int virtio_transport_connect(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_REQUEST, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_connect); -+ -+int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_SHUTDOWN, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .flags = (mode & RCV_SHUTDOWN ? -+ VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | -+ (mode & SEND_SHUTDOWN ? -+ VIRTIO_VSOCK_SHUTDOWN_SEND : 0), -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_shutdown); -+ -+int -+virtio_transport_dgram_enqueue(struct vsock_sock *vsk, -+ struct sockaddr_vm *remote_addr, -+ struct msghdr *msg, -+ size_t dgram_len) -+{ -+ return -EOPNOTSUPP; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); -+ -+ssize_t -+virtio_transport_stream_enqueue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RW, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .msg = msg, -+ .pkt_len = len, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); -+ -+void virtio_transport_destruct(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ kfree(vvs); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_destruct); -+ -+static int virtio_transport_reset(struct vsock_sock *vsk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RST, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .reply = !!pkt, -+ }; -+ -+ /* Send RST only if the original pkt is not a RST pkt */ -+ if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) -+ return 0; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+ -+/* Normally packets are associated with a socket. There may be no socket if an -+ * attempt was made to connect to a socket that does not exist. -+ */ -+static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RST, -+ .type = le16_to_cpu(pkt->hdr.type), -+ .reply = true, -+ }; -+ -+ /* Send RST only if the original pkt is not a RST pkt */ -+ if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) -+ return 0; -+ -+ pkt = virtio_transport_alloc_pkt(&info, 0, -+ le32_to_cpu(pkt->hdr.dst_cid), -+ le32_to_cpu(pkt->hdr.dst_port), -+ le32_to_cpu(pkt->hdr.src_cid), -+ le32_to_cpu(pkt->hdr.src_port)); -+ if (!pkt) -+ return -ENOMEM; -+ -+ return virtio_transport_get_ops()->send_pkt(pkt); -+} -+ -+static void virtio_transport_wait_close(struct sock *sk, long timeout) -+{ -+ if (timeout) { -+ DEFINE_WAIT(wait); -+ -+ do { -+ prepare_to_wait(sk_sleep(sk), &wait, -+ TASK_INTERRUPTIBLE); -+ if (sk_wait_event(sk, &timeout, -+ sock_flag(sk, SOCK_DONE))) -+ break; -+ } while (!signal_pending(current) && timeout); -+ -+ finish_wait(sk_sleep(sk), &wait); -+ } -+} -+ -+static void virtio_transport_do_close(struct vsock_sock *vsk, -+ bool cancel_timeout) -+{ -+ struct sock *sk = sk_vsock(vsk); -+ -+ sock_set_flag(sk, SOCK_DONE); -+ vsk->peer_shutdown = SHUTDOWN_MASK; -+ if (vsock_stream_has_data(vsk) <= 0) -+ sk->sk_state = SS_DISCONNECTING; -+ sk->sk_state_change(sk); -+ -+ if (vsk->close_work_scheduled && -+ (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { -+ vsk->close_work_scheduled = false; -+ -+ vsock_remove_sock(vsk); -+ -+ /* Release refcnt obtained when we scheduled the timeout */ -+ sock_put(sk); -+ } -+} -+ -+static void virtio_transport_close_timeout(struct work_struct *work) -+{ -+ struct vsock_sock *vsk = -+ container_of(work, struct vsock_sock, close_work.work); -+ struct sock *sk = sk_vsock(vsk); -+ -+ sock_hold(sk); -+ lock_sock(sk); -+ -+ if (!sock_flag(sk, SOCK_DONE)) { -+ (void)virtio_transport_reset(vsk, NULL); -+ -+ virtio_transport_do_close(vsk, false); -+ } -+ -+ vsk->close_work_scheduled = false; -+ -+ release_sock(sk); -+ sock_put(sk); -+} -+ -+/* User context, vsk->sk is locked */ -+static bool virtio_transport_close(struct vsock_sock *vsk) -+{ -+ struct sock *sk = &vsk->sk; -+ -+ if (!(sk->sk_state == SS_CONNECTED || -+ sk->sk_state == SS_DISCONNECTING)) -+ return true; -+ -+ /* Already received SHUTDOWN from peer, reply with RST */ -+ if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { -+ (void)virtio_transport_reset(vsk, NULL); -+ return true; -+ } -+ -+ if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) -+ (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); -+ -+ if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) -+ virtio_transport_wait_close(sk, sk->sk_lingertime); -+ -+ if (sock_flag(sk, SOCK_DONE)) { -+ return true; -+ } -+ -+ sock_hold(sk); -+ INIT_DELAYED_WORK(&vsk->close_work, -+ virtio_transport_close_timeout); -+ vsk->close_work_scheduled = true; -+ schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); -+ return false; -+} -+ -+void virtio_transport_release(struct vsock_sock *vsk) -+{ -+ struct sock *sk = &vsk->sk; -+ bool remove_sock = true; -+ -+ lock_sock(sk); -+ if (sk->sk_type == SOCK_STREAM) -+ remove_sock = virtio_transport_close(vsk); -+ release_sock(sk); -+ -+ if (remove_sock) -+ vsock_remove_sock(vsk); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_release); -+ -+static int -+virtio_transport_recv_connecting(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ int err; -+ int skerr; -+ -+ switch (le16_to_cpu(pkt->hdr.op)) { -+ case VIRTIO_VSOCK_OP_RESPONSE: -+ sk->sk_state = SS_CONNECTED; -+ sk->sk_socket->state = SS_CONNECTED; -+ vsock_insert_connected(vsk); -+ sk->sk_state_change(sk); -+ break; -+ case VIRTIO_VSOCK_OP_INVALID: -+ break; -+ case VIRTIO_VSOCK_OP_RST: -+ skerr = ECONNRESET; -+ err = 0; -+ goto destroy; -+ default: -+ skerr = EPROTO; -+ err = -EINVAL; -+ goto destroy; -+ } -+ return 0; -+ -+destroy: -+ virtio_transport_reset(vsk, pkt); -+ sk->sk_state = SS_UNCONNECTED; -+ sk->sk_err = skerr; -+ sk->sk_error_report(sk); -+ return err; -+} -+ -+static int -+virtio_transport_recv_connected(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ int err = 0; -+ -+ switch (le16_to_cpu(pkt->hdr.op)) { -+ case VIRTIO_VSOCK_OP_RW: -+ pkt->len = le32_to_cpu(pkt->hdr.len); -+ pkt->off = 0; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ virtio_transport_inc_rx_pkt(vvs, pkt); -+ list_add_tail(&pkt->list, &vvs->rx_queue); -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ sk->sk_data_ready(sk); -+ return err; -+ case VIRTIO_VSOCK_OP_CREDIT_UPDATE: -+ sk->sk_write_space(sk); -+ break; -+ case VIRTIO_VSOCK_OP_SHUTDOWN: -+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) -+ vsk->peer_shutdown |= RCV_SHUTDOWN; -+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) -+ vsk->peer_shutdown |= SEND_SHUTDOWN; -+ if (vsk->peer_shutdown == SHUTDOWN_MASK && -+ vsock_stream_has_data(vsk) <= 0) -+ sk->sk_state = SS_DISCONNECTING; -+ if (le32_to_cpu(pkt->hdr.flags)) -+ sk->sk_state_change(sk); -+ break; -+ case VIRTIO_VSOCK_OP_RST: -+ virtio_transport_do_close(vsk, true); -+ break; -+ default: -+ err = -EINVAL; -+ break; -+ } -+ -+ virtio_transport_free_pkt(pkt); -+ return err; -+} -+ -+static void -+virtio_transport_recv_disconnecting(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ -+ if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) -+ virtio_transport_do_close(vsk, true); -+} -+ -+static int -+virtio_transport_send_response(struct vsock_sock *vsk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RESPONSE, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .remote_cid = le32_to_cpu(pkt->hdr.src_cid), -+ .remote_port = le32_to_cpu(pkt->hdr.src_port), -+ .reply = true, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+ -+/* Handle server socket */ -+static int -+virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ struct vsock_sock *vchild; -+ struct sock *child; -+ -+ if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { -+ virtio_transport_reset(vsk, pkt); -+ return -EINVAL; -+ } -+ -+ if (sk_acceptq_is_full(sk)) { -+ virtio_transport_reset(vsk, pkt); -+ return -ENOMEM; -+ } -+ -+ child = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, -+ sk->sk_type, 0); -+ if (!child) { -+ virtio_transport_reset(vsk, pkt); -+ return -ENOMEM; -+ } -+ -+ sk->sk_ack_backlog++; -+ -+ lock_sock_nested(child, SINGLE_DEPTH_NESTING); -+ -+ child->sk_state = SS_CONNECTED; -+ -+ vchild = vsock_sk(child); -+ vsock_addr_init(&vchild->local_addr, le32_to_cpu(pkt->hdr.dst_cid), -+ le32_to_cpu(pkt->hdr.dst_port)); -+ vsock_addr_init(&vchild->remote_addr, le32_to_cpu(pkt->hdr.src_cid), -+ le32_to_cpu(pkt->hdr.src_port)); -+ -+ vsock_insert_connected(vchild); -+ vsock_enqueue_accept(sk, child); -+ virtio_transport_send_response(vchild, pkt); -+ -+ release_sock(child); -+ -+ sk->sk_data_ready(sk); -+ return 0; -+} -+ -+static bool virtio_transport_space_update(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ bool space_available; -+ -+ /* buf_alloc and fwd_cnt is always included in the hdr */ -+ spin_lock_bh(&vvs->tx_lock); -+ vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); -+ vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); -+ space_available = virtio_transport_has_space(vsk); -+ spin_unlock_bh(&vvs->tx_lock); -+ return space_available; -+} -+ -+/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex -+ * lock. -+ */ -+void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ struct sockaddr_vm src, dst; -+ struct vsock_sock *vsk; -+ struct sock *sk; -+ bool space_available; -+ -+ vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), -+ le32_to_cpu(pkt->hdr.src_port)); -+ vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), -+ le32_to_cpu(pkt->hdr.dst_port)); -+ -+ trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, -+ dst.svm_cid, dst.svm_port, -+ le32_to_cpu(pkt->hdr.len), -+ le16_to_cpu(pkt->hdr.type), -+ le16_to_cpu(pkt->hdr.op), -+ le32_to_cpu(pkt->hdr.flags), -+ le32_to_cpu(pkt->hdr.buf_alloc), -+ le32_to_cpu(pkt->hdr.fwd_cnt)); -+ -+ if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) { -+ (void)virtio_transport_reset_no_sock(pkt); -+ goto free_pkt; -+ } -+ -+ /* The socket must be in connected or bound table -+ * otherwise send reset back -+ */ -+ sk = vsock_find_connected_socket(&src, &dst); -+ if (!sk) { -+ sk = vsock_find_bound_socket(&dst); -+ if (!sk) { -+ (void)virtio_transport_reset_no_sock(pkt); -+ goto free_pkt; -+ } -+ } -+ -+ vsk = vsock_sk(sk); -+ -+ space_available = virtio_transport_space_update(sk, pkt); -+ -+ lock_sock(sk); -+ -+ /* Update CID in case it has changed after a transport reset event */ -+ vsk->local_addr.svm_cid = dst.svm_cid; -+ -+ if (space_available) -+ sk->sk_write_space(sk); -+ -+ switch (sk->sk_state) { -+ case VSOCK_SS_LISTEN: -+ virtio_transport_recv_listen(sk, pkt); -+ virtio_transport_free_pkt(pkt); -+ break; -+ case SS_CONNECTING: -+ virtio_transport_recv_connecting(sk, pkt); -+ virtio_transport_free_pkt(pkt); -+ break; -+ case SS_CONNECTED: -+ virtio_transport_recv_connected(sk, pkt); -+ break; -+ case SS_DISCONNECTING: -+ virtio_transport_recv_disconnecting(sk, pkt); -+ virtio_transport_free_pkt(pkt); -+ break; -+ default: -+ virtio_transport_free_pkt(pkt); -+ break; -+ } -+ release_sock(sk); -+ -+ /* Release refcnt obtained when we fetched this socket out of the -+ * bound or connected list. -+ */ -+ sock_put(sk); -+ return; -+ -+free_pkt: -+ virtio_transport_free_pkt(pkt); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); -+ -+void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ kfree(pkt->buf); -+ kfree(pkt); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); -+ -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("Asias He"); -+MODULE_DESCRIPTION("common code for virtio vsock"); --- -2.10.0 - diff --git a/alpine/kernel/patches/0008-VSOCK-Introduce-virtio_transport.ko.patch b/alpine/kernel/patches/0008-VSOCK-Introduce-virtio_transport.ko.patch deleted file mode 100644 index 78931bc21..000000000 --- a/alpine/kernel/patches/0008-VSOCK-Introduce-virtio_transport.ko.patch +++ /dev/null @@ -1,663 +0,0 @@ -From c384834d9495c7b2a36b0054d08ddf3240687bdc Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:33 +0100 -Subject: [PATCH 08/42] VSOCK: Introduce virtio_transport.ko - -VM sockets virtio transport implementation. This driver runs in the -guest. - -Signed-off-by: Asias He -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 0ea9e1d3a9e3ef7d2a1462d3de6b95131dc7d872) ---- - MAINTAINERS | 1 + - net/vmw_vsock/virtio_transport.c | 624 +++++++++++++++++++++++++++++++++++++++ - 2 files changed, 625 insertions(+) - create mode 100644 net/vmw_vsock/virtio_transport.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index b93ba8b..82d1123 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -11391,6 +11391,7 @@ S: Maintained - F: include/linux/virtio_vsock.h - F: include/uapi/linux/virtio_vsock.h - F: net/vmw_vsock/virtio_transport_common.c -+F: net/vmw_vsock/virtio_transport.c - - VIRTUAL SERIO DEVICE DRIVER - M: Stephen Chandler Paul -diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c -new file mode 100644 -index 0000000..699dfab ---- /dev/null -+++ b/net/vmw_vsock/virtio_transport.c -@@ -0,0 +1,624 @@ -+/* -+ * virtio transport for vsock -+ * -+ * Copyright (C) 2013-2015 Red Hat, Inc. -+ * Author: Asias He -+ * Stefan Hajnoczi -+ * -+ * Some of the code is take from Gerd Hoffmann 's -+ * early virtio-vsock proof-of-concept bits. -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static struct workqueue_struct *virtio_vsock_workqueue; -+static struct virtio_vsock *the_virtio_vsock; -+static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ -+ -+struct virtio_vsock { -+ struct virtio_device *vdev; -+ struct virtqueue *vqs[VSOCK_VQ_MAX]; -+ -+ /* Virtqueue processing is deferred to a workqueue */ -+ struct work_struct tx_work; -+ struct work_struct rx_work; -+ struct work_struct event_work; -+ -+ /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX] -+ * must be accessed with tx_lock held. -+ */ -+ struct mutex tx_lock; -+ -+ struct work_struct send_pkt_work; -+ spinlock_t send_pkt_list_lock; -+ struct list_head send_pkt_list; -+ -+ atomic_t queued_replies; -+ -+ /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] -+ * must be accessed with rx_lock held. -+ */ -+ struct mutex rx_lock; -+ int rx_buf_nr; -+ int rx_buf_max_nr; -+ -+ /* The following fields are protected by event_lock. -+ * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held. -+ */ -+ struct mutex event_lock; -+ struct virtio_vsock_event event_list[8]; -+ -+ u32 guest_cid; -+}; -+ -+static struct virtio_vsock *virtio_vsock_get(void) -+{ -+ return the_virtio_vsock; -+} -+ -+static u32 virtio_transport_get_local_cid(void) -+{ -+ struct virtio_vsock *vsock = virtio_vsock_get(); -+ -+ return vsock->guest_cid; -+} -+ -+static void -+virtio_transport_send_pkt_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, send_pkt_work); -+ struct virtqueue *vq; -+ bool added = false; -+ bool restart_rx = false; -+ -+ mutex_lock(&vsock->tx_lock); -+ -+ vq = vsock->vqs[VSOCK_VQ_TX]; -+ -+ /* Avoid unnecessary interrupts while we're processing the ring */ -+ virtqueue_disable_cb(vq); -+ -+ for (;;) { -+ struct virtio_vsock_pkt *pkt; -+ struct scatterlist hdr, buf, *sgs[2]; -+ int ret, in_sg = 0, out_sg = 0; -+ bool reply; -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ if (list_empty(&vsock->send_pkt_list)) { -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ virtqueue_enable_cb(vq); -+ break; -+ } -+ -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del_init(&pkt->list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ reply = pkt->reply; -+ -+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); -+ sgs[out_sg++] = &hdr; -+ if (pkt->buf) { -+ sg_init_one(&buf, pkt->buf, pkt->len); -+ sgs[out_sg++] = &buf; -+ } -+ -+ ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL); -+ if (ret < 0) { -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ if (!virtqueue_enable_cb(vq) && ret == -ENOSPC) -+ continue; /* retry now that we have more space */ -+ break; -+ } -+ -+ if (reply) { -+ struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; -+ int val; -+ -+ val = atomic_dec_return(&vsock->queued_replies); -+ -+ /* Do we now have resources to resume rx processing? */ -+ if (val + 1 == virtqueue_get_vring_size(rx_vq)) -+ restart_rx = true; -+ } -+ -+ added = true; -+ } -+ -+ if (added) -+ virtqueue_kick(vq); -+ -+ mutex_unlock(&vsock->tx_lock); -+ -+ if (restart_rx) -+ queue_work(virtio_vsock_workqueue, &vsock->rx_work); -+} -+ -+static int -+virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock *vsock; -+ int len = pkt->len; -+ -+ vsock = virtio_vsock_get(); -+ if (!vsock) { -+ virtio_transport_free_pkt(pkt); -+ return -ENODEV; -+ } -+ -+ if (pkt->reply) -+ atomic_inc(&vsock->queued_replies); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add_tail(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); -+ return len; -+} -+ -+static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) -+{ -+ int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; -+ struct virtio_vsock_pkt *pkt; -+ struct scatterlist hdr, buf, *sgs[2]; -+ struct virtqueue *vq; -+ int ret; -+ -+ vq = vsock->vqs[VSOCK_VQ_RX]; -+ -+ do { -+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); -+ if (!pkt) -+ break; -+ -+ pkt->buf = kmalloc(buf_len, GFP_KERNEL); -+ if (!pkt->buf) { -+ virtio_transport_free_pkt(pkt); -+ break; -+ } -+ -+ pkt->len = buf_len; -+ -+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); -+ sgs[0] = &hdr; -+ -+ sg_init_one(&buf, pkt->buf, buf_len); -+ sgs[1] = &buf; -+ ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); -+ if (ret) { -+ virtio_transport_free_pkt(pkt); -+ break; -+ } -+ vsock->rx_buf_nr++; -+ } while (vq->num_free); -+ if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) -+ vsock->rx_buf_max_nr = vsock->rx_buf_nr; -+ virtqueue_kick(vq); -+} -+ -+static void virtio_transport_tx_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, tx_work); -+ struct virtqueue *vq; -+ bool added = false; -+ -+ vq = vsock->vqs[VSOCK_VQ_TX]; -+ mutex_lock(&vsock->tx_lock); -+ do { -+ struct virtio_vsock_pkt *pkt; -+ unsigned int len; -+ -+ virtqueue_disable_cb(vq); -+ while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { -+ virtio_transport_free_pkt(pkt); -+ added = true; -+ } -+ } while (!virtqueue_enable_cb(vq)); -+ mutex_unlock(&vsock->tx_lock); -+ -+ if (added) -+ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); -+} -+ -+/* Is there space left for replies to rx packets? */ -+static bool virtio_transport_more_replies(struct virtio_vsock *vsock) -+{ -+ struct virtqueue *vq = vsock->vqs[VSOCK_VQ_RX]; -+ int val; -+ -+ smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ -+ val = atomic_read(&vsock->queued_replies); -+ -+ return val < virtqueue_get_vring_size(vq); -+} -+ -+static void virtio_transport_rx_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, rx_work); -+ struct virtqueue *vq; -+ -+ vq = vsock->vqs[VSOCK_VQ_RX]; -+ -+ mutex_lock(&vsock->rx_lock); -+ -+ do { -+ virtqueue_disable_cb(vq); -+ for (;;) { -+ struct virtio_vsock_pkt *pkt; -+ unsigned int len; -+ -+ if (!virtio_transport_more_replies(vsock)) { -+ /* Stop rx until the device processes already -+ * pending replies. Leave rx virtqueue -+ * callbacks disabled. -+ */ -+ goto out; -+ } -+ -+ pkt = virtqueue_get_buf(vq, &len); -+ if (!pkt) { -+ break; -+ } -+ -+ vsock->rx_buf_nr--; -+ -+ /* Drop short/long packets */ -+ if (unlikely(len < sizeof(pkt->hdr) || -+ len > sizeof(pkt->hdr) + pkt->len)) { -+ virtio_transport_free_pkt(pkt); -+ continue; -+ } -+ -+ pkt->len = len - sizeof(pkt->hdr); -+ virtio_transport_recv_pkt(pkt); -+ } -+ } while (!virtqueue_enable_cb(vq)); -+ -+out: -+ if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) -+ virtio_vsock_rx_fill(vsock); -+ mutex_unlock(&vsock->rx_lock); -+} -+ -+/* event_lock must be held */ -+static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, -+ struct virtio_vsock_event *event) -+{ -+ struct scatterlist sg; -+ struct virtqueue *vq; -+ -+ vq = vsock->vqs[VSOCK_VQ_EVENT]; -+ -+ sg_init_one(&sg, event, sizeof(*event)); -+ -+ return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL); -+} -+ -+/* event_lock must be held */ -+static void virtio_vsock_event_fill(struct virtio_vsock *vsock) -+{ -+ size_t i; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) { -+ struct virtio_vsock_event *event = &vsock->event_list[i]; -+ -+ virtio_vsock_event_fill_one(vsock, event); -+ } -+ -+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); -+} -+ -+static void virtio_vsock_reset_sock(struct sock *sk) -+{ -+ lock_sock(sk); -+ sk->sk_state = SS_UNCONNECTED; -+ sk->sk_err = ECONNRESET; -+ sk->sk_error_report(sk); -+ release_sock(sk); -+} -+ -+static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock) -+{ -+ struct virtio_device *vdev = vsock->vdev; -+ u64 guest_cid; -+ -+ vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), -+ &guest_cid, sizeof(guest_cid)); -+ vsock->guest_cid = le64_to_cpu(guest_cid); -+} -+ -+/* event_lock must be held */ -+static void virtio_vsock_event_handle(struct virtio_vsock *vsock, -+ struct virtio_vsock_event *event) -+{ -+ switch (le32_to_cpu(event->id)) { -+ case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: -+ virtio_vsock_update_guest_cid(vsock); -+ vsock_for_each_connected_socket(virtio_vsock_reset_sock); -+ break; -+ } -+} -+ -+static void virtio_transport_event_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, event_work); -+ struct virtqueue *vq; -+ -+ vq = vsock->vqs[VSOCK_VQ_EVENT]; -+ -+ mutex_lock(&vsock->event_lock); -+ -+ do { -+ struct virtio_vsock_event *event; -+ unsigned int len; -+ -+ virtqueue_disable_cb(vq); -+ while ((event = virtqueue_get_buf(vq, &len)) != NULL) { -+ if (len == sizeof(*event)) -+ virtio_vsock_event_handle(vsock, event); -+ -+ virtio_vsock_event_fill_one(vsock, event); -+ } -+ } while (!virtqueue_enable_cb(vq)); -+ -+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); -+ -+ mutex_unlock(&vsock->event_lock); -+} -+ -+static void virtio_vsock_event_done(struct virtqueue *vq) -+{ -+ struct virtio_vsock *vsock = vq->vdev->priv; -+ -+ if (!vsock) -+ return; -+ queue_work(virtio_vsock_workqueue, &vsock->event_work); -+} -+ -+static void virtio_vsock_tx_done(struct virtqueue *vq) -+{ -+ struct virtio_vsock *vsock = vq->vdev->priv; -+ -+ if (!vsock) -+ return; -+ queue_work(virtio_vsock_workqueue, &vsock->tx_work); -+} -+ -+static void virtio_vsock_rx_done(struct virtqueue *vq) -+{ -+ struct virtio_vsock *vsock = vq->vdev->priv; -+ -+ if (!vsock) -+ return; -+ queue_work(virtio_vsock_workqueue, &vsock->rx_work); -+} -+ -+static struct virtio_transport virtio_transport = { -+ .transport = { -+ .get_local_cid = virtio_transport_get_local_cid, -+ -+ .init = virtio_transport_do_socket_init, -+ .destruct = virtio_transport_destruct, -+ .release = virtio_transport_release, -+ .connect = virtio_transport_connect, -+ .shutdown = virtio_transport_shutdown, -+ -+ .dgram_bind = virtio_transport_dgram_bind, -+ .dgram_dequeue = virtio_transport_dgram_dequeue, -+ .dgram_enqueue = virtio_transport_dgram_enqueue, -+ .dgram_allow = virtio_transport_dgram_allow, -+ -+ .stream_dequeue = virtio_transport_stream_dequeue, -+ .stream_enqueue = virtio_transport_stream_enqueue, -+ .stream_has_data = virtio_transport_stream_has_data, -+ .stream_has_space = virtio_transport_stream_has_space, -+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, -+ .stream_is_active = virtio_transport_stream_is_active, -+ .stream_allow = virtio_transport_stream_allow, -+ -+ .notify_poll_in = virtio_transport_notify_poll_in, -+ .notify_poll_out = virtio_transport_notify_poll_out, -+ .notify_recv_init = virtio_transport_notify_recv_init, -+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, -+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, -+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, -+ .notify_send_init = virtio_transport_notify_send_init, -+ .notify_send_pre_block = virtio_transport_notify_send_pre_block, -+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, -+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, -+ -+ .set_buffer_size = virtio_transport_set_buffer_size, -+ .set_min_buffer_size = virtio_transport_set_min_buffer_size, -+ .set_max_buffer_size = virtio_transport_set_max_buffer_size, -+ .get_buffer_size = virtio_transport_get_buffer_size, -+ .get_min_buffer_size = virtio_transport_get_min_buffer_size, -+ .get_max_buffer_size = virtio_transport_get_max_buffer_size, -+ }, -+ -+ .send_pkt = virtio_transport_send_pkt, -+}; -+ -+static int virtio_vsock_probe(struct virtio_device *vdev) -+{ -+ vq_callback_t *callbacks[] = { -+ virtio_vsock_rx_done, -+ virtio_vsock_tx_done, -+ virtio_vsock_event_done, -+ }; -+ static const char * const names[] = { -+ "rx", -+ "tx", -+ "event", -+ }; -+ struct virtio_vsock *vsock = NULL; -+ int ret; -+ -+ ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); -+ if (ret) -+ return ret; -+ -+ /* Only one virtio-vsock device per guest is supported */ -+ if (the_virtio_vsock) { -+ ret = -EBUSY; -+ goto out; -+ } -+ -+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); -+ if (!vsock) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ vsock->vdev = vdev; -+ -+ ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, -+ vsock->vqs, callbacks, names); -+ if (ret < 0) -+ goto out; -+ -+ virtio_vsock_update_guest_cid(vsock); -+ -+ ret = vsock_core_init(&virtio_transport.transport); -+ if (ret < 0) -+ goto out_vqs; -+ -+ vsock->rx_buf_nr = 0; -+ vsock->rx_buf_max_nr = 0; -+ atomic_set(&vsock->queued_replies, 0); -+ -+ vdev->priv = vsock; -+ the_virtio_vsock = vsock; -+ mutex_init(&vsock->tx_lock); -+ mutex_init(&vsock->rx_lock); -+ mutex_init(&vsock->event_lock); -+ spin_lock_init(&vsock->send_pkt_list_lock); -+ INIT_LIST_HEAD(&vsock->send_pkt_list); -+ INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); -+ INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); -+ INIT_WORK(&vsock->event_work, virtio_transport_event_work); -+ INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); -+ -+ mutex_lock(&vsock->rx_lock); -+ virtio_vsock_rx_fill(vsock); -+ mutex_unlock(&vsock->rx_lock); -+ -+ mutex_lock(&vsock->event_lock); -+ virtio_vsock_event_fill(vsock); -+ mutex_unlock(&vsock->event_lock); -+ -+ mutex_unlock(&the_virtio_vsock_mutex); -+ return 0; -+ -+out_vqs: -+ vsock->vdev->config->del_vqs(vsock->vdev); -+out: -+ kfree(vsock); -+ mutex_unlock(&the_virtio_vsock_mutex); -+ return ret; -+} -+ -+static void virtio_vsock_remove(struct virtio_device *vdev) -+{ -+ struct virtio_vsock *vsock = vdev->priv; -+ struct virtio_vsock_pkt *pkt; -+ -+ flush_work(&vsock->rx_work); -+ flush_work(&vsock->tx_work); -+ flush_work(&vsock->event_work); -+ flush_work(&vsock->send_pkt_work); -+ -+ vdev->config->reset(vdev); -+ -+ mutex_lock(&vsock->rx_lock); -+ while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX]))) -+ virtio_transport_free_pkt(pkt); -+ mutex_unlock(&vsock->rx_lock); -+ -+ mutex_lock(&vsock->tx_lock); -+ while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX]))) -+ virtio_transport_free_pkt(pkt); -+ mutex_unlock(&vsock->tx_lock); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ while (!list_empty(&vsock->send_pkt_list)) { -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del(&pkt->list); -+ virtio_transport_free_pkt(pkt); -+ } -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ mutex_lock(&the_virtio_vsock_mutex); -+ the_virtio_vsock = NULL; -+ vsock_core_exit(); -+ mutex_unlock(&the_virtio_vsock_mutex); -+ -+ vdev->config->del_vqs(vdev); -+ -+ kfree(vsock); -+} -+ -+static struct virtio_device_id id_table[] = { -+ { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, -+ { 0 }, -+}; -+ -+static unsigned int features[] = { -+}; -+ -+static struct virtio_driver virtio_vsock_driver = { -+ .feature_table = features, -+ .feature_table_size = ARRAY_SIZE(features), -+ .driver.name = KBUILD_MODNAME, -+ .driver.owner = THIS_MODULE, -+ .id_table = id_table, -+ .probe = virtio_vsock_probe, -+ .remove = virtio_vsock_remove, -+}; -+ -+static int __init virtio_vsock_init(void) -+{ -+ int ret; -+ -+ virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); -+ if (!virtio_vsock_workqueue) -+ return -ENOMEM; -+ ret = register_virtio_driver(&virtio_vsock_driver); -+ if (ret) -+ destroy_workqueue(virtio_vsock_workqueue); -+ return ret; -+} -+ -+static void __exit virtio_vsock_exit(void) -+{ -+ unregister_virtio_driver(&virtio_vsock_driver); -+ destroy_workqueue(virtio_vsock_workqueue); -+} -+ -+module_init(virtio_vsock_init); -+module_exit(virtio_vsock_exit); -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("Asias He"); -+MODULE_DESCRIPTION("virtio transport for vsock"); -+MODULE_DEVICE_TABLE(virtio, id_table); --- -2.10.0 - diff --git a/alpine/kernel/patches/0009-VSOCK-Introduce-vhost_vsock.ko.patch b/alpine/kernel/patches/0009-VSOCK-Introduce-vhost_vsock.ko.patch deleted file mode 100644 index ea0d3196f..000000000 --- a/alpine/kernel/patches/0009-VSOCK-Introduce-vhost_vsock.ko.patch +++ /dev/null @@ -1,777 +0,0 @@ -From a0af1060ea091348b94bd3780e5b92a3334e64b2 Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:34 +0100 -Subject: [PATCH 09/42] VSOCK: Introduce vhost_vsock.ko - -VM sockets vhost transport implementation. This driver runs on the -host. - -Signed-off-by: Asias He -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 433fc58e6bf2c8bd97e57153ed28e64fd78207b8) ---- - MAINTAINERS | 2 + - drivers/vhost/vsock.c | 722 +++++++++++++++++++++++++++++++++++++++++++++ - include/uapi/linux/vhost.h | 5 + - 3 files changed, 729 insertions(+) - create mode 100644 drivers/vhost/vsock.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index 82d1123..12d49f5 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -11392,6 +11392,8 @@ F: include/linux/virtio_vsock.h - F: include/uapi/linux/virtio_vsock.h - F: net/vmw_vsock/virtio_transport_common.c - F: net/vmw_vsock/virtio_transport.c -+F: drivers/vhost/vsock.c -+F: drivers/vhost/vsock.h - - VIRTUAL SERIO DEVICE DRIVER - M: Stephen Chandler Paul -diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c -new file mode 100644 -index 0000000..028ca16 ---- /dev/null -+++ b/drivers/vhost/vsock.c -@@ -0,0 +1,722 @@ -+/* -+ * vhost transport for vsock -+ * -+ * Copyright (C) 2013-2015 Red Hat, Inc. -+ * Author: Asias He -+ * Stefan Hajnoczi -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include "vhost.h" -+ -+#define VHOST_VSOCK_DEFAULT_HOST_CID 2 -+ -+enum { -+ VHOST_VSOCK_FEATURES = VHOST_FEATURES, -+}; -+ -+/* Used to track all the vhost_vsock instances on the system. */ -+static DEFINE_SPINLOCK(vhost_vsock_lock); -+static LIST_HEAD(vhost_vsock_list); -+ -+struct vhost_vsock { -+ struct vhost_dev dev; -+ struct vhost_virtqueue vqs[2]; -+ -+ /* Link to global vhost_vsock_list, protected by vhost_vsock_lock */ -+ struct list_head list; -+ -+ struct vhost_work send_pkt_work; -+ spinlock_t send_pkt_list_lock; -+ struct list_head send_pkt_list; /* host->guest pending packets */ -+ -+ atomic_t queued_replies; -+ -+ u32 guest_cid; -+}; -+ -+static u32 vhost_transport_get_local_cid(void) -+{ -+ return VHOST_VSOCK_DEFAULT_HOST_CID; -+} -+ -+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) -+{ -+ struct vhost_vsock *vsock; -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ list_for_each_entry(vsock, &vhost_vsock_list, list) { -+ u32 other_cid = vsock->guest_cid; -+ -+ /* Skip instances that have no CID yet */ -+ if (other_cid == 0) -+ continue; -+ -+ if (other_cid == guest_cid) { -+ spin_unlock_bh(&vhost_vsock_lock); -+ return vsock; -+ } -+ } -+ spin_unlock_bh(&vhost_vsock_lock); -+ -+ return NULL; -+} -+ -+static void -+vhost_transport_do_send_pkt(struct vhost_vsock *vsock, -+ struct vhost_virtqueue *vq) -+{ -+ struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; -+ bool added = false; -+ bool restart_tx = false; -+ -+ mutex_lock(&vq->mutex); -+ -+ if (!vq->private_data) -+ goto out; -+ -+ /* Avoid further vmexits, we're already processing the virtqueue */ -+ vhost_disable_notify(&vsock->dev, vq); -+ -+ for (;;) { -+ struct virtio_vsock_pkt *pkt; -+ struct iov_iter iov_iter; -+ unsigned out, in; -+ size_t nbytes; -+ size_t len; -+ int head; -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ if (list_empty(&vsock->send_pkt_list)) { -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ vhost_enable_notify(&vsock->dev, vq); -+ break; -+ } -+ -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del_init(&pkt->list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), -+ &out, &in, NULL, NULL); -+ if (head < 0) { -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ break; -+ } -+ -+ if (head == vq->num) { -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ /* We cannot finish yet if more buffers snuck in while -+ * re-enabling notify. -+ */ -+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { -+ vhost_disable_notify(&vsock->dev, vq); -+ continue; -+ } -+ break; -+ } -+ -+ if (out) { -+ virtio_transport_free_pkt(pkt); -+ vq_err(vq, "Expected 0 output buffers, got %u\n", out); -+ break; -+ } -+ -+ len = iov_length(&vq->iov[out], in); -+ iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); -+ -+ nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); -+ if (nbytes != sizeof(pkt->hdr)) { -+ virtio_transport_free_pkt(pkt); -+ vq_err(vq, "Faulted on copying pkt hdr\n"); -+ break; -+ } -+ -+ nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); -+ if (nbytes != pkt->len) { -+ virtio_transport_free_pkt(pkt); -+ vq_err(vq, "Faulted on copying pkt buf\n"); -+ break; -+ } -+ -+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); -+ added = true; -+ -+ if (pkt->reply) { -+ int val; -+ -+ val = atomic_dec_return(&vsock->queued_replies); -+ -+ /* Do we have resources to resume tx processing? */ -+ if (val + 1 == tx_vq->num) -+ restart_tx = true; -+ } -+ -+ virtio_transport_free_pkt(pkt); -+ } -+ if (added) -+ vhost_signal(&vsock->dev, vq); -+ -+out: -+ mutex_unlock(&vq->mutex); -+ -+ if (restart_tx) -+ vhost_poll_queue(&tx_vq->poll); -+} -+ -+static void vhost_transport_send_pkt_work(struct vhost_work *work) -+{ -+ struct vhost_virtqueue *vq; -+ struct vhost_vsock *vsock; -+ -+ vsock = container_of(work, struct vhost_vsock, send_pkt_work); -+ vq = &vsock->vqs[VSOCK_VQ_RX]; -+ -+ vhost_transport_do_send_pkt(vsock, vq); -+} -+ -+static int -+vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ struct vhost_vsock *vsock; -+ struct vhost_virtqueue *vq; -+ int len = pkt->len; -+ -+ /* Find the vhost_vsock according to guest context id */ -+ vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); -+ if (!vsock) { -+ virtio_transport_free_pkt(pkt); -+ return -ENODEV; -+ } -+ -+ vq = &vsock->vqs[VSOCK_VQ_RX]; -+ -+ if (pkt->reply) -+ atomic_inc(&vsock->queued_replies); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add_tail(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); -+ return len; -+} -+ -+static struct virtio_vsock_pkt * -+vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, -+ unsigned int out, unsigned int in) -+{ -+ struct virtio_vsock_pkt *pkt; -+ struct iov_iter iov_iter; -+ size_t nbytes; -+ size_t len; -+ -+ if (in != 0) { -+ vq_err(vq, "Expected 0 input buffers, got %u\n", in); -+ return NULL; -+ } -+ -+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); -+ if (!pkt) -+ return NULL; -+ -+ len = iov_length(vq->iov, out); -+ iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); -+ -+ nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); -+ if (nbytes != sizeof(pkt->hdr)) { -+ vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", -+ sizeof(pkt->hdr), nbytes); -+ kfree(pkt); -+ return NULL; -+ } -+ -+ if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) -+ pkt->len = le32_to_cpu(pkt->hdr.len); -+ -+ /* No payload */ -+ if (!pkt->len) -+ return pkt; -+ -+ /* The pkt is too big */ -+ if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { -+ kfree(pkt); -+ return NULL; -+ } -+ -+ pkt->buf = kmalloc(pkt->len, GFP_KERNEL); -+ if (!pkt->buf) { -+ kfree(pkt); -+ return NULL; -+ } -+ -+ nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); -+ if (nbytes != pkt->len) { -+ vq_err(vq, "Expected %u byte payload, got %zu bytes\n", -+ pkt->len, nbytes); -+ virtio_transport_free_pkt(pkt); -+ return NULL; -+ } -+ -+ return pkt; -+} -+ -+/* Is there space left for replies to rx packets? */ -+static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) -+{ -+ struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX]; -+ int val; -+ -+ smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ -+ val = atomic_read(&vsock->queued_replies); -+ -+ return val < vq->num; -+} -+ -+static void vhost_vsock_handle_tx_kick(struct vhost_work *work) -+{ -+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, -+ poll.work); -+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, -+ dev); -+ struct virtio_vsock_pkt *pkt; -+ int head; -+ unsigned int out, in; -+ bool added = false; -+ -+ mutex_lock(&vq->mutex); -+ -+ if (!vq->private_data) -+ goto out; -+ -+ vhost_disable_notify(&vsock->dev, vq); -+ for (;;) { -+ if (!vhost_vsock_more_replies(vsock)) { -+ /* Stop tx until the device processes already -+ * pending replies. Leave tx virtqueue -+ * callbacks disabled. -+ */ -+ goto no_more_replies; -+ } -+ -+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), -+ &out, &in, NULL, NULL); -+ if (head < 0) -+ break; -+ -+ if (head == vq->num) { -+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { -+ vhost_disable_notify(&vsock->dev, vq); -+ continue; -+ } -+ break; -+ } -+ -+ pkt = vhost_vsock_alloc_pkt(vq, out, in); -+ if (!pkt) { -+ vq_err(vq, "Faulted on pkt\n"); -+ continue; -+ } -+ -+ /* Only accept correctly addressed packets */ -+ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) -+ virtio_transport_recv_pkt(pkt); -+ else -+ virtio_transport_free_pkt(pkt); -+ -+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); -+ added = true; -+ } -+ -+no_more_replies: -+ if (added) -+ vhost_signal(&vsock->dev, vq); -+ -+out: -+ mutex_unlock(&vq->mutex); -+} -+ -+static void vhost_vsock_handle_rx_kick(struct vhost_work *work) -+{ -+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, -+ poll.work); -+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, -+ dev); -+ -+ vhost_transport_do_send_pkt(vsock, vq); -+} -+ -+static int vhost_vsock_start(struct vhost_vsock *vsock) -+{ -+ size_t i; -+ int ret; -+ -+ mutex_lock(&vsock->dev.mutex); -+ -+ ret = vhost_dev_check_owner(&vsock->dev); -+ if (ret) -+ goto err; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ struct vhost_virtqueue *vq = &vsock->vqs[i]; -+ -+ mutex_lock(&vq->mutex); -+ -+ if (!vhost_vq_access_ok(vq)) { -+ ret = -EFAULT; -+ mutex_unlock(&vq->mutex); -+ goto err_vq; -+ } -+ -+ if (!vq->private_data) { -+ vq->private_data = vsock; -+ vhost_vq_init_access(vq); -+ } -+ -+ mutex_unlock(&vq->mutex); -+ } -+ -+ mutex_unlock(&vsock->dev.mutex); -+ return 0; -+ -+err_vq: -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ struct vhost_virtqueue *vq = &vsock->vqs[i]; -+ -+ mutex_lock(&vq->mutex); -+ vq->private_data = NULL; -+ mutex_unlock(&vq->mutex); -+ } -+err: -+ mutex_unlock(&vsock->dev.mutex); -+ return ret; -+} -+ -+static int vhost_vsock_stop(struct vhost_vsock *vsock) -+{ -+ size_t i; -+ int ret; -+ -+ mutex_lock(&vsock->dev.mutex); -+ -+ ret = vhost_dev_check_owner(&vsock->dev); -+ if (ret) -+ goto err; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ struct vhost_virtqueue *vq = &vsock->vqs[i]; -+ -+ mutex_lock(&vq->mutex); -+ vq->private_data = NULL; -+ mutex_unlock(&vq->mutex); -+ } -+ -+err: -+ mutex_unlock(&vsock->dev.mutex); -+ return ret; -+} -+ -+static void vhost_vsock_free(struct vhost_vsock *vsock) -+{ -+ if (is_vmalloc_addr(vsock)) -+ vfree(vsock); -+ else -+ kfree(vsock); -+} -+ -+static int vhost_vsock_dev_open(struct inode *inode, struct file *file) -+{ -+ struct vhost_virtqueue **vqs; -+ struct vhost_vsock *vsock; -+ int ret; -+ -+ /* This struct is large and allocation could fail, fall back to vmalloc -+ * if there is no other way. -+ */ -+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); -+ if (!vsock) { -+ vsock = vmalloc(sizeof(*vsock)); -+ if (!vsock) -+ return -ENOMEM; -+ } -+ -+ vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); -+ if (!vqs) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ atomic_set(&vsock->queued_replies, 0); -+ -+ vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; -+ vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; -+ vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; -+ vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; -+ -+ vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); -+ -+ file->private_data = vsock; -+ spin_lock_init(&vsock->send_pkt_list_lock); -+ INIT_LIST_HEAD(&vsock->send_pkt_list); -+ vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ list_add_tail(&vsock->list, &vhost_vsock_list); -+ spin_unlock_bh(&vhost_vsock_lock); -+ return 0; -+ -+out: -+ vhost_vsock_free(vsock); -+ return ret; -+} -+ -+static void vhost_vsock_flush(struct vhost_vsock *vsock) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) -+ if (vsock->vqs[i].handle_kick) -+ vhost_poll_flush(&vsock->vqs[i].poll); -+ vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); -+} -+ -+static void vhost_vsock_reset_orphans(struct sock *sk) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ -+ /* vmci_transport.c doesn't take sk_lock here either. At least we're -+ * under vsock_table_lock so the sock cannot disappear while we're -+ * executing. -+ */ -+ -+ if (!vhost_vsock_get(vsk->local_addr.svm_cid)) { -+ sock_set_flag(sk, SOCK_DONE); -+ vsk->peer_shutdown = SHUTDOWN_MASK; -+ sk->sk_state = SS_UNCONNECTED; -+ sk->sk_err = ECONNRESET; -+ sk->sk_error_report(sk); -+ } -+} -+ -+static int vhost_vsock_dev_release(struct inode *inode, struct file *file) -+{ -+ struct vhost_vsock *vsock = file->private_data; -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ list_del(&vsock->list); -+ spin_unlock_bh(&vhost_vsock_lock); -+ -+ /* Iterating over all connections for all CIDs to find orphans is -+ * inefficient. Room for improvement here. */ -+ vsock_for_each_connected_socket(vhost_vsock_reset_orphans); -+ -+ vhost_vsock_stop(vsock); -+ vhost_vsock_flush(vsock); -+ vhost_dev_stop(&vsock->dev); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ while (!list_empty(&vsock->send_pkt_list)) { -+ struct virtio_vsock_pkt *pkt; -+ -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del_init(&pkt->list); -+ virtio_transport_free_pkt(pkt); -+ } -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ vhost_dev_cleanup(&vsock->dev, false); -+ kfree(vsock->dev.vqs); -+ vhost_vsock_free(vsock); -+ return 0; -+} -+ -+static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) -+{ -+ struct vhost_vsock *other; -+ -+ /* Refuse reserved CIDs */ -+ if (guest_cid <= VMADDR_CID_HOST || -+ guest_cid == U32_MAX) -+ return -EINVAL; -+ -+ /* 64-bit CIDs are not yet supported */ -+ if (guest_cid > U32_MAX) -+ return -EINVAL; -+ -+ /* Refuse if CID is already in use */ -+ other = vhost_vsock_get(guest_cid); -+ if (other && other != vsock) -+ return -EADDRINUSE; -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ vsock->guest_cid = guest_cid; -+ spin_unlock_bh(&vhost_vsock_lock); -+ -+ return 0; -+} -+ -+static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) -+{ -+ struct vhost_virtqueue *vq; -+ int i; -+ -+ if (features & ~VHOST_VSOCK_FEATURES) -+ return -EOPNOTSUPP; -+ -+ mutex_lock(&vsock->dev.mutex); -+ if ((features & (1 << VHOST_F_LOG_ALL)) && -+ !vhost_log_access_ok(&vsock->dev)) { -+ mutex_unlock(&vsock->dev.mutex); -+ return -EFAULT; -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ vq = &vsock->vqs[i]; -+ mutex_lock(&vq->mutex); -+ vq->acked_features = features; -+ mutex_unlock(&vq->mutex); -+ } -+ mutex_unlock(&vsock->dev.mutex); -+ return 0; -+} -+ -+static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, -+ unsigned long arg) -+{ -+ struct vhost_vsock *vsock = f->private_data; -+ void __user *argp = (void __user *)arg; -+ u64 guest_cid; -+ u64 features; -+ int start; -+ int r; -+ -+ switch (ioctl) { -+ case VHOST_VSOCK_SET_GUEST_CID: -+ if (copy_from_user(&guest_cid, argp, sizeof(guest_cid))) -+ return -EFAULT; -+ return vhost_vsock_set_cid(vsock, guest_cid); -+ case VHOST_VSOCK_SET_RUNNING: -+ if (copy_from_user(&start, argp, sizeof(start))) -+ return -EFAULT; -+ if (start) -+ return vhost_vsock_start(vsock); -+ else -+ return vhost_vsock_stop(vsock); -+ case VHOST_GET_FEATURES: -+ features = VHOST_VSOCK_FEATURES; -+ if (copy_to_user(argp, &features, sizeof(features))) -+ return -EFAULT; -+ return 0; -+ case VHOST_SET_FEATURES: -+ if (copy_from_user(&features, argp, sizeof(features))) -+ return -EFAULT; -+ return vhost_vsock_set_features(vsock, features); -+ default: -+ mutex_lock(&vsock->dev.mutex); -+ r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); -+ if (r == -ENOIOCTLCMD) -+ r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); -+ else -+ vhost_vsock_flush(vsock); -+ mutex_unlock(&vsock->dev.mutex); -+ return r; -+ } -+} -+ -+static const struct file_operations vhost_vsock_fops = { -+ .owner = THIS_MODULE, -+ .open = vhost_vsock_dev_open, -+ .release = vhost_vsock_dev_release, -+ .llseek = noop_llseek, -+ .unlocked_ioctl = vhost_vsock_dev_ioctl, -+}; -+ -+static struct miscdevice vhost_vsock_misc = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "vhost-vsock", -+ .fops = &vhost_vsock_fops, -+}; -+ -+static struct virtio_transport vhost_transport = { -+ .transport = { -+ .get_local_cid = vhost_transport_get_local_cid, -+ -+ .init = virtio_transport_do_socket_init, -+ .destruct = virtio_transport_destruct, -+ .release = virtio_transport_release, -+ .connect = virtio_transport_connect, -+ .shutdown = virtio_transport_shutdown, -+ -+ .dgram_enqueue = virtio_transport_dgram_enqueue, -+ .dgram_dequeue = virtio_transport_dgram_dequeue, -+ .dgram_bind = virtio_transport_dgram_bind, -+ .dgram_allow = virtio_transport_dgram_allow, -+ -+ .stream_enqueue = virtio_transport_stream_enqueue, -+ .stream_dequeue = virtio_transport_stream_dequeue, -+ .stream_has_data = virtio_transport_stream_has_data, -+ .stream_has_space = virtio_transport_stream_has_space, -+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, -+ .stream_is_active = virtio_transport_stream_is_active, -+ .stream_allow = virtio_transport_stream_allow, -+ -+ .notify_poll_in = virtio_transport_notify_poll_in, -+ .notify_poll_out = virtio_transport_notify_poll_out, -+ .notify_recv_init = virtio_transport_notify_recv_init, -+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, -+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, -+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, -+ .notify_send_init = virtio_transport_notify_send_init, -+ .notify_send_pre_block = virtio_transport_notify_send_pre_block, -+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, -+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, -+ -+ .set_buffer_size = virtio_transport_set_buffer_size, -+ .set_min_buffer_size = virtio_transport_set_min_buffer_size, -+ .set_max_buffer_size = virtio_transport_set_max_buffer_size, -+ .get_buffer_size = virtio_transport_get_buffer_size, -+ .get_min_buffer_size = virtio_transport_get_min_buffer_size, -+ .get_max_buffer_size = virtio_transport_get_max_buffer_size, -+ }, -+ -+ .send_pkt = vhost_transport_send_pkt, -+}; -+ -+static int __init vhost_vsock_init(void) -+{ -+ int ret; -+ -+ ret = vsock_core_init(&vhost_transport.transport); -+ if (ret < 0) -+ return ret; -+ return misc_register(&vhost_vsock_misc); -+}; -+ -+static void __exit vhost_vsock_exit(void) -+{ -+ misc_deregister(&vhost_vsock_misc); -+ vsock_core_exit(); -+}; -+ -+module_init(vhost_vsock_init); -+module_exit(vhost_vsock_exit); -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("Asias He"); -+MODULE_DESCRIPTION("vhost transport for vsock "); -diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h -index ab373191..b306476 100644 ---- a/include/uapi/linux/vhost.h -+++ b/include/uapi/linux/vhost.h -@@ -169,4 +169,9 @@ struct vhost_scsi_target { - #define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32) - #define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32) - -+/* VHOST_VSOCK specific defines */ -+ -+#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64) -+#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int) -+ - #endif --- -2.10.0 - diff --git a/alpine/kernel/patches/0010-VSOCK-Add-Makefile-and-Kconfig.patch b/alpine/kernel/patches/0010-VSOCK-Add-Makefile-and-Kconfig.patch deleted file mode 100644 index 8c84c7879..000000000 --- a/alpine/kernel/patches/0010-VSOCK-Add-Makefile-and-Kconfig.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 30e1801c9e9683512a0cd169edf015923497dd70 Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:35 +0100 -Subject: [PATCH 10/42] VSOCK: Add Makefile and Kconfig - -Enable virtio-vsock and vhost-vsock. - -Signed-off-by: Asias He -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 304ba62fd4e670c1a5784585da0fac9f7309ef6c) ---- - drivers/vhost/Kconfig | 14 ++++++++++++++ - drivers/vhost/Makefile | 4 ++++ - net/vmw_vsock/Kconfig | 20 ++++++++++++++++++++ - net/vmw_vsock/Makefile | 6 ++++++ - 4 files changed, 44 insertions(+) - -diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig -index 533eaf0..2b5f588 100644 ---- a/drivers/vhost/Kconfig -+++ b/drivers/vhost/Kconfig -@@ -21,6 +21,20 @@ config VHOST_SCSI - Say M here to enable the vhost_scsi TCM fabric module - for use with virtio-scsi guests - -+config VHOST_VSOCK -+ tristate "vhost virtio-vsock driver" -+ depends on VSOCKETS && EVENTFD -+ select VIRTIO_VSOCKETS_COMMON -+ select VHOST -+ default n -+ ---help--- -+ This kernel module can be loaded in the host kernel to provide AF_VSOCK -+ sockets for communicating with guests. The guests must have the -+ virtio_transport.ko driver loaded to use the virtio-vsock device. -+ -+ To compile this driver as a module, choose M here: the module will be called -+ vhost_vsock. -+ - config VHOST_RING - tristate - ---help--- -diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile -index e0441c3..6b012b9 100644 ---- a/drivers/vhost/Makefile -+++ b/drivers/vhost/Makefile -@@ -4,5 +4,9 @@ vhost_net-y := net.o - obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o - vhost_scsi-y := scsi.o - -+obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o -+vhost_vsock-y := vsock.o -+ - obj-$(CONFIG_VHOST_RING) += vringh.o -+ - obj-$(CONFIG_VHOST) += vhost.o -diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig -index 14810ab..8831e7c 100644 ---- a/net/vmw_vsock/Kconfig -+++ b/net/vmw_vsock/Kconfig -@@ -26,3 +26,23 @@ config VMWARE_VMCI_VSOCKETS - - To compile this driver as a module, choose M here: the module - will be called vmw_vsock_vmci_transport. If unsure, say N. -+ -+config VIRTIO_VSOCKETS -+ tristate "virtio transport for Virtual Sockets" -+ depends on VSOCKETS && VIRTIO -+ select VIRTIO_VSOCKETS_COMMON -+ help -+ This module implements a virtio transport for Virtual Sockets. -+ -+ Enable this transport if your Virtual Machine host supports Virtual -+ Sockets over virtio. -+ -+ To compile this driver as a module, choose M here: the module will be -+ called vmw_vsock_virtio_transport. If unsure, say N. -+ -+config VIRTIO_VSOCKETS_COMMON -+ tristate -+ help -+ This option is selected by any driver which needs to access -+ the virtio_vsock. The module will be called -+ vmw_vsock_virtio_transport_common. -diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile -index 2ce52d7..bc27c70 100644 ---- a/net/vmw_vsock/Makefile -+++ b/net/vmw_vsock/Makefile -@@ -1,7 +1,13 @@ - obj-$(CONFIG_VSOCKETS) += vsock.o - obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o -+obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o -+obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o - - vsock-y += af_vsock.o vsock_addr.o - - vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ - vmci_transport_notify_qstate.o -+ -+vmw_vsock_virtio_transport-y += virtio_transport.o -+ -+vmw_vsock_virtio_transport_common-y += virtio_transport_common.o --- -2.10.0 - diff --git a/alpine/kernel/patches/0011-VSOCK-Use-kvfree.patch b/alpine/kernel/patches/0011-VSOCK-Use-kvfree.patch deleted file mode 100644 index e1e78d9c8..000000000 --- a/alpine/kernel/patches/0011-VSOCK-Use-kvfree.patch +++ /dev/null @@ -1,33 +0,0 @@ -From e9a09f08525c736a71d8331fd6412a0ad19ee428 Mon Sep 17 00:00:00 2001 -From: Wei Yongjun -Date: Tue, 2 Aug 2016 13:50:42 +0000 -Subject: [PATCH 11/42] VSOCK: Use kvfree() - -Use kvfree() instead of open-coding it. - -Signed-off-by: Wei Yongjun -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit b226acab2f6aaa45c2af27279b63f622b23a44bd) ---- - drivers/vhost/vsock.c | 5 +---- - 1 file changed, 1 insertion(+), 4 deletions(-) - -diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c -index 028ca16..0ddf3a2 100644 ---- a/drivers/vhost/vsock.c -+++ b/drivers/vhost/vsock.c -@@ -434,10 +434,7 @@ err: - - static void vhost_vsock_free(struct vhost_vsock *vsock) - { -- if (is_vmalloc_addr(vsock)) -- vfree(vsock); -- else -- kfree(vsock); -+ kvfree(vsock); - } - - static int vhost_vsock_dev_open(struct inode *inode, struct file *file) --- -2.10.0 - diff --git a/alpine/kernel/patches/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch b/alpine/kernel/patches/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch deleted file mode 100644 index c522808e7..000000000 --- a/alpine/kernel/patches/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch +++ /dev/null @@ -1,53 +0,0 @@ -From f886059ea8d0ac8ed981263d91d94275b85c50d5 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 4 Aug 2016 14:52:53 +0100 -Subject: [PATCH 12/42] vhost/vsock: fix vhost virtio_vsock_pkt use-after-free - -Stash the packet length in a local variable before handing over -ownership of the packet to virtio_transport_recv_pkt() or -virtio_transport_free_pkt(). - -This patch solves the use-after-free since pkt is no longer guaranteed -to be alive. - -Reported-by: Dan Carpenter -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 3fda5d6e580193fa005014355b3a61498f1b3ae0) ---- - drivers/vhost/vsock.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c -index 0ddf3a2..e3b30ea 100644 ---- a/drivers/vhost/vsock.c -+++ b/drivers/vhost/vsock.c -@@ -307,6 +307,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) - - vhost_disable_notify(&vsock->dev, vq); - for (;;) { -+ u32 len; -+ - if (!vhost_vsock_more_replies(vsock)) { - /* Stop tx until the device processes already - * pending replies. Leave tx virtqueue -@@ -334,13 +336,15 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) - continue; - } - -+ len = pkt->len; -+ - /* Only accept correctly addressed packets */ - if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) - virtio_transport_recv_pkt(pkt); - else - virtio_transport_free_pkt(pkt); - -- vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); -+ vhost_add_used(vq, head, sizeof(pkt->hdr) + len); - added = true; - } - --- -2.10.0 - diff --git a/alpine/kernel/patches/0013-virtio-vsock-fix-include-guard-typo.patch b/alpine/kernel/patches/0013-virtio-vsock-fix-include-guard-typo.patch deleted file mode 100644 index aff3fc5c6..000000000 --- a/alpine/kernel/patches/0013-virtio-vsock-fix-include-guard-typo.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 6ded3ac18eabf23a790d6b6876119d8cd0538964 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 5 Aug 2016 13:52:09 +0100 -Subject: [PATCH 13/42] virtio-vsock: fix include guard typo - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 28ad55578b8a76390d966b09da8c7fa3644f5140) ---- - include/uapi/linux/virtio_vsock.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h -index 6b011c1..1d57ed3 100644 ---- a/include/uapi/linux/virtio_vsock.h -+++ b/include/uapi/linux/virtio_vsock.h -@@ -32,7 +32,7 @@ - */ - - #ifndef _UAPI_LINUX_VIRTIO_VSOCK_H --#define _UAPI_LINUX_VIRTIO_VOSCK_H -+#define _UAPI_LINUX_VIRTIO_VSOCK_H - - #include - #include --- -2.10.0 - diff --git a/alpine/kernel/patches/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch b/alpine/kernel/patches/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch deleted file mode 100644 index cb1f6e165..000000000 --- a/alpine/kernel/patches/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 5fcd2673fadd46b0d2d5f896281113cd67a2efa7 Mon Sep 17 00:00:00 2001 -From: Gerard Garcia -Date: Wed, 10 Aug 2016 17:24:34 +0200 -Subject: [PATCH 14/42] vhost/vsock: drop space available check for TX vq - -Remove unnecessary use of enable/disable callback notifications -and the incorrect more space available check. - -The virtio_transport_tx_work handles when the TX virtqueue -has more buffers available. - -Signed-off-by: Gerard Garcia -Acked-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 21bc54fc0cdc31de72b57d2b3c79cf9c2b83cf39) ---- - net/vmw_vsock/virtio_transport.c | 10 +++------- - 1 file changed, 3 insertions(+), 7 deletions(-) - -diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c -index 699dfab..936d7ee 100644 ---- a/net/vmw_vsock/virtio_transport.c -+++ b/net/vmw_vsock/virtio_transport.c -@@ -87,9 +87,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) - - vq = vsock->vqs[VSOCK_VQ_TX]; - -- /* Avoid unnecessary interrupts while we're processing the ring */ -- virtqueue_disable_cb(vq); -- - for (;;) { - struct virtio_vsock_pkt *pkt; - struct scatterlist hdr, buf, *sgs[2]; -@@ -99,7 +96,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) - spin_lock_bh(&vsock->send_pkt_list_lock); - if (list_empty(&vsock->send_pkt_list)) { - spin_unlock_bh(&vsock->send_pkt_list_lock); -- virtqueue_enable_cb(vq); - break; - } - -@@ -118,13 +114,13 @@ virtio_transport_send_pkt_work(struct work_struct *work) - } - - ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL); -+ /* Usually this means that there is no more space available in -+ * the vq -+ */ - if (ret < 0) { - spin_lock_bh(&vsock->send_pkt_list_lock); - list_add(&pkt->list, &vsock->send_pkt_list); - spin_unlock_bh(&vsock->send_pkt_list_lock); -- -- if (!virtqueue_enable_cb(vq) && ret == -ENOSPC) -- continue; /* retry now that we have more space */ - break; - } - --- -2.10.0 - diff --git a/alpine/kernel/patches/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch b/alpine/kernel/patches/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch deleted file mode 100644 index 4b5bf9751..000000000 --- a/alpine/kernel/patches/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 84e1e7a4981f6ef926bb01481445def66e0982b2 Mon Sep 17 00:00:00 2001 -From: Jake Oshins -Date: Mon, 14 Dec 2015 16:01:41 -0800 -Subject: [PATCH 16/42] drivers:hv: Define the channel type for Hyper-V PCI - Express pass-through - -This defines the channel type for PCI front-ends in Hyper-V VMs. - -Signed-off-by: Jake Oshins -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 3053c762444a83ec6a8777f9476668b23b8ab180) ---- - drivers/hv/channel_mgmt.c | 3 +++ - include/linux/hyperv.h | 11 +++++++++++ - 2 files changed, 14 insertions(+) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 37238df..a562318 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -359,6 +359,7 @@ enum { - SCSI, - NIC, - ND_NIC, -+ PCIE, - MAX_PERF_CHN, - }; - -@@ -376,6 +377,8 @@ static const struct hv_vmbus_device_id hp_devs[] = { - { HV_NIC_GUID, }, - /* NetworkDirect Guest RDMA */ - { HV_ND_GUID, }, -+ /* PCI Express Pass Through */ -+ { HV_PCIE_GUID, }, - }; - - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index ae6a711..10dda1e 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1156,6 +1156,17 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - } - - /* -+ * PCI Express Pass Through -+ * {44C4F61D-4444-4400-9D52-802E27EDE19F} -+ */ -+ -+#define HV_PCIE_GUID \ -+ .guid = { \ -+ 0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, \ -+ 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F \ -+ } -+ -+/* - * Common header for Hyper-V ICs - */ - --- -2.10.0 - diff --git a/alpine/kernel/patches/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch b/alpine/kernel/patches/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch deleted file mode 100644 index 623cc37ff..000000000 --- a/alpine/kernel/patches/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch +++ /dev/null @@ -1,297 +0,0 @@ -From 12fbf6bcf859c7ce33766ae450dc291d0b857197 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Mon, 14 Dec 2015 16:01:43 -0800 -Subject: [PATCH 17/42] Drivers: hv: vmbus: Use uuid_le type consistently - -Consistently use uuid_le type in the Hyper-V driver code. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit af3ff643ea91ba64dd8d0b1cbed54d44512f96cd) ---- - drivers/hv/channel_mgmt.c | 2 +- - drivers/hv/vmbus_drv.c | 10 ++--- - include/linux/hyperv.h | 92 ++++++++++++++--------------------------- - include/linux/mod_devicetable.h | 2 +- - scripts/mod/file2alias.c | 2 +- - 5 files changed, 40 insertions(+), 68 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index a562318..339277b 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -409,7 +409,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui - struct cpumask *alloced_mask; - - for (i = IDE; i < MAX_PERF_CHN; i++) { -- if (!memcmp(type_guid->b, hp_devs[i].guid, -+ if (!memcmp(type_guid->b, &hp_devs[i].guid, - sizeof(uuid_le))) { - perf_chn = true; - break; -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index 509ed97..6ce2bf8 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -533,7 +533,7 @@ static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) - - static const uuid_le null_guid; - --static inline bool is_null_guid(const __u8 *guid) -+static inline bool is_null_guid(const uuid_le *guid) - { - if (memcmp(guid, &null_guid, sizeof(uuid_le))) - return false; -@@ -546,9 +546,9 @@ static inline bool is_null_guid(const __u8 *guid) - */ - static const struct hv_vmbus_device_id *hv_vmbus_get_id( - const struct hv_vmbus_device_id *id, -- const __u8 *guid) -+ const uuid_le *guid) - { -- for (; !is_null_guid(id->guid); id++) -+ for (; !is_null_guid(&id->guid); id++) - if (!memcmp(&id->guid, guid, sizeof(uuid_le))) - return id; - -@@ -565,7 +565,7 @@ static int vmbus_match(struct device *device, struct device_driver *driver) - struct hv_driver *drv = drv_to_hv_drv(driver); - struct hv_device *hv_dev = device_to_hv_device(device); - -- if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b)) -+ if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) - return 1; - - return 0; -@@ -582,7 +582,7 @@ static int vmbus_probe(struct device *child_device) - struct hv_device *dev = device_to_hv_device(child_device); - const struct hv_vmbus_device_id *dev_id; - -- dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b); -+ dev_id = hv_vmbus_get_id(drv->id_table, &dev->dev_type); - if (drv->probe) { - ret = drv->probe(dev, dev_id); - if (ret != 0) -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 10dda1e..4712d7d 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1012,6 +1012,8 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - .guid = { g0, g1, g2, g3, g4, g5, g6, g7, \ - g8, g9, ga, gb, gc, gd, ge, gf }, - -+ -+ - /* - * GUID definitions of various offer types - services offered to the guest. - */ -@@ -1021,118 +1023,94 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - * {f8615163-df3e-46c5-913f-f2d2f965ed0e} - */ - #define HV_NIC_GUID \ -- .guid = { \ -- 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, \ -- 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e \ -- } -+ .guid = UUID_LE(0xf8615163, 0xdf3e, 0x46c5, 0x91, 0x3f, \ -+ 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e) - - /* - * IDE GUID - * {32412632-86cb-44a2-9b5c-50d1417354f5} - */ - #define HV_IDE_GUID \ -- .guid = { \ -- 0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, \ -- 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5 \ -- } -+ .guid = UUID_LE(0x32412632, 0x86cb, 0x44a2, 0x9b, 0x5c, \ -+ 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5) - - /* - * SCSI GUID - * {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} - */ - #define HV_SCSI_GUID \ -- .guid = { \ -- 0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, \ -- 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f \ -- } -+ .guid = UUID_LE(0xba6163d9, 0x04a1, 0x4d29, 0xb6, 0x05, \ -+ 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f) - - /* - * Shutdown GUID - * {0e0b6031-5213-4934-818b-38d90ced39db} - */ - #define HV_SHUTDOWN_GUID \ -- .guid = { \ -- 0x31, 0x60, 0x0b, 0x0e, 0x13, 0x52, 0x34, 0x49, \ -- 0x81, 0x8b, 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb \ -- } -+ .guid = UUID_LE(0x0e0b6031, 0x5213, 0x4934, 0x81, 0x8b, \ -+ 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb) - - /* - * Time Synch GUID - * {9527E630-D0AE-497b-ADCE-E80AB0175CAF} - */ - #define HV_TS_GUID \ -- .guid = { \ -- 0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49, \ -- 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf \ -- } -+ .guid = UUID_LE(0x9527e630, 0xd0ae, 0x497b, 0xad, 0xce, \ -+ 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf) - - /* - * Heartbeat GUID - * {57164f39-9115-4e78-ab55-382f3bd5422d} - */ - #define HV_HEART_BEAT_GUID \ -- .guid = { \ -- 0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e, \ -- 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d \ -- } -+ .guid = UUID_LE(0x57164f39, 0x9115, 0x4e78, 0xab, 0x55, \ -+ 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d) - - /* - * KVP GUID - * {a9a0f4e7-5a45-4d96-b827-8a841e8c03e6} - */ - #define HV_KVP_GUID \ -- .guid = { \ -- 0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d, \ -- 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6 \ -- } -+ .guid = UUID_LE(0xa9a0f4e7, 0x5a45, 0x4d96, 0xb8, 0x27, \ -+ 0x8a, 0x84, 0x1e, 0x8c, 0x03, 0xe6) - - /* - * Dynamic memory GUID - * {525074dc-8985-46e2-8057-a307dc18a502} - */ - #define HV_DM_GUID \ -- .guid = { \ -- 0xdc, 0x74, 0x50, 0X52, 0x85, 0x89, 0xe2, 0x46, \ -- 0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02 \ -- } -+ .guid = UUID_LE(0x525074dc, 0x8985, 0x46e2, 0x80, 0x57, \ -+ 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02) - - /* - * Mouse GUID - * {cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a} - */ - #define HV_MOUSE_GUID \ -- .guid = { \ -- 0x9e, 0xb6, 0xa8, 0xcf, 0x4a, 0x5b, 0xc0, 0x4c, \ -- 0xb9, 0x8b, 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a \ -- } -+ .guid = UUID_LE(0xcfa8b69e, 0x5b4a, 0x4cc0, 0xb9, 0x8b, \ -+ 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a) - - /* - * VSS (Backup/Restore) GUID - */ - #define HV_VSS_GUID \ -- .guid = { \ -- 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, \ -- 0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40 \ -- } -+ .guid = UUID_LE(0x35fa2e29, 0xea23, 0x4236, 0x96, 0xae, \ -+ 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40) - /* - * Synthetic Video GUID - * {DA0A7802-E377-4aac-8E77-0558EB1073F8} - */ - #define HV_SYNTHVID_GUID \ -- .guid = { \ -- 0x02, 0x78, 0x0a, 0xda, 0x77, 0xe3, 0xac, 0x4a, \ -- 0x8e, 0x77, 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8 \ -- } -+ .guid = UUID_LE(0xda0a7802, 0xe377, 0x4aac, 0x8e, 0x77, \ -+ 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8) - - /* - * Synthetic FC GUID - * {2f9bcc4a-0069-4af3-b76b-6fd0be528cda} - */ - #define HV_SYNTHFC_GUID \ -- .guid = { \ -- 0x4A, 0xCC, 0x9B, 0x2F, 0x69, 0x00, 0xF3, 0x4A, \ -- 0xB7, 0x6B, 0x6F, 0xD0, 0xBE, 0x52, 0x8C, 0xDA \ -- } -+ .guid = UUID_LE(0x2f9bcc4a, 0x0069, 0x4af3, 0xb7, 0x6b, \ -+ 0x6f, 0xd0, 0xbe, 0x52, 0x8c, 0xda) - - /* - * Guest File Copy Service -@@ -1140,20 +1118,16 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - */ - - #define HV_FCOPY_GUID \ -- .guid = { \ -- 0xE3, 0x4B, 0xD1, 0x34, 0xE4, 0xDE, 0xC8, 0x41, \ -- 0x9A, 0xE7, 0x6B, 0x17, 0x49, 0x77, 0xC1, 0x92 \ -- } -+ .guid = UUID_LE(0x34d14be3, 0xdee4, 0x41c8, 0x9a, 0xe7, \ -+ 0x6b, 0x17, 0x49, 0x77, 0xc1, 0x92) - - /* - * NetworkDirect. This is the guest RDMA service. - * {8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501} - */ - #define HV_ND_GUID \ -- .guid = { \ -- 0x3d, 0xaf, 0x2e, 0x8c, 0xa7, 0x32, 0x09, 0x4b, \ -- 0xab, 0x99, 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01 \ -- } -+ .guid = UUID_LE(0x8c2eaf3d, 0x32a7, 0x4b09, 0xab, 0x99, \ -+ 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01) - - /* - * PCI Express Pass Through -@@ -1161,10 +1135,8 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - */ - - #define HV_PCIE_GUID \ -- .guid = { \ -- 0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, \ -- 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F \ -- } -+ .guid = UUID_LE(0x44c4f61d, 0x4444, 0x4400, 0x9d, 0x52, \ -+ 0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f) - - /* - * Common header for Hyper-V ICs -diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h -index 64f36e0..6e4c645 100644 ---- a/include/linux/mod_devicetable.h -+++ b/include/linux/mod_devicetable.h -@@ -404,7 +404,7 @@ struct virtio_device_id { - * For Hyper-V devices we use the device guid as the id. - */ - struct hv_vmbus_device_id { -- __u8 guid[16]; -+ uuid_le guid; - kernel_ulong_t driver_data; /* Data private to the driver */ - }; - -diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c -index 9f5cdd4..8e8c69b 100644 ---- a/scripts/mod/file2alias.c -+++ b/scripts/mod/file2alias.c -@@ -917,7 +917,7 @@ static int do_vmbus_entry(const char *filename, void *symval, - char guid_name[(sizeof(*guid) + 1) * 2]; - - for (i = 0; i < (sizeof(*guid) * 2); i += 2) -- sprintf(&guid_name[i], "%02x", TO_NATIVE((*guid)[i/2])); -+ sprintf(&guid_name[i], "%02x", TO_NATIVE((guid->b)[i/2])); - - strcpy(alias, "vmbus:"); - strcat(alias, guid_name); --- -2.10.0 - diff --git a/alpine/kernel/patches/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch b/alpine/kernel/patches/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch deleted file mode 100644 index fee2b87dd..000000000 --- a/alpine/kernel/patches/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 01a403dce6afb34dd0430e12d93b7acd5f384439 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Mon, 14 Dec 2015 16:01:44 -0800 -Subject: [PATCH 18/42] Drivers: hv: vmbus: Use uuid_le_cmp() for comparing - GUIDs - -Use uuid_le_cmp() for comparing GUIDs. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 4ae9250893485f380275e7d5cb291df87c4d9710) ---- - drivers/hv/channel_mgmt.c | 3 +-- - drivers/hv/vmbus_drv.c | 4 ++-- - 2 files changed, 3 insertions(+), 4 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 339277b..9b4525c 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -409,8 +409,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui - struct cpumask *alloced_mask; - - for (i = IDE; i < MAX_PERF_CHN; i++) { -- if (!memcmp(type_guid->b, &hp_devs[i].guid, -- sizeof(uuid_le))) { -+ if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) { - perf_chn = true; - break; - } -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index 6ce2bf8..7973aa5 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -535,7 +535,7 @@ static const uuid_le null_guid; - - static inline bool is_null_guid(const uuid_le *guid) - { -- if (memcmp(guid, &null_guid, sizeof(uuid_le))) -+ if (uuid_le_cmp(*guid, null_guid)) - return false; - return true; - } -@@ -549,7 +549,7 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id( - const uuid_le *guid) - { - for (; !is_null_guid(&id->guid); id++) -- if (!memcmp(&id->guid, guid, sizeof(uuid_le))) -+ if (!uuid_le_cmp(id->guid, *guid)) - return id; - - return NULL; --- -2.10.0 - diff --git a/alpine/kernel/patches/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch b/alpine/kernel/patches/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch deleted file mode 100644 index 6b228bce5..000000000 --- a/alpine/kernel/patches/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch +++ /dev/null @@ -1,42 +0,0 @@ -From a9c4320f47b5a2d2ef7600c5f61f3d4256de2ba5 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 14 Dec 2015 16:01:48 -0800 -Subject: [PATCH 19/42] Drivers: hv: vmbus: do sanity check of channel state in - vmbus_close_internal() - -This fixes an incorrect assumption of channel state in the function. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 64b7faf903dae2df94d89edf2c688b16751800e4) ---- - drivers/hv/channel.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 1ef37c7..2889d97 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -512,6 +512,18 @@ static int vmbus_close_internal(struct vmbus_channel *channel) - tasklet = hv_context.event_dpc[channel->target_cpu]; - tasklet_disable(tasklet); - -+ /* -+ * In case a device driver's probe() fails (e.g., -+ * util_probe() -> vmbus_open() returns -ENOMEM) and the device is -+ * rescinded later (e.g., we dynamically disble an Integrated Service -+ * in Hyper-V Manager), the driver's remove() invokes vmbus_close(): -+ * here we should skip most of the below cleanup work. -+ */ -+ if (channel->state != CHANNEL_OPENED_STATE) { -+ ret = -EINVAL; -+ goto out; -+ } -+ - channel->state = CHANNEL_OPEN_STATE; - channel->sc_creation_callback = NULL; - /* Stop callback and cancel the timer asap */ --- -2.10.0 - diff --git a/alpine/kernel/patches/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch b/alpine/kernel/patches/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch deleted file mode 100644 index 338568fb4..000000000 --- a/alpine/kernel/patches/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch +++ /dev/null @@ -1,74 +0,0 @@ -From b92976804d10f78b9a50f5d8f62f3663a44f32e6 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 14 Dec 2015 16:01:50 -0800 -Subject: [PATCH 20/42] Drivers: hv: vmbus: release relid on error in - vmbus_process_offer() - -We want to simplify vmbus_onoffer_rescind() by not invoking -hv_process_channel_removal(NULL, ...). - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit f52078cf5711ce47c113a58702b35c8ff5f212f5) ---- - drivers/hv/channel_mgmt.c | 21 +++++++++++++++------ - 1 file changed, 15 insertions(+), 6 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 9b4525c..8529dd2 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -178,19 +178,22 @@ static void percpu_channel_deq(void *arg) - } - - --void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) -+static void vmbus_release_relid(u32 relid) - { - struct vmbus_channel_relid_released msg; -- unsigned long flags; -- struct vmbus_channel *primary_channel; - - memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); - msg.child_relid = relid; - msg.header.msgtype = CHANNELMSG_RELID_RELEASED; - vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released)); -+} - -- if (channel == NULL) -- return; -+void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) -+{ -+ unsigned long flags; -+ struct vmbus_channel *primary_channel; -+ -+ vmbus_release_relid(relid); - - BUG_ON(!channel->rescind); - -@@ -337,6 +340,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - return; - - err_deq_chan: -+ vmbus_release_relid(newchannel->offermsg.child_relid); -+ - spin_lock_irqsave(&vmbus_connection.channel_lock, flags); - list_del(&newchannel->listentry); - spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -@@ -640,7 +645,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - channel = relid2channel(rescind->child_relid); - - if (channel == NULL) { -- hv_process_channel_removal(NULL, rescind->child_relid); -+ /* -+ * This is very impossible, because in -+ * vmbus_process_offer(), we have already invoked -+ * vmbus_release_relid() on error. -+ */ - return; - } - --- -2.10.0 - diff --git a/alpine/kernel/patches/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch b/alpine/kernel/patches/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch deleted file mode 100644 index f2a28b416..000000000 --- a/alpine/kernel/patches/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch +++ /dev/null @@ -1,116 +0,0 @@ -From e34354b98924dba0128289e722bde4ca35eafa90 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 14 Dec 2015 16:01:51 -0800 -Subject: [PATCH 21/42] Drivers: hv: vmbus: channge - vmbus_connection.channel_lock to mutex - -spinlock is unnecessary here. -mutex is enough. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit d6f591e339d23f434efda11917da511870891472) ---- - drivers/hv/channel_mgmt.c | 12 ++++++------ - drivers/hv/connection.c | 7 +++---- - drivers/hv/hyperv_vmbus.h | 2 +- - 3 files changed, 10 insertions(+), 11 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 8529dd2..306c7df 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -207,9 +207,9 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - } - - if (channel->primary_channel == NULL) { -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - list_del(&channel->listentry); -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - primary_channel = channel; - } else { -@@ -254,7 +254,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - unsigned long flags; - - /* Make sure this is a new offer */ -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (!uuid_le_cmp(channel->offermsg.offer.if_type, -@@ -270,7 +270,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - list_add_tail(&newchannel->listentry, - &vmbus_connection.chn_list); - -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - if (!fnew) { - /* -@@ -342,9 +342,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - err_deq_chan: - vmbus_release_relid(newchannel->offermsg.child_relid); - -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - list_del(&newchannel->listentry); -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); -diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c -index 4fc2e88..521f48e 100644 ---- a/drivers/hv/connection.c -+++ b/drivers/hv/connection.c -@@ -146,7 +146,7 @@ int vmbus_connect(void) - spin_lock_init(&vmbus_connection.channelmsg_lock); - - INIT_LIST_HEAD(&vmbus_connection.chn_list); -- spin_lock_init(&vmbus_connection.channel_lock); -+ mutex_init(&vmbus_connection.channel_mutex); - - /* - * Setup the vmbus event connection for channel interrupt -@@ -282,11 +282,10 @@ struct vmbus_channel *relid2channel(u32 relid) - { - struct vmbus_channel *channel; - struct vmbus_channel *found_channel = NULL; -- unsigned long flags; - struct list_head *cur, *tmp; - struct vmbus_channel *cur_sc; - -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (channel->offermsg.child_relid == relid) { - found_channel = channel; -@@ -305,7 +304,7 @@ struct vmbus_channel *relid2channel(u32 relid) - } - } - } -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - return found_channel; - } -diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h -index 12156db..50b1de7 100644 ---- a/drivers/hv/hyperv_vmbus.h -+++ b/drivers/hv/hyperv_vmbus.h -@@ -683,7 +683,7 @@ struct vmbus_connection { - - /* List of channels */ - struct list_head chn_list; -- spinlock_t channel_lock; -+ struct mutex channel_mutex; - - struct workqueue_struct *work_queue; - }; --- -2.10.0 - diff --git a/alpine/kernel/patches/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch b/alpine/kernel/patches/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch deleted file mode 100644 index c993989dc..000000000 --- a/alpine/kernel/patches/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 91a65c691fc22cc6bfb884dea29cc7c5c3e5f9a9 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Mon, 14 Dec 2015 19:02:00 -0800 -Subject: [PATCH 22/42] Drivers: hv: remove code duplication between - vmbus_recvpacket()/vmbus_recvpacket_raw() - -vmbus_recvpacket() and vmbus_recvpacket_raw() are almost identical but -there are two discrepancies: -1) vmbus_recvpacket() doesn't propagate errors from hv_ringbuffer_read() - which looks like it is not desired. -2) There is an error message printed in packetlen > bufferlen case in - vmbus_recvpacket(). I'm removing it as it is usless for users to see - such messages and /vmbus_recvpacket_raw() doesn't have it. - -Signed-off-by: Vitaly Kuznetsov -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 667d374064b0cc48b6122101b287908d1b392bdb) ---- - drivers/hv/channel.c | 65 ++++++++++++++++++---------------------------------- - 1 file changed, 22 insertions(+), 43 deletions(-) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 2889d97..dd6de7f 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -922,8 +922,10 @@ EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer); - * - * Mainly used by Hyper-V drivers. - */ --int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, -- u32 bufferlen, u32 *buffer_actual_len, u64 *requestid) -+static inline int -+__vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, -+ u32 bufferlen, u32 *buffer_actual_len, u64 *requestid, -+ bool raw) - { - struct vmpacket_descriptor desc; - u32 packetlen; -@@ -941,27 +943,34 @@ int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, - return 0; - - packetlen = desc.len8 << 3; -- userlen = packetlen - (desc.offset8 << 3); -+ if (!raw) -+ userlen = packetlen - (desc.offset8 << 3); -+ else -+ userlen = packetlen; - - *buffer_actual_len = userlen; - -- if (userlen > bufferlen) { -- -- pr_err("Buffer too small - got %d needs %d\n", -- bufferlen, userlen); -- return -ETOOSMALL; -- } -+ if (userlen > bufferlen) -+ return -ENOBUFS; - - *requestid = desc.trans_id; - - /* Copy over the packet to the user buffer */ - ret = hv_ringbuffer_read(&channel->inbound, buffer, userlen, -- (desc.offset8 << 3), &signal); -+ raw ? 0 : desc.offset8 << 3, &signal); - - if (signal) - vmbus_setevent(channel); - -- return 0; -+ return ret; -+} -+ -+int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, -+ u32 bufferlen, u32 *buffer_actual_len, -+ u64 *requestid) -+{ -+ return __vmbus_recvpacket(channel, buffer, bufferlen, -+ buffer_actual_len, requestid, false); - } - EXPORT_SYMBOL(vmbus_recvpacket); - -@@ -972,37 +981,7 @@ int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer, - u32 bufferlen, u32 *buffer_actual_len, - u64 *requestid) - { -- struct vmpacket_descriptor desc; -- u32 packetlen; -- int ret; -- bool signal = false; -- -- *buffer_actual_len = 0; -- *requestid = 0; -- -- -- ret = hv_ringbuffer_peek(&channel->inbound, &desc, -- sizeof(struct vmpacket_descriptor)); -- if (ret != 0) -- return 0; -- -- -- packetlen = desc.len8 << 3; -- -- *buffer_actual_len = packetlen; -- -- if (packetlen > bufferlen) -- return -ENOBUFS; -- -- *requestid = desc.trans_id; -- -- /* Copy over the entire packet to the user buffer */ -- ret = hv_ringbuffer_read(&channel->inbound, buffer, packetlen, 0, -- &signal); -- -- if (signal) -- vmbus_setevent(channel); -- -- return ret; -+ return __vmbus_recvpacket(channel, buffer, bufferlen, -+ buffer_actual_len, requestid, true); - } - EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw); --- -2.10.0 - diff --git a/alpine/kernel/patches/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch b/alpine/kernel/patches/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch deleted file mode 100644 index a52582000..000000000 --- a/alpine/kernel/patches/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 4c754b011766c2d8a99424637656ea8096d55890 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 21 Dec 2015 12:21:22 -0800 -Subject: [PATCH 23/42] Drivers: hv: vmbus: fix the building warning with - hyperv-keyboard - -With the recent change af3ff643ea91ba64dd8d0b1cbed54d44512f96cd -(Drivers: hv: vmbus: Use uuid_le type consistently), we always get this -warning: - - CC [M] drivers/input/serio/hyperv-keyboard.o -drivers/input/serio/hyperv-keyboard.c:427:2: warning: missing braces around - initializer [-Wmissing-braces] - { HV_KBD_GUID, }, - ^ -drivers/input/serio/hyperv-keyboard.c:427:2: warning: (near initialization - for .id_table[0].guid.b.) [-Wmissing-braces] - -The patch fixes the warning. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 2048157ad02e65f6327118dd4a7b9c9f1fd12f77) ---- - drivers/input/serio/hyperv-keyboard.c | 10 ---------- - include/linux/hyperv.h | 8 ++++++++ - 2 files changed, 8 insertions(+), 10 deletions(-) - -diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c -index e74e5d6..c948866 100644 ---- a/drivers/input/serio/hyperv-keyboard.c -+++ b/drivers/input/serio/hyperv-keyboard.c -@@ -412,16 +412,6 @@ static int hv_kbd_remove(struct hv_device *hv_dev) - return 0; - } - --/* -- * Keyboard GUID -- * {f912ad6d-2b17-48ea-bd65-f927a61c7684} -- */ --#define HV_KBD_GUID \ -- .guid = { \ -- 0x6d, 0xad, 0x12, 0xf9, 0x17, 0x2b, 0xea, 0x48, \ -- 0xbd, 0x65, 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84 \ -- } -- - static const struct hv_vmbus_device_id id_table[] = { - /* Keyboard guid */ - { HV_KBD_GUID, }, -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 4712d7d..9e2de6a 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1091,6 +1091,14 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a) - - /* -+ * Keyboard GUID -+ * {f912ad6d-2b17-48ea-bd65-f927a61c7684} -+ */ -+#define HV_KBD_GUID \ -+ .guid = UUID_LE(0xf912ad6d, 0x2b17, 0x48ea, 0xbd, 0x65, \ -+ 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84) -+ -+/* - * VSS (Backup/Restore) GUID - */ - #define HV_VSS_GUID \ --- -2.10.0 - diff --git a/alpine/kernel/patches/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch b/alpine/kernel/patches/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch deleted file mode 100644 index 14c287bd4..000000000 --- a/alpine/kernel/patches/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 6cb1a2f24c7b049f8a0c259afa4f5de37ac84084 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Tue, 15 Dec 2015 16:27:27 -0800 -Subject: [PATCH 24/42] Drivers: hv: vmbus: Treat Fibre Channel devices as - performance critical - -For performance critical devices, we distribute the incoming -channel interrupt load across available CPUs in the guest. -Include Fibre channel devices in the set of devices for which -we would distribute the interrupt load. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 879a650a273bc3efb9d472886b8ced12630ea8ed) ---- - drivers/hv/channel_mgmt.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 306c7df..763d0c1 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -362,6 +362,7 @@ err_free_chan: - enum { - IDE = 0, - SCSI, -+ FC, - NIC, - ND_NIC, - PCIE, -@@ -378,6 +379,8 @@ static const struct hv_vmbus_device_id hp_devs[] = { - { HV_IDE_GUID, }, - /* Storage - SCSI */ - { HV_SCSI_GUID, }, -+ /* Storage - FC */ -+ { HV_SYNTHFC_GUID, }, - /* Network */ - { HV_NIC_GUID, }, - /* NetworkDirect Guest RDMA */ --- -2.10.0 - diff --git a/alpine/kernel/patches/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch b/alpine/kernel/patches/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch deleted file mode 100644 index 2e89c4ea2..000000000 --- a/alpine/kernel/patches/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch +++ /dev/null @@ -1,355 +0,0 @@ -From 69933a7f325a93afbb5ed819388b8b063d602066 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Fri, 25 Dec 2015 20:00:30 -0800 -Subject: [PATCH 25/42] Drivers: hv: vmbus: Add vendor and device atttributes - -Add vendor and device attributes to VMBUS devices. These will be used -by Hyper-V tools as well user-level RDMA libraries that will use the -vendor/device tuple to discover the RDMA device. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 7047f17d70fc0599563d30d0791692cb5fe42ae6) ---- - Documentation/ABI/stable/sysfs-bus-vmbus | 14 +++ - drivers/hv/channel_mgmt.c | 166 +++++++++++++++++++++++-------- - drivers/hv/vmbus_drv.c | 21 ++++ - include/linux/hyperv.h | 28 ++++++ - 4 files changed, 186 insertions(+), 43 deletions(-) - -diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus -index 636e938..5d0125f 100644 ---- a/Documentation/ABI/stable/sysfs-bus-vmbus -+++ b/Documentation/ABI/stable/sysfs-bus-vmbus -@@ -27,3 +27,17 @@ Description: The mapping of which primary/sub channels are bound to which - Virtual Processors. - Format: - Users: tools/hv/lsvmbus -+ -+What: /sys/bus/vmbus/devices/vmbus_*/device -+Date: Dec. 2015 -+KernelVersion: 4.5 -+Contact: K. Y. Srinivasan -+Description: The 16 bit device ID of the device -+Users: tools/hv/lsvmbus and user level RDMA libraries -+ -+What: /sys/bus/vmbus/devices/vmbus_*/vendor -+Date: Dec. 2015 -+KernelVersion: 4.5 -+Contact: K. Y. Srinivasan -+Description: The 16 bit vendor ID of the device -+Users: tools/hv/lsvmbus and user level RDMA libraries -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 763d0c1..d6c6114 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -33,8 +33,122 @@ - - #include "hyperv_vmbus.h" - --static void init_vp_index(struct vmbus_channel *channel, -- const uuid_le *type_guid); -+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type); -+ -+static const struct vmbus_device vmbus_devs[] = { -+ /* IDE */ -+ { .dev_type = HV_IDE, -+ HV_IDE_GUID, -+ .perf_device = true, -+ }, -+ -+ /* SCSI */ -+ { .dev_type = HV_SCSI, -+ HV_SCSI_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Fibre Channel */ -+ { .dev_type = HV_FC, -+ HV_SYNTHFC_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Synthetic NIC */ -+ { .dev_type = HV_NIC, -+ HV_NIC_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Network Direct */ -+ { .dev_type = HV_ND, -+ HV_ND_GUID, -+ .perf_device = true, -+ }, -+ -+ /* PCIE */ -+ { .dev_type = HV_PCIE, -+ HV_PCIE_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Synthetic Frame Buffer */ -+ { .dev_type = HV_FB, -+ HV_SYNTHVID_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Synthetic Keyboard */ -+ { .dev_type = HV_KBD, -+ HV_KBD_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Synthetic MOUSE */ -+ { .dev_type = HV_MOUSE, -+ HV_MOUSE_GUID, -+ .perf_device = false, -+ }, -+ -+ /* KVP */ -+ { .dev_type = HV_KVP, -+ HV_KVP_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Time Synch */ -+ { .dev_type = HV_TS, -+ HV_TS_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Heartbeat */ -+ { .dev_type = HV_HB, -+ HV_HEART_BEAT_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Shutdown */ -+ { .dev_type = HV_SHUTDOWN, -+ HV_SHUTDOWN_GUID, -+ .perf_device = false, -+ }, -+ -+ /* File copy */ -+ { .dev_type = HV_FCOPY, -+ HV_FCOPY_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Backup */ -+ { .dev_type = HV_BACKUP, -+ HV_VSS_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Dynamic Memory */ -+ { .dev_type = HV_DM, -+ HV_DM_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Unknown GUID */ -+ { .dev_type = HV_UNKOWN, -+ .perf_device = false, -+ }, -+}; -+ -+static u16 hv_get_dev_type(const uuid_le *guid) -+{ -+ u16 i; -+ -+ for (i = HV_IDE; i < HV_UNKOWN; i++) { -+ if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) -+ return i; -+ } -+ pr_info("Unknown GUID: %pUl\n", guid); -+ return i; -+} - - /** - * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message -@@ -252,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - struct vmbus_channel *channel; - bool fnew = true; - unsigned long flags; -+ u16 dev_type; - - /* Make sure this is a new offer */ - mutex_lock(&vmbus_connection.channel_mutex); -@@ -289,7 +404,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - goto err_free_chan; - } - -- init_vp_index(newchannel, &newchannel->offermsg.offer.if_type); -+ dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type); -+ -+ init_vp_index(newchannel, dev_type); - - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); -@@ -326,6 +443,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - if (!newchannel->device_obj) - goto err_deq_chan; - -+ newchannel->device_obj->device_id = dev_type; - /* - * Add the new device to the bus. This will kick off device-driver - * binding which eventually invokes the device driver's AddDevice() -@@ -359,37 +477,6 @@ err_free_chan: - free_channel(newchannel); - } - --enum { -- IDE = 0, -- SCSI, -- FC, -- NIC, -- ND_NIC, -- PCIE, -- MAX_PERF_CHN, --}; -- --/* -- * This is an array of device_ids (device types) that are performance critical. -- * We attempt to distribute the interrupt load for these devices across -- * all available CPUs. -- */ --static const struct hv_vmbus_device_id hp_devs[] = { -- /* IDE */ -- { HV_IDE_GUID, }, -- /* Storage - SCSI */ -- { HV_SCSI_GUID, }, -- /* Storage - FC */ -- { HV_SYNTHFC_GUID, }, -- /* Network */ -- { HV_NIC_GUID, }, -- /* NetworkDirect Guest RDMA */ -- { HV_ND_GUID, }, -- /* PCI Express Pass Through */ -- { HV_PCIE_GUID, }, --}; -- -- - /* - * We use this state to statically distribute the channel interrupt load. - */ -@@ -406,22 +493,15 @@ static int next_numa_node_id; - * For pre-win8 hosts or non-performance critical channels we assign the - * first CPU in the first NUMA node. - */ --static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid) -+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) - { - u32 cur_cpu; -- int i; -- bool perf_chn = false; -+ bool perf_chn = vmbus_devs[dev_type].perf_device; - struct vmbus_channel *primary = channel->primary_channel; - int next_node; - struct cpumask available_mask; - struct cpumask *alloced_mask; - -- for (i = IDE; i < MAX_PERF_CHN; i++) { -- if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) { -- perf_chn = true; -- break; -- } -- } - if ((vmbus_proto_version == VERSION_WS2008) || - (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) { - /* -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index 7973aa5..de7130c 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -480,6 +480,24 @@ static ssize_t channel_vp_mapping_show(struct device *dev, - } - static DEVICE_ATTR_RO(channel_vp_mapping); - -+static ssize_t vendor_show(struct device *dev, -+ struct device_attribute *dev_attr, -+ char *buf) -+{ -+ struct hv_device *hv_dev = device_to_hv_device(dev); -+ return sprintf(buf, "0x%x\n", hv_dev->vendor_id); -+} -+static DEVICE_ATTR_RO(vendor); -+ -+static ssize_t device_show(struct device *dev, -+ struct device_attribute *dev_attr, -+ char *buf) -+{ -+ struct hv_device *hv_dev = device_to_hv_device(dev); -+ return sprintf(buf, "0x%x\n", hv_dev->device_id); -+} -+static DEVICE_ATTR_RO(device); -+ - /* Set up per device attributes in /sys/bus/vmbus/devices/ */ - static struct attribute *vmbus_attrs[] = { - &dev_attr_id.attr, -@@ -505,6 +523,8 @@ static struct attribute *vmbus_attrs[] = { - &dev_attr_in_read_bytes_avail.attr, - &dev_attr_in_write_bytes_avail.attr, - &dev_attr_channel_vp_mapping.attr, -+ &dev_attr_vendor.attr, -+ &dev_attr_device.attr, - NULL, - }; - ATTRIBUTE_GROUPS(vmbus); -@@ -963,6 +983,7 @@ struct hv_device *vmbus_device_create(const uuid_le *type, - memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); - memcpy(&child_device_obj->dev_instance, instance, - sizeof(uuid_le)); -+ child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ - - - return child_device_obj; -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 9e2de6a..51c98fd 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -635,6 +635,32 @@ enum hv_signal_policy { - HV_SIGNAL_POLICY_EXPLICIT, - }; - -+enum vmbus_device_type { -+ HV_IDE = 0, -+ HV_SCSI, -+ HV_FC, -+ HV_NIC, -+ HV_ND, -+ HV_PCIE, -+ HV_FB, -+ HV_KBD, -+ HV_MOUSE, -+ HV_KVP, -+ HV_TS, -+ HV_HB, -+ HV_SHUTDOWN, -+ HV_FCOPY, -+ HV_BACKUP, -+ HV_DM, -+ HV_UNKOWN, -+}; -+ -+struct vmbus_device { -+ u16 dev_type; -+ uuid_le guid; -+ bool perf_device; -+}; -+ - struct vmbus_channel { - /* Unique channel id */ - int id; -@@ -961,6 +987,8 @@ struct hv_device { - - /* the device instance id of this device */ - uuid_le dev_instance; -+ u16 vendor_id; -+ u16 device_id; - - struct device device; - --- -2.10.0 - diff --git a/alpine/kernel/patches/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch b/alpine/kernel/patches/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch deleted file mode 100644 index 46095d238..000000000 --- a/alpine/kernel/patches/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 64f93cfc49018e7ffa772506cfe3631b3db530b9 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:37 -0800 -Subject: [PATCH 26/42] Drivers: hv: vmbus: add a helper function to set a - channel's pending send size - -This will be used by the coming net/hvsock driver. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 3c75354d043ad546148d6992e40033ecaefc5ea5) ---- - include/linux/hyperv.h | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 51c98fd..934542a 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -818,6 +818,12 @@ static inline void *get_per_channel_state(struct vmbus_channel *c) - return c->per_channel_state; - } - -+static inline void set_channel_pending_send_size(struct vmbus_channel *c, -+ u32 size) -+{ -+ c->outbound.ring_buffer->pending_send_sz = size; -+} -+ - void vmbus_onmessage(void *context); - - int vmbus_request_offers(void); --- -2.10.0 - diff --git a/alpine/kernel/patches/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch b/alpine/kernel/patches/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch deleted file mode 100644 index 375a62ae1..000000000 --- a/alpine/kernel/patches/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 8658862991789c9dca080be3d35a7e72479b91e9 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:38 -0800 -Subject: [PATCH 27/42] Drivers: hv: vmbus: define the new offer type for - Hyper-V socket (hvsock) - -A helper function is also added. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit e8d6ca023efce3bd80050dcd9e708ee3cf8babd4) ---- - include/linux/hyperv.h | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 934542a..a4f105d 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -237,6 +237,7 @@ struct vmbus_channel_offer { - #define VMBUS_CHANNEL_LOOPBACK_OFFER 0x100 - #define VMBUS_CHANNEL_PARENT_OFFER 0x200 - #define VMBUS_CHANNEL_REQUEST_MONITORED_NOTIFICATION 0x400 -+#define VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER 0x2000 - - struct vmpacket_descriptor { - u16 type; -@@ -797,6 +798,12 @@ struct vmbus_channel { - enum hv_signal_policy signal_policy; - }; - -+static inline bool is_hvsock_channel(const struct vmbus_channel *c) -+{ -+ return !!(c->offermsg.offer.chn_flags & -+ VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER); -+} -+ - static inline void set_channel_signal_state(struct vmbus_channel *c, - enum hv_signal_policy policy) - { --- -2.10.0 - diff --git a/alpine/kernel/patches/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch b/alpine/kernel/patches/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch deleted file mode 100644 index 7c93e72a1..000000000 --- a/alpine/kernel/patches/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 60af2c3c5565e40ee66123edb9386ccaa1355dff Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:39 -0800 -Subject: [PATCH 28/42] Drivers: hv: vmbus: vmbus_sendpacket_ctl: hvsock: avoid - unnecessary signaling - -When the hvsock channel's outbound ringbuffer is full (i.e., -hv_ringbuffer_write() returns -EAGAIN), we should avoid the unnecessary -signaling the host. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 5f363bc38f810d238d1e8b19998625ddec3b8138) ---- - drivers/hv/channel.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index dd6de7f..128dcf2 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -659,6 +659,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - * If we cannot write to the ring-buffer; signal the host - * even if we may not have written anything. This is a rare - * enough condition that it should not matter. -+ * NOTE: in this case, the hvsock channel is an exception, because -+ * it looks the host side's hvsock implementation has a throttling -+ * mechanism which can hurt the performance otherwise. - */ - - if (channel->signal_policy) -@@ -666,7 +669,8 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - else - kick_q = true; - -- if (((ret == 0) && kick_q && signal) || (ret)) -+ if (((ret == 0) && kick_q && signal) || -+ (ret && !is_hvsock_channel(channel))) - vmbus_setevent(channel); - - return ret; --- -2.10.0 - diff --git a/alpine/kernel/patches/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch b/alpine/kernel/patches/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch deleted file mode 100644 index fa317eec0..000000000 --- a/alpine/kernel/patches/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 2c5183043209906ad0a41fb1a5b4d0c4c8a8e735 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:40 -0800 -Subject: [PATCH 29/42] Drivers: hv: vmbus: define a new VMBus message type for - hvsock - -A function to send the type of message is also added. - -The coming net/hvsock driver will use this function to proactively request -the host to offer a VMBus channel for a new hvsock connection. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 5c23a1a5c60b0f472cfa61cd7d8279f8aaeb5b64) ---- - drivers/hv/channel.c | 15 +++++++++++++++ - drivers/hv/channel_mgmt.c | 4 ++++ - include/linux/hyperv.h | 13 +++++++++++++ - 3 files changed, 32 insertions(+) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 128dcf2..415f6c7 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -219,6 +219,21 @@ error0: - } - EXPORT_SYMBOL_GPL(vmbus_open); - -+/* Used for Hyper-V Socket: a guest client's connect() to the host */ -+int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, -+ const uuid_le *shv_host_servie_id) -+{ -+ struct vmbus_channel_tl_connect_request conn_msg; -+ -+ memset(&conn_msg, 0, sizeof(conn_msg)); -+ conn_msg.header.msgtype = CHANNELMSG_TL_CONNECT_REQUEST; -+ conn_msg.guest_endpoint_id = *shv_guest_servie_id; -+ conn_msg.host_service_id = *shv_host_servie_id; -+ -+ return vmbus_post_msg(&conn_msg, sizeof(conn_msg)); -+} -+EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request); -+ - /* - * create_gpadl_header - Creates a gpadl for the specified buffer - */ -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index d6c6114..60ca25b 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -958,6 +958,10 @@ struct vmbus_channel_message_table_entry - {CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response}, - {CHANNELMSG_UNLOAD, 0, NULL}, - {CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response}, -+ {CHANNELMSG_18, 0, NULL}, -+ {CHANNELMSG_19, 0, NULL}, -+ {CHANNELMSG_20, 0, NULL}, -+ {CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL}, - }; - - /* -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index a4f105d..191bc5d 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -394,6 +394,10 @@ enum vmbus_channel_message_type { - CHANNELMSG_VERSION_RESPONSE = 15, - CHANNELMSG_UNLOAD = 16, - CHANNELMSG_UNLOAD_RESPONSE = 17, -+ CHANNELMSG_18 = 18, -+ CHANNELMSG_19 = 19, -+ CHANNELMSG_20 = 20, -+ CHANNELMSG_TL_CONNECT_REQUEST = 21, - CHANNELMSG_COUNT - }; - -@@ -564,6 +568,13 @@ struct vmbus_channel_initiate_contact { - u64 monitor_page2; - } __packed; - -+/* Hyper-V socket: guest's connect()-ing to host */ -+struct vmbus_channel_tl_connect_request { -+ struct vmbus_channel_message_header header; -+ uuid_le guest_endpoint_id; -+ uuid_le host_service_id; -+} __packed; -+ - struct vmbus_channel_version_response { - struct vmbus_channel_message_header header; - u8 version_supported; -@@ -1295,4 +1306,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); - - extern __u32 vmbus_proto_version; - -+int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, -+ const uuid_le *shv_host_servie_id); - #endif /* _HYPERV_H */ --- -2.10.0 - diff --git a/alpine/kernel/patches/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch b/alpine/kernel/patches/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch deleted file mode 100644 index f264f65dd..000000000 --- a/alpine/kernel/patches/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 58a10705d630bdcb5ea08c894d28851c73e9bd4f Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:41 -0800 -Subject: [PATCH 30/42] Drivers: hv: vmbus: add a hvsock flag in struct - hv_driver - -Only the coming hv_sock driver has a "true" value for this flag. - -We treat the hvsock offers/channels as special VMBus devices. -Since the hv_sock driver handles all the hvsock offers/channels, we need to -tweak vmbus_match() for hv_sock driver, so we introduce this flag. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 8981da320a11217589aa3c50f9e891bcdef07ece) ---- - drivers/hv/vmbus_drv.c | 4 ++++ - include/linux/hyperv.h | 14 ++++++++++++++ - 2 files changed, 18 insertions(+) - -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index de7130c..03fc5d3 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -585,6 +585,10 @@ static int vmbus_match(struct device *device, struct device_driver *driver) - struct hv_driver *drv = drv_to_hv_drv(driver); - struct hv_device *hv_dev = device_to_hv_device(device); - -+ /* The hv_sock driver handles all hv_sock offers. */ -+ if (is_hvsock_channel(hv_dev->channel)) -+ return drv->hvsock; -+ - if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) - return 1; - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 191bc5d..05966e2 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -992,6 +992,20 @@ extern void vmbus_ontimer(unsigned long data); - struct hv_driver { - const char *name; - -+ /* -+ * A hvsock offer, which has a VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER -+ * channel flag, actually doesn't mean a synthetic device because the -+ * offer's if_type/if_instance can change for every new hvsock -+ * connection. -+ * -+ * However, to facilitate the notification of new-offer/rescind-offer -+ * from vmbus driver to hvsock driver, we can handle hvsock offer as -+ * a special vmbus device, and hence we need the below flag to -+ * indicate if the driver is the hvsock driver or not: we need to -+ * specially treat the hvosck offer & driver in vmbus_match(). -+ */ -+ bool hvsock; -+ - /* the device type supported by this driver */ - uuid_le dev_type; - const struct hv_vmbus_device_id *id_table; --- -2.10.0 - diff --git a/alpine/kernel/patches/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch b/alpine/kernel/patches/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch deleted file mode 100644 index 9768186ca..000000000 --- a/alpine/kernel/patches/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 6dd9db116b0985dfc56b3028205549f4c52d8be0 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:42 -0800 -Subject: [PATCH 31/42] Drivers: hv: vmbus: add a per-channel rescind callback - -This will be used by the coming hv_sock driver. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 499e8401a515d04daa986b995da710d2b9737764) ---- - drivers/hv/channel_mgmt.c | 11 +++++++++++ - include/linux/hyperv.h | 9 +++++++++ - 2 files changed, 20 insertions(+) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 60ca25b..76864c9 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -741,6 +741,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - spin_unlock_irqrestore(&channel->lock, flags); - - if (channel->device_obj) { -+ if (channel->chn_rescind_callback) { -+ channel->chn_rescind_callback(channel); -+ return; -+ } - /* - * We will have to unregister this device from the - * driver core. -@@ -1110,3 +1114,10 @@ bool vmbus_are_subchannels_present(struct vmbus_channel *primary) - return ret; - } - EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present); -+ -+void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, -+ void (*chn_rescind_cb)(struct vmbus_channel *)) -+{ -+ channel->chn_rescind_callback = chn_rescind_cb; -+} -+EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 05966e2..ad04017 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -768,6 +768,12 @@ struct vmbus_channel { - void (*sc_creation_callback)(struct vmbus_channel *new_sc); - - /* -+ * Channel rescind callback. Some channels (the hvsock ones), need to -+ * register a callback which is invoked in vmbus_onoffer_rescind(). -+ */ -+ void (*chn_rescind_callback)(struct vmbus_channel *channel); -+ -+ /* - * The spinlock to protect the structure. It is being used to protect - * test-and-set access to various attributes of the structure as well - * as all sc_list operations. -@@ -853,6 +859,9 @@ int vmbus_request_offers(void); - void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, - void (*sc_cr_cb)(struct vmbus_channel *new_sc)); - -+void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, -+ void (*chn_rescind_cb)(struct vmbus_channel *)); -+ - /* - * Retrieve the (sub) channel on which to send an outgoing request. - * When a primary channel has multiple sub-channels, we choose a --- -2.10.0 - diff --git a/alpine/kernel/patches/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch b/alpine/kernel/patches/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch deleted file mode 100644 index cb9a4f99c..000000000 --- a/alpine/kernel/patches/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 5e89daa5e8c0b5950b46ba77dd6248c5e61bc405 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:43 -0800 -Subject: [PATCH 32/42] Drivers: hv: vmbus: add an API - vmbus_hvsock_device_unregister() - -The hvsock driver needs this API to release all the resources related -to the channel. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 85d9aa705184a4504d0330017e3956fcdae8a9d6) ---- - drivers/hv/channel_mgmt.c | 33 ++++++++++++++++++++++++++++----- - drivers/hv/connection.c | 4 ++-- - include/linux/hyperv.h | 2 ++ - 3 files changed, 32 insertions(+), 7 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 76864c9..cf311be 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -310,6 +310,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - vmbus_release_relid(relid); - - BUG_ON(!channel->rescind); -+ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - - if (channel->target_cpu != get_cpu()) { - put_cpu(); -@@ -321,9 +322,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - } - - if (channel->primary_channel == NULL) { -- mutex_lock(&vmbus_connection.channel_mutex); - list_del(&channel->listentry); -- mutex_unlock(&vmbus_connection.channel_mutex); - - primary_channel = channel; - } else { -@@ -367,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - bool fnew = true; - unsigned long flags; - u16 dev_type; -+ int ret; - - /* Make sure this is a new offer */ - mutex_lock(&vmbus_connection.channel_mutex); -@@ -449,7 +449,11 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - * binding which eventually invokes the device driver's AddDevice() - * method. - */ -- if (vmbus_device_register(newchannel->device_obj) != 0) { -+ mutex_lock(&vmbus_connection.channel_mutex); -+ ret = vmbus_device_register(newchannel->device_obj); -+ mutex_unlock(&vmbus_connection.channel_mutex); -+ -+ if (ret != 0) { - pr_err("unable to add child device object (relid %d)\n", - newchannel->offermsg.child_relid); - kfree(newchannel->device_obj); -@@ -725,6 +729,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - struct device *dev; - - rescind = (struct vmbus_channel_rescind_offer *)hdr; -+ -+ mutex_lock(&vmbus_connection.channel_mutex); - channel = relid2channel(rescind->child_relid); - - if (channel == NULL) { -@@ -733,7 +739,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - * vmbus_process_offer(), we have already invoked - * vmbus_release_relid() on error. - */ -- return; -+ goto out; - } - - spin_lock_irqsave(&channel->lock, flags); -@@ -743,7 +749,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - if (channel->device_obj) { - if (channel->chn_rescind_callback) { - channel->chn_rescind_callback(channel); -- return; -+ goto out; - } - /* - * We will have to unregister this device from the -@@ -758,8 +764,25 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - hv_process_channel_removal(channel, - channel->offermsg.child_relid); - } -+ -+out: -+ mutex_unlock(&vmbus_connection.channel_mutex); - } - -+void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) -+{ -+ mutex_lock(&vmbus_connection.channel_mutex); -+ -+ BUG_ON(!is_hvsock_channel(channel)); -+ -+ channel->rescind = true; -+ vmbus_device_unregister(channel->device_obj); -+ -+ mutex_unlock(&vmbus_connection.channel_mutex); -+} -+EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); -+ -+ - /* - * vmbus_onoffers_delivered - - * This is invoked when all offers have been delivered. -diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c -index 521f48e..09c08b5 100644 ---- a/drivers/hv/connection.c -+++ b/drivers/hv/connection.c -@@ -285,7 +285,8 @@ struct vmbus_channel *relid2channel(u32 relid) - struct list_head *cur, *tmp; - struct vmbus_channel *cur_sc; - -- mutex_lock(&vmbus_connection.channel_mutex); -+ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); -+ - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (channel->offermsg.child_relid == relid) { - found_channel = channel; -@@ -304,7 +305,6 @@ struct vmbus_channel *relid2channel(u32 relid) - } - } - } -- mutex_unlock(&vmbus_connection.channel_mutex); - - return found_channel; - } -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index ad04017..993318a 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1071,6 +1071,8 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver, - const char *mod_name); - void vmbus_driver_unregister(struct hv_driver *hv_driver); - -+void vmbus_hvsock_device_unregister(struct vmbus_channel *channel); -+ - int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - resource_size_t min, resource_size_t max, - resource_size_t size, resource_size_t align, --- -2.10.0 - diff --git a/alpine/kernel/patches/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch b/alpine/kernel/patches/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch deleted file mode 100644 index fa351c53c..000000000 --- a/alpine/kernel/patches/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch +++ /dev/null @@ -1,208 +0,0 @@ -From b7e3c4ad47b7fd47a79a723ac0c1823b6782d1ff Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Wed, 27 Jan 2016 22:29:45 -0800 -Subject: [PATCH 33/42] Drivers: hv: vmbus: Give control over how the ring - access is serialized - -On the channel send side, many of the VMBUS -device drivers explicity serialize access to the -outgoing ring buffer. Give more control to the -VMBUS device drivers in terms how to serialize -accesss to the outgoing ring buffer. -The default behavior will be to aquire the -ring lock to preserve the current behavior. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit fe760e4d64fe5c17c39e86c410d41f6587ee88bc) ---- - drivers/hv/channel.c | 15 +++++++++++---- - drivers/hv/channel_mgmt.c | 1 + - drivers/hv/hyperv_vmbus.h | 2 +- - drivers/hv/ring_buffer.c | 13 ++++++++----- - include/linux/hyperv.h | 16 ++++++++++++++++ - 5 files changed, 37 insertions(+), 10 deletions(-) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 415f6c7..57a1b65 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -639,6 +639,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - u64 aligned_data = 0; - int ret; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - int num_vecs = ((bufferlen != 0) ? 3 : 1); - - -@@ -658,7 +659,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs, -- &signal); -+ &signal, lock); - - /* - * Signalling the host is conditional on many factors: -@@ -738,6 +739,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, - struct kvec bufferlist[3]; - u64 aligned_data = 0; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - - if (pagecount > MAX_PAGE_BUFFER_COUNT) - return -EINVAL; -@@ -774,7 +776,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - -- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); -+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, -+ &signal, lock); - - /* - * Signalling the host is conditional on many factors: -@@ -837,6 +840,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, - struct kvec bufferlist[3]; - u64 aligned_data = 0; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - - packetlen = desc_size + bufferlen; - packetlen_aligned = ALIGN(packetlen, sizeof(u64)); -@@ -856,7 +860,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - -- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); -+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, -+ &signal, lock); - - if (ret == 0 && signal) - vmbus_setevent(channel); -@@ -881,6 +886,7 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, - struct kvec bufferlist[3]; - u64 aligned_data = 0; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset, - multi_pagebuffer->len); - -@@ -919,7 +925,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - -- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); -+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, -+ &signal, lock); - - if (ret == 0 && signal) - vmbus_setevent(channel); -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index cf311be..b40f429 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -259,6 +259,7 @@ static struct vmbus_channel *alloc_channel(void) - return NULL; - - channel->id = atomic_inc_return(&chan_num); -+ channel->acquire_ring_lock = true; - spin_lock_init(&channel->inbound_lock); - spin_lock_init(&channel->lock); - -diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h -index 50b1de7..89bb559 100644 ---- a/drivers/hv/hyperv_vmbus.h -+++ b/drivers/hv/hyperv_vmbus.h -@@ -617,7 +617,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); - - int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, - struct kvec *kv_list, -- u32 kv_count, bool *signal); -+ u32 kv_count, bool *signal, bool lock); - - int hv_ringbuffer_peek(struct hv_ring_buffer_info *ring_info, void *buffer, - u32 buflen); -diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c -index 70a1a9a..89a428f 100644 ---- a/drivers/hv/ring_buffer.c -+++ b/drivers/hv/ring_buffer.c -@@ -388,7 +388,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) - * - */ - int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, -- struct kvec *kv_list, u32 kv_count, bool *signal) -+ struct kvec *kv_list, u32 kv_count, bool *signal, bool lock) - { - int i = 0; - u32 bytes_avail_towrite; -@@ -398,14 +398,15 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - u32 next_write_location; - u32 old_write; - u64 prev_indices = 0; -- unsigned long flags; -+ unsigned long flags = 0; - - for (i = 0; i < kv_count; i++) - totalbytes_towrite += kv_list[i].iov_len; - - totalbytes_towrite += sizeof(u64); - -- spin_lock_irqsave(&outring_info->ring_lock, flags); -+ if (lock) -+ spin_lock_irqsave(&outring_info->ring_lock, flags); - - hv_get_ringbuffer_availbytes(outring_info, - &bytes_avail_toread, -@@ -416,7 +417,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - /* Otherwise, the next time around, we think the ring buffer */ - /* is empty since the read index == write index */ - if (bytes_avail_towrite <= totalbytes_towrite) { -- spin_unlock_irqrestore(&outring_info->ring_lock, flags); -+ if (lock) -+ spin_unlock_irqrestore(&outring_info->ring_lock, flags); - return -EAGAIN; - } - -@@ -447,7 +449,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - hv_set_next_write_location(outring_info, next_write_location); - - -- spin_unlock_irqrestore(&outring_info->ring_lock, flags); -+ if (lock) -+ spin_unlock_irqrestore(&outring_info->ring_lock, flags); - - *signal = hv_need_to_signal(old_write, outring_info); - return 0; -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 993318a..6c9695e 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -813,8 +813,24 @@ struct vmbus_channel { - * signaling control. - */ - enum hv_signal_policy signal_policy; -+ /* -+ * On the channel send side, many of the VMBUS -+ * device drivers explicity serialize access to the -+ * outgoing ring buffer. Give more control to the -+ * VMBUS device drivers in terms how to serialize -+ * accesss to the outgoing ring buffer. -+ * The default behavior will be to aquire the -+ * ring lock to preserve the current behavior. -+ */ -+ bool acquire_ring_lock; -+ - }; - -+static inline void set_channel_lock_state(struct vmbus_channel *c, bool state) -+{ -+ c->acquire_ring_lock = state; -+} -+ - static inline bool is_hvsock_channel(const struct vmbus_channel *c) - { - return !!(c->offermsg.offer.chn_flags & --- -2.10.0 - diff --git a/alpine/kernel/patches/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch b/alpine/kernel/patches/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch deleted file mode 100644 index 06b9acdea..000000000 --- a/alpine/kernel/patches/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch +++ /dev/null @@ -1,100 +0,0 @@ -From af2dd29e3cf40c789045199893c232d57f0b7057 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Fri, 26 Feb 2016 15:13:16 -0800 -Subject: [PATCH 34/42] Drivers: hv: vmbus: avoid wait_for_completion() on - crash -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -wait_for_completion() may sleep, it enables interrupts and this -is something we really want to avoid on crashes because interrupt -handlers can cause other crashes. Switch to the recently introduced -vmbus_wait_for_unload() doing busy wait instead. - -Reported-by: Radim Krcmar -Signed-off-by: Vitaly Kuznetsov -Reviewed-by: Radim Kr.má -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 75ff3a8a9168df750b5bd0589e897a6c0517a9f1) ---- - drivers/hv/channel_mgmt.c | 4 ++-- - drivers/hv/connection.c | 2 +- - drivers/hv/hyperv_vmbus.h | 2 +- - drivers/hv/vmbus_drv.c | 4 ++-- - 4 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index b40f429..f70e352 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -641,7 +641,7 @@ static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) - complete(&vmbus_connection.unload_event); - } - --void vmbus_initiate_unload(void) -+void vmbus_initiate_unload(bool crash) - { - struct vmbus_channel_message_header hdr; - -@@ -658,7 +658,7 @@ void vmbus_initiate_unload(void) - * vmbus_initiate_unload() is also called on crash and the crash can be - * happening in an interrupt context, where scheduling is impossible. - */ -- if (!in_interrupt()) -+ if (!crash) - wait_for_completion(&vmbus_connection.unload_event); - else - vmbus_wait_for_unload(); -diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c -index 09c08b5..78b8be8 100644 ---- a/drivers/hv/connection.c -+++ b/drivers/hv/connection.c -@@ -233,7 +233,7 @@ void vmbus_disconnect(void) - /* - * First send the unload request to the host. - */ -- vmbus_initiate_unload(); -+ vmbus_initiate_unload(false); - - if (vmbus_connection.work_queue) { - drain_workqueue(vmbus_connection.work_queue); -diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h -index 89bb559..f424c2d 100644 ---- a/drivers/hv/hyperv_vmbus.h -+++ b/drivers/hv/hyperv_vmbus.h -@@ -756,7 +756,7 @@ void hv_vss_onchannelcallback(void *); - int hv_fcopy_init(struct hv_util_service *); - void hv_fcopy_deinit(void); - void hv_fcopy_onchannelcallback(void *); --void vmbus_initiate_unload(void); -+void vmbus_initiate_unload(bool crash); - - static inline void hv_poll_channel(struct vmbus_channel *channel, - void (*cb)(void *)) -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index 03fc5d3..b0cc6fd 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -1276,7 +1276,7 @@ static void hv_kexec_handler(void) - int cpu; - - hv_synic_clockevents_cleanup(); -- vmbus_initiate_unload(); -+ vmbus_initiate_unload(false); - for_each_online_cpu(cpu) - smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); - hv_cleanup(); -@@ -1284,7 +1284,7 @@ static void hv_kexec_handler(void) - - static void hv_crash_handler(struct pt_regs *regs) - { -- vmbus_initiate_unload(); -+ vmbus_initiate_unload(true); - /* - * In crash handler we can't schedule synic cleanup for all CPUs, - * doing the cleanup for current CPU only. This should be sufficient --- -2.10.0 - diff --git a/alpine/kernel/patches/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch b/alpine/kernel/patches/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch deleted file mode 100644 index 58ba9c17c..000000000 --- a/alpine/kernel/patches/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch +++ /dev/null @@ -1,39 +0,0 @@ -From fa3647ae889af3cccaaee37ac0723fc1b74689e3 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Fri, 26 Feb 2016 15:13:18 -0800 -Subject: [PATCH 35/42] Drivers: hv: vmbus: avoid unneeded compiler - optimizations in vmbus_wait_for_unload() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Message header is modified by the hypervisor and we read it in a loop, -we need to prevent compilers from optimizing accesses. There are no such -optimizations at this moment, this is just a future proof. - -Suggested-by: Radim Krcmar -Signed-off-by: Vitaly Kuznetsov -Reviewed-by: Radim Kr.má -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit d452ab7b4c65dfcaee88a0d6866eeeb98a3d1884) ---- - drivers/hv/channel_mgmt.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index f70e352..c892db5 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -605,7 +605,7 @@ static void vmbus_wait_for_unload(void) - bool unloaded = false; - - while (1) { -- if (msg->header.message_type == HVMSG_NONE) { -+ if (READ_ONCE(msg->header.message_type) == HVMSG_NONE) { - mdelay(10); - continue; - } --- -2.10.0 - diff --git a/alpine/kernel/patches/0036-kcm-Kernel-Connection-Multiplexor-module.patch b/alpine/kernel/patches/0036-kcm-Kernel-Connection-Multiplexor-module.patch deleted file mode 100644 index 9353d1e53..000000000 --- a/alpine/kernel/patches/0036-kcm-Kernel-Connection-Multiplexor-module.patch +++ /dev/null @@ -1,2312 +0,0 @@ -From afde92b79d7bbdf25d3f583898cbee4773b07d41 Mon Sep 17 00:00:00 2001 -From: Tom Herbert -Date: Mon, 7 Mar 2016 14:11:06 -0800 -Subject: [PATCH 36/42] kcm: Kernel Connection Multiplexor module - -This module implements the Kernel Connection Multiplexor. - -Kernel Connection Multiplexor (KCM) is a facility that provides a -message based interface over TCP for generic application protocols. -With KCM an application can efficiently send and receive application -protocol messages over TCP using datagram sockets. - -For more information see the included Documentation/networking/kcm.txt - -Signed-off-by: Tom Herbert -Signed-off-by: David S. Miller -(cherry picked from commit ab7ac4eb9832e32a09f4e8042705484d2fb0aad3) ---- - include/linux/socket.h | 6 +- - include/net/kcm.h | 125 +++ - include/uapi/linux/kcm.h | 39 + - net/Kconfig | 1 + - net/Makefile | 1 + - net/kcm/Kconfig | 9 + - net/kcm/Makefile | 3 + - net/kcm/kcmsock.c | 2015 ++++++++++++++++++++++++++++++++++++++++++++++ - 8 files changed, 2198 insertions(+), 1 deletion(-) - create mode 100644 include/net/kcm.h - create mode 100644 include/uapi/linux/kcm.h - create mode 100644 net/kcm/Kconfig - create mode 100644 net/kcm/Makefile - create mode 100644 net/kcm/kcmsock.c - -diff --git a/include/linux/socket.h b/include/linux/socket.h -index 5bf59c8..4e1ea53 100644 ---- a/include/linux/socket.h -+++ b/include/linux/socket.h -@@ -200,7 +200,9 @@ struct ucred { - #define AF_ALG 38 /* Algorithm sockets */ - #define AF_NFC 39 /* NFC sockets */ - #define AF_VSOCK 40 /* vSockets */ --#define AF_MAX 41 /* For now.. */ -+#define AF_KCM 41 /* Kernel Connection Multiplexor*/ -+ -+#define AF_MAX 42 /* For now.. */ - - /* Protocol families, same as address families. */ - #define PF_UNSPEC AF_UNSPEC -@@ -246,6 +248,7 @@ struct ucred { - #define PF_ALG AF_ALG - #define PF_NFC AF_NFC - #define PF_VSOCK AF_VSOCK -+#define PF_KCM AF_KCM - #define PF_MAX AF_MAX - - /* Maximum queue length specifiable by listen. */ -@@ -322,6 +325,7 @@ struct ucred { - #define SOL_CAIF 278 - #define SOL_ALG 279 - #define SOL_NFC 280 -+#define SOL_KCM 281 - - /* IPX options */ - #define IPX_TYPE 1 -diff --git a/include/net/kcm.h b/include/net/kcm.h -new file mode 100644 -index 0000000..1bcae39 ---- /dev/null -+++ b/include/net/kcm.h -@@ -0,0 +1,125 @@ -+/* -+ * Kernel Connection Multiplexor -+ * -+ * Copyright (c) 2016 Tom Herbert -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation. -+ */ -+ -+#ifndef __NET_KCM_H_ -+#define __NET_KCM_H_ -+ -+#include -+#include -+#include -+ -+extern unsigned int kcm_net_id; -+ -+struct kcm_tx_msg { -+ unsigned int sent; -+ unsigned int fragidx; -+ unsigned int frag_offset; -+ unsigned int msg_flags; -+ struct sk_buff *frag_skb; -+ struct sk_buff *last_skb; -+}; -+ -+struct kcm_rx_msg { -+ int full_len; -+ int accum_len; -+ int offset; -+}; -+ -+/* Socket structure for KCM client sockets */ -+struct kcm_sock { -+ struct sock sk; -+ struct kcm_mux *mux; -+ struct list_head kcm_sock_list; -+ int index; -+ u32 done : 1; -+ struct work_struct done_work; -+ -+ /* Transmit */ -+ struct kcm_psock *tx_psock; -+ struct work_struct tx_work; -+ struct list_head wait_psock_list; -+ struct sk_buff *seq_skb; -+ -+ /* Don't use bit fields here, these are set under different locks */ -+ bool tx_wait; -+ bool tx_wait_more; -+ -+ /* Receive */ -+ struct kcm_psock *rx_psock; -+ struct list_head wait_rx_list; /* KCMs waiting for receiving */ -+ bool rx_wait; -+ u32 rx_disabled : 1; -+}; -+ -+struct bpf_prog; -+ -+/* Structure for an attached lower socket */ -+struct kcm_psock { -+ struct sock *sk; -+ struct kcm_mux *mux; -+ int index; -+ -+ u32 tx_stopped : 1; -+ u32 rx_stopped : 1; -+ u32 done : 1; -+ u32 unattaching : 1; -+ -+ void (*save_state_change)(struct sock *sk); -+ void (*save_data_ready)(struct sock *sk); -+ void (*save_write_space)(struct sock *sk); -+ -+ struct list_head psock_list; -+ -+ /* Receive */ -+ struct sk_buff *rx_skb_head; -+ struct sk_buff **rx_skb_nextp; -+ struct sk_buff *ready_rx_msg; -+ struct list_head psock_ready_list; -+ struct work_struct rx_work; -+ struct delayed_work rx_delayed_work; -+ struct bpf_prog *bpf_prog; -+ struct kcm_sock *rx_kcm; -+ -+ /* Transmit */ -+ struct kcm_sock *tx_kcm; -+ struct list_head psock_avail_list; -+}; -+ -+/* Per net MUX list */ -+struct kcm_net { -+ struct mutex mutex; -+ struct list_head mux_list; -+ int count; -+}; -+ -+/* Structure for a MUX */ -+struct kcm_mux { -+ struct list_head kcm_mux_list; -+ struct rcu_head rcu; -+ struct kcm_net *knet; -+ -+ struct list_head kcm_socks; /* All KCM sockets on MUX */ -+ int kcm_socks_cnt; /* Total KCM socket count for MUX */ -+ struct list_head psocks; /* List of all psocks on MUX */ -+ int psocks_cnt; /* Total attached sockets */ -+ -+ /* Receive */ -+ spinlock_t rx_lock ____cacheline_aligned_in_smp; -+ struct list_head kcm_rx_waiters; /* KCMs waiting for receiving */ -+ struct list_head psocks_ready; /* List of psocks with a msg ready */ -+ struct sk_buff_head rx_hold_queue; -+ -+ /* Transmit */ -+ spinlock_t lock ____cacheline_aligned_in_smp; /* TX and mux locking */ -+ struct list_head psocks_avail; /* List of available psocks */ -+ struct list_head kcm_tx_waiters; /* KCMs waiting for a TX psock */ -+}; -+ -+#endif /* __NET_KCM_H_ */ -diff --git a/include/uapi/linux/kcm.h b/include/uapi/linux/kcm.h -new file mode 100644 -index 0000000..d72350f ---- /dev/null -+++ b/include/uapi/linux/kcm.h -@@ -0,0 +1,39 @@ -+/* -+ * Kernel Connection Multiplexor -+ * -+ * Copyright (c) 2016 Tom Herbert -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation. -+ * -+ * User API to clone KCM sockets and attach transport socket to a KCM -+ * multiplexor. -+ */ -+ -+#ifndef KCM_KERNEL_H -+#define KCM_KERNEL_H -+ -+struct kcm_attach { -+ int fd; -+ int bpf_fd; -+}; -+ -+struct kcm_unattach { -+ int fd; -+}; -+ -+struct kcm_clone { -+ int fd; -+}; -+ -+#define SIOCKCMATTACH (SIOCPROTOPRIVATE + 0) -+#define SIOCKCMUNATTACH (SIOCPROTOPRIVATE + 1) -+#define SIOCKCMCLONE (SIOCPROTOPRIVATE + 2) -+ -+#define KCMPROTO_CONNECTED 0 -+ -+/* Socket options */ -+#define KCM_RECV_DISABLE 1 -+ -+#endif -diff --git a/net/Kconfig b/net/Kconfig -index 127da94..b8439e6 100644 ---- a/net/Kconfig -+++ b/net/Kconfig -@@ -351,6 +351,7 @@ source "net/can/Kconfig" - source "net/irda/Kconfig" - source "net/bluetooth/Kconfig" - source "net/rxrpc/Kconfig" -+source "net/kcm/Kconfig" - - config FIB_RULES - bool -diff --git a/net/Makefile b/net/Makefile -index a5d0409..81d1411 100644 ---- a/net/Makefile -+++ b/net/Makefile -@@ -34,6 +34,7 @@ obj-$(CONFIG_IRDA) += irda/ - obj-$(CONFIG_BT) += bluetooth/ - obj-$(CONFIG_SUNRPC) += sunrpc/ - obj-$(CONFIG_AF_RXRPC) += rxrpc/ -+obj-$(CONFIG_AF_KCM) += kcm/ - obj-$(CONFIG_ATM) += atm/ - obj-$(CONFIG_L2TP) += l2tp/ - obj-$(CONFIG_DECNET) += decnet/ -diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig -new file mode 100644 -index 0000000..4f28332 ---- /dev/null -+++ b/net/kcm/Kconfig -@@ -0,0 +1,9 @@ -+ -+config AF_KCM -+ tristate "KCM sockets" -+ depends on INET -+ select BPF_SYSCALL -+ ---help--- -+ KCM (Kernel Connection Multiplexor) sockets provide a method -+ for multiplexing messages of a message based application -+ protocol over kernel connectons (e.g. TCP connections). -diff --git a/net/kcm/Makefile b/net/kcm/Makefile -new file mode 100644 -index 0000000..cb525f7 ---- /dev/null -+++ b/net/kcm/Makefile -@@ -0,0 +1,3 @@ -+obj-$(CONFIG_AF_KCM) += kcm.o -+ -+kcm-y := kcmsock.o -diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c -new file mode 100644 -index 0000000..649d246 ---- /dev/null -+++ b/net/kcm/kcmsock.c -@@ -0,0 +1,2015 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+unsigned int kcm_net_id; -+ -+static struct kmem_cache *kcm_psockp __read_mostly; -+static struct kmem_cache *kcm_muxp __read_mostly; -+static struct workqueue_struct *kcm_wq; -+ -+static inline struct kcm_sock *kcm_sk(const struct sock *sk) -+{ -+ return (struct kcm_sock *)sk; -+} -+ -+static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb) -+{ -+ return (struct kcm_tx_msg *)skb->cb; -+} -+ -+static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb) -+{ -+ return (struct kcm_rx_msg *)((void *)skb->cb + -+ offsetof(struct qdisc_skb_cb, data)); -+} -+ -+static void report_csk_error(struct sock *csk, int err) -+{ -+ csk->sk_err = EPIPE; -+ csk->sk_error_report(csk); -+} -+ -+/* Callback lock held */ -+static void kcm_abort_rx_psock(struct kcm_psock *psock, int err, -+ struct sk_buff *skb) -+{ -+ struct sock *csk = psock->sk; -+ -+ /* Unrecoverable error in receive */ -+ -+ if (psock->rx_stopped) -+ return; -+ -+ psock->rx_stopped = 1; -+ -+ /* Report an error on the lower socket */ -+ report_csk_error(csk, err); -+} -+ -+static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, -+ bool wakeup_kcm) -+{ -+ struct sock *csk = psock->sk; -+ struct kcm_mux *mux = psock->mux; -+ -+ /* Unrecoverable error in transmit */ -+ -+ spin_lock_bh(&mux->lock); -+ -+ if (psock->tx_stopped) { -+ spin_unlock_bh(&mux->lock); -+ return; -+ } -+ -+ psock->tx_stopped = 1; -+ -+ if (!psock->tx_kcm) { -+ /* Take off psocks_avail list */ -+ list_del(&psock->psock_avail_list); -+ } else if (wakeup_kcm) { -+ /* In this case psock is being aborted while outside of -+ * write_msgs and psock is reserved. Schedule tx_work -+ * to handle the failure there. Need to commit tx_stopped -+ * before queuing work. -+ */ -+ smp_mb(); -+ -+ queue_work(kcm_wq, &psock->tx_kcm->tx_work); -+ } -+ -+ spin_unlock_bh(&mux->lock); -+ -+ /* Report error on lower socket */ -+ report_csk_error(csk, err); -+} -+ -+static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -+ -+/* KCM is ready to receive messages on its queue-- either the KCM is new or -+ * has become unblocked after being blocked on full socket buffer. Queue any -+ * pending ready messages on a psock. RX mux lock held. -+ */ -+static void kcm_rcv_ready(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ struct kcm_psock *psock; -+ struct sk_buff *skb; -+ -+ if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled)) -+ return; -+ -+ while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) { -+ if (kcm_queue_rcv_skb(&kcm->sk, skb)) { -+ /* Assuming buffer limit has been reached */ -+ skb_queue_head(&mux->rx_hold_queue, skb); -+ WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); -+ return; -+ } -+ } -+ -+ while (!list_empty(&mux->psocks_ready)) { -+ psock = list_first_entry(&mux->psocks_ready, struct kcm_psock, -+ psock_ready_list); -+ -+ if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) { -+ /* Assuming buffer limit has been reached */ -+ WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); -+ return; -+ } -+ -+ /* Consumed the ready message on the psock. Schedule rx_work to -+ * get more messages. -+ */ -+ list_del(&psock->psock_ready_list); -+ psock->ready_rx_msg = NULL; -+ -+ /* Commit clearing of ready_rx_msg for queuing work */ -+ smp_mb(); -+ -+ queue_work(kcm_wq, &psock->rx_work); -+ } -+ -+ /* Buffer limit is okay now, add to ready list */ -+ list_add_tail(&kcm->wait_rx_list, -+ &kcm->mux->kcm_rx_waiters); -+ kcm->rx_wait = true; -+} -+ -+static void kcm_rfree(struct sk_buff *skb) -+{ -+ struct sock *sk = skb->sk; -+ struct kcm_sock *kcm = kcm_sk(sk); -+ struct kcm_mux *mux = kcm->mux; -+ unsigned int len = skb->truesize; -+ -+ sk_mem_uncharge(sk, len); -+ atomic_sub(len, &sk->sk_rmem_alloc); -+ -+ /* For reading rx_wait and rx_psock without holding lock */ -+ smp_mb__after_atomic(); -+ -+ if (!kcm->rx_wait && !kcm->rx_psock && -+ sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { -+ spin_lock_bh(&mux->rx_lock); -+ kcm_rcv_ready(kcm); -+ spin_unlock_bh(&mux->rx_lock); -+ } -+} -+ -+static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) -+{ -+ struct sk_buff_head *list = &sk->sk_receive_queue; -+ -+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) -+ return -ENOMEM; -+ -+ if (!sk_rmem_schedule(sk, skb, skb->truesize)) -+ return -ENOBUFS; -+ -+ skb->dev = NULL; -+ -+ skb_orphan(skb); -+ skb->sk = sk; -+ skb->destructor = kcm_rfree; -+ atomic_add(skb->truesize, &sk->sk_rmem_alloc); -+ sk_mem_charge(sk, skb->truesize); -+ -+ skb_queue_tail(list, skb); -+ -+ if (!sock_flag(sk, SOCK_DEAD)) -+ sk->sk_data_ready(sk); -+ -+ return 0; -+} -+ -+/* Requeue received messages for a kcm socket to other kcm sockets. This is -+ * called with a kcm socket is receive disabled. -+ * RX mux lock held. -+ */ -+static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head) -+{ -+ struct sk_buff *skb; -+ struct kcm_sock *kcm; -+ -+ while ((skb = __skb_dequeue(head))) { -+ /* Reset destructor to avoid calling kcm_rcv_ready */ -+ skb->destructor = sock_rfree; -+ skb_orphan(skb); -+try_again: -+ if (list_empty(&mux->kcm_rx_waiters)) { -+ skb_queue_tail(&mux->rx_hold_queue, skb); -+ continue; -+ } -+ -+ kcm = list_first_entry(&mux->kcm_rx_waiters, -+ struct kcm_sock, wait_rx_list); -+ -+ if (kcm_queue_rcv_skb(&kcm->sk, skb)) { -+ /* Should mean socket buffer full */ -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ -+ /* Commit rx_wait to read in kcm_free */ -+ smp_wmb(); -+ -+ goto try_again; -+ } -+ } -+} -+ -+/* Lower sock lock held */ -+static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, -+ struct sk_buff *head) -+{ -+ struct kcm_mux *mux = psock->mux; -+ struct kcm_sock *kcm; -+ -+ WARN_ON(psock->ready_rx_msg); -+ -+ if (psock->rx_kcm) -+ return psock->rx_kcm; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ if (psock->rx_kcm) { -+ spin_unlock_bh(&mux->rx_lock); -+ return psock->rx_kcm; -+ } -+ -+ if (list_empty(&mux->kcm_rx_waiters)) { -+ psock->ready_rx_msg = head; -+ list_add_tail(&psock->psock_ready_list, -+ &mux->psocks_ready); -+ spin_unlock_bh(&mux->rx_lock); -+ return NULL; -+ } -+ -+ kcm = list_first_entry(&mux->kcm_rx_waiters, -+ struct kcm_sock, wait_rx_list); -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ -+ psock->rx_kcm = kcm; -+ kcm->rx_psock = psock; -+ -+ spin_unlock_bh(&mux->rx_lock); -+ -+ return kcm; -+} -+ -+static void kcm_done(struct kcm_sock *kcm); -+ -+static void kcm_done_work(struct work_struct *w) -+{ -+ kcm_done(container_of(w, struct kcm_sock, done_work)); -+} -+ -+/* Lower sock held */ -+static void unreserve_rx_kcm(struct kcm_psock *psock, -+ bool rcv_ready) -+{ -+ struct kcm_sock *kcm = psock->rx_kcm; -+ struct kcm_mux *mux = psock->mux; -+ -+ if (!kcm) -+ return; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ psock->rx_kcm = NULL; -+ kcm->rx_psock = NULL; -+ -+ /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with -+ * kcm_rfree -+ */ -+ smp_mb(); -+ -+ if (unlikely(kcm->done)) { -+ spin_unlock_bh(&mux->rx_lock); -+ -+ /* Need to run kcm_done in a task since we need to qcquire -+ * callback locks which may already be held here. -+ */ -+ INIT_WORK(&kcm->done_work, kcm_done_work); -+ schedule_work(&kcm->done_work); -+ return; -+ } -+ -+ if (unlikely(kcm->rx_disabled)) { -+ requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); -+ } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) { -+ /* Check for degenerative race with rx_wait that all -+ * data was dequeued (accounted for in kcm_rfree). -+ */ -+ kcm_rcv_ready(kcm); -+ } -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+/* Macro to invoke filter function. */ -+#define KCM_RUN_FILTER(prog, ctx) \ -+ (*prog->bpf_func)(ctx, prog->insnsi) -+ -+/* Lower socket lock held */ -+static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, -+ unsigned int orig_offset, size_t orig_len) -+{ -+ struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data; -+ struct kcm_rx_msg *rxm; -+ struct kcm_sock *kcm; -+ struct sk_buff *head, *skb; -+ size_t eaten = 0, cand_len; -+ ssize_t extra; -+ int err; -+ bool cloned_orig = false; -+ -+ if (psock->ready_rx_msg) -+ return 0; -+ -+ head = psock->rx_skb_head; -+ if (head) { -+ /* Message already in progress */ -+ -+ if (unlikely(orig_offset)) { -+ /* Getting data with a non-zero offset when a message is -+ * in progress is not expected. If it does happen, we -+ * need to clone and pull since we can't deal with -+ * offsets in the skbs for a message expect in the head. -+ */ -+ orig_skb = skb_clone(orig_skb, GFP_ATOMIC); -+ if (!orig_skb) { -+ desc->error = -ENOMEM; -+ return 0; -+ } -+ if (!pskb_pull(orig_skb, orig_offset)) { -+ kfree_skb(orig_skb); -+ desc->error = -ENOMEM; -+ return 0; -+ } -+ cloned_orig = true; -+ orig_offset = 0; -+ } -+ -+ if (!psock->rx_skb_nextp) { -+ /* We are going to append to the frags_list of head. -+ * Need to unshare the frag_list. -+ */ -+ err = skb_unclone(head, GFP_ATOMIC); -+ if (err) { -+ desc->error = err; -+ return 0; -+ } -+ -+ if (unlikely(skb_shinfo(head)->frag_list)) { -+ /* We can't append to an sk_buff that already -+ * has a frag_list. We create a new head, point -+ * the frag_list of that to the old head, and -+ * then are able to use the old head->next for -+ * appending to the message. -+ */ -+ if (WARN_ON(head->next)) { -+ desc->error = -EINVAL; -+ return 0; -+ } -+ -+ skb = alloc_skb(0, GFP_ATOMIC); -+ if (!skb) { -+ desc->error = -ENOMEM; -+ return 0; -+ } -+ skb->len = head->len; -+ skb->data_len = head->len; -+ skb->truesize = head->truesize; -+ *kcm_rx_msg(skb) = *kcm_rx_msg(head); -+ psock->rx_skb_nextp = &head->next; -+ skb_shinfo(skb)->frag_list = head; -+ psock->rx_skb_head = skb; -+ head = skb; -+ } else { -+ psock->rx_skb_nextp = -+ &skb_shinfo(head)->frag_list; -+ } -+ } -+ } -+ -+ while (eaten < orig_len) { -+ /* Always clone since we will consume something */ -+ skb = skb_clone(orig_skb, GFP_ATOMIC); -+ if (!skb) { -+ desc->error = -ENOMEM; -+ break; -+ } -+ -+ cand_len = orig_len - eaten; -+ -+ head = psock->rx_skb_head; -+ if (!head) { -+ head = skb; -+ psock->rx_skb_head = head; -+ /* Will set rx_skb_nextp on next packet if needed */ -+ psock->rx_skb_nextp = NULL; -+ rxm = kcm_rx_msg(head); -+ memset(rxm, 0, sizeof(*rxm)); -+ rxm->offset = orig_offset + eaten; -+ } else { -+ /* Unclone since we may be appending to an skb that we -+ * already share a frag_list with. -+ */ -+ err = skb_unclone(skb, GFP_ATOMIC); -+ if (err) { -+ desc->error = err; -+ break; -+ } -+ -+ rxm = kcm_rx_msg(head); -+ *psock->rx_skb_nextp = skb; -+ psock->rx_skb_nextp = &skb->next; -+ head->data_len += skb->len; -+ head->len += skb->len; -+ head->truesize += skb->truesize; -+ } -+ -+ if (!rxm->full_len) { -+ ssize_t len; -+ -+ len = KCM_RUN_FILTER(psock->bpf_prog, head); -+ -+ if (!len) { -+ /* Need more header to determine length */ -+ rxm->accum_len += cand_len; -+ eaten += cand_len; -+ WARN_ON(eaten != orig_len); -+ break; -+ } else if (len <= (ssize_t)head->len - -+ skb->len - rxm->offset) { -+ /* Length must be into new skb (and also -+ * greater than zero) -+ */ -+ desc->error = -EPROTO; -+ psock->rx_skb_head = NULL; -+ kcm_abort_rx_psock(psock, EPROTO, head); -+ break; -+ } -+ -+ rxm->full_len = len; -+ } -+ -+ extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len; -+ -+ if (extra < 0) { -+ /* Message not complete yet. */ -+ rxm->accum_len += cand_len; -+ eaten += cand_len; -+ WARN_ON(eaten != orig_len); -+ break; -+ } -+ -+ /* Positive extra indicates ore bytes than needed for the -+ * message -+ */ -+ -+ WARN_ON(extra > cand_len); -+ -+ eaten += (cand_len - extra); -+ -+ /* Hurray, we have a new message! */ -+ psock->rx_skb_head = NULL; -+ -+try_queue: -+ kcm = reserve_rx_kcm(psock, head); -+ if (!kcm) { -+ /* Unable to reserve a KCM, message is held in psock. */ -+ break; -+ } -+ -+ if (kcm_queue_rcv_skb(&kcm->sk, head)) { -+ /* Should mean socket buffer full */ -+ unreserve_rx_kcm(psock, false); -+ goto try_queue; -+ } -+ } -+ -+ if (cloned_orig) -+ kfree_skb(orig_skb); -+ -+ return eaten; -+} -+ -+/* Called with lock held on lower socket */ -+static int psock_tcp_read_sock(struct kcm_psock *psock) -+{ -+ read_descriptor_t desc; -+ -+ desc.arg.data = psock; -+ desc.error = 0; -+ desc.count = 1; /* give more than one skb per call */ -+ -+ /* sk should be locked here, so okay to do tcp_read_sock */ -+ tcp_read_sock(psock->sk, &desc, kcm_tcp_recv); -+ -+ unreserve_rx_kcm(psock, true); -+ -+ return desc.error; -+} -+ -+/* Lower sock lock held */ -+static void psock_tcp_data_ready(struct sock *sk) -+{ -+ struct kcm_psock *psock; -+ -+ read_lock_bh(&sk->sk_callback_lock); -+ -+ psock = (struct kcm_psock *)sk->sk_user_data; -+ if (unlikely(!psock || psock->rx_stopped)) -+ goto out; -+ -+ if (psock->ready_rx_msg) -+ goto out; -+ -+ if (psock_tcp_read_sock(psock) == -ENOMEM) -+ queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); -+ -+out: -+ read_unlock_bh(&sk->sk_callback_lock); -+} -+ -+static void do_psock_rx_work(struct kcm_psock *psock) -+{ -+ read_descriptor_t rd_desc; -+ struct sock *csk = psock->sk; -+ -+ /* We need the read lock to synchronize with psock_tcp_data_ready. We -+ * need the socket lock for calling tcp_read_sock. -+ */ -+ lock_sock(csk); -+ read_lock_bh(&csk->sk_callback_lock); -+ -+ if (unlikely(csk->sk_user_data != psock)) -+ goto out; -+ -+ if (unlikely(psock->rx_stopped)) -+ goto out; -+ -+ if (psock->ready_rx_msg) -+ goto out; -+ -+ rd_desc.arg.data = psock; -+ -+ if (psock_tcp_read_sock(psock) == -ENOMEM) -+ queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); -+ -+out: -+ read_unlock_bh(&csk->sk_callback_lock); -+ release_sock(csk); -+} -+ -+static void psock_rx_work(struct work_struct *w) -+{ -+ do_psock_rx_work(container_of(w, struct kcm_psock, rx_work)); -+} -+ -+static void psock_rx_delayed_work(struct work_struct *w) -+{ -+ do_psock_rx_work(container_of(w, struct kcm_psock, -+ rx_delayed_work.work)); -+} -+ -+static void psock_tcp_state_change(struct sock *sk) -+{ -+ /* TCP only does a POLLIN for a half close. Do a POLLHUP here -+ * since application will normally not poll with POLLIN -+ * on the TCP sockets. -+ */ -+ -+ report_csk_error(sk, EPIPE); -+} -+ -+static void psock_tcp_write_space(struct sock *sk) -+{ -+ struct kcm_psock *psock; -+ struct kcm_mux *mux; -+ struct kcm_sock *kcm; -+ -+ read_lock_bh(&sk->sk_callback_lock); -+ -+ psock = (struct kcm_psock *)sk->sk_user_data; -+ if (unlikely(!psock)) -+ goto out; -+ -+ mux = psock->mux; -+ -+ spin_lock_bh(&mux->lock); -+ -+ /* Check if the socket is reserved so someone is waiting for sending. */ -+ kcm = psock->tx_kcm; -+ if (kcm) -+ queue_work(kcm_wq, &kcm->tx_work); -+ -+ spin_unlock_bh(&mux->lock); -+out: -+ read_unlock_bh(&sk->sk_callback_lock); -+} -+ -+static void unreserve_psock(struct kcm_sock *kcm); -+ -+/* kcm sock is locked. */ -+static struct kcm_psock *reserve_psock(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ struct kcm_psock *psock; -+ -+ psock = kcm->tx_psock; -+ -+ smp_rmb(); /* Must read tx_psock before tx_wait */ -+ -+ if (psock) { -+ WARN_ON(kcm->tx_wait); -+ if (unlikely(psock->tx_stopped)) -+ unreserve_psock(kcm); -+ else -+ return kcm->tx_psock; -+ } -+ -+ spin_lock_bh(&mux->lock); -+ -+ /* Check again under lock to see if psock was reserved for this -+ * psock via psock_unreserve. -+ */ -+ psock = kcm->tx_psock; -+ if (unlikely(psock)) { -+ WARN_ON(kcm->tx_wait); -+ spin_unlock_bh(&mux->lock); -+ return kcm->tx_psock; -+ } -+ -+ if (!list_empty(&mux->psocks_avail)) { -+ psock = list_first_entry(&mux->psocks_avail, -+ struct kcm_psock, -+ psock_avail_list); -+ list_del(&psock->psock_avail_list); -+ if (kcm->tx_wait) { -+ list_del(&kcm->wait_psock_list); -+ kcm->tx_wait = false; -+ } -+ kcm->tx_psock = psock; -+ psock->tx_kcm = kcm; -+ } else if (!kcm->tx_wait) { -+ list_add_tail(&kcm->wait_psock_list, -+ &mux->kcm_tx_waiters); -+ kcm->tx_wait = true; -+ } -+ -+ spin_unlock_bh(&mux->lock); -+ -+ return psock; -+} -+ -+/* mux lock held */ -+static void psock_now_avail(struct kcm_psock *psock) -+{ -+ struct kcm_mux *mux = psock->mux; -+ struct kcm_sock *kcm; -+ -+ if (list_empty(&mux->kcm_tx_waiters)) { -+ list_add_tail(&psock->psock_avail_list, -+ &mux->psocks_avail); -+ } else { -+ kcm = list_first_entry(&mux->kcm_tx_waiters, -+ struct kcm_sock, -+ wait_psock_list); -+ list_del(&kcm->wait_psock_list); -+ kcm->tx_wait = false; -+ psock->tx_kcm = kcm; -+ -+ /* Commit before changing tx_psock since that is read in -+ * reserve_psock before queuing work. -+ */ -+ smp_mb(); -+ -+ kcm->tx_psock = psock; -+ queue_work(kcm_wq, &kcm->tx_work); -+ } -+} -+ -+/* kcm sock is locked. */ -+static void unreserve_psock(struct kcm_sock *kcm) -+{ -+ struct kcm_psock *psock; -+ struct kcm_mux *mux = kcm->mux; -+ -+ spin_lock_bh(&mux->lock); -+ -+ psock = kcm->tx_psock; -+ -+ if (WARN_ON(!psock)) { -+ spin_unlock_bh(&mux->lock); -+ return; -+ } -+ -+ smp_rmb(); /* Read tx_psock before tx_wait */ -+ -+ WARN_ON(kcm->tx_wait); -+ -+ kcm->tx_psock = NULL; -+ psock->tx_kcm = NULL; -+ -+ if (unlikely(psock->tx_stopped)) { -+ if (psock->done) { -+ /* Deferred free */ -+ list_del(&psock->psock_list); -+ mux->psocks_cnt--; -+ sock_put(psock->sk); -+ fput(psock->sk->sk_socket->file); -+ kmem_cache_free(kcm_psockp, psock); -+ } -+ -+ /* Don't put back on available list */ -+ -+ spin_unlock_bh(&mux->lock); -+ -+ return; -+ } -+ -+ psock_now_avail(psock); -+ -+ spin_unlock_bh(&mux->lock); -+} -+ -+/* Write any messages ready on the kcm socket. Called with kcm sock lock -+ * held. Return bytes actually sent or error. -+ */ -+static int kcm_write_msgs(struct kcm_sock *kcm) -+{ -+ struct sock *sk = &kcm->sk; -+ struct kcm_psock *psock; -+ struct sk_buff *skb, *head; -+ struct kcm_tx_msg *txm; -+ unsigned short fragidx, frag_offset; -+ unsigned int sent, total_sent = 0; -+ int ret = 0; -+ -+ kcm->tx_wait_more = false; -+ psock = kcm->tx_psock; -+ if (unlikely(psock && psock->tx_stopped)) { -+ /* A reserved psock was aborted asynchronously. Unreserve -+ * it and we'll retry the message. -+ */ -+ unreserve_psock(kcm); -+ if (skb_queue_empty(&sk->sk_write_queue)) -+ return 0; -+ -+ kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0; -+ -+ } else if (skb_queue_empty(&sk->sk_write_queue)) { -+ return 0; -+ } -+ -+ head = skb_peek(&sk->sk_write_queue); -+ txm = kcm_tx_msg(head); -+ -+ if (txm->sent) { -+ /* Send of first skbuff in queue already in progress */ -+ if (WARN_ON(!psock)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ sent = txm->sent; -+ frag_offset = txm->frag_offset; -+ fragidx = txm->fragidx; -+ skb = txm->frag_skb; -+ -+ goto do_frag; -+ } -+ -+try_again: -+ psock = reserve_psock(kcm); -+ if (!psock) -+ goto out; -+ -+ do { -+ skb = head; -+ txm = kcm_tx_msg(head); -+ sent = 0; -+ -+do_frag_list: -+ if (WARN_ON(!skb_shinfo(skb)->nr_frags)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; -+ fragidx++) { -+ skb_frag_t *frag; -+ -+ frag_offset = 0; -+do_frag: -+ frag = &skb_shinfo(skb)->frags[fragidx]; -+ if (WARN_ON(!frag->size)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ ret = kernel_sendpage(psock->sk->sk_socket, -+ frag->page.p, -+ frag->page_offset + frag_offset, -+ frag->size - frag_offset, -+ MSG_DONTWAIT); -+ if (ret <= 0) { -+ if (ret == -EAGAIN) { -+ /* Save state to try again when there's -+ * write space on the socket -+ */ -+ txm->sent = sent; -+ txm->frag_offset = frag_offset; -+ txm->fragidx = fragidx; -+ txm->frag_skb = skb; -+ -+ ret = 0; -+ goto out; -+ } -+ -+ /* Hard failure in sending message, abort this -+ * psock since it has lost framing -+ * synchonization and retry sending the -+ * message from the beginning. -+ */ -+ kcm_abort_tx_psock(psock, ret ? -ret : EPIPE, -+ true); -+ unreserve_psock(kcm); -+ -+ txm->sent = 0; -+ ret = 0; -+ -+ goto try_again; -+ } -+ -+ sent += ret; -+ frag_offset += ret; -+ if (frag_offset < frag->size) { -+ /* Not finished with this frag */ -+ goto do_frag; -+ } -+ } -+ -+ if (skb == head) { -+ if (skb_has_frag_list(skb)) { -+ skb = skb_shinfo(skb)->frag_list; -+ goto do_frag_list; -+ } -+ } else if (skb->next) { -+ skb = skb->next; -+ goto do_frag_list; -+ } -+ -+ /* Successfully sent the whole packet, account for it. */ -+ skb_dequeue(&sk->sk_write_queue); -+ kfree_skb(head); -+ sk->sk_wmem_queued -= sent; -+ total_sent += sent; -+ } while ((head = skb_peek(&sk->sk_write_queue))); -+out: -+ if (!head) { -+ /* Done with all queued messages. */ -+ WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); -+ unreserve_psock(kcm); -+ } -+ -+ /* Check if write space is available */ -+ sk->sk_write_space(sk); -+ -+ return total_sent ? : ret; -+} -+ -+static void kcm_tx_work(struct work_struct *w) -+{ -+ struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work); -+ struct sock *sk = &kcm->sk; -+ int err; -+ -+ lock_sock(sk); -+ -+ /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx -+ * aborts -+ */ -+ err = kcm_write_msgs(kcm); -+ if (err < 0) { -+ /* Hard failure in write, report error on KCM socket */ -+ pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err); -+ report_csk_error(&kcm->sk, -err); -+ goto out; -+ } -+ -+ /* Primarily for SOCK_SEQPACKET sockets */ -+ if (likely(sk->sk_socket) && -+ test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { -+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -+ sk->sk_write_space(sk); -+ } -+ -+out: -+ release_sock(sk); -+} -+ -+static void kcm_push(struct kcm_sock *kcm) -+{ -+ if (kcm->tx_wait_more) -+ kcm_write_msgs(kcm); -+} -+ -+static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) -+{ -+ struct sock *sk = sock->sk; -+ struct kcm_sock *kcm = kcm_sk(sk); -+ struct sk_buff *skb = NULL, *head = NULL; -+ size_t copy, copied = 0; -+ long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); -+ int eor = (sock->type == SOCK_DGRAM) ? -+ !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR); -+ int err = -EPIPE; -+ -+ lock_sock(sk); -+ -+ /* Per tcp_sendmsg this should be in poll */ -+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); -+ -+ if (sk->sk_err) -+ goto out_error; -+ -+ if (kcm->seq_skb) { -+ /* Previously opened message */ -+ head = kcm->seq_skb; -+ skb = kcm_tx_msg(head)->last_skb; -+ goto start; -+ } -+ -+ /* Call the sk_stream functions to manage the sndbuf mem. */ -+ if (!sk_stream_memory_free(sk)) { -+ kcm_push(kcm); -+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -+ err = sk_stream_wait_memory(sk, &timeo); -+ if (err) -+ goto out_error; -+ } -+ -+ /* New message, alloc head skb */ -+ head = alloc_skb(0, sk->sk_allocation); -+ while (!head) { -+ kcm_push(kcm); -+ err = sk_stream_wait_memory(sk, &timeo); -+ if (err) -+ goto out_error; -+ -+ head = alloc_skb(0, sk->sk_allocation); -+ } -+ -+ skb = head; -+ -+ /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling -+ * csum_and_copy_from_iter from skb_do_copy_data_nocache. -+ */ -+ skb->ip_summed = CHECKSUM_UNNECESSARY; -+ -+start: -+ while (msg_data_left(msg)) { -+ bool merge = true; -+ int i = skb_shinfo(skb)->nr_frags; -+ struct page_frag *pfrag = sk_page_frag(sk); -+ -+ if (!sk_page_frag_refill(sk, pfrag)) -+ goto wait_for_memory; -+ -+ if (!skb_can_coalesce(skb, i, pfrag->page, -+ pfrag->offset)) { -+ if (i == MAX_SKB_FRAGS) { -+ struct sk_buff *tskb; -+ -+ tskb = alloc_skb(0, sk->sk_allocation); -+ if (!tskb) -+ goto wait_for_memory; -+ -+ if (head == skb) -+ skb_shinfo(head)->frag_list = tskb; -+ else -+ skb->next = tskb; -+ -+ skb = tskb; -+ skb->ip_summed = CHECKSUM_UNNECESSARY; -+ continue; -+ } -+ merge = false; -+ } -+ -+ copy = min_t(int, msg_data_left(msg), -+ pfrag->size - pfrag->offset); -+ -+ if (!sk_wmem_schedule(sk, copy)) -+ goto wait_for_memory; -+ -+ err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, -+ pfrag->page, -+ pfrag->offset, -+ copy); -+ if (err) -+ goto out_error; -+ -+ /* Update the skb. */ -+ if (merge) { -+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); -+ } else { -+ skb_fill_page_desc(skb, i, pfrag->page, -+ pfrag->offset, copy); -+ get_page(pfrag->page); -+ } -+ -+ pfrag->offset += copy; -+ copied += copy; -+ if (head != skb) { -+ head->len += copy; -+ head->data_len += copy; -+ } -+ -+ continue; -+ -+wait_for_memory: -+ kcm_push(kcm); -+ err = sk_stream_wait_memory(sk, &timeo); -+ if (err) -+ goto out_error; -+ } -+ -+ if (eor) { -+ bool not_busy = skb_queue_empty(&sk->sk_write_queue); -+ -+ /* Message complete, queue it on send buffer */ -+ __skb_queue_tail(&sk->sk_write_queue, head); -+ kcm->seq_skb = NULL; -+ -+ if (msg->msg_flags & MSG_BATCH) { -+ kcm->tx_wait_more = true; -+ } else if (kcm->tx_wait_more || not_busy) { -+ err = kcm_write_msgs(kcm); -+ if (err < 0) { -+ /* We got a hard error in write_msgs but have -+ * already queued this message. Report an error -+ * in the socket, but don't affect return value -+ * from sendmsg -+ */ -+ pr_warn("KCM: Hard failure on kcm_write_msgs\n"); -+ report_csk_error(&kcm->sk, -err); -+ } -+ } -+ } else { -+ /* Message not complete, save state */ -+partial_message: -+ kcm->seq_skb = head; -+ kcm_tx_msg(head)->last_skb = skb; -+ } -+ -+ release_sock(sk); -+ return copied; -+ -+out_error: -+ kcm_push(kcm); -+ -+ if (copied && sock->type == SOCK_SEQPACKET) { -+ /* Wrote some bytes before encountering an -+ * error, return partial success. -+ */ -+ goto partial_message; -+ } -+ -+ if (head != kcm->seq_skb) -+ kfree_skb(head); -+ -+ err = sk_stream_error(sk, msg->msg_flags, err); -+ -+ /* make sure we wake any epoll edge trigger waiter */ -+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) -+ sk->sk_write_space(sk); -+ -+ release_sock(sk); -+ return err; -+} -+ -+static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, -+ long timeo, int *err) -+{ -+ struct sk_buff *skb; -+ -+ while (!(skb = skb_peek(&sk->sk_receive_queue))) { -+ if (sk->sk_err) { -+ *err = sock_error(sk); -+ return NULL; -+ } -+ -+ if (sock_flag(sk, SOCK_DONE)) -+ return NULL; -+ -+ if ((flags & MSG_DONTWAIT) || !timeo) { -+ *err = -EAGAIN; -+ return NULL; -+ } -+ -+ sk_wait_data(sk, &timeo, NULL); -+ -+ /* Handle signals */ -+ if (signal_pending(current)) { -+ *err = sock_intr_errno(timeo); -+ return NULL; -+ } -+ } -+ -+ return skb; -+} -+ -+static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, -+ size_t len, int flags) -+{ -+ struct sock *sk = sock->sk; -+ int err = 0; -+ long timeo; -+ struct kcm_rx_msg *rxm; -+ int copied = 0; -+ struct sk_buff *skb; -+ -+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); -+ -+ lock_sock(sk); -+ -+ skb = kcm_wait_data(sk, flags, timeo, &err); -+ if (!skb) -+ goto out; -+ -+ /* Okay, have a message on the receive queue */ -+ -+ rxm = kcm_rx_msg(skb); -+ -+ if (len > rxm->full_len) -+ len = rxm->full_len; -+ -+ err = skb_copy_datagram_msg(skb, rxm->offset, msg, len); -+ if (err < 0) -+ goto out; -+ -+ copied = len; -+ if (likely(!(flags & MSG_PEEK))) { -+ if (copied < rxm->full_len) { -+ if (sock->type == SOCK_DGRAM) { -+ /* Truncated message */ -+ msg->msg_flags |= MSG_TRUNC; -+ goto msg_finished; -+ } -+ rxm->offset += copied; -+ rxm->full_len -= copied; -+ } else { -+msg_finished: -+ /* Finished with message */ -+ msg->msg_flags |= MSG_EOR; -+ skb_unlink(skb, &sk->sk_receive_queue); -+ kfree_skb(skb); -+ } -+ } -+ -+out: -+ release_sock(sk); -+ -+ return copied ? : err; -+} -+ -+/* kcm sock lock held */ -+static void kcm_recv_disable(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ -+ if (kcm->rx_disabled) -+ return; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ kcm->rx_disabled = 1; -+ -+ /* If a psock is reserved we'll do cleanup in unreserve */ -+ if (!kcm->rx_psock) { -+ if (kcm->rx_wait) { -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ } -+ -+ requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); -+ } -+ -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+/* kcm sock lock held */ -+static void kcm_recv_enable(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ -+ if (!kcm->rx_disabled) -+ return; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ kcm->rx_disabled = 0; -+ kcm_rcv_ready(kcm); -+ -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+static int kcm_setsockopt(struct socket *sock, int level, int optname, -+ char __user *optval, unsigned int optlen) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ int val, valbool; -+ int err = 0; -+ -+ if (level != SOL_KCM) -+ return -ENOPROTOOPT; -+ -+ if (optlen < sizeof(int)) -+ return -EINVAL; -+ -+ if (get_user(val, (int __user *)optval)) -+ return -EINVAL; -+ -+ valbool = val ? 1 : 0; -+ -+ switch (optname) { -+ case KCM_RECV_DISABLE: -+ lock_sock(&kcm->sk); -+ if (valbool) -+ kcm_recv_disable(kcm); -+ else -+ kcm_recv_enable(kcm); -+ release_sock(&kcm->sk); -+ break; -+ default: -+ err = -ENOPROTOOPT; -+ } -+ -+ return err; -+} -+ -+static int kcm_getsockopt(struct socket *sock, int level, int optname, -+ char __user *optval, int __user *optlen) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ int val, len; -+ -+ if (level != SOL_KCM) -+ return -ENOPROTOOPT; -+ -+ if (get_user(len, optlen)) -+ return -EFAULT; -+ -+ len = min_t(unsigned int, len, sizeof(int)); -+ if (len < 0) -+ return -EINVAL; -+ -+ switch (optname) { -+ case KCM_RECV_DISABLE: -+ val = kcm->rx_disabled; -+ break; -+ default: -+ return -ENOPROTOOPT; -+ } -+ -+ if (put_user(len, optlen)) -+ return -EFAULT; -+ if (copy_to_user(optval, &val, len)) -+ return -EFAULT; -+ return 0; -+} -+ -+static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) -+{ -+ struct kcm_sock *tkcm; -+ struct list_head *head; -+ int index = 0; -+ -+ /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so -+ * we set sk_state, otherwise epoll_wait always returns right away with -+ * POLLHUP -+ */ -+ kcm->sk.sk_state = TCP_ESTABLISHED; -+ -+ /* Add to mux's kcm sockets list */ -+ kcm->mux = mux; -+ spin_lock_bh(&mux->lock); -+ -+ head = &mux->kcm_socks; -+ list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) { -+ if (tkcm->index != index) -+ break; -+ head = &tkcm->kcm_sock_list; -+ index++; -+ } -+ -+ list_add(&kcm->kcm_sock_list, head); -+ kcm->index = index; -+ -+ mux->kcm_socks_cnt++; -+ spin_unlock_bh(&mux->lock); -+ -+ INIT_WORK(&kcm->tx_work, kcm_tx_work); -+ -+ spin_lock_bh(&mux->rx_lock); -+ kcm_rcv_ready(kcm); -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+static int kcm_attach(struct socket *sock, struct socket *csock, -+ struct bpf_prog *prog) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ struct kcm_mux *mux = kcm->mux; -+ struct sock *csk; -+ struct kcm_psock *psock = NULL, *tpsock; -+ struct list_head *head; -+ int index = 0; -+ -+ if (csock->ops->family != PF_INET && -+ csock->ops->family != PF_INET6) -+ return -EINVAL; -+ -+ csk = csock->sk; -+ if (!csk) -+ return -EINVAL; -+ -+ /* Only support TCP for now */ -+ if (csk->sk_protocol != IPPROTO_TCP) -+ return -EINVAL; -+ -+ psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); -+ if (!psock) -+ return -ENOMEM; -+ -+ psock->mux = mux; -+ psock->sk = csk; -+ psock->bpf_prog = prog; -+ INIT_WORK(&psock->rx_work, psock_rx_work); -+ INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work); -+ -+ sock_hold(csk); -+ -+ write_lock_bh(&csk->sk_callback_lock); -+ psock->save_data_ready = csk->sk_data_ready; -+ psock->save_write_space = csk->sk_write_space; -+ psock->save_state_change = csk->sk_state_change; -+ csk->sk_user_data = psock; -+ csk->sk_data_ready = psock_tcp_data_ready; -+ csk->sk_write_space = psock_tcp_write_space; -+ csk->sk_state_change = psock_tcp_state_change; -+ write_unlock_bh(&csk->sk_callback_lock); -+ -+ /* Finished initialization, now add the psock to the MUX. */ -+ spin_lock_bh(&mux->lock); -+ head = &mux->psocks; -+ list_for_each_entry(tpsock, &mux->psocks, psock_list) { -+ if (tpsock->index != index) -+ break; -+ head = &tpsock->psock_list; -+ index++; -+ } -+ -+ list_add(&psock->psock_list, head); -+ psock->index = index; -+ -+ mux->psocks_cnt++; -+ psock_now_avail(psock); -+ spin_unlock_bh(&mux->lock); -+ -+ /* Schedule RX work in case there are already bytes queued */ -+ queue_work(kcm_wq, &psock->rx_work); -+ -+ return 0; -+} -+ -+static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) -+{ -+ struct socket *csock; -+ struct bpf_prog *prog; -+ int err; -+ -+ csock = sockfd_lookup(info->fd, &err); -+ if (!csock) -+ return -ENOENT; -+ -+ prog = bpf_prog_get(info->bpf_fd); -+ if (IS_ERR(prog)) { -+ err = PTR_ERR(prog); -+ goto out; -+ } -+ -+ if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { -+ bpf_prog_put(prog); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ err = kcm_attach(sock, csock, prog); -+ if (err) { -+ bpf_prog_put(prog); -+ goto out; -+ } -+ -+ /* Keep reference on file also */ -+ -+ return 0; -+out: -+ fput(csock->file); -+ return err; -+} -+ -+static void kcm_unattach(struct kcm_psock *psock) -+{ -+ struct sock *csk = psock->sk; -+ struct kcm_mux *mux = psock->mux; -+ -+ /* Stop getting callbacks from TCP socket. After this there should -+ * be no way to reserve a kcm for this psock. -+ */ -+ write_lock_bh(&csk->sk_callback_lock); -+ csk->sk_user_data = NULL; -+ csk->sk_data_ready = psock->save_data_ready; -+ csk->sk_write_space = psock->save_write_space; -+ csk->sk_state_change = psock->save_state_change; -+ psock->rx_stopped = 1; -+ -+ if (WARN_ON(psock->rx_kcm)) { -+ write_unlock_bh(&csk->sk_callback_lock); -+ return; -+ } -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ /* Stop receiver activities. After this point psock should not be -+ * able to get onto ready list either through callbacks or work. -+ */ -+ if (psock->ready_rx_msg) { -+ list_del(&psock->psock_ready_list); -+ kfree_skb(psock->ready_rx_msg); -+ psock->ready_rx_msg = NULL; -+ } -+ -+ spin_unlock_bh(&mux->rx_lock); -+ -+ write_unlock_bh(&csk->sk_callback_lock); -+ -+ cancel_work_sync(&psock->rx_work); -+ cancel_delayed_work_sync(&psock->rx_delayed_work); -+ -+ bpf_prog_put(psock->bpf_prog); -+ -+ kfree_skb(psock->rx_skb_head); -+ psock->rx_skb_head = NULL; -+ -+ spin_lock_bh(&mux->lock); -+ -+ if (psock->tx_kcm) { -+ /* psock was reserved. Just mark it finished and we will clean -+ * up in the kcm paths, we need kcm lock which can not be -+ * acquired here. -+ */ -+ spin_unlock_bh(&mux->lock); -+ -+ /* We are unattaching a socket that is reserved. Abort the -+ * socket since we may be out of sync in sending on it. We need -+ * to do this without the mux lock. -+ */ -+ kcm_abort_tx_psock(psock, EPIPE, false); -+ -+ spin_lock_bh(&mux->lock); -+ if (!psock->tx_kcm) { -+ /* psock now unreserved in window mux was unlocked */ -+ goto no_reserved; -+ } -+ psock->done = 1; -+ -+ /* Commit done before queuing work to process it */ -+ smp_mb(); -+ -+ /* Queue tx work to make sure psock->done is handled */ -+ queue_work(kcm_wq, &psock->tx_kcm->tx_work); -+ spin_unlock_bh(&mux->lock); -+ } else { -+no_reserved: -+ if (!psock->tx_stopped) -+ list_del(&psock->psock_avail_list); -+ list_del(&psock->psock_list); -+ mux->psocks_cnt--; -+ spin_unlock_bh(&mux->lock); -+ -+ sock_put(csk); -+ fput(csk->sk_socket->file); -+ kmem_cache_free(kcm_psockp, psock); -+ } -+} -+ -+static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ struct kcm_mux *mux = kcm->mux; -+ struct kcm_psock *psock; -+ struct socket *csock; -+ struct sock *csk; -+ int err; -+ -+ csock = sockfd_lookup(info->fd, &err); -+ if (!csock) -+ return -ENOENT; -+ -+ csk = csock->sk; -+ if (!csk) { -+ err = -EINVAL; -+ goto out; -+ } -+ -+ err = -ENOENT; -+ -+ spin_lock_bh(&mux->lock); -+ -+ list_for_each_entry(psock, &mux->psocks, psock_list) { -+ if (psock->sk != csk) -+ continue; -+ -+ /* Found the matching psock */ -+ -+ if (psock->unattaching || WARN_ON(psock->done)) { -+ err = -EALREADY; -+ break; -+ } -+ -+ psock->unattaching = 1; -+ -+ spin_unlock_bh(&mux->lock); -+ -+ kcm_unattach(psock); -+ -+ err = 0; -+ goto out; -+ } -+ -+ spin_unlock_bh(&mux->lock); -+ -+out: -+ fput(csock->file); -+ return err; -+} -+ -+static struct proto kcm_proto = { -+ .name = "KCM", -+ .owner = THIS_MODULE, -+ .obj_size = sizeof(struct kcm_sock), -+}; -+ -+/* Clone a kcm socket. */ -+static int kcm_clone(struct socket *osock, struct kcm_clone *info, -+ struct socket **newsockp) -+{ -+ struct socket *newsock; -+ struct sock *newsk; -+ struct file *newfile; -+ int err, newfd; -+ -+ err = -ENFILE; -+ newsock = sock_alloc(); -+ if (!newsock) -+ goto out; -+ -+ newsock->type = osock->type; -+ newsock->ops = osock->ops; -+ -+ __module_get(newsock->ops->owner); -+ -+ newfd = get_unused_fd_flags(0); -+ if (unlikely(newfd < 0)) { -+ err = newfd; -+ goto out_fd_fail; -+ } -+ -+ newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); -+ if (unlikely(IS_ERR(newfile))) { -+ err = PTR_ERR(newfile); -+ goto out_sock_alloc_fail; -+ } -+ -+ newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, -+ &kcm_proto, true); -+ if (!newsk) { -+ err = -ENOMEM; -+ goto out_sk_alloc_fail; -+ } -+ -+ sock_init_data(newsock, newsk); -+ init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); -+ -+ fd_install(newfd, newfile); -+ *newsockp = newsock; -+ info->fd = newfd; -+ -+ return 0; -+ -+out_sk_alloc_fail: -+ fput(newfile); -+out_sock_alloc_fail: -+ put_unused_fd(newfd); -+out_fd_fail: -+ sock_release(newsock); -+out: -+ return err; -+} -+ -+static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -+{ -+ int err; -+ -+ switch (cmd) { -+ case SIOCKCMATTACH: { -+ struct kcm_attach info; -+ -+ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) -+ err = -EFAULT; -+ -+ err = kcm_attach_ioctl(sock, &info); -+ -+ break; -+ } -+ case SIOCKCMUNATTACH: { -+ struct kcm_unattach info; -+ -+ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) -+ err = -EFAULT; -+ -+ err = kcm_unattach_ioctl(sock, &info); -+ -+ break; -+ } -+ case SIOCKCMCLONE: { -+ struct kcm_clone info; -+ struct socket *newsock = NULL; -+ -+ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) -+ err = -EFAULT; -+ -+ err = kcm_clone(sock, &info, &newsock); -+ -+ if (!err) { -+ if (copy_to_user((void __user *)arg, &info, -+ sizeof(info))) { -+ err = -EFAULT; -+ sock_release(newsock); -+ } -+ } -+ -+ break; -+ } -+ default: -+ err = -ENOIOCTLCMD; -+ break; -+ } -+ -+ return err; -+} -+ -+static void free_mux(struct rcu_head *rcu) -+{ -+ struct kcm_mux *mux = container_of(rcu, -+ struct kcm_mux, rcu); -+ -+ kmem_cache_free(kcm_muxp, mux); -+} -+ -+static void release_mux(struct kcm_mux *mux) -+{ -+ struct kcm_net *knet = mux->knet; -+ struct kcm_psock *psock, *tmp_psock; -+ -+ /* Release psocks */ -+ list_for_each_entry_safe(psock, tmp_psock, -+ &mux->psocks, psock_list) { -+ if (!WARN_ON(psock->unattaching)) -+ kcm_unattach(psock); -+ } -+ -+ if (WARN_ON(mux->psocks_cnt)) -+ return; -+ -+ __skb_queue_purge(&mux->rx_hold_queue); -+ -+ mutex_lock(&knet->mutex); -+ list_del_rcu(&mux->kcm_mux_list); -+ knet->count--; -+ mutex_unlock(&knet->mutex); -+ -+ call_rcu(&mux->rcu, free_mux); -+} -+ -+static void kcm_done(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ struct sock *sk = &kcm->sk; -+ int socks_cnt; -+ -+ spin_lock_bh(&mux->rx_lock); -+ if (kcm->rx_psock) { -+ /* Cleanup in unreserve_rx_kcm */ -+ WARN_ON(kcm->done); -+ kcm->rx_disabled = 1; -+ kcm->done = 1; -+ spin_unlock_bh(&mux->rx_lock); -+ return; -+ } -+ -+ if (kcm->rx_wait) { -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ } -+ /* Move any pending receive messages to other kcm sockets */ -+ requeue_rx_msgs(mux, &sk->sk_receive_queue); -+ -+ spin_unlock_bh(&mux->rx_lock); -+ -+ if (WARN_ON(sk_rmem_alloc_get(sk))) -+ return; -+ -+ /* Detach from MUX */ -+ spin_lock_bh(&mux->lock); -+ -+ list_del(&kcm->kcm_sock_list); -+ mux->kcm_socks_cnt--; -+ socks_cnt = mux->kcm_socks_cnt; -+ -+ spin_unlock_bh(&mux->lock); -+ -+ if (!socks_cnt) { -+ /* We are done with the mux now. */ -+ release_mux(mux); -+ } -+ -+ WARN_ON(kcm->rx_wait); -+ -+ sock_put(&kcm->sk); -+} -+ -+/* Called by kcm_release to close a KCM socket. -+ * If this is the last KCM socket on the MUX, destroy the MUX. -+ */ -+static int kcm_release(struct socket *sock) -+{ -+ struct sock *sk = sock->sk; -+ struct kcm_sock *kcm; -+ struct kcm_mux *mux; -+ struct kcm_psock *psock; -+ -+ if (!sk) -+ return 0; -+ -+ kcm = kcm_sk(sk); -+ mux = kcm->mux; -+ -+ sock_orphan(sk); -+ kfree_skb(kcm->seq_skb); -+ -+ lock_sock(sk); -+ /* Purge queue under lock to avoid race condition with tx_work trying -+ * to act when queue is nonempty. If tx_work runs after this point -+ * it will just return. -+ */ -+ __skb_queue_purge(&sk->sk_write_queue); -+ release_sock(sk); -+ -+ spin_lock_bh(&mux->lock); -+ if (kcm->tx_wait) { -+ /* Take of tx_wait list, after this point there should be no way -+ * that a psock will be assigned to this kcm. -+ */ -+ list_del(&kcm->wait_psock_list); -+ kcm->tx_wait = false; -+ } -+ spin_unlock_bh(&mux->lock); -+ -+ /* Cancel work. After this point there should be no outside references -+ * to the kcm socket. -+ */ -+ cancel_work_sync(&kcm->tx_work); -+ -+ lock_sock(sk); -+ psock = kcm->tx_psock; -+ if (psock) { -+ /* A psock was reserved, so we need to kill it since it -+ * may already have some bytes queued from a message. We -+ * need to do this after removing kcm from tx_wait list. -+ */ -+ kcm_abort_tx_psock(psock, EPIPE, false); -+ unreserve_psock(kcm); -+ } -+ release_sock(sk); -+ -+ WARN_ON(kcm->tx_wait); -+ WARN_ON(kcm->tx_psock); -+ -+ sock->sk = NULL; -+ -+ kcm_done(kcm); -+ -+ return 0; -+} -+ -+static const struct proto_ops kcm_ops = { -+ .family = PF_KCM, -+ .owner = THIS_MODULE, -+ .release = kcm_release, -+ .bind = sock_no_bind, -+ .connect = sock_no_connect, -+ .socketpair = sock_no_socketpair, -+ .accept = sock_no_accept, -+ .getname = sock_no_getname, -+ .poll = datagram_poll, -+ .ioctl = kcm_ioctl, -+ .listen = sock_no_listen, -+ .shutdown = sock_no_shutdown, -+ .setsockopt = kcm_setsockopt, -+ .getsockopt = kcm_getsockopt, -+ .sendmsg = kcm_sendmsg, -+ .recvmsg = kcm_recvmsg, -+ .mmap = sock_no_mmap, -+ .sendpage = sock_no_sendpage, -+}; -+ -+/* Create proto operation for kcm sockets */ -+static int kcm_create(struct net *net, struct socket *sock, -+ int protocol, int kern) -+{ -+ struct kcm_net *knet = net_generic(net, kcm_net_id); -+ struct sock *sk; -+ struct kcm_mux *mux; -+ -+ switch (sock->type) { -+ case SOCK_DGRAM: -+ case SOCK_SEQPACKET: -+ sock->ops = &kcm_ops; -+ break; -+ default: -+ return -ESOCKTNOSUPPORT; -+ } -+ -+ if (protocol != KCMPROTO_CONNECTED) -+ return -EPROTONOSUPPORT; -+ -+ sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern); -+ if (!sk) -+ return -ENOMEM; -+ -+ /* Allocate a kcm mux, shared between KCM sockets */ -+ mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL); -+ if (!mux) { -+ sk_free(sk); -+ return -ENOMEM; -+ } -+ -+ spin_lock_init(&mux->lock); -+ spin_lock_init(&mux->rx_lock); -+ INIT_LIST_HEAD(&mux->kcm_socks); -+ INIT_LIST_HEAD(&mux->kcm_rx_waiters); -+ INIT_LIST_HEAD(&mux->kcm_tx_waiters); -+ -+ INIT_LIST_HEAD(&mux->psocks); -+ INIT_LIST_HEAD(&mux->psocks_ready); -+ INIT_LIST_HEAD(&mux->psocks_avail); -+ -+ mux->knet = knet; -+ -+ /* Add new MUX to list */ -+ mutex_lock(&knet->mutex); -+ list_add_rcu(&mux->kcm_mux_list, &knet->mux_list); -+ knet->count++; -+ mutex_unlock(&knet->mutex); -+ -+ skb_queue_head_init(&mux->rx_hold_queue); -+ -+ /* Init KCM socket */ -+ sock_init_data(sock, sk); -+ init_kcm_sock(kcm_sk(sk), mux); -+ -+ return 0; -+} -+ -+static struct net_proto_family kcm_family_ops = { -+ .family = PF_KCM, -+ .create = kcm_create, -+ .owner = THIS_MODULE, -+}; -+ -+static __net_init int kcm_init_net(struct net *net) -+{ -+ struct kcm_net *knet = net_generic(net, kcm_net_id); -+ -+ INIT_LIST_HEAD_RCU(&knet->mux_list); -+ mutex_init(&knet->mutex); -+ -+ return 0; -+} -+ -+static __net_exit void kcm_exit_net(struct net *net) -+{ -+ struct kcm_net *knet = net_generic(net, kcm_net_id); -+ -+ /* All KCM sockets should be closed at this point, which should mean -+ * that all multiplexors and psocks have been destroyed. -+ */ -+ WARN_ON(!list_empty(&knet->mux_list)); -+} -+ -+static struct pernet_operations kcm_net_ops = { -+ .init = kcm_init_net, -+ .exit = kcm_exit_net, -+ .id = &kcm_net_id, -+ .size = sizeof(struct kcm_net), -+}; -+ -+static int __init kcm_init(void) -+{ -+ int err = -ENOMEM; -+ -+ kcm_muxp = kmem_cache_create("kcm_mux_cache", -+ sizeof(struct kcm_mux), 0, -+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); -+ if (!kcm_muxp) -+ goto fail; -+ -+ kcm_psockp = kmem_cache_create("kcm_psock_cache", -+ sizeof(struct kcm_psock), 0, -+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); -+ if (!kcm_psockp) -+ goto fail; -+ -+ kcm_wq = create_singlethread_workqueue("kkcmd"); -+ if (!kcm_wq) -+ goto fail; -+ -+ err = proto_register(&kcm_proto, 1); -+ if (err) -+ goto fail; -+ -+ err = sock_register(&kcm_family_ops); -+ if (err) -+ goto sock_register_fail; -+ -+ err = register_pernet_device(&kcm_net_ops); -+ if (err) -+ goto net_ops_fail; -+ -+ return 0; -+ -+net_ops_fail: -+ sock_unregister(PF_KCM); -+ -+sock_register_fail: -+ proto_unregister(&kcm_proto); -+ -+fail: -+ kmem_cache_destroy(kcm_muxp); -+ kmem_cache_destroy(kcm_psockp); -+ -+ if (kcm_wq) -+ destroy_workqueue(kcm_wq); -+ -+ return err; -+} -+ -+static void __exit kcm_exit(void) -+{ -+ unregister_pernet_device(&kcm_net_ops); -+ sock_unregister(PF_KCM); -+ proto_unregister(&kcm_proto); -+ destroy_workqueue(kcm_wq); -+ -+ kmem_cache_destroy(kcm_muxp); -+ kmem_cache_destroy(kcm_psockp); -+} -+ -+module_init(kcm_init); -+module_exit(kcm_exit); -+ -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS_NETPROTO(PF_KCM); --- -2.10.0 - diff --git a/alpine/kernel/patches/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch b/alpine/kernel/patches/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch deleted file mode 100644 index 3e97c57c5..000000000 --- a/alpine/kernel/patches/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 2f2e6e31ed1b82f1658139e0abe7155ee3755da1 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 21 Mar 2016 02:51:09 -0700 -Subject: [PATCH 37/42] net: add the AF_KCM entries to family name tables - -This is for the recent kcm driver, which introduces AF_KCM(41) in -b7ac4eb(kcm: Kernel Connection Multiplexor module). - -Signed-off-by: Dexuan Cui -Cc: Signed-off-by: Tom Herbert -Origin: https://patchwork.ozlabs.org/patch/600006 ---- - net/core/sock.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/net/core/sock.c b/net/core/sock.c -index 0d91f7d..925def4 100644 ---- a/net/core/sock.c -+++ b/net/core/sock.c -@@ -263,7 +263,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = { - "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , - "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , -- "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" -+ "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , -+ "sk_lock-AF_MAX" - }; - static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , -@@ -279,7 +280,8 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , - "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , -- "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" -+ "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , -+ "slock-AF_MAX" - }; - static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , -@@ -295,7 +297,8 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , - "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , -- "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" -+ "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , -+ "clock-AF_MAX" - }; - - /* --- -2.10.0 - diff --git a/alpine/kernel/patches/0038-net-Add-Qualcomm-IPC-router.patch b/alpine/kernel/patches/0038-net-Add-Qualcomm-IPC-router.patch deleted file mode 100644 index 2d3461c20..000000000 --- a/alpine/kernel/patches/0038-net-Add-Qualcomm-IPC-router.patch +++ /dev/null @@ -1,1307 +0,0 @@ -From 9e184cb0991a4cc08cd7688f2d4e23740c60e382 Mon Sep 17 00:00:00 2001 -From: Courtney Cavin -Date: Wed, 27 Apr 2016 12:13:03 -0700 -Subject: [PATCH 38/42] net: Add Qualcomm IPC router - -Add an implementation of Qualcomm's IPC router protocol, used to -communicate with service providing remote processors. - -Signed-off-by: Courtney Cavin -Signed-off-by: Bjorn Andersson -[bjorn: Cope with 0 being a valid node id and implement RTM_NEWADDR] -Signed-off-by: Bjorn Andersson -Origin: https://patchwork.ozlabs.org/patch/615774/ ---- - include/linux/socket.h | 4 +- - include/uapi/linux/qrtr.h | 12 + - net/Kconfig | 1 + - net/Makefile | 1 + - net/qrtr/Kconfig | 24 ++ - net/qrtr/Makefile | 2 + - net/qrtr/qrtr.c | 1007 +++++++++++++++++++++++++++++++++++++++++++++ - net/qrtr/qrtr.h | 31 ++ - net/qrtr/smd.c | 117 ++++++ - 9 files changed, 1198 insertions(+), 1 deletion(-) - create mode 100644 include/uapi/linux/qrtr.h - create mode 100644 net/qrtr/Kconfig - create mode 100644 net/qrtr/Makefile - create mode 100644 net/qrtr/qrtr.c - create mode 100644 net/qrtr/qrtr.h - create mode 100644 net/qrtr/smd.c - -diff --git a/include/linux/socket.h b/include/linux/socket.h -index 4e1ea53..dbd81e7 100644 ---- a/include/linux/socket.h -+++ b/include/linux/socket.h -@@ -201,8 +201,9 @@ struct ucred { - #define AF_NFC 39 /* NFC sockets */ - #define AF_VSOCK 40 /* vSockets */ - #define AF_KCM 41 /* Kernel Connection Multiplexor*/ -+#define AF_QIPCRTR 42 /* Qualcomm IPC Router */ - --#define AF_MAX 42 /* For now.. */ -+#define AF_MAX 43 /* For now.. */ - - /* Protocol families, same as address families. */ - #define PF_UNSPEC AF_UNSPEC -@@ -249,6 +250,7 @@ struct ucred { - #define PF_NFC AF_NFC - #define PF_VSOCK AF_VSOCK - #define PF_KCM AF_KCM -+#define PF_QIPCRTR AF_QIPCRTR - #define PF_MAX AF_MAX - - /* Maximum queue length specifiable by listen. */ -diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h -new file mode 100644 -index 0000000..66c0748 ---- /dev/null -+++ b/include/uapi/linux/qrtr.h -@@ -0,0 +1,12 @@ -+#ifndef _LINUX_QRTR_H -+#define _LINUX_QRTR_H -+ -+#include -+ -+struct sockaddr_qrtr { -+ __kernel_sa_family_t sq_family; -+ __u32 sq_node; -+ __u32 sq_port; -+}; -+ -+#endif /* _LINUX_QRTR_H */ -diff --git a/net/Kconfig b/net/Kconfig -index b8439e6..1c9fda1 100644 ---- a/net/Kconfig -+++ b/net/Kconfig -@@ -233,6 +233,7 @@ source "net/mpls/Kconfig" - source "net/hsr/Kconfig" - source "net/switchdev/Kconfig" - source "net/l3mdev/Kconfig" -+source "net/qrtr/Kconfig" - - config RPS - bool -diff --git a/net/Makefile b/net/Makefile -index 81d1411..bdd1455 100644 ---- a/net/Makefile -+++ b/net/Makefile -@@ -78,3 +78,4 @@ endif - ifneq ($(CONFIG_NET_L3_MASTER_DEV),) - obj-y += l3mdev/ - endif -+obj-$(CONFIG_QRTR) += qrtr/ -diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig -new file mode 100644 -index 0000000..0c2619d ---- /dev/null -+++ b/net/qrtr/Kconfig -@@ -0,0 +1,24 @@ -+# Qualcomm IPC Router configuration -+# -+ -+config QRTR -+ bool "Qualcomm IPC Router support" -+ depends on ARCH_QCOM || COMPILE_TEST -+ ---help--- -+ Say Y if you intend to use Qualcomm IPC router protocol. The -+ protocol is used to communicate with services provided by other -+ hardware blocks in the system. -+ -+ In order to do service lookups, a userspace daemon is required to -+ maintain a service listing. -+ -+if QRTR -+ -+config QRTR_SMD -+ tristate "SMD IPC Router channels" -+ depends on QCOM_SMD || COMPILE_TEST -+ ---help--- -+ Say Y here to support SMD based ipcrouter channels. SMD is the -+ most common transport for IPC Router. -+ -+endif # QRTR -diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile -new file mode 100644 -index 0000000..e282a84 ---- /dev/null -+++ b/net/qrtr/Makefile -@@ -0,0 +1,2 @@ -+obj-y := qrtr.o -+obj-$(CONFIG_QRTR_SMD) += smd.o -diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c -new file mode 100644 -index 0000000..c985ecb ---- /dev/null -+++ b/net/qrtr/qrtr.c -@@ -0,0 +1,1007 @@ -+/* -+ * Copyright (c) 2015, Sony Mobile Communications Inc. -+ * Copyright (c) 2013, The Linux Foundation. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 and -+ * only version 2 as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+#include -+#include -+#include -+#include /* For TIOCINQ/OUTQ */ -+ -+#include -+ -+#include "qrtr.h" -+ -+#define QRTR_PROTO_VER 1 -+ -+/* auto-bind range */ -+#define QRTR_MIN_EPH_SOCKET 0x4000 -+#define QRTR_MAX_EPH_SOCKET 0x7fff -+ -+enum qrtr_pkt_type { -+ QRTR_TYPE_DATA = 1, -+ QRTR_TYPE_HELLO = 2, -+ QRTR_TYPE_BYE = 3, -+ QRTR_TYPE_NEW_SERVER = 4, -+ QRTR_TYPE_DEL_SERVER = 5, -+ QRTR_TYPE_DEL_CLIENT = 6, -+ QRTR_TYPE_RESUME_TX = 7, -+ QRTR_TYPE_EXIT = 8, -+ QRTR_TYPE_PING = 9, -+}; -+ -+/** -+ * struct qrtr_hdr - (I|R)PCrouter packet header -+ * @version: protocol version -+ * @type: packet type; one of QRTR_TYPE_* -+ * @src_node_id: source node -+ * @src_port_id: source port -+ * @confirm_rx: boolean; whether a resume-tx packet should be send in reply -+ * @size: length of packet, excluding this header -+ * @dst_node_id: destination node -+ * @dst_port_id: destination port -+ */ -+struct qrtr_hdr { -+ __le32 version; -+ __le32 type; -+ __le32 src_node_id; -+ __le32 src_port_id; -+ __le32 confirm_rx; -+ __le32 size; -+ __le32 dst_node_id; -+ __le32 dst_port_id; -+} __packed; -+ -+#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr) -+#define QRTR_NODE_BCAST ((unsigned int)-1) -+#define QRTR_PORT_CTRL ((unsigned int)-2) -+ -+struct qrtr_sock { -+ /* WARNING: sk must be the first member */ -+ struct sock sk; -+ struct sockaddr_qrtr us; -+ struct sockaddr_qrtr peer; -+}; -+ -+static inline struct qrtr_sock *qrtr_sk(struct sock *sk) -+{ -+ BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0); -+ return container_of(sk, struct qrtr_sock, sk); -+} -+ -+static unsigned int qrtr_local_nid = -1; -+ -+/* for node ids */ -+static RADIX_TREE(qrtr_nodes, GFP_KERNEL); -+/* broadcast list */ -+static LIST_HEAD(qrtr_all_nodes); -+/* lock for qrtr_nodes, qrtr_all_nodes and node reference */ -+static DEFINE_MUTEX(qrtr_node_lock); -+ -+/* local port allocation management */ -+static DEFINE_IDR(qrtr_ports); -+static DEFINE_MUTEX(qrtr_port_lock); -+ -+/** -+ * struct qrtr_node - endpoint node -+ * @ep_lock: lock for endpoint management and callbacks -+ * @ep: endpoint -+ * @ref: reference count for node -+ * @nid: node id -+ * @rx_queue: receive queue -+ * @work: scheduled work struct for recv work -+ * @item: list item for broadcast list -+ */ -+struct qrtr_node { -+ struct mutex ep_lock; -+ struct qrtr_endpoint *ep; -+ struct kref ref; -+ unsigned int nid; -+ -+ struct sk_buff_head rx_queue; -+ struct work_struct work; -+ struct list_head item; -+}; -+ -+/* Release node resources and free the node. -+ * -+ * Do not call directly, use qrtr_node_release. To be used with -+ * kref_put_mutex. As such, the node mutex is expected to be locked on call. -+ */ -+static void __qrtr_node_release(struct kref *kref) -+{ -+ struct qrtr_node *node = container_of(kref, struct qrtr_node, ref); -+ -+ if (node->nid != QRTR_EP_NID_AUTO) -+ radix_tree_delete(&qrtr_nodes, node->nid); -+ -+ list_del(&node->item); -+ mutex_unlock(&qrtr_node_lock); -+ -+ skb_queue_purge(&node->rx_queue); -+ kfree(node); -+} -+ -+/* Increment reference to node. */ -+static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node) -+{ -+ if (node) -+ kref_get(&node->ref); -+ return node; -+} -+ -+/* Decrement reference to node and release as necessary. */ -+static void qrtr_node_release(struct qrtr_node *node) -+{ -+ if (!node) -+ return; -+ kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock); -+} -+ -+/* Pass an outgoing packet socket buffer to the endpoint driver. */ -+static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb) -+{ -+ int rc = -ENODEV; -+ -+ mutex_lock(&node->ep_lock); -+ if (node->ep) -+ rc = node->ep->xmit(node->ep, skb); -+ else -+ kfree_skb(skb); -+ mutex_unlock(&node->ep_lock); -+ -+ return rc; -+} -+ -+/* Lookup node by id. -+ * -+ * callers must release with qrtr_node_release() -+ */ -+static struct qrtr_node *qrtr_node_lookup(unsigned int nid) -+{ -+ struct qrtr_node *node; -+ -+ mutex_lock(&qrtr_node_lock); -+ node = radix_tree_lookup(&qrtr_nodes, nid); -+ node = qrtr_node_acquire(node); -+ mutex_unlock(&qrtr_node_lock); -+ -+ return node; -+} -+ -+/* Assign node id to node. -+ * -+ * This is mostly useful for automatic node id assignment, based on -+ * the source id in the incoming packet. -+ */ -+static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid) -+{ -+ if (node->nid != QRTR_EP_NID_AUTO || nid == QRTR_EP_NID_AUTO) -+ return; -+ -+ mutex_lock(&qrtr_node_lock); -+ radix_tree_insert(&qrtr_nodes, nid, node); -+ node->nid = nid; -+ mutex_unlock(&qrtr_node_lock); -+} -+ -+/** -+ * qrtr_endpoint_post() - post incoming data -+ * @ep: endpoint handle -+ * @data: data pointer -+ * @len: size of data in bytes -+ * -+ * Return: 0 on success; negative error code on failure -+ */ -+int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) -+{ -+ struct qrtr_node *node = ep->node; -+ const struct qrtr_hdr *phdr = data; -+ struct sk_buff *skb; -+ unsigned int psize; -+ unsigned int size; -+ unsigned int type; -+ unsigned int ver; -+ unsigned int dst; -+ -+ if (len < QRTR_HDR_SIZE || len & 3) -+ return -EINVAL; -+ -+ ver = le32_to_cpu(phdr->version); -+ size = le32_to_cpu(phdr->size); -+ type = le32_to_cpu(phdr->type); -+ dst = le32_to_cpu(phdr->dst_port_id); -+ -+ psize = (size + 3) & ~3; -+ -+ if (ver != QRTR_PROTO_VER) -+ return -EINVAL; -+ -+ if (len != psize + QRTR_HDR_SIZE) -+ return -EINVAL; -+ -+ if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA) -+ return -EINVAL; -+ -+ skb = netdev_alloc_skb(NULL, len); -+ if (!skb) -+ return -ENOMEM; -+ -+ skb_reset_transport_header(skb); -+ memcpy(skb_put(skb, len), data, len); -+ -+ skb_queue_tail(&node->rx_queue, skb); -+ schedule_work(&node->work); -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(qrtr_endpoint_post); -+ -+/* Allocate and construct a resume-tx packet. */ -+static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, -+ u32 dst_node, u32 port) -+{ -+ const int pkt_len = 20; -+ struct qrtr_hdr *hdr; -+ struct sk_buff *skb; -+ u32 *buf; -+ -+ skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); -+ if (!skb) -+ return NULL; -+ skb_reset_transport_header(skb); -+ -+ hdr = (struct qrtr_hdr *)skb_put(skb, QRTR_HDR_SIZE); -+ hdr->version = cpu_to_le32(QRTR_PROTO_VER); -+ hdr->type = cpu_to_le32(QRTR_TYPE_RESUME_TX); -+ hdr->src_node_id = cpu_to_le32(src_node); -+ hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL); -+ hdr->confirm_rx = cpu_to_le32(0); -+ hdr->size = cpu_to_le32(pkt_len); -+ hdr->dst_node_id = cpu_to_le32(dst_node); -+ hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); -+ -+ buf = (u32 *)skb_put(skb, pkt_len); -+ memset(buf, 0, pkt_len); -+ buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); -+ buf[1] = cpu_to_le32(src_node); -+ buf[2] = cpu_to_le32(port); -+ -+ return skb; -+} -+ -+static struct qrtr_sock *qrtr_port_lookup(int port); -+static void qrtr_port_put(struct qrtr_sock *ipc); -+ -+/* Handle and route a received packet. -+ * -+ * This will auto-reply with resume-tx packet as necessary. -+ */ -+static void qrtr_node_rx_work(struct work_struct *work) -+{ -+ struct qrtr_node *node = container_of(work, struct qrtr_node, work); -+ struct sk_buff *skb; -+ -+ while ((skb = skb_dequeue(&node->rx_queue)) != NULL) { -+ const struct qrtr_hdr *phdr; -+ u32 dst_node, dst_port; -+ struct qrtr_sock *ipc; -+ u32 src_node; -+ int confirm; -+ -+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); -+ src_node = le32_to_cpu(phdr->src_node_id); -+ dst_node = le32_to_cpu(phdr->dst_node_id); -+ dst_port = le32_to_cpu(phdr->dst_port_id); -+ confirm = !!phdr->confirm_rx; -+ -+ qrtr_node_assign(node, src_node); -+ -+ ipc = qrtr_port_lookup(dst_port); -+ if (!ipc) { -+ kfree_skb(skb); -+ } else { -+ if (sock_queue_rcv_skb(&ipc->sk, skb)) -+ kfree_skb(skb); -+ -+ qrtr_port_put(ipc); -+ } -+ -+ if (confirm) { -+ skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port); -+ if (!skb) -+ break; -+ if (qrtr_node_enqueue(node, skb)) -+ break; -+ } -+ } -+} -+ -+/** -+ * qrtr_endpoint_register() - register a new endpoint -+ * @ep: endpoint to register -+ * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment -+ * Return: 0 on success; negative error code on failure -+ * -+ * The specified endpoint must have the xmit function pointer set on call. -+ */ -+int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid) -+{ -+ struct qrtr_node *node; -+ -+ if (!ep || !ep->xmit) -+ return -EINVAL; -+ -+ node = kzalloc(sizeof(*node), GFP_KERNEL); -+ if (!node) -+ return -ENOMEM; -+ -+ INIT_WORK(&node->work, qrtr_node_rx_work); -+ kref_init(&node->ref); -+ mutex_init(&node->ep_lock); -+ skb_queue_head_init(&node->rx_queue); -+ node->nid = QRTR_EP_NID_AUTO; -+ node->ep = ep; -+ -+ qrtr_node_assign(node, nid); -+ -+ mutex_lock(&qrtr_node_lock); -+ list_add(&node->item, &qrtr_all_nodes); -+ mutex_unlock(&qrtr_node_lock); -+ ep->node = node; -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(qrtr_endpoint_register); -+ -+/** -+ * qrtr_endpoint_unregister - unregister endpoint -+ * @ep: endpoint to unregister -+ */ -+void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) -+{ -+ struct qrtr_node *node = ep->node; -+ -+ mutex_lock(&node->ep_lock); -+ node->ep = NULL; -+ mutex_unlock(&node->ep_lock); -+ -+ qrtr_node_release(node); -+ ep->node = NULL; -+} -+EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister); -+ -+/* Lookup socket by port. -+ * -+ * Callers must release with qrtr_port_put() -+ */ -+static struct qrtr_sock *qrtr_port_lookup(int port) -+{ -+ struct qrtr_sock *ipc; -+ -+ if (port == QRTR_PORT_CTRL) -+ port = 0; -+ -+ mutex_lock(&qrtr_port_lock); -+ ipc = idr_find(&qrtr_ports, port); -+ if (ipc) -+ sock_hold(&ipc->sk); -+ mutex_unlock(&qrtr_port_lock); -+ -+ return ipc; -+} -+ -+/* Release acquired socket. */ -+static void qrtr_port_put(struct qrtr_sock *ipc) -+{ -+ sock_put(&ipc->sk); -+} -+ -+/* Remove port assignment. */ -+static void qrtr_port_remove(struct qrtr_sock *ipc) -+{ -+ int port = ipc->us.sq_port; -+ -+ if (port == QRTR_PORT_CTRL) -+ port = 0; -+ -+ __sock_put(&ipc->sk); -+ -+ mutex_lock(&qrtr_port_lock); -+ idr_remove(&qrtr_ports, port); -+ mutex_unlock(&qrtr_port_lock); -+} -+ -+/* Assign port number to socket. -+ * -+ * Specify port in the integer pointed to by port, and it will be adjusted -+ * on return as necesssary. -+ * -+ * Port may be: -+ * 0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET] -+ * QRTR_MIN_EPH_SOCKET: Specified; available to all -+ */ -+static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) -+{ -+ int rc; -+ -+ mutex_lock(&qrtr_port_lock); -+ if (!*port) { -+ rc = idr_alloc(&qrtr_ports, ipc, -+ QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET + 1, -+ GFP_ATOMIC); -+ if (rc >= 0) -+ *port = rc; -+ } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { -+ rc = -EACCES; -+ } else if (*port == QRTR_PORT_CTRL) { -+ rc = idr_alloc(&qrtr_ports, ipc, 0, 1, GFP_ATOMIC); -+ } else { -+ rc = idr_alloc(&qrtr_ports, ipc, *port, *port + 1, GFP_ATOMIC); -+ if (rc >= 0) -+ *port = rc; -+ } -+ mutex_unlock(&qrtr_port_lock); -+ -+ if (rc == -ENOSPC) -+ return -EADDRINUSE; -+ else if (rc < 0) -+ return rc; -+ -+ sock_hold(&ipc->sk); -+ -+ return 0; -+} -+ -+/* Bind socket to address. -+ * -+ * Socket should be locked upon call. -+ */ -+static int __qrtr_bind(struct socket *sock, -+ const struct sockaddr_qrtr *addr, int zapped) -+{ -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ int port; -+ int rc; -+ -+ /* rebinding ok */ -+ if (!zapped && addr->sq_port == ipc->us.sq_port) -+ return 0; -+ -+ port = addr->sq_port; -+ rc = qrtr_port_assign(ipc, &port); -+ if (rc) -+ return rc; -+ -+ /* unbind previous, if any */ -+ if (!zapped) -+ qrtr_port_remove(ipc); -+ ipc->us.sq_port = port; -+ -+ sock_reset_flag(sk, SOCK_ZAPPED); -+ -+ return 0; -+} -+ -+/* Auto bind to an ephemeral port. */ -+static int qrtr_autobind(struct socket *sock) -+{ -+ struct sock *sk = sock->sk; -+ struct sockaddr_qrtr addr; -+ -+ if (!sock_flag(sk, SOCK_ZAPPED)) -+ return 0; -+ -+ addr.sq_family = AF_QIPCRTR; -+ addr.sq_node = qrtr_local_nid; -+ addr.sq_port = 0; -+ -+ return __qrtr_bind(sock, &addr, 1); -+} -+ -+/* Bind socket to specified sockaddr. */ -+static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ int rc; -+ -+ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) -+ return -EINVAL; -+ -+ if (addr->sq_node != ipc->us.sq_node) -+ return -EINVAL; -+ -+ lock_sock(sk); -+ rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED)); -+ release_sock(sk); -+ -+ return rc; -+} -+ -+/* Queue packet to local peer socket. */ -+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb) -+{ -+ const struct qrtr_hdr *phdr; -+ struct qrtr_sock *ipc; -+ -+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); -+ -+ ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id)); -+ if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ -+ kfree_skb(skb); -+ return -ENODEV; -+ } -+ -+ if (sock_queue_rcv_skb(&ipc->sk, skb)) { -+ qrtr_port_put(ipc); -+ kfree_skb(skb); -+ return -ENOSPC; -+ } -+ -+ qrtr_port_put(ipc); -+ -+ return 0; -+} -+ -+/* Queue packet for broadcast. */ -+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) -+{ -+ struct sk_buff *skbn; -+ -+ mutex_lock(&qrtr_node_lock); -+ list_for_each_entry(node, &qrtr_all_nodes, item) { -+ skbn = skb_clone(skb, GFP_KERNEL); -+ if (!skbn) -+ break; -+ skb_set_owner_w(skbn, skb->sk); -+ qrtr_node_enqueue(node, skbn); -+ } -+ mutex_unlock(&qrtr_node_lock); -+ -+ qrtr_local_enqueue(node, skb); -+ -+ return 0; -+} -+ -+static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); -+ int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *); -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ struct qrtr_node *node; -+ struct qrtr_hdr *hdr; -+ struct sk_buff *skb; -+ size_t plen; -+ int rc; -+ -+ if (msg->msg_flags & ~(MSG_DONTWAIT)) -+ return -EINVAL; -+ -+ if (len > 65535) -+ return -EMSGSIZE; -+ -+ lock_sock(sk); -+ -+ if (addr) { -+ if (msg->msg_namelen < sizeof(*addr)) { -+ release_sock(sk); -+ return -EINVAL; -+ } -+ -+ if (addr->sq_family != AF_QIPCRTR) { -+ release_sock(sk); -+ return -EINVAL; -+ } -+ -+ rc = qrtr_autobind(sock); -+ if (rc) { -+ release_sock(sk); -+ return rc; -+ } -+ } else if (sk->sk_state == TCP_ESTABLISHED) { -+ addr = &ipc->peer; -+ } else { -+ release_sock(sk); -+ return -ENOTCONN; -+ } -+ -+ node = NULL; -+ if (addr->sq_node == QRTR_NODE_BCAST) { -+ enqueue_fn = qrtr_bcast_enqueue; -+ } else if (addr->sq_node == ipc->us.sq_node) { -+ enqueue_fn = qrtr_local_enqueue; -+ } else { -+ enqueue_fn = qrtr_node_enqueue; -+ node = qrtr_node_lookup(addr->sq_node); -+ if (!node) { -+ release_sock(sk); -+ return -ECONNRESET; -+ } -+ } -+ -+ plen = (len + 3) & ~3; -+ skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE, -+ msg->msg_flags & MSG_DONTWAIT, &rc); -+ if (!skb) -+ goto out_node; -+ -+ skb_reset_transport_header(skb); -+ skb_put(skb, len + QRTR_HDR_SIZE); -+ -+ hdr = (struct qrtr_hdr *)skb_transport_header(skb); -+ hdr->version = cpu_to_le32(QRTR_PROTO_VER); -+ hdr->src_node_id = cpu_to_le32(ipc->us.sq_node); -+ hdr->src_port_id = cpu_to_le32(ipc->us.sq_port); -+ hdr->confirm_rx = cpu_to_le32(0); -+ hdr->size = cpu_to_le32(len); -+ hdr->dst_node_id = cpu_to_le32(addr->sq_node); -+ hdr->dst_port_id = cpu_to_le32(addr->sq_port); -+ -+ rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE, -+ &msg->msg_iter, len); -+ if (rc) { -+ kfree_skb(skb); -+ goto out_node; -+ } -+ -+ if (plen != len) { -+ skb_pad(skb, plen - len); -+ skb_put(skb, plen - len); -+ } -+ -+ if (ipc->us.sq_port == QRTR_PORT_CTRL) { -+ if (len < 4) { -+ rc = -EINVAL; -+ kfree_skb(skb); -+ goto out_node; -+ } -+ -+ /* control messages already require the type as 'command' */ -+ skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4); -+ } else { -+ hdr->type = cpu_to_le32(QRTR_TYPE_DATA); -+ } -+ -+ rc = enqueue_fn(node, skb); -+ if (rc >= 0) -+ rc = len; -+ -+out_node: -+ qrtr_node_release(node); -+ release_sock(sk); -+ -+ return rc; -+} -+ -+static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, -+ size_t size, int flags) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); -+ const struct qrtr_hdr *phdr; -+ struct sock *sk = sock->sk; -+ struct sk_buff *skb; -+ int copied, rc; -+ -+ lock_sock(sk); -+ -+ if (sock_flag(sk, SOCK_ZAPPED)) { -+ release_sock(sk); -+ return -EADDRNOTAVAIL; -+ } -+ -+ skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, -+ flags & MSG_DONTWAIT, &rc); -+ if (!skb) { -+ release_sock(sk); -+ return rc; -+ } -+ -+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); -+ copied = le32_to_cpu(phdr->size); -+ if (copied > size) { -+ copied = size; -+ msg->msg_flags |= MSG_TRUNC; -+ } -+ -+ rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied); -+ if (rc < 0) -+ goto out; -+ rc = copied; -+ -+ if (addr) { -+ addr->sq_family = AF_QIPCRTR; -+ addr->sq_node = le32_to_cpu(phdr->src_node_id); -+ addr->sq_port = le32_to_cpu(phdr->src_port_id); -+ msg->msg_namelen = sizeof(*addr); -+ } -+ -+out: -+ skb_free_datagram(sk, skb); -+ release_sock(sk); -+ -+ return rc; -+} -+ -+static int qrtr_connect(struct socket *sock, struct sockaddr *saddr, -+ int len, int flags) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ int rc; -+ -+ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) -+ return -EINVAL; -+ -+ lock_sock(sk); -+ -+ sk->sk_state = TCP_CLOSE; -+ sock->state = SS_UNCONNECTED; -+ -+ rc = qrtr_autobind(sock); -+ if (rc) { -+ release_sock(sk); -+ return rc; -+ } -+ -+ ipc->peer = *addr; -+ sock->state = SS_CONNECTED; -+ sk->sk_state = TCP_ESTABLISHED; -+ -+ release_sock(sk); -+ -+ return 0; -+} -+ -+static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, -+ int *len, int peer) -+{ -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sockaddr_qrtr qaddr; -+ struct sock *sk = sock->sk; -+ -+ lock_sock(sk); -+ if (peer) { -+ if (sk->sk_state != TCP_ESTABLISHED) { -+ release_sock(sk); -+ return -ENOTCONN; -+ } -+ -+ qaddr = ipc->peer; -+ } else { -+ qaddr = ipc->us; -+ } -+ release_sock(sk); -+ -+ *len = sizeof(qaddr); -+ qaddr.sq_family = AF_QIPCRTR; -+ -+ memcpy(saddr, &qaddr, sizeof(qaddr)); -+ -+ return 0; -+} -+ -+static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -+{ -+ void __user *argp = (void __user *)arg; -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ struct sockaddr_qrtr *sq; -+ struct sk_buff *skb; -+ struct ifreq ifr; -+ long len = 0; -+ int rc = 0; -+ -+ lock_sock(sk); -+ -+ switch (cmd) { -+ case TIOCOUTQ: -+ len = sk->sk_sndbuf - sk_wmem_alloc_get(sk); -+ if (len < 0) -+ len = 0; -+ rc = put_user(len, (int __user *)argp); -+ break; -+ case TIOCINQ: -+ skb = skb_peek(&sk->sk_receive_queue); -+ if (skb) -+ len = skb->len - QRTR_HDR_SIZE; -+ rc = put_user(len, (int __user *)argp); -+ break; -+ case SIOCGIFADDR: -+ if (copy_from_user(&ifr, argp, sizeof(ifr))) { -+ rc = -EFAULT; -+ break; -+ } -+ -+ sq = (struct sockaddr_qrtr *)&ifr.ifr_addr; -+ *sq = ipc->us; -+ if (copy_to_user(argp, &ifr, sizeof(ifr))) { -+ rc = -EFAULT; -+ break; -+ } -+ break; -+ case SIOCGSTAMP: -+ rc = sock_get_timestamp(sk, argp); -+ break; -+ case SIOCADDRT: -+ case SIOCDELRT: -+ case SIOCSIFADDR: -+ case SIOCGIFDSTADDR: -+ case SIOCSIFDSTADDR: -+ case SIOCGIFBRDADDR: -+ case SIOCSIFBRDADDR: -+ case SIOCGIFNETMASK: -+ case SIOCSIFNETMASK: -+ rc = -EINVAL; -+ break; -+ default: -+ rc = -ENOIOCTLCMD; -+ break; -+ } -+ -+ release_sock(sk); -+ -+ return rc; -+} -+ -+static int qrtr_release(struct socket *sock) -+{ -+ struct sock *sk = sock->sk; -+ struct qrtr_sock *ipc; -+ -+ if (!sk) -+ return 0; -+ -+ lock_sock(sk); -+ -+ ipc = qrtr_sk(sk); -+ sk->sk_shutdown = SHUTDOWN_MASK; -+ if (!sock_flag(sk, SOCK_DEAD)) -+ sk->sk_state_change(sk); -+ -+ sock_set_flag(sk, SOCK_DEAD); -+ sock->sk = NULL; -+ -+ if (!sock_flag(sk, SOCK_ZAPPED)) -+ qrtr_port_remove(ipc); -+ -+ skb_queue_purge(&sk->sk_receive_queue); -+ -+ release_sock(sk); -+ sock_put(sk); -+ -+ return 0; -+} -+ -+static const struct proto_ops qrtr_proto_ops = { -+ .owner = THIS_MODULE, -+ .family = AF_QIPCRTR, -+ .bind = qrtr_bind, -+ .connect = qrtr_connect, -+ .socketpair = sock_no_socketpair, -+ .accept = sock_no_accept, -+ .listen = sock_no_listen, -+ .sendmsg = qrtr_sendmsg, -+ .recvmsg = qrtr_recvmsg, -+ .getname = qrtr_getname, -+ .ioctl = qrtr_ioctl, -+ .poll = datagram_poll, -+ .shutdown = sock_no_shutdown, -+ .setsockopt = sock_no_setsockopt, -+ .getsockopt = sock_no_getsockopt, -+ .release = qrtr_release, -+ .mmap = sock_no_mmap, -+ .sendpage = sock_no_sendpage, -+}; -+ -+static struct proto qrtr_proto = { -+ .name = "QIPCRTR", -+ .owner = THIS_MODULE, -+ .obj_size = sizeof(struct qrtr_sock), -+}; -+ -+static int qrtr_create(struct net *net, struct socket *sock, -+ int protocol, int kern) -+{ -+ struct qrtr_sock *ipc; -+ struct sock *sk; -+ -+ if (sock->type != SOCK_DGRAM) -+ return -EPROTOTYPE; -+ -+ sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern); -+ if (!sk) -+ return -ENOMEM; -+ -+ sock_set_flag(sk, SOCK_ZAPPED); -+ -+ sock_init_data(sock, sk); -+ sock->ops = &qrtr_proto_ops; -+ -+ ipc = qrtr_sk(sk); -+ ipc->us.sq_family = AF_QIPCRTR; -+ ipc->us.sq_node = qrtr_local_nid; -+ ipc->us.sq_port = 0; -+ -+ return 0; -+} -+ -+static const struct nla_policy qrtr_policy[IFA_MAX + 1] = { -+ [IFA_LOCAL] = { .type = NLA_U32 }, -+}; -+ -+static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) -+{ -+ struct nlattr *tb[IFA_MAX + 1]; -+ struct ifaddrmsg *ifm; -+ int rc; -+ -+ if (!netlink_capable(skb, CAP_NET_ADMIN)) -+ return -EPERM; -+ -+ if (!netlink_capable(skb, CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ ASSERT_RTNL(); -+ -+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy); -+ if (rc < 0) -+ return rc; -+ -+ ifm = nlmsg_data(nlh); -+ if (!tb[IFA_LOCAL]) -+ return -EINVAL; -+ -+ qrtr_local_nid = nla_get_u32(tb[IFA_LOCAL]); -+ return 0; -+} -+ -+static const struct net_proto_family qrtr_family = { -+ .owner = THIS_MODULE, -+ .family = AF_QIPCRTR, -+ .create = qrtr_create, -+}; -+ -+static int __init qrtr_proto_init(void) -+{ -+ int rc; -+ -+ rc = proto_register(&qrtr_proto, 1); -+ if (rc) -+ return rc; -+ -+ rc = sock_register(&qrtr_family); -+ if (rc) { -+ proto_unregister(&qrtr_proto); -+ return rc; -+ } -+ -+ rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL); -+ -+ return 0; -+} -+module_init(qrtr_proto_init); -+ -+static void __exit qrtr_proto_fini(void) -+{ -+ rtnl_unregister(PF_QIPCRTR, RTM_NEWADDR); -+ sock_unregister(qrtr_family.family); -+ proto_unregister(&qrtr_proto); -+} -+module_exit(qrtr_proto_fini); -+ -+MODULE_DESCRIPTION("Qualcomm IPC-router driver"); -+MODULE_LICENSE("GPL v2"); -diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h -new file mode 100644 -index 0000000..2b84871 ---- /dev/null -+++ b/net/qrtr/qrtr.h -@@ -0,0 +1,31 @@ -+#ifndef __QRTR_H_ -+#define __QRTR_H_ -+ -+#include -+ -+struct sk_buff; -+ -+/* endpoint node id auto assignment */ -+#define QRTR_EP_NID_AUTO (-1) -+ -+/** -+ * struct qrtr_endpoint - endpoint handle -+ * @xmit: Callback for outgoing packets -+ * -+ * The socket buffer passed to the xmit function becomes owned by the endpoint -+ * driver. As such, when the driver is done with the buffer, it should -+ * call kfree_skb() on failure, or consume_skb() on success. -+ */ -+struct qrtr_endpoint { -+ int (*xmit)(struct qrtr_endpoint *ep, struct sk_buff *skb); -+ /* private: not for endpoint use */ -+ struct qrtr_node *node; -+}; -+ -+int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid); -+ -+void qrtr_endpoint_unregister(struct qrtr_endpoint *ep); -+ -+int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len); -+ -+#endif -diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c -new file mode 100644 -index 0000000..84ebce7 ---- /dev/null -+++ b/net/qrtr/smd.c -@@ -0,0 +1,117 @@ -+/* -+ * Copyright (c) 2015, Sony Mobile Communications Inc. -+ * Copyright (c) 2013, The Linux Foundation. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 and -+ * only version 2 as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include -+#include -+#include -+ -+#include "qrtr.h" -+ -+struct qrtr_smd_dev { -+ struct qrtr_endpoint ep; -+ struct qcom_smd_channel *channel; -+}; -+ -+/* from smd to qrtr */ -+static int qcom_smd_qrtr_callback(struct qcom_smd_device *sdev, -+ const void *data, size_t len) -+{ -+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); -+ int rc; -+ -+ if (!qdev) -+ return -EAGAIN; -+ -+ rc = qrtr_endpoint_post(&qdev->ep, data, len); -+ if (rc == -EINVAL) { -+ dev_err(&sdev->dev, "invalid ipcrouter packet\n"); -+ /* return 0 to let smd drop the packet */ -+ rc = 0; -+ } -+ -+ return rc; -+} -+ -+/* from qrtr to smd */ -+static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) -+{ -+ struct qrtr_smd_dev *qdev = container_of(ep, struct qrtr_smd_dev, ep); -+ int rc; -+ -+ rc = skb_linearize(skb); -+ if (rc) -+ goto out; -+ -+ rc = qcom_smd_send(qdev->channel, skb->data, skb->len); -+ -+out: -+ if (rc) -+ kfree_skb(skb); -+ else -+ consume_skb(skb); -+ return rc; -+} -+ -+static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev) -+{ -+ struct qrtr_smd_dev *qdev; -+ int rc; -+ -+ qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL); -+ if (!qdev) -+ return -ENOMEM; -+ -+ qdev->channel = sdev->channel; -+ qdev->ep.xmit = qcom_smd_qrtr_send; -+ -+ rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO); -+ if (rc) -+ return rc; -+ -+ dev_set_drvdata(&sdev->dev, qdev); -+ -+ dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n"); -+ -+ return 0; -+} -+ -+static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev) -+{ -+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); -+ -+ qrtr_endpoint_unregister(&qdev->ep); -+ -+ dev_set_drvdata(&sdev->dev, NULL); -+} -+ -+static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = { -+ { "IPCRTR" }, -+ {} -+}; -+ -+static struct qcom_smd_driver qcom_smd_qrtr_driver = { -+ .probe = qcom_smd_qrtr_probe, -+ .remove = qcom_smd_qrtr_remove, -+ .callback = qcom_smd_qrtr_callback, -+ .smd_match_table = qcom_smd_qrtr_smd_match, -+ .driver = { -+ .name = "qcom_smd_qrtr", -+ .owner = THIS_MODULE, -+ }, -+}; -+ -+module_qcom_smd_driver(qcom_smd_qrtr_driver); -+ -+MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver"); -+MODULE_LICENSE("GPL v2"); --- -2.10.0 - diff --git a/alpine/kernel/patches/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch b/alpine/kernel/patches/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch deleted file mode 100644 index ce4bee972..000000000 --- a/alpine/kernel/patches/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch +++ /dev/null @@ -1,49 +0,0 @@ -From b7da2c01ddbb00ed9ccdd3d646f6129f07016cf8 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 21 Mar 2016 02:53:08 -0700 -Subject: [PATCH 40/42] net: add the AF_HYPERV entries to family name tables - -This is for the hv_sock driver, which introduces AF_HYPERV(42). - -Signed-off-by: Dexuan Cui -Cc: "K. Y. Srinivasan" -Cc: Haiyang Zhang -Origin: https://patchwork.ozlabs.org/patch/600009 ---- - net/core/sock.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/net/core/sock.c b/net/core/sock.c -index 925def4..323f7a3 100644 ---- a/net/core/sock.c -+++ b/net/core/sock.c -@@ -264,7 +264,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { - "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , - "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , -- "sk_lock-AF_MAX" -+ "sk_lock-AF_HYPERV", "sk_lock-AF_MAX" - }; - static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , -@@ -281,7 +281,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , - "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , -- "slock-AF_MAX" -+ "slock-AF_HYPERV", "slock-AF_MAX" - }; - static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , -@@ -298,7 +298,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , - "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , -- "clock-AF_MAX" -+ "clock-AF_HYPERV", "clock-AF_MAX" - }; - - /* --- -2.10.0 -