From b92ef47f945cb4090a57e478cb77a3dc41ef8594 Mon Sep 17 00:00:00 2001 From: Justin Cormack Date: Mon, 24 Oct 2016 17:20:02 +0100 Subject: [PATCH] Revert "kernel: update to 4.8.2" This reverts commit 0808e359df74afa33128518a496a67fcdcd86ce8. Signed-off-by: Justin Cormack --- alpine/kernel/Dockerfile | 8 +- ...make-find_vqs-checkpatch.pl-friendly.patch | 219 ++ ...vmci_transport_notify_ops-structures.patch | 77 + ...the-area-influenced-by-prepare_to_wa.patch | 336 +++ ...istener-child-lock-ordering-explicit.patch | 63 + ...t-specific-vsock_transport-functions.patch | 59 + ...OCK-defer-sock-removal-to-transports.patch | 83 + ...OCK-Introduce-virtio_vsock_common.ko.patch | 1496 +++++++++++ ...-VSOCK-Introduce-virtio_transport.ko.patch | 663 +++++ .../0009-VSOCK-Introduce-vhost_vsock.ko.patch | 777 ++++++ .../0010-VSOCK-Add-Makefile-and-Kconfig.patch | 106 + .../patches/0011-VSOCK-Use-kvfree.patch | 33 + ...vhost-virtio_vsock_pkt-use-after-fre.patch | 53 + ...-virtio-vsock-fix-include-guard-typo.patch | 28 + ...drop-space-available-check-for-TX-vq.patch | 61 + ...host-network-namespace-to-use-AF_VS.patch} | 18 +- ...e-the-channel-type-for-Hyper-V-PCI-E.patch | 63 + ...-vmbus-Use-uuid_le-type-consistently.patch | 297 +++ ...-Use-uuid_le_cmp-for-comparing-GUIDs.patch | 55 + ...-do-sanity-check-of-channel-state-in.patch | 42 + ...-release-relid-on-error-in-vmbus_pro.patch | 74 + ...-channge-vmbus_connection.channel_lo.patch | 116 + ...e-code-duplication-between-vmbus_rec.patch | 126 + ...-fix-the-building-warning-with-hyper.patch | 72 + ...-Treat-Fibre-Channel-devices-as-perf.patch | 42 + ...us-Add-vendor-and-device-atttributes.patch | 355 +++ ...-add-a-helper-function-to-set-a-chan.patch | 36 + ...-define-the-new-offer-type-for-Hyper.patch | 44 + ...-vmbus_sendpacket_ctl-hvsock-avoid-u.patch | 45 + ...-define-a-new-VMBus-message-type-for.patch | 101 + ...-add-a-hvsock-flag-in-struct-hv_driv.patch | 64 + ...s-add-a-per-channel-rescind-callback.patch | 72 + ...-add-an-API-vmbus_hvsock_device_unre.patch | 153 ++ ...-Give-control-over-how-the-ring-acce.patch | 208 ++ ...s-avoid-wait_for_completion-on-crash.patch | 100 + ...-avoid-unneeded-compiler-optimizatio.patch | 39 + ...Kernel-Connection-Multiplexor-module.patch | 2312 +++++++++++++++++ ...AF_KCM-entries-to-family-name-tables.patch | 52 + .../0038-net-Add-Qualcomm-IPC-router.patch | 1307 ++++++++++ ...9-hv_sock-introduce-Hyper-V-Sockets.patch} | 525 ++-- ...HYPERV-entries-to-family-name-tables.patch | 49 + ...fix-the-race-when-querying-updating.patch} | 115 +- ...-t-spam-the-logs-with-unknown-GUIDs.patch} | 19 +- 43 files changed, 10230 insertions(+), 333 deletions(-) create mode 100644 alpine/kernel/patches/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch create mode 100644 alpine/kernel/patches/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch create mode 100644 alpine/kernel/patches/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch create mode 100644 alpine/kernel/patches/0004-vsock-make-listener-child-lock-ordering-explicit.patch create mode 100644 alpine/kernel/patches/0005-VSOCK-transport-specific-vsock_transport-functions.patch create mode 100644 alpine/kernel/patches/0006-VSOCK-defer-sock-removal-to-transports.patch create mode 100644 alpine/kernel/patches/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch create mode 100644 alpine/kernel/patches/0008-VSOCK-Introduce-virtio_transport.ko.patch create mode 100644 alpine/kernel/patches/0009-VSOCK-Introduce-vhost_vsock.ko.patch create mode 100644 alpine/kernel/patches/0010-VSOCK-Add-Makefile-and-Kconfig.patch create mode 100644 alpine/kernel/patches/0011-VSOCK-Use-kvfree.patch create mode 100644 alpine/kernel/patches/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch create mode 100644 alpine/kernel/patches/0013-virtio-vsock-fix-include-guard-typo.patch create mode 100644 alpine/kernel/patches/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch rename alpine/kernel/patches/{0001-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch => 0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch} (72%) create mode 100644 alpine/kernel/patches/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch create mode 100644 alpine/kernel/patches/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch create mode 100644 alpine/kernel/patches/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch create mode 100644 alpine/kernel/patches/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch create mode 100644 alpine/kernel/patches/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch create mode 100644 alpine/kernel/patches/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch create mode 100644 alpine/kernel/patches/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch create mode 100644 alpine/kernel/patches/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch create mode 100644 alpine/kernel/patches/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch create mode 100644 alpine/kernel/patches/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch create mode 100644 alpine/kernel/patches/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch create mode 100644 alpine/kernel/patches/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch create mode 100644 alpine/kernel/patches/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch create mode 100644 alpine/kernel/patches/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch create mode 100644 alpine/kernel/patches/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch create mode 100644 alpine/kernel/patches/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch create mode 100644 alpine/kernel/patches/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch create mode 100644 alpine/kernel/patches/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch create mode 100644 alpine/kernel/patches/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch create mode 100644 alpine/kernel/patches/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch create mode 100644 alpine/kernel/patches/0036-kcm-Kernel-Connection-Multiplexor-module.patch create mode 100644 alpine/kernel/patches/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch create mode 100644 alpine/kernel/patches/0038-net-Add-Qualcomm-IPC-router.patch rename alpine/kernel/patches/{0003-hv_sock-introduce-Hyper-V-Sockets.patch => 0039-hv_sock-introduce-Hyper-V-Sockets.patch} (81%) create mode 100644 alpine/kernel/patches/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch rename alpine/kernel/patches/{0002-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch => 0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch} (67%) rename alpine/kernel/patches/{0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch => 0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch} (74%) diff --git a/alpine/kernel/Dockerfile b/alpine/kernel/Dockerfile index ff9c9d7c5..a27ceacc7 100644 --- a/alpine/kernel/Dockerfile +++ b/alpine/kernel/Dockerfile @@ -1,6 +1,6 @@ FROM mobylinux/alpine-build-c:7303e33e9dcd5276b8bb5269644a9bf3354008c8 -ARG KERNEL_VERSION=4.8.2 +ARG KERNEL_VERSION=4.4.25 ENV KERNEL_SOURCE=https://www.kernel.org/pub/linux/kernel/v4.x/linux-${KERNEL_VERSION}.tar.xz @@ -8,10 +8,10 @@ RUN curl -sSL -o linux-${KERNEL_VERSION}.tar.xz ${KERNEL_SOURCE} RUN cat linux-${KERNEL_VERSION}.tar.xz | tar --absolute-names -xJ && mv /linux-${KERNEL_VERSION} /linux -# this is aufs4.8 20161010 +# this is aufs4.4 20160912 ENV AUFS_REPO https://github.com/sfjro/aufs4-standalone -ENV AUFS_BRANCH aufs4.8 -ENV AUFS_COMMIT e9fd128dcb16167417683e199a5feb14f3c9eca8 +ENV AUFS_BRANCH aufs4.4 +ENV AUFS_COMMIT 7d174ae40b4c9c876ee51aa50fa4ee1f3747de23 # Download AUFS RUN git clone -b "$AUFS_BRANCH" "$AUFS_REPO" /aufs && \ diff --git a/alpine/kernel/patches/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch b/alpine/kernel/patches/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch new file mode 100644 index 000000000..0fd255114 --- /dev/null +++ b/alpine/kernel/patches/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch @@ -0,0 +1,219 @@ +From 622883ec571c468f756195c13726740bdd33a0ee Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 17 Dec 2015 16:53:43 +0800 +Subject: [PATCH 01/42] virtio: make find_vqs() checkpatch.pl-friendly + +checkpatch.pl wants arrays of strings declared as follows: + + static const char * const names[] = { "vq-1", "vq-2", "vq-3" }; + +Currently the find_vqs() function takes a const char *names[] argument +so passing checkpatch.pl's const char * const names[] results in a +compiler error due to losing the second const. + +This patch adjusts the find_vqs() prototype and updates all virtio +transports. This makes it possible for virtio_balloon.c, virtio_input.c, +virtgpu_kms.c, and virtio_rpmsg_bus.c to use the checkpatch.pl-friendly +type. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Michael S. Tsirkin +Acked-by: Bjorn Andersson +(cherry picked from commit f7ad26ff952b3ca2702d7da03aad0ab1f6c01d7c) +--- + drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +- + drivers/misc/mic/card/mic_virtio.c | 2 +- + drivers/remoteproc/remoteproc_virtio.c | 2 +- + drivers/rpmsg/virtio_rpmsg_bus.c | 2 +- + drivers/s390/virtio/kvm_virtio.c | 2 +- + drivers/s390/virtio/virtio_ccw.c | 2 +- + drivers/virtio/virtio_balloon.c | 2 +- + drivers/virtio/virtio_input.c | 2 +- + drivers/virtio/virtio_mmio.c | 2 +- + drivers/virtio/virtio_pci_common.c | 4 ++-- + drivers/virtio/virtio_pci_common.h | 2 +- + drivers/virtio/virtio_pci_modern.c | 2 +- + include/linux/virtio_config.h | 2 +- + 13 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c +index 06496a1..4150873 100644 +--- a/drivers/gpu/drm/virtio/virtgpu_kms.c ++++ b/drivers/gpu/drm/virtio/virtgpu_kms.c +@@ -130,7 +130,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) + static vq_callback_t *callbacks[] = { + virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack + }; +- static const char *names[] = { "control", "cursor" }; ++ static const char * const names[] = { "control", "cursor" }; + + struct virtio_gpu_device *vgdev; + /* this will expand later */ +diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c +index e486a0c..f6ed57d 100644 +--- a/drivers/misc/mic/card/mic_virtio.c ++++ b/drivers/misc/mic/card/mic_virtio.c +@@ -311,7 +311,7 @@ unmap: + static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], +- const char *names[]) ++ const char * const names[]) + { + struct mic_vdev *mvdev = to_micvdev(vdev); + struct mic_device_ctrl __iomem *dc = mvdev->dc; +diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c +index e1a1023..e44872f 100644 +--- a/drivers/remoteproc/remoteproc_virtio.c ++++ b/drivers/remoteproc/remoteproc_virtio.c +@@ -147,7 +147,7 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev) + static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], +- const char *names[]) ++ const char * const names[]) + { + struct rproc *rproc = vdev_to_rproc(vdev); + int i, ret; +diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c +index 73354ee..1fcd27c 100644 +--- a/drivers/rpmsg/virtio_rpmsg_bus.c ++++ b/drivers/rpmsg/virtio_rpmsg_bus.c +@@ -945,7 +945,7 @@ static void rpmsg_ns_cb(struct rpmsg_channel *rpdev, void *data, int len, + static int rpmsg_probe(struct virtio_device *vdev) + { + vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done }; +- const char *names[] = { "input", "output" }; ++ static const char * const names[] = { "input", "output" }; + struct virtqueue *vqs[2]; + struct virtproc_info *vrp; + void *bufs_va; +diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c +index 53fb975..1d060fd 100644 +--- a/drivers/s390/virtio/kvm_virtio.c ++++ b/drivers/s390/virtio/kvm_virtio.c +@@ -255,7 +255,7 @@ static void kvm_del_vqs(struct virtio_device *vdev) + static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], +- const char *names[]) ++ const char * const names[]) + { + struct kvm_device *kdev = to_kvmdev(vdev); + int i; +diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c +index 1b83159..bf2d130 100644 +--- a/drivers/s390/virtio/virtio_ccw.c ++++ b/drivers/s390/virtio/virtio_ccw.c +@@ -635,7 +635,7 @@ out: + static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], +- const char *names[]) ++ const char * const names[]) + { + struct virtio_ccw_device *vcdev = to_vc_device(vdev); + unsigned long *indicatorp = NULL; +diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c +index 56f7e25..66082c9 100644 +--- a/drivers/virtio/virtio_balloon.c ++++ b/drivers/virtio/virtio_balloon.c +@@ -394,7 +394,7 @@ static int init_vqs(struct virtio_balloon *vb) + { + struct virtqueue *vqs[3]; + vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; +- const char *names[] = { "inflate", "deflate", "stats" }; ++ static const char * const names[] = { "inflate", "deflate", "stats" }; + int err, nvqs; + + /* +diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c +index c96944b..350a2a5 100644 +--- a/drivers/virtio/virtio_input.c ++++ b/drivers/virtio/virtio_input.c +@@ -170,7 +170,7 @@ static int virtinput_init_vqs(struct virtio_input *vi) + struct virtqueue *vqs[2]; + vq_callback_t *cbs[] = { virtinput_recv_events, + virtinput_recv_status }; +- static const char *names[] = { "events", "status" }; ++ static const char * const names[] = { "events", "status" }; + int err; + + err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names); +diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c +index f499d9d..745c6ee 100644 +--- a/drivers/virtio/virtio_mmio.c ++++ b/drivers/virtio/virtio_mmio.c +@@ -482,7 +482,7 @@ error_available: + static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], +- const char *names[]) ++ const char * const names[]) + { + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + unsigned int irq = platform_get_irq(vm_dev->pdev, 0); +diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c +index 2046a68..f6bed86 100644 +--- a/drivers/virtio/virtio_pci_common.c ++++ b/drivers/virtio/virtio_pci_common.c +@@ -296,7 +296,7 @@ void vp_del_vqs(struct virtio_device *vdev) + static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], +- const char *names[], ++ const char * const names[], + bool use_msix, + bool per_vq_vectors) + { +@@ -376,7 +376,7 @@ error_find: + int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], +- const char *names[]) ++ const char * const names[]) + { + int err; + +diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h +index b976d96..2cc2522 100644 +--- a/drivers/virtio/virtio_pci_common.h ++++ b/drivers/virtio/virtio_pci_common.h +@@ -139,7 +139,7 @@ void vp_del_vqs(struct virtio_device *vdev); + int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], +- const char *names[]); ++ const char * const names[]); + const char *vp_bus_name(struct virtio_device *vdev); + + /* Setup the affinity for a virtqueue: +diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c +index 4469202..631021c 100644 +--- a/drivers/virtio/virtio_pci_modern.c ++++ b/drivers/virtio/virtio_pci_modern.c +@@ -423,7 +423,7 @@ err_new_queue: + static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], +- const char *names[]) ++ const char * const names[]) + { + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtqueue *vq; +diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h +index e5ce8ab..6e6cb0c 100644 +--- a/include/linux/virtio_config.h ++++ b/include/linux/virtio_config.h +@@ -70,7 +70,7 @@ struct virtio_config_ops { + int (*find_vqs)(struct virtio_device *, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], +- const char *names[]); ++ const char * const names[]); + void (*del_vqs)(struct virtio_device *); + u64 (*get_features)(struct virtio_device *vdev); + int (*finalize_features)(struct virtio_device *vdev); +-- +2.10.0 + diff --git a/alpine/kernel/patches/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch b/alpine/kernel/patches/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch new file mode 100644 index 000000000..ff2c9634d --- /dev/null +++ b/alpine/kernel/patches/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch @@ -0,0 +1,77 @@ +From 8dc15fd8fab55e076a640d1a5d6f34b77e196632 Mon Sep 17 00:00:00 2001 +From: Julia Lawall +Date: Sat, 21 Nov 2015 18:39:17 +0100 +Subject: [PATCH 02/42] VSOCK: constify vmci_transport_notify_ops structures + +The vmci_transport_notify_ops structures are never modified, so declare +them as const. + +Done with the help of Coccinelle. + +Signed-off-by: Julia Lawall +Signed-off-by: David S. Miller +(cherry picked from commit 3b22dae38db1cea9ead3229f08cfb0b69aca5706) +--- + net/vmw_vsock/vmci_transport.h | 2 +- + net/vmw_vsock/vmci_transport_notify.c | 2 +- + net/vmw_vsock/vmci_transport_notify.h | 5 +++-- + net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +- + 4 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h +index 2ad46f3..1820e74 100644 +--- a/net/vmw_vsock/vmci_transport.h ++++ b/net/vmw_vsock/vmci_transport.h +@@ -121,7 +121,7 @@ struct vmci_transport { + u64 queue_pair_max_size; + u32 detach_sub_id; + union vmci_transport_notify notify; +- struct vmci_transport_notify_ops *notify_ops; ++ const struct vmci_transport_notify_ops *notify_ops; + struct list_head elem; + struct sock *sk; + spinlock_t lock; /* protects sk. */ +diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c +index 9b7f207..fd8cf02 100644 +--- a/net/vmw_vsock/vmci_transport_notify.c ++++ b/net/vmw_vsock/vmci_transport_notify.c +@@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) + } + + /* Socket control packet based operations. */ +-struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { ++const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, +diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h +index 7df7932..3c464d3 100644 +--- a/net/vmw_vsock/vmci_transport_notify.h ++++ b/net/vmw_vsock/vmci_transport_notify.h +@@ -77,7 +77,8 @@ struct vmci_transport_notify_ops { + void (*process_negotiate) (struct sock *sk); + }; + +-extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +-extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; ++extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; ++extern const ++struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; + + #endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ +diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c +index dc9c792..21e591d 100644 +--- a/net/vmw_vsock/vmci_transport_notify_qstate.c ++++ b/net/vmw_vsock/vmci_transport_notify_qstate.c +@@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue( + } + + /* Socket always on control packet based operations. */ +-struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { ++const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, +-- +2.10.0 + diff --git a/alpine/kernel/patches/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch b/alpine/kernel/patches/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch new file mode 100644 index 000000000..8c0d62952 --- /dev/null +++ b/alpine/kernel/patches/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch @@ -0,0 +1,336 @@ +From 761aa629641afa804127aea0e3ce5c95dddfcb17 Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Tue, 22 Mar 2016 17:05:52 +0100 +Subject: [PATCH 03/42] AF_VSOCK: Shrink the area influenced by prepare_to_wait + +When a thread is prepared for waiting by calling prepare_to_wait, sleeping +is not allowed until either the wait has taken place or finish_wait has +been called. The existing code in af_vsock imposed unnecessary no-sleep +assumptions to a broad list of backend functions. +This patch shrinks the influence of prepare_to_wait to the area where it +is strictly needed, therefore relaxing the no-sleep restriction there. + +Signed-off-by: Claudio Imbrenda +Signed-off-by: David S. Miller +(cherry picked from commit f7f9b5e7f8eccfd68ffa7b8d74b07c478bb9e7f0) +--- + net/vmw_vsock/af_vsock.c | 158 +++++++++++++++++++++++++---------------------- + 1 file changed, 85 insertions(+), 73 deletions(-) + +diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c +index 9b5bd6d..b5f1221 100644 +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -1209,10 +1209,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); +- goto out_wait_error; ++ sk->sk_state = SS_UNCONNECTED; ++ sock->state = SS_UNCONNECTED; ++ goto out_wait; + } else if (timeout == 0) { + err = -ETIMEDOUT; +- goto out_wait_error; ++ sk->sk_state = SS_UNCONNECTED; ++ sock->state = SS_UNCONNECTED; ++ goto out_wait; + } + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); +@@ -1220,20 +1224,17 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, + + if (sk->sk_err) { + err = -sk->sk_err; +- goto out_wait_error; +- } else ++ sk->sk_state = SS_UNCONNECTED; ++ sock->state = SS_UNCONNECTED; ++ } else { + err = 0; ++ } + + out_wait: + finish_wait(sk_sleep(sk), &wait); + out: + release_sock(sk); + return err; +- +-out_wait_error: +- sk->sk_state = SS_UNCONNECTED; +- sock->state = SS_UNCONNECTED; +- goto out_wait; + } + + static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) +@@ -1270,18 +1271,20 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) + listener->sk_err == 0) { + release_sock(listener); + timeout = schedule_timeout(timeout); ++ finish_wait(sk_sleep(listener), &wait); + lock_sock(listener); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); +- goto out_wait; ++ goto out; + } else if (timeout == 0) { + err = -EAGAIN; +- goto out_wait; ++ goto out; + } + + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + } ++ finish_wait(sk_sleep(listener), &wait); + + if (listener->sk_err) + err = -listener->sk_err; +@@ -1301,19 +1304,15 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) + */ + if (err) { + vconnected->rejected = true; +- release_sock(connected); +- sock_put(connected); +- goto out_wait; ++ } else { ++ newsock->state = SS_CONNECTED; ++ sock_graft(connected, newsock); + } + +- newsock->state = SS_CONNECTED; +- sock_graft(connected, newsock); + release_sock(connected); + sock_put(connected); + } + +-out_wait: +- finish_wait(sk_sleep(listener), &wait); + out: + release_sock(listener); + return err; +@@ -1557,11 +1556,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, + if (err < 0) + goto out; + +- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (total_written < len) { + ssize_t written; + ++ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + while (vsock_stream_has_space(vsk) == 0 && + sk->sk_err == 0 && + !(sk->sk_shutdown & SEND_SHUTDOWN) && +@@ -1570,27 +1569,33 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; +- goto out_wait; ++ finish_wait(sk_sleep(sk), &wait); ++ goto out_err; + } + + err = transport->notify_send_pre_block(vsk, &send_data); +- if (err < 0) +- goto out_wait; ++ if (err < 0) { ++ finish_wait(sk_sleep(sk), &wait); ++ goto out_err; ++ } + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + if (signal_pending(current)) { + err = sock_intr_errno(timeout); +- goto out_wait; ++ finish_wait(sk_sleep(sk), &wait); ++ goto out_err; + } else if (timeout == 0) { + err = -EAGAIN; +- goto out_wait; ++ finish_wait(sk_sleep(sk), &wait); ++ goto out_err; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } ++ finish_wait(sk_sleep(sk), &wait); + + /* These checks occur both as part of and after the loop + * conditional since we need to check before and after +@@ -1598,16 +1603,16 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, + */ + if (sk->sk_err) { + err = -sk->sk_err; +- goto out_wait; ++ goto out_err; + } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || + (vsk->peer_shutdown & RCV_SHUTDOWN)) { + err = -EPIPE; +- goto out_wait; ++ goto out_err; + } + + err = transport->notify_send_pre_enqueue(vsk, &send_data); + if (err < 0) +- goto out_wait; ++ goto out_err; + + /* Note that enqueue will only write as many bytes as are free + * in the produce queue, so we don't need to ensure len is +@@ -1620,7 +1625,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, + len - total_written); + if (written < 0) { + err = -ENOMEM; +- goto out_wait; ++ goto out_err; + } + + total_written += written; +@@ -1628,14 +1633,13 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, + err = transport->notify_send_post_enqueue( + vsk, written, &send_data); + if (err < 0) +- goto out_wait; ++ goto out_err; + + } + +-out_wait: ++out_err: + if (total_written > 0) + err = total_written; +- finish_wait(sk_sleep(sk), &wait); + out: + release_sock(sk); + return err; +@@ -1716,21 +1720,61 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + if (err < 0) + goto out; + +- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (1) { +- s64 ready = vsock_stream_has_data(vsk); ++ s64 ready; + +- if (ready < 0) { +- /* Invalid queue pair content. XXX This should be +- * changed to a connection reset in a later change. +- */ ++ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ ready = vsock_stream_has_data(vsk); + +- err = -ENOMEM; +- goto out_wait; +- } else if (ready > 0) { ++ if (ready == 0) { ++ if (sk->sk_err != 0 || ++ (sk->sk_shutdown & RCV_SHUTDOWN) || ++ (vsk->peer_shutdown & SEND_SHUTDOWN)) { ++ finish_wait(sk_sleep(sk), &wait); ++ break; ++ } ++ /* Don't wait for non-blocking sockets. */ ++ if (timeout == 0) { ++ err = -EAGAIN; ++ finish_wait(sk_sleep(sk), &wait); ++ break; ++ } ++ ++ err = transport->notify_recv_pre_block( ++ vsk, target, &recv_data); ++ if (err < 0) { ++ finish_wait(sk_sleep(sk), &wait); ++ break; ++ } ++ release_sock(sk); ++ timeout = schedule_timeout(timeout); ++ lock_sock(sk); ++ ++ if (signal_pending(current)) { ++ err = sock_intr_errno(timeout); ++ finish_wait(sk_sleep(sk), &wait); ++ break; ++ } else if (timeout == 0) { ++ err = -EAGAIN; ++ finish_wait(sk_sleep(sk), &wait); ++ break; ++ } ++ } else { + ssize_t read; + ++ finish_wait(sk_sleep(sk), &wait); ++ ++ if (ready < 0) { ++ /* Invalid queue pair content. XXX This should ++ * be changed to a connection reset in a later ++ * change. ++ */ ++ ++ err = -ENOMEM; ++ goto out; ++ } ++ + err = transport->notify_recv_pre_dequeue( + vsk, target, &recv_data); + if (err < 0) +@@ -1750,42 +1794,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + vsk, target, read, + !(flags & MSG_PEEK), &recv_data); + if (err < 0) +- goto out_wait; ++ goto out; + + if (read >= target || flags & MSG_PEEK) + break; + + target -= read; +- } else { +- if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) +- || (vsk->peer_shutdown & SEND_SHUTDOWN)) { +- break; +- } +- /* Don't wait for non-blocking sockets. */ +- if (timeout == 0) { +- err = -EAGAIN; +- break; +- } +- +- err = transport->notify_recv_pre_block( +- vsk, target, &recv_data); +- if (err < 0) +- break; +- +- release_sock(sk); +- timeout = schedule_timeout(timeout); +- lock_sock(sk); +- +- if (signal_pending(current)) { +- err = sock_intr_errno(timeout); +- break; +- } else if (timeout == 0) { +- err = -EAGAIN; +- break; +- } +- +- prepare_to_wait(sk_sleep(sk), &wait, +- TASK_INTERRUPTIBLE); + } + } + +@@ -1797,8 +1811,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + if (copied > 0) + err = copied; + +-out_wait: +- finish_wait(sk_sleep(sk), &wait); + out: + release_sock(sk); + return err; +-- +2.10.0 + diff --git a/alpine/kernel/patches/0004-vsock-make-listener-child-lock-ordering-explicit.patch b/alpine/kernel/patches/0004-vsock-make-listener-child-lock-ordering-explicit.patch new file mode 100644 index 000000000..ecaa8e3e0 --- /dev/null +++ b/alpine/kernel/patches/0004-vsock-make-listener-child-lock-ordering-explicit.patch @@ -0,0 +1,63 @@ +From 8386f4e436f280cec08f95338ae5e44bc8aa5b5e Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 23 Jun 2016 16:28:58 +0100 +Subject: [PATCH 04/42] vsock: make listener child lock ordering explicit + +There are several places where the listener and pending or accept queue +child sockets are accessed at the same time. Lockdep is unhappy that +two locks from the same class are held. + +Tell lockdep that it is safe and document the lock ordering. + +Originally Claudio Imbrenda sent a similar +patch asking whether this is safe. I have audited the code and also +covered the vsock_pending_work() function. + +Suggested-by: Claudio Imbrenda +Signed-off-by: Stefan Hajnoczi +Signed-off-by: David S. Miller +(cherry picked from commit 4192f672fae559f32d82de72a677701853cc98a7) +--- + net/vmw_vsock/af_vsock.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c +index b5f1221..b96ac91 100644 +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -61,6 +61,14 @@ + * function will also cleanup rejected sockets, those that reach the connected + * state but leave it before they have been accepted. + * ++ * - Lock ordering for pending or accept queue sockets is: ++ * ++ * lock_sock(listener); ++ * lock_sock_nested(pending, SINGLE_DEPTH_NESTING); ++ * ++ * Using explicit nested locking keeps lockdep happy since normally only one ++ * lock of a given class may be taken at a time. ++ * + * - Sockets created by user action will be cleaned up when the user process + * calls close(2), causing our release implementation to be called. Our release + * implementation will perform some cleanup then drop the last reference so our +@@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work) + cleanup = true; + + lock_sock(listener); +- lock_sock(sk); ++ lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (vsock_is_pending(sk)) { + vsock_remove_pending(listener, sk); +@@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) + if (connected) { + listener->sk_ack_backlog--; + +- lock_sock(connected); ++ lock_sock_nested(connected, SINGLE_DEPTH_NESTING); + vconnected = vsock_sk(connected); + + /* If the listener socket has received an error, then we should +-- +2.10.0 + diff --git a/alpine/kernel/patches/0005-VSOCK-transport-specific-vsock_transport-functions.patch b/alpine/kernel/patches/0005-VSOCK-transport-specific-vsock_transport-functions.patch new file mode 100644 index 000000000..edf521c29 --- /dev/null +++ b/alpine/kernel/patches/0005-VSOCK-transport-specific-vsock_transport-functions.patch @@ -0,0 +1,59 @@ +From ae6d39c3a4cd08ce37606ab36b202702a48f5440 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 28 Jul 2016 15:36:30 +0100 +Subject: [PATCH 05/42] VSOCK: transport-specific vsock_transport functions + +struct vsock_transport contains function pointers called by AF_VSOCK +core code. The transport may want its own transport-specific function +pointers and they can be added after struct vsock_transport. + +Allow the transport to fetch vsock_transport. It can downcast it to +access transport-specific function pointers. + +The virtio transport will use this. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 0b01aeb3d2fbf16787f0c9629f4ca52ae792f732) +--- + include/net/af_vsock.h | 3 +++ + net/vmw_vsock/af_vsock.c | 9 +++++++++ + 2 files changed, 12 insertions(+) + +diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h +index e9eb2d6..23f5525 100644 +--- a/include/net/af_vsock.h ++++ b/include/net/af_vsock.h +@@ -165,6 +165,9 @@ static inline int vsock_core_init(const struct vsock_transport *t) + } + void vsock_core_exit(void); + ++/* The transport may downcast this to access transport-specific functions */ ++const struct vsock_transport *vsock_core_get_transport(void); ++ + /**** UTILS ****/ + + void vsock_release_pending(struct sock *pending); +diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c +index b96ac91..e34d96f 100644 +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -1995,6 +1995,15 @@ void vsock_core_exit(void) + } + EXPORT_SYMBOL_GPL(vsock_core_exit); + ++const struct vsock_transport *vsock_core_get_transport(void) ++{ ++ /* vsock_register_mutex not taken since only the transport uses this ++ * function and only while registered. ++ */ ++ return transport; ++} ++EXPORT_SYMBOL_GPL(vsock_core_get_transport); ++ + MODULE_AUTHOR("VMware, Inc."); + MODULE_DESCRIPTION("VMware Virtual Socket Family"); + MODULE_VERSION("1.0.1.0-k"); +-- +2.10.0 + diff --git a/alpine/kernel/patches/0006-VSOCK-defer-sock-removal-to-transports.patch b/alpine/kernel/patches/0006-VSOCK-defer-sock-removal-to-transports.patch new file mode 100644 index 000000000..36f65a70d --- /dev/null +++ b/alpine/kernel/patches/0006-VSOCK-defer-sock-removal-to-transports.patch @@ -0,0 +1,83 @@ +From 816c87fe6ecfa46981c0ca332d21f1e0d8bfd8a0 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 28 Jul 2016 15:36:31 +0100 +Subject: [PATCH 06/42] VSOCK: defer sock removal to transports + +The virtio transport will implement graceful shutdown and the related +SO_LINGER socket option. This requires orphaning the sock but keeping +it in the table of connections after .release(). + +This patch adds the vsock_remove_sock() function and leaves it up to the +transport when to remove the sock. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6773b7dc39f165bd9d824b50ac52cbb3f87d53c8) +--- + include/net/af_vsock.h | 1 + + net/vmw_vsock/af_vsock.c | 16 ++++++++++------ + net/vmw_vsock/vmci_transport.c | 2 ++ + 3 files changed, 13 insertions(+), 6 deletions(-) + +diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h +index 23f5525..3af0b22 100644 +--- a/include/net/af_vsock.h ++++ b/include/net/af_vsock.h +@@ -180,6 +180,7 @@ void vsock_remove_connected(struct vsock_sock *vsk); + struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); + struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst); ++void vsock_remove_sock(struct vsock_sock *vsk); + void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); + + #endif /* __AF_VSOCK_H__ */ +diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c +index e34d96f..17dbbe6 100644 +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -344,6 +344,16 @@ static bool vsock_in_connected_table(struct vsock_sock *vsk) + return ret; + } + ++void vsock_remove_sock(struct vsock_sock *vsk) ++{ ++ if (vsock_in_bound_table(vsk)) ++ vsock_remove_bound(vsk); ++ ++ if (vsock_in_connected_table(vsk)) ++ vsock_remove_connected(vsk); ++} ++EXPORT_SYMBOL_GPL(vsock_remove_sock); ++ + void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) + { + int i; +@@ -660,12 +670,6 @@ static void __vsock_release(struct sock *sk) + vsk = vsock_sk(sk); + pending = NULL; /* Compiler warning. */ + +- if (vsock_in_bound_table(vsk)) +- vsock_remove_bound(vsk); +- +- if (vsock_in_connected_table(vsk)) +- vsock_remove_connected(vsk); +- + transport->release(vsk); + + lock_sock(sk); +diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c +index 0a369bb..706991e 100644 +--- a/net/vmw_vsock/vmci_transport.c ++++ b/net/vmw_vsock/vmci_transport.c +@@ -1644,6 +1644,8 @@ static void vmci_transport_destruct(struct vsock_sock *vsk) + + static void vmci_transport_release(struct vsock_sock *vsk) + { ++ vsock_remove_sock(vsk); ++ + if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { + vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; +-- +2.10.0 + diff --git a/alpine/kernel/patches/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch b/alpine/kernel/patches/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch new file mode 100644 index 000000000..aaa7a887a --- /dev/null +++ b/alpine/kernel/patches/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch @@ -0,0 +1,1496 @@ +From fe9f8cb30a5c819adabb5b9b598f7776cbbdc4f0 Mon Sep 17 00:00:00 2001 +From: Asias He +Date: Thu, 28 Jul 2016 15:36:32 +0100 +Subject: [PATCH 07/42] VSOCK: Introduce virtio_vsock_common.ko + +This module contains the common code and header files for the following +virtio_transporto and vhost_vsock kernel modules. + +Signed-off-by: Asias He +Signed-off-by: Claudio Imbrenda +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 06a8fc78367d070720af960dcecec917d3ae5f3b) +--- + MAINTAINERS | 10 + + include/linux/virtio_vsock.h | 154 ++++ + include/net/af_vsock.h | 2 + + .../trace/events/vsock_virtio_transport_common.h | 144 +++ + include/uapi/linux/Kbuild | 1 + + include/uapi/linux/virtio_ids.h | 1 + + include/uapi/linux/virtio_vsock.h | 94 ++ + net/vmw_vsock/virtio_transport_common.c | 992 +++++++++++++++++++++ + 8 files changed, 1398 insertions(+) + create mode 100644 include/linux/virtio_vsock.h + create mode 100644 include/trace/events/vsock_virtio_transport_common.h + create mode 100644 include/uapi/linux/virtio_vsock.h + create mode 100644 net/vmw_vsock/virtio_transport_common.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index ab65bbe..b93ba8b 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -11382,6 +11382,16 @@ S: Maintained + F: drivers/media/v4l2-core/videobuf2-* + F: include/media/videobuf2-* + ++VIRTIO AND VHOST VSOCK DRIVER ++M: Stefan Hajnoczi ++L: kvm@vger.kernel.org ++L: virtualization@lists.linux-foundation.org ++L: netdev@vger.kernel.org ++S: Maintained ++F: include/linux/virtio_vsock.h ++F: include/uapi/linux/virtio_vsock.h ++F: net/vmw_vsock/virtio_transport_common.c ++ + VIRTUAL SERIO DEVICE DRIVER + M: Stephen Chandler Paul + S: Maintained +diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h +new file mode 100644 +index 0000000..9638bfe +--- /dev/null ++++ b/include/linux/virtio_vsock.h +@@ -0,0 +1,154 @@ ++#ifndef _LINUX_VIRTIO_VSOCK_H ++#define _LINUX_VIRTIO_VSOCK_H ++ ++#include ++#include ++#include ++#include ++ ++#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128 ++#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256) ++#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) ++#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) ++#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL ++#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) ++ ++enum { ++ VSOCK_VQ_RX = 0, /* for host to guest data */ ++ VSOCK_VQ_TX = 1, /* for guest to host data */ ++ VSOCK_VQ_EVENT = 2, ++ VSOCK_VQ_MAX = 3, ++}; ++ ++/* Per-socket state (accessed via vsk->trans) */ ++struct virtio_vsock_sock { ++ struct vsock_sock *vsk; ++ ++ /* Protected by lock_sock(sk_vsock(trans->vsk)) */ ++ u32 buf_size; ++ u32 buf_size_min; ++ u32 buf_size_max; ++ ++ spinlock_t tx_lock; ++ spinlock_t rx_lock; ++ ++ /* Protected by tx_lock */ ++ u32 tx_cnt; ++ u32 buf_alloc; ++ u32 peer_fwd_cnt; ++ u32 peer_buf_alloc; ++ ++ /* Protected by rx_lock */ ++ u32 fwd_cnt; ++ u32 rx_bytes; ++ struct list_head rx_queue; ++}; ++ ++struct virtio_vsock_pkt { ++ struct virtio_vsock_hdr hdr; ++ struct work_struct work; ++ struct list_head list; ++ void *buf; ++ u32 len; ++ u32 off; ++ bool reply; ++}; ++ ++struct virtio_vsock_pkt_info { ++ u32 remote_cid, remote_port; ++ struct msghdr *msg; ++ u32 pkt_len; ++ u16 type; ++ u16 op; ++ u32 flags; ++ bool reply; ++}; ++ ++struct virtio_transport { ++ /* This must be the first field */ ++ struct vsock_transport transport; ++ ++ /* Takes ownership of the packet */ ++ int (*send_pkt)(struct virtio_vsock_pkt *pkt); ++}; ++ ++ssize_t ++virtio_transport_stream_dequeue(struct vsock_sock *vsk, ++ struct msghdr *msg, ++ size_t len, ++ int type); ++int ++virtio_transport_dgram_dequeue(struct vsock_sock *vsk, ++ struct msghdr *msg, ++ size_t len, int flags); ++ ++s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); ++s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); ++ ++int virtio_transport_do_socket_init(struct vsock_sock *vsk, ++ struct vsock_sock *psk); ++u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk); ++u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk); ++u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk); ++void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val); ++void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val); ++void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val); ++int ++virtio_transport_notify_poll_in(struct vsock_sock *vsk, ++ size_t target, ++ bool *data_ready_now); ++int ++virtio_transport_notify_poll_out(struct vsock_sock *vsk, ++ size_t target, ++ bool *space_available_now); ++ ++int virtio_transport_notify_recv_init(struct vsock_sock *vsk, ++ size_t target, struct vsock_transport_recv_notify_data *data); ++int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, ++ size_t target, struct vsock_transport_recv_notify_data *data); ++int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, ++ size_t target, struct vsock_transport_recv_notify_data *data); ++int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, ++ size_t target, ssize_t copied, bool data_read, ++ struct vsock_transport_recv_notify_data *data); ++int virtio_transport_notify_send_init(struct vsock_sock *vsk, ++ struct vsock_transport_send_notify_data *data); ++int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, ++ struct vsock_transport_send_notify_data *data); ++int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, ++ struct vsock_transport_send_notify_data *data); ++int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, ++ ssize_t written, struct vsock_transport_send_notify_data *data); ++ ++u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); ++bool virtio_transport_stream_is_active(struct vsock_sock *vsk); ++bool virtio_transport_stream_allow(u32 cid, u32 port); ++int virtio_transport_dgram_bind(struct vsock_sock *vsk, ++ struct sockaddr_vm *addr); ++bool virtio_transport_dgram_allow(u32 cid, u32 port); ++ ++int virtio_transport_connect(struct vsock_sock *vsk); ++ ++int virtio_transport_shutdown(struct vsock_sock *vsk, int mode); ++ ++void virtio_transport_release(struct vsock_sock *vsk); ++ ++ssize_t ++virtio_transport_stream_enqueue(struct vsock_sock *vsk, ++ struct msghdr *msg, ++ size_t len); ++int ++virtio_transport_dgram_enqueue(struct vsock_sock *vsk, ++ struct sockaddr_vm *remote_addr, ++ struct msghdr *msg, ++ size_t len); ++ ++void virtio_transport_destruct(struct vsock_sock *vsk); ++ ++void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); ++void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); ++void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt); ++u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); ++void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); ++ ++#endif /* _LINUX_VIRTIO_VSOCK_H */ +diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h +index 3af0b22..f275896 100644 +--- a/include/net/af_vsock.h ++++ b/include/net/af_vsock.h +@@ -63,6 +63,8 @@ struct vsock_sock { + struct list_head accept_queue; + bool rejected; + struct delayed_work dwork; ++ struct delayed_work close_work; ++ bool close_work_scheduled; + u32 peer_shutdown; + bool sent_request; + bool ignore_connecting_rst; +diff --git a/include/trace/events/vsock_virtio_transport_common.h b/include/trace/events/vsock_virtio_transport_common.h +new file mode 100644 +index 0000000..b7f1d62 +--- /dev/null ++++ b/include/trace/events/vsock_virtio_transport_common.h +@@ -0,0 +1,144 @@ ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM vsock ++ ++#if !defined(_TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H) || \ ++ defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H ++ ++#include ++ ++TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_STREAM); ++ ++#define show_type(val) \ ++ __print_symbolic(val, { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" }) ++ ++TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_INVALID); ++TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_REQUEST); ++TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RESPONSE); ++TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RST); ++TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_SHUTDOWN); ++TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RW); ++TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_UPDATE); ++TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_REQUEST); ++ ++#define show_op(val) \ ++ __print_symbolic(val, \ ++ { VIRTIO_VSOCK_OP_INVALID, "INVALID" }, \ ++ { VIRTIO_VSOCK_OP_REQUEST, "REQUEST" }, \ ++ { VIRTIO_VSOCK_OP_RESPONSE, "RESPONSE" }, \ ++ { VIRTIO_VSOCK_OP_RST, "RST" }, \ ++ { VIRTIO_VSOCK_OP_SHUTDOWN, "SHUTDOWN" }, \ ++ { VIRTIO_VSOCK_OP_RW, "RW" }, \ ++ { VIRTIO_VSOCK_OP_CREDIT_UPDATE, "CREDIT_UPDATE" }, \ ++ { VIRTIO_VSOCK_OP_CREDIT_REQUEST, "CREDIT_REQUEST" }) ++ ++TRACE_EVENT(virtio_transport_alloc_pkt, ++ TP_PROTO( ++ __u32 src_cid, __u32 src_port, ++ __u32 dst_cid, __u32 dst_port, ++ __u32 len, ++ __u16 type, ++ __u16 op, ++ __u32 flags ++ ), ++ TP_ARGS( ++ src_cid, src_port, ++ dst_cid, dst_port, ++ len, ++ type, ++ op, ++ flags ++ ), ++ TP_STRUCT__entry( ++ __field(__u32, src_cid) ++ __field(__u32, src_port) ++ __field(__u32, dst_cid) ++ __field(__u32, dst_port) ++ __field(__u32, len) ++ __field(__u16, type) ++ __field(__u16, op) ++ __field(__u32, flags) ++ ), ++ TP_fast_assign( ++ __entry->src_cid = src_cid; ++ __entry->src_port = src_port; ++ __entry->dst_cid = dst_cid; ++ __entry->dst_port = dst_port; ++ __entry->len = len; ++ __entry->type = type; ++ __entry->op = op; ++ __entry->flags = flags; ++ ), ++ TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x", ++ __entry->src_cid, __entry->src_port, ++ __entry->dst_cid, __entry->dst_port, ++ __entry->len, ++ show_type(__entry->type), ++ show_op(__entry->op), ++ __entry->flags) ++); ++ ++TRACE_EVENT(virtio_transport_recv_pkt, ++ TP_PROTO( ++ __u32 src_cid, __u32 src_port, ++ __u32 dst_cid, __u32 dst_port, ++ __u32 len, ++ __u16 type, ++ __u16 op, ++ __u32 flags, ++ __u32 buf_alloc, ++ __u32 fwd_cnt ++ ), ++ TP_ARGS( ++ src_cid, src_port, ++ dst_cid, dst_port, ++ len, ++ type, ++ op, ++ flags, ++ buf_alloc, ++ fwd_cnt ++ ), ++ TP_STRUCT__entry( ++ __field(__u32, src_cid) ++ __field(__u32, src_port) ++ __field(__u32, dst_cid) ++ __field(__u32, dst_port) ++ __field(__u32, len) ++ __field(__u16, type) ++ __field(__u16, op) ++ __field(__u32, flags) ++ __field(__u32, buf_alloc) ++ __field(__u32, fwd_cnt) ++ ), ++ TP_fast_assign( ++ __entry->src_cid = src_cid; ++ __entry->src_port = src_port; ++ __entry->dst_cid = dst_cid; ++ __entry->dst_port = dst_port; ++ __entry->len = len; ++ __entry->type = type; ++ __entry->op = op; ++ __entry->flags = flags; ++ __entry->buf_alloc = buf_alloc; ++ __entry->fwd_cnt = fwd_cnt; ++ ), ++ TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x " ++ "buf_alloc=%u fwd_cnt=%u", ++ __entry->src_cid, __entry->src_port, ++ __entry->dst_cid, __entry->dst_port, ++ __entry->len, ++ show_type(__entry->type), ++ show_op(__entry->op), ++ __entry->flags, ++ __entry->buf_alloc, ++ __entry->fwd_cnt) ++); ++ ++#endif /* _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H */ ++ ++#undef TRACE_INCLUDE_FILE ++#define TRACE_INCLUDE_FILE vsock_virtio_transport_common ++ ++/* This part must be outside protection */ ++#include +diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild +index ebd10e6..6c51a4d 100644 +--- a/include/uapi/linux/Kbuild ++++ b/include/uapi/linux/Kbuild +@@ -447,6 +447,7 @@ header-y += virtio_ring.h + header-y += virtio_rng.h + header-y += virtio_scsi.h + header-y += virtio_types.h ++header-y += virtio_vsock.h + header-y += vm_sockets.h + header-y += vt.h + header-y += wait.h +diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h +index 77925f5..3228d58 100644 +--- a/include/uapi/linux/virtio_ids.h ++++ b/include/uapi/linux/virtio_ids.h +@@ -41,5 +41,6 @@ + #define VIRTIO_ID_CAIF 12 /* Virtio caif */ + #define VIRTIO_ID_GPU 16 /* virtio GPU */ + #define VIRTIO_ID_INPUT 18 /* virtio input */ ++#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ + + #endif /* _LINUX_VIRTIO_IDS_H */ +diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h +new file mode 100644 +index 0000000..6b011c1 +--- /dev/null ++++ b/include/uapi/linux/virtio_vsock.h +@@ -0,0 +1,94 @@ ++/* ++ * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so ++ * anyone can use the definitions to implement compatible drivers/servers: ++ * ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of IBM nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' ++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * Copyright (C) Red Hat, Inc., 2013-2015 ++ * Copyright (C) Asias He , 2013 ++ * Copyright (C) Stefan Hajnoczi , 2015 ++ */ ++ ++#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H ++#define _UAPI_LINUX_VIRTIO_VOSCK_H ++ ++#include ++#include ++#include ++ ++struct virtio_vsock_config { ++ __le64 guest_cid; ++} __attribute__((packed)); ++ ++enum virtio_vsock_event_id { ++ VIRTIO_VSOCK_EVENT_TRANSPORT_RESET = 0, ++}; ++ ++struct virtio_vsock_event { ++ __le32 id; ++} __attribute__((packed)); ++ ++struct virtio_vsock_hdr { ++ __le64 src_cid; ++ __le64 dst_cid; ++ __le32 src_port; ++ __le32 dst_port; ++ __le32 len; ++ __le16 type; /* enum virtio_vsock_type */ ++ __le16 op; /* enum virtio_vsock_op */ ++ __le32 flags; ++ __le32 buf_alloc; ++ __le32 fwd_cnt; ++} __attribute__((packed)); ++ ++enum virtio_vsock_type { ++ VIRTIO_VSOCK_TYPE_STREAM = 1, ++}; ++ ++enum virtio_vsock_op { ++ VIRTIO_VSOCK_OP_INVALID = 0, ++ ++ /* Connect operations */ ++ VIRTIO_VSOCK_OP_REQUEST = 1, ++ VIRTIO_VSOCK_OP_RESPONSE = 2, ++ VIRTIO_VSOCK_OP_RST = 3, ++ VIRTIO_VSOCK_OP_SHUTDOWN = 4, ++ ++ /* To send payload */ ++ VIRTIO_VSOCK_OP_RW = 5, ++ ++ /* Tell the peer our credit info */ ++ VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6, ++ /* Request the peer to send the credit info to us */ ++ VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7, ++}; ++ ++/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ ++enum virtio_vsock_shutdown { ++ VIRTIO_VSOCK_SHUTDOWN_RCV = 1, ++ VIRTIO_VSOCK_SHUTDOWN_SEND = 2, ++}; ++ ++#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ +diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c +new file mode 100644 +index 0000000..a53b3a1 +--- /dev/null ++++ b/net/vmw_vsock/virtio_transport_common.c +@@ -0,0 +1,992 @@ ++/* ++ * common code for virtio vsock ++ * ++ * Copyright (C) 2013-2015 Red Hat, Inc. ++ * Author: Asias He ++ * Stefan Hajnoczi ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#define CREATE_TRACE_POINTS ++#include ++ ++/* How long to wait for graceful shutdown of a connection */ ++#define VSOCK_CLOSE_TIMEOUT (8 * HZ) ++ ++static const struct virtio_transport *virtio_transport_get_ops(void) ++{ ++ const struct vsock_transport *t = vsock_core_get_transport(); ++ ++ return container_of(t, struct virtio_transport, transport); ++} ++ ++struct virtio_vsock_pkt * ++virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, ++ size_t len, ++ u32 src_cid, ++ u32 src_port, ++ u32 dst_cid, ++ u32 dst_port) ++{ ++ struct virtio_vsock_pkt *pkt; ++ int err; ++ ++ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); ++ if (!pkt) ++ return NULL; ++ ++ pkt->hdr.type = cpu_to_le16(info->type); ++ pkt->hdr.op = cpu_to_le16(info->op); ++ pkt->hdr.src_cid = cpu_to_le64(src_cid); ++ pkt->hdr.dst_cid = cpu_to_le64(dst_cid); ++ pkt->hdr.src_port = cpu_to_le32(src_port); ++ pkt->hdr.dst_port = cpu_to_le32(dst_port); ++ pkt->hdr.flags = cpu_to_le32(info->flags); ++ pkt->len = len; ++ pkt->hdr.len = cpu_to_le32(len); ++ pkt->reply = info->reply; ++ ++ if (info->msg && len > 0) { ++ pkt->buf = kmalloc(len, GFP_KERNEL); ++ if (!pkt->buf) ++ goto out_pkt; ++ err = memcpy_from_msg(pkt->buf, info->msg, len); ++ if (err) ++ goto out; ++ } ++ ++ trace_virtio_transport_alloc_pkt(src_cid, src_port, ++ dst_cid, dst_port, ++ len, ++ info->type, ++ info->op, ++ info->flags); ++ ++ return pkt; ++ ++out: ++ kfree(pkt->buf); ++out_pkt: ++ kfree(pkt); ++ return NULL; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); ++ ++static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, ++ struct virtio_vsock_pkt_info *info) ++{ ++ u32 src_cid, src_port, dst_cid, dst_port; ++ struct virtio_vsock_sock *vvs; ++ struct virtio_vsock_pkt *pkt; ++ u32 pkt_len = info->pkt_len; ++ ++ src_cid = vm_sockets_get_local_cid(); ++ src_port = vsk->local_addr.svm_port; ++ if (!info->remote_cid) { ++ dst_cid = vsk->remote_addr.svm_cid; ++ dst_port = vsk->remote_addr.svm_port; ++ } else { ++ dst_cid = info->remote_cid; ++ dst_port = info->remote_port; ++ } ++ ++ vvs = vsk->trans; ++ ++ /* we can send less than pkt_len bytes */ ++ if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) ++ pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; ++ ++ /* virtio_transport_get_credit might return less than pkt_len credit */ ++ pkt_len = virtio_transport_get_credit(vvs, pkt_len); ++ ++ /* Do not send zero length OP_RW pkt */ ++ if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) ++ return pkt_len; ++ ++ pkt = virtio_transport_alloc_pkt(info, pkt_len, ++ src_cid, src_port, ++ dst_cid, dst_port); ++ if (!pkt) { ++ virtio_transport_put_credit(vvs, pkt_len); ++ return -ENOMEM; ++ } ++ ++ virtio_transport_inc_tx_pkt(vvs, pkt); ++ ++ return virtio_transport_get_ops()->send_pkt(pkt); ++} ++ ++static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, ++ struct virtio_vsock_pkt *pkt) ++{ ++ vvs->rx_bytes += pkt->len; ++} ++ ++static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, ++ struct virtio_vsock_pkt *pkt) ++{ ++ vvs->rx_bytes -= pkt->len; ++ vvs->fwd_cnt += pkt->len; ++} ++ ++void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) ++{ ++ spin_lock_bh(&vvs->tx_lock); ++ pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); ++ pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); ++ spin_unlock_bh(&vvs->tx_lock); ++} ++EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); ++ ++u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) ++{ ++ u32 ret; ++ ++ spin_lock_bh(&vvs->tx_lock); ++ ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); ++ if (ret > credit) ++ ret = credit; ++ vvs->tx_cnt += ret; ++ spin_unlock_bh(&vvs->tx_lock); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_get_credit); ++ ++void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) ++{ ++ spin_lock_bh(&vvs->tx_lock); ++ vvs->tx_cnt -= credit; ++ spin_unlock_bh(&vvs->tx_lock); ++} ++EXPORT_SYMBOL_GPL(virtio_transport_put_credit); ++ ++static int virtio_transport_send_credit_update(struct vsock_sock *vsk, ++ int type, ++ struct virtio_vsock_hdr *hdr) ++{ ++ struct virtio_vsock_pkt_info info = { ++ .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, ++ .type = type, ++ }; ++ ++ return virtio_transport_send_pkt_info(vsk, &info); ++} ++ ++static ssize_t ++virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, ++ struct msghdr *msg, ++ size_t len) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ struct virtio_vsock_pkt *pkt; ++ size_t bytes, total = 0; ++ int err = -EFAULT; ++ ++ spin_lock_bh(&vvs->rx_lock); ++ while (total < len && !list_empty(&vvs->rx_queue)) { ++ pkt = list_first_entry(&vvs->rx_queue, ++ struct virtio_vsock_pkt, list); ++ ++ bytes = len - total; ++ if (bytes > pkt->len - pkt->off) ++ bytes = pkt->len - pkt->off; ++ ++ /* sk_lock is held by caller so no one else can dequeue. ++ * Unlock rx_lock since memcpy_to_msg() may sleep. ++ */ ++ spin_unlock_bh(&vvs->rx_lock); ++ ++ err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); ++ if (err) ++ goto out; ++ ++ spin_lock_bh(&vvs->rx_lock); ++ ++ total += bytes; ++ pkt->off += bytes; ++ if (pkt->off == pkt->len) { ++ virtio_transport_dec_rx_pkt(vvs, pkt); ++ list_del(&pkt->list); ++ virtio_transport_free_pkt(pkt); ++ } ++ } ++ spin_unlock_bh(&vvs->rx_lock); ++ ++ /* Send a credit pkt to peer */ ++ virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, ++ NULL); ++ ++ return total; ++ ++out: ++ if (total) ++ err = total; ++ return err; ++} ++ ++ssize_t ++virtio_transport_stream_dequeue(struct vsock_sock *vsk, ++ struct msghdr *msg, ++ size_t len, int flags) ++{ ++ if (flags & MSG_PEEK) ++ return -EOPNOTSUPP; ++ ++ return virtio_transport_stream_do_dequeue(vsk, msg, len); ++} ++EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); ++ ++int ++virtio_transport_dgram_dequeue(struct vsock_sock *vsk, ++ struct msghdr *msg, ++ size_t len, int flags) ++{ ++ return -EOPNOTSUPP; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); ++ ++s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ s64 bytes; ++ ++ spin_lock_bh(&vvs->rx_lock); ++ bytes = vvs->rx_bytes; ++ spin_unlock_bh(&vvs->rx_lock); ++ ++ return bytes; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); ++ ++static s64 virtio_transport_has_space(struct vsock_sock *vsk) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ s64 bytes; ++ ++ bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); ++ if (bytes < 0) ++ bytes = 0; ++ ++ return bytes; ++} ++ ++s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ s64 bytes; ++ ++ spin_lock_bh(&vvs->tx_lock); ++ bytes = virtio_transport_has_space(vsk); ++ spin_unlock_bh(&vvs->tx_lock); ++ ++ return bytes; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); ++ ++int virtio_transport_do_socket_init(struct vsock_sock *vsk, ++ struct vsock_sock *psk) ++{ ++ struct virtio_vsock_sock *vvs; ++ ++ vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); ++ if (!vvs) ++ return -ENOMEM; ++ ++ vsk->trans = vvs; ++ vvs->vsk = vsk; ++ if (psk) { ++ struct virtio_vsock_sock *ptrans = psk->trans; ++ ++ vvs->buf_size = ptrans->buf_size; ++ vvs->buf_size_min = ptrans->buf_size_min; ++ vvs->buf_size_max = ptrans->buf_size_max; ++ vvs->peer_buf_alloc = ptrans->peer_buf_alloc; ++ } else { ++ vvs->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; ++ vvs->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; ++ vvs->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; ++ } ++ ++ vvs->buf_alloc = vvs->buf_size; ++ ++ spin_lock_init(&vvs->rx_lock); ++ spin_lock_init(&vvs->tx_lock); ++ INIT_LIST_HEAD(&vvs->rx_queue); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); ++ ++u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ ++ return vvs->buf_size; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); ++ ++u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ ++ return vvs->buf_size_min; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); ++ ++u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ ++ return vvs->buf_size_max; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); ++ ++void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ ++ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) ++ val = VIRTIO_VSOCK_MAX_BUF_SIZE; ++ if (val < vvs->buf_size_min) ++ vvs->buf_size_min = val; ++ if (val > vvs->buf_size_max) ++ vvs->buf_size_max = val; ++ vvs->buf_size = val; ++ vvs->buf_alloc = val; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); ++ ++void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ ++ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) ++ val = VIRTIO_VSOCK_MAX_BUF_SIZE; ++ if (val > vvs->buf_size) ++ vvs->buf_size = val; ++ vvs->buf_size_min = val; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); ++ ++void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ ++ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) ++ val = VIRTIO_VSOCK_MAX_BUF_SIZE; ++ if (val < vvs->buf_size) ++ vvs->buf_size = val; ++ vvs->buf_size_max = val; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); ++ ++int ++virtio_transport_notify_poll_in(struct vsock_sock *vsk, ++ size_t target, ++ bool *data_ready_now) ++{ ++ if (vsock_stream_has_data(vsk)) ++ *data_ready_now = true; ++ else ++ *data_ready_now = false; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); ++ ++int ++virtio_transport_notify_poll_out(struct vsock_sock *vsk, ++ size_t target, ++ bool *space_avail_now) ++{ ++ s64 free_space; ++ ++ free_space = vsock_stream_has_space(vsk); ++ if (free_space > 0) ++ *space_avail_now = true; ++ else if (free_space == 0) ++ *space_avail_now = false; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); ++ ++int virtio_transport_notify_recv_init(struct vsock_sock *vsk, ++ size_t target, struct vsock_transport_recv_notify_data *data) ++{ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); ++ ++int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, ++ size_t target, struct vsock_transport_recv_notify_data *data) ++{ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); ++ ++int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, ++ size_t target, struct vsock_transport_recv_notify_data *data) ++{ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); ++ ++int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, ++ size_t target, ssize_t copied, bool data_read, ++ struct vsock_transport_recv_notify_data *data) ++{ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); ++ ++int virtio_transport_notify_send_init(struct vsock_sock *vsk, ++ struct vsock_transport_send_notify_data *data) ++{ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); ++ ++int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, ++ struct vsock_transport_send_notify_data *data) ++{ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); ++ ++int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, ++ struct vsock_transport_send_notify_data *data) ++{ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); ++ ++int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, ++ ssize_t written, struct vsock_transport_send_notify_data *data) ++{ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); ++ ++u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ ++ return vvs->buf_size; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); ++ ++bool virtio_transport_stream_is_active(struct vsock_sock *vsk) ++{ ++ return true; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); ++ ++bool virtio_transport_stream_allow(u32 cid, u32 port) ++{ ++ return true; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); ++ ++int virtio_transport_dgram_bind(struct vsock_sock *vsk, ++ struct sockaddr_vm *addr) ++{ ++ return -EOPNOTSUPP; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); ++ ++bool virtio_transport_dgram_allow(u32 cid, u32 port) ++{ ++ return false; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); ++ ++int virtio_transport_connect(struct vsock_sock *vsk) ++{ ++ struct virtio_vsock_pkt_info info = { ++ .op = VIRTIO_VSOCK_OP_REQUEST, ++ .type = VIRTIO_VSOCK_TYPE_STREAM, ++ }; ++ ++ return virtio_transport_send_pkt_info(vsk, &info); ++} ++EXPORT_SYMBOL_GPL(virtio_transport_connect); ++ ++int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) ++{ ++ struct virtio_vsock_pkt_info info = { ++ .op = VIRTIO_VSOCK_OP_SHUTDOWN, ++ .type = VIRTIO_VSOCK_TYPE_STREAM, ++ .flags = (mode & RCV_SHUTDOWN ? ++ VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | ++ (mode & SEND_SHUTDOWN ? ++ VIRTIO_VSOCK_SHUTDOWN_SEND : 0), ++ }; ++ ++ return virtio_transport_send_pkt_info(vsk, &info); ++} ++EXPORT_SYMBOL_GPL(virtio_transport_shutdown); ++ ++int ++virtio_transport_dgram_enqueue(struct vsock_sock *vsk, ++ struct sockaddr_vm *remote_addr, ++ struct msghdr *msg, ++ size_t dgram_len) ++{ ++ return -EOPNOTSUPP; ++} ++EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); ++ ++ssize_t ++virtio_transport_stream_enqueue(struct vsock_sock *vsk, ++ struct msghdr *msg, ++ size_t len) ++{ ++ struct virtio_vsock_pkt_info info = { ++ .op = VIRTIO_VSOCK_OP_RW, ++ .type = VIRTIO_VSOCK_TYPE_STREAM, ++ .msg = msg, ++ .pkt_len = len, ++ }; ++ ++ return virtio_transport_send_pkt_info(vsk, &info); ++} ++EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); ++ ++void virtio_transport_destruct(struct vsock_sock *vsk) ++{ ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ ++ kfree(vvs); ++} ++EXPORT_SYMBOL_GPL(virtio_transport_destruct); ++ ++static int virtio_transport_reset(struct vsock_sock *vsk, ++ struct virtio_vsock_pkt *pkt) ++{ ++ struct virtio_vsock_pkt_info info = { ++ .op = VIRTIO_VSOCK_OP_RST, ++ .type = VIRTIO_VSOCK_TYPE_STREAM, ++ .reply = !!pkt, ++ }; ++ ++ /* Send RST only if the original pkt is not a RST pkt */ ++ if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) ++ return 0; ++ ++ return virtio_transport_send_pkt_info(vsk, &info); ++} ++ ++/* Normally packets are associated with a socket. There may be no socket if an ++ * attempt was made to connect to a socket that does not exist. ++ */ ++static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) ++{ ++ struct virtio_vsock_pkt_info info = { ++ .op = VIRTIO_VSOCK_OP_RST, ++ .type = le16_to_cpu(pkt->hdr.type), ++ .reply = true, ++ }; ++ ++ /* Send RST only if the original pkt is not a RST pkt */ ++ if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) ++ return 0; ++ ++ pkt = virtio_transport_alloc_pkt(&info, 0, ++ le32_to_cpu(pkt->hdr.dst_cid), ++ le32_to_cpu(pkt->hdr.dst_port), ++ le32_to_cpu(pkt->hdr.src_cid), ++ le32_to_cpu(pkt->hdr.src_port)); ++ if (!pkt) ++ return -ENOMEM; ++ ++ return virtio_transport_get_ops()->send_pkt(pkt); ++} ++ ++static void virtio_transport_wait_close(struct sock *sk, long timeout) ++{ ++ if (timeout) { ++ DEFINE_WAIT(wait); ++ ++ do { ++ prepare_to_wait(sk_sleep(sk), &wait, ++ TASK_INTERRUPTIBLE); ++ if (sk_wait_event(sk, &timeout, ++ sock_flag(sk, SOCK_DONE))) ++ break; ++ } while (!signal_pending(current) && timeout); ++ ++ finish_wait(sk_sleep(sk), &wait); ++ } ++} ++ ++static void virtio_transport_do_close(struct vsock_sock *vsk, ++ bool cancel_timeout) ++{ ++ struct sock *sk = sk_vsock(vsk); ++ ++ sock_set_flag(sk, SOCK_DONE); ++ vsk->peer_shutdown = SHUTDOWN_MASK; ++ if (vsock_stream_has_data(vsk) <= 0) ++ sk->sk_state = SS_DISCONNECTING; ++ sk->sk_state_change(sk); ++ ++ if (vsk->close_work_scheduled && ++ (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { ++ vsk->close_work_scheduled = false; ++ ++ vsock_remove_sock(vsk); ++ ++ /* Release refcnt obtained when we scheduled the timeout */ ++ sock_put(sk); ++ } ++} ++ ++static void virtio_transport_close_timeout(struct work_struct *work) ++{ ++ struct vsock_sock *vsk = ++ container_of(work, struct vsock_sock, close_work.work); ++ struct sock *sk = sk_vsock(vsk); ++ ++ sock_hold(sk); ++ lock_sock(sk); ++ ++ if (!sock_flag(sk, SOCK_DONE)) { ++ (void)virtio_transport_reset(vsk, NULL); ++ ++ virtio_transport_do_close(vsk, false); ++ } ++ ++ vsk->close_work_scheduled = false; ++ ++ release_sock(sk); ++ sock_put(sk); ++} ++ ++/* User context, vsk->sk is locked */ ++static bool virtio_transport_close(struct vsock_sock *vsk) ++{ ++ struct sock *sk = &vsk->sk; ++ ++ if (!(sk->sk_state == SS_CONNECTED || ++ sk->sk_state == SS_DISCONNECTING)) ++ return true; ++ ++ /* Already received SHUTDOWN from peer, reply with RST */ ++ if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { ++ (void)virtio_transport_reset(vsk, NULL); ++ return true; ++ } ++ ++ if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) ++ (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); ++ ++ if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) ++ virtio_transport_wait_close(sk, sk->sk_lingertime); ++ ++ if (sock_flag(sk, SOCK_DONE)) { ++ return true; ++ } ++ ++ sock_hold(sk); ++ INIT_DELAYED_WORK(&vsk->close_work, ++ virtio_transport_close_timeout); ++ vsk->close_work_scheduled = true; ++ schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); ++ return false; ++} ++ ++void virtio_transport_release(struct vsock_sock *vsk) ++{ ++ struct sock *sk = &vsk->sk; ++ bool remove_sock = true; ++ ++ lock_sock(sk); ++ if (sk->sk_type == SOCK_STREAM) ++ remove_sock = virtio_transport_close(vsk); ++ release_sock(sk); ++ ++ if (remove_sock) ++ vsock_remove_sock(vsk); ++} ++EXPORT_SYMBOL_GPL(virtio_transport_release); ++ ++static int ++virtio_transport_recv_connecting(struct sock *sk, ++ struct virtio_vsock_pkt *pkt) ++{ ++ struct vsock_sock *vsk = vsock_sk(sk); ++ int err; ++ int skerr; ++ ++ switch (le16_to_cpu(pkt->hdr.op)) { ++ case VIRTIO_VSOCK_OP_RESPONSE: ++ sk->sk_state = SS_CONNECTED; ++ sk->sk_socket->state = SS_CONNECTED; ++ vsock_insert_connected(vsk); ++ sk->sk_state_change(sk); ++ break; ++ case VIRTIO_VSOCK_OP_INVALID: ++ break; ++ case VIRTIO_VSOCK_OP_RST: ++ skerr = ECONNRESET; ++ err = 0; ++ goto destroy; ++ default: ++ skerr = EPROTO; ++ err = -EINVAL; ++ goto destroy; ++ } ++ return 0; ++ ++destroy: ++ virtio_transport_reset(vsk, pkt); ++ sk->sk_state = SS_UNCONNECTED; ++ sk->sk_err = skerr; ++ sk->sk_error_report(sk); ++ return err; ++} ++ ++static int ++virtio_transport_recv_connected(struct sock *sk, ++ struct virtio_vsock_pkt *pkt) ++{ ++ struct vsock_sock *vsk = vsock_sk(sk); ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ int err = 0; ++ ++ switch (le16_to_cpu(pkt->hdr.op)) { ++ case VIRTIO_VSOCK_OP_RW: ++ pkt->len = le32_to_cpu(pkt->hdr.len); ++ pkt->off = 0; ++ ++ spin_lock_bh(&vvs->rx_lock); ++ virtio_transport_inc_rx_pkt(vvs, pkt); ++ list_add_tail(&pkt->list, &vvs->rx_queue); ++ spin_unlock_bh(&vvs->rx_lock); ++ ++ sk->sk_data_ready(sk); ++ return err; ++ case VIRTIO_VSOCK_OP_CREDIT_UPDATE: ++ sk->sk_write_space(sk); ++ break; ++ case VIRTIO_VSOCK_OP_SHUTDOWN: ++ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) ++ vsk->peer_shutdown |= RCV_SHUTDOWN; ++ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) ++ vsk->peer_shutdown |= SEND_SHUTDOWN; ++ if (vsk->peer_shutdown == SHUTDOWN_MASK && ++ vsock_stream_has_data(vsk) <= 0) ++ sk->sk_state = SS_DISCONNECTING; ++ if (le32_to_cpu(pkt->hdr.flags)) ++ sk->sk_state_change(sk); ++ break; ++ case VIRTIO_VSOCK_OP_RST: ++ virtio_transport_do_close(vsk, true); ++ break; ++ default: ++ err = -EINVAL; ++ break; ++ } ++ ++ virtio_transport_free_pkt(pkt); ++ return err; ++} ++ ++static void ++virtio_transport_recv_disconnecting(struct sock *sk, ++ struct virtio_vsock_pkt *pkt) ++{ ++ struct vsock_sock *vsk = vsock_sk(sk); ++ ++ if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) ++ virtio_transport_do_close(vsk, true); ++} ++ ++static int ++virtio_transport_send_response(struct vsock_sock *vsk, ++ struct virtio_vsock_pkt *pkt) ++{ ++ struct virtio_vsock_pkt_info info = { ++ .op = VIRTIO_VSOCK_OP_RESPONSE, ++ .type = VIRTIO_VSOCK_TYPE_STREAM, ++ .remote_cid = le32_to_cpu(pkt->hdr.src_cid), ++ .remote_port = le32_to_cpu(pkt->hdr.src_port), ++ .reply = true, ++ }; ++ ++ return virtio_transport_send_pkt_info(vsk, &info); ++} ++ ++/* Handle server socket */ ++static int ++virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) ++{ ++ struct vsock_sock *vsk = vsock_sk(sk); ++ struct vsock_sock *vchild; ++ struct sock *child; ++ ++ if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { ++ virtio_transport_reset(vsk, pkt); ++ return -EINVAL; ++ } ++ ++ if (sk_acceptq_is_full(sk)) { ++ virtio_transport_reset(vsk, pkt); ++ return -ENOMEM; ++ } ++ ++ child = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, ++ sk->sk_type, 0); ++ if (!child) { ++ virtio_transport_reset(vsk, pkt); ++ return -ENOMEM; ++ } ++ ++ sk->sk_ack_backlog++; ++ ++ lock_sock_nested(child, SINGLE_DEPTH_NESTING); ++ ++ child->sk_state = SS_CONNECTED; ++ ++ vchild = vsock_sk(child); ++ vsock_addr_init(&vchild->local_addr, le32_to_cpu(pkt->hdr.dst_cid), ++ le32_to_cpu(pkt->hdr.dst_port)); ++ vsock_addr_init(&vchild->remote_addr, le32_to_cpu(pkt->hdr.src_cid), ++ le32_to_cpu(pkt->hdr.src_port)); ++ ++ vsock_insert_connected(vchild); ++ vsock_enqueue_accept(sk, child); ++ virtio_transport_send_response(vchild, pkt); ++ ++ release_sock(child); ++ ++ sk->sk_data_ready(sk); ++ return 0; ++} ++ ++static bool virtio_transport_space_update(struct sock *sk, ++ struct virtio_vsock_pkt *pkt) ++{ ++ struct vsock_sock *vsk = vsock_sk(sk); ++ struct virtio_vsock_sock *vvs = vsk->trans; ++ bool space_available; ++ ++ /* buf_alloc and fwd_cnt is always included in the hdr */ ++ spin_lock_bh(&vvs->tx_lock); ++ vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); ++ vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); ++ space_available = virtio_transport_has_space(vsk); ++ spin_unlock_bh(&vvs->tx_lock); ++ return space_available; ++} ++ ++/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex ++ * lock. ++ */ ++void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) ++{ ++ struct sockaddr_vm src, dst; ++ struct vsock_sock *vsk; ++ struct sock *sk; ++ bool space_available; ++ ++ vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), ++ le32_to_cpu(pkt->hdr.src_port)); ++ vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), ++ le32_to_cpu(pkt->hdr.dst_port)); ++ ++ trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, ++ dst.svm_cid, dst.svm_port, ++ le32_to_cpu(pkt->hdr.len), ++ le16_to_cpu(pkt->hdr.type), ++ le16_to_cpu(pkt->hdr.op), ++ le32_to_cpu(pkt->hdr.flags), ++ le32_to_cpu(pkt->hdr.buf_alloc), ++ le32_to_cpu(pkt->hdr.fwd_cnt)); ++ ++ if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) { ++ (void)virtio_transport_reset_no_sock(pkt); ++ goto free_pkt; ++ } ++ ++ /* The socket must be in connected or bound table ++ * otherwise send reset back ++ */ ++ sk = vsock_find_connected_socket(&src, &dst); ++ if (!sk) { ++ sk = vsock_find_bound_socket(&dst); ++ if (!sk) { ++ (void)virtio_transport_reset_no_sock(pkt); ++ goto free_pkt; ++ } ++ } ++ ++ vsk = vsock_sk(sk); ++ ++ space_available = virtio_transport_space_update(sk, pkt); ++ ++ lock_sock(sk); ++ ++ /* Update CID in case it has changed after a transport reset event */ ++ vsk->local_addr.svm_cid = dst.svm_cid; ++ ++ if (space_available) ++ sk->sk_write_space(sk); ++ ++ switch (sk->sk_state) { ++ case VSOCK_SS_LISTEN: ++ virtio_transport_recv_listen(sk, pkt); ++ virtio_transport_free_pkt(pkt); ++ break; ++ case SS_CONNECTING: ++ virtio_transport_recv_connecting(sk, pkt); ++ virtio_transport_free_pkt(pkt); ++ break; ++ case SS_CONNECTED: ++ virtio_transport_recv_connected(sk, pkt); ++ break; ++ case SS_DISCONNECTING: ++ virtio_transport_recv_disconnecting(sk, pkt); ++ virtio_transport_free_pkt(pkt); ++ break; ++ default: ++ virtio_transport_free_pkt(pkt); ++ break; ++ } ++ release_sock(sk); ++ ++ /* Release refcnt obtained when we fetched this socket out of the ++ * bound or connected list. ++ */ ++ sock_put(sk); ++ return; ++ ++free_pkt: ++ virtio_transport_free_pkt(pkt); ++} ++EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); ++ ++void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) ++{ ++ kfree(pkt->buf); ++ kfree(pkt); ++} ++EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); ++ ++MODULE_LICENSE("GPL v2"); ++MODULE_AUTHOR("Asias He"); ++MODULE_DESCRIPTION("common code for virtio vsock"); +-- +2.10.0 + diff --git a/alpine/kernel/patches/0008-VSOCK-Introduce-virtio_transport.ko.patch b/alpine/kernel/patches/0008-VSOCK-Introduce-virtio_transport.ko.patch new file mode 100644 index 000000000..78931bc21 --- /dev/null +++ b/alpine/kernel/patches/0008-VSOCK-Introduce-virtio_transport.ko.patch @@ -0,0 +1,663 @@ +From c384834d9495c7b2a36b0054d08ddf3240687bdc Mon Sep 17 00:00:00 2001 +From: Asias He +Date: Thu, 28 Jul 2016 15:36:33 +0100 +Subject: [PATCH 08/42] VSOCK: Introduce virtio_transport.ko + +VM sockets virtio transport implementation. This driver runs in the +guest. + +Signed-off-by: Asias He +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 0ea9e1d3a9e3ef7d2a1462d3de6b95131dc7d872) +--- + MAINTAINERS | 1 + + net/vmw_vsock/virtio_transport.c | 624 +++++++++++++++++++++++++++++++++++++++ + 2 files changed, 625 insertions(+) + create mode 100644 net/vmw_vsock/virtio_transport.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index b93ba8b..82d1123 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -11391,6 +11391,7 @@ S: Maintained + F: include/linux/virtio_vsock.h + F: include/uapi/linux/virtio_vsock.h + F: net/vmw_vsock/virtio_transport_common.c ++F: net/vmw_vsock/virtio_transport.c + + VIRTUAL SERIO DEVICE DRIVER + M: Stephen Chandler Paul +diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c +new file mode 100644 +index 0000000..699dfab +--- /dev/null ++++ b/net/vmw_vsock/virtio_transport.c +@@ -0,0 +1,624 @@ ++/* ++ * virtio transport for vsock ++ * ++ * Copyright (C) 2013-2015 Red Hat, Inc. ++ * Author: Asias He ++ * Stefan Hajnoczi ++ * ++ * Some of the code is take from Gerd Hoffmann 's ++ * early virtio-vsock proof-of-concept bits. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static struct workqueue_struct *virtio_vsock_workqueue; ++static struct virtio_vsock *the_virtio_vsock; ++static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ ++ ++struct virtio_vsock { ++ struct virtio_device *vdev; ++ struct virtqueue *vqs[VSOCK_VQ_MAX]; ++ ++ /* Virtqueue processing is deferred to a workqueue */ ++ struct work_struct tx_work; ++ struct work_struct rx_work; ++ struct work_struct event_work; ++ ++ /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX] ++ * must be accessed with tx_lock held. ++ */ ++ struct mutex tx_lock; ++ ++ struct work_struct send_pkt_work; ++ spinlock_t send_pkt_list_lock; ++ struct list_head send_pkt_list; ++ ++ atomic_t queued_replies; ++ ++ /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] ++ * must be accessed with rx_lock held. ++ */ ++ struct mutex rx_lock; ++ int rx_buf_nr; ++ int rx_buf_max_nr; ++ ++ /* The following fields are protected by event_lock. ++ * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held. ++ */ ++ struct mutex event_lock; ++ struct virtio_vsock_event event_list[8]; ++ ++ u32 guest_cid; ++}; ++ ++static struct virtio_vsock *virtio_vsock_get(void) ++{ ++ return the_virtio_vsock; ++} ++ ++static u32 virtio_transport_get_local_cid(void) ++{ ++ struct virtio_vsock *vsock = virtio_vsock_get(); ++ ++ return vsock->guest_cid; ++} ++ ++static void ++virtio_transport_send_pkt_work(struct work_struct *work) ++{ ++ struct virtio_vsock *vsock = ++ container_of(work, struct virtio_vsock, send_pkt_work); ++ struct virtqueue *vq; ++ bool added = false; ++ bool restart_rx = false; ++ ++ mutex_lock(&vsock->tx_lock); ++ ++ vq = vsock->vqs[VSOCK_VQ_TX]; ++ ++ /* Avoid unnecessary interrupts while we're processing the ring */ ++ virtqueue_disable_cb(vq); ++ ++ for (;;) { ++ struct virtio_vsock_pkt *pkt; ++ struct scatterlist hdr, buf, *sgs[2]; ++ int ret, in_sg = 0, out_sg = 0; ++ bool reply; ++ ++ spin_lock_bh(&vsock->send_pkt_list_lock); ++ if (list_empty(&vsock->send_pkt_list)) { ++ spin_unlock_bh(&vsock->send_pkt_list_lock); ++ virtqueue_enable_cb(vq); ++ break; ++ } ++ ++ pkt = list_first_entry(&vsock->send_pkt_list, ++ struct virtio_vsock_pkt, list); ++ list_del_init(&pkt->list); ++ spin_unlock_bh(&vsock->send_pkt_list_lock); ++ ++ reply = pkt->reply; ++ ++ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); ++ sgs[out_sg++] = &hdr; ++ if (pkt->buf) { ++ sg_init_one(&buf, pkt->buf, pkt->len); ++ sgs[out_sg++] = &buf; ++ } ++ ++ ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL); ++ if (ret < 0) { ++ spin_lock_bh(&vsock->send_pkt_list_lock); ++ list_add(&pkt->list, &vsock->send_pkt_list); ++ spin_unlock_bh(&vsock->send_pkt_list_lock); ++ ++ if (!virtqueue_enable_cb(vq) && ret == -ENOSPC) ++ continue; /* retry now that we have more space */ ++ break; ++ } ++ ++ if (reply) { ++ struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; ++ int val; ++ ++ val = atomic_dec_return(&vsock->queued_replies); ++ ++ /* Do we now have resources to resume rx processing? */ ++ if (val + 1 == virtqueue_get_vring_size(rx_vq)) ++ restart_rx = true; ++ } ++ ++ added = true; ++ } ++ ++ if (added) ++ virtqueue_kick(vq); ++ ++ mutex_unlock(&vsock->tx_lock); ++ ++ if (restart_rx) ++ queue_work(virtio_vsock_workqueue, &vsock->rx_work); ++} ++ ++static int ++virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt) ++{ ++ struct virtio_vsock *vsock; ++ int len = pkt->len; ++ ++ vsock = virtio_vsock_get(); ++ if (!vsock) { ++ virtio_transport_free_pkt(pkt); ++ return -ENODEV; ++ } ++ ++ if (pkt->reply) ++ atomic_inc(&vsock->queued_replies); ++ ++ spin_lock_bh(&vsock->send_pkt_list_lock); ++ list_add_tail(&pkt->list, &vsock->send_pkt_list); ++ spin_unlock_bh(&vsock->send_pkt_list_lock); ++ ++ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); ++ return len; ++} ++ ++static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) ++{ ++ int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; ++ struct virtio_vsock_pkt *pkt; ++ struct scatterlist hdr, buf, *sgs[2]; ++ struct virtqueue *vq; ++ int ret; ++ ++ vq = vsock->vqs[VSOCK_VQ_RX]; ++ ++ do { ++ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); ++ if (!pkt) ++ break; ++ ++ pkt->buf = kmalloc(buf_len, GFP_KERNEL); ++ if (!pkt->buf) { ++ virtio_transport_free_pkt(pkt); ++ break; ++ } ++ ++ pkt->len = buf_len; ++ ++ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); ++ sgs[0] = &hdr; ++ ++ sg_init_one(&buf, pkt->buf, buf_len); ++ sgs[1] = &buf; ++ ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); ++ if (ret) { ++ virtio_transport_free_pkt(pkt); ++ break; ++ } ++ vsock->rx_buf_nr++; ++ } while (vq->num_free); ++ if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) ++ vsock->rx_buf_max_nr = vsock->rx_buf_nr; ++ virtqueue_kick(vq); ++} ++ ++static void virtio_transport_tx_work(struct work_struct *work) ++{ ++ struct virtio_vsock *vsock = ++ container_of(work, struct virtio_vsock, tx_work); ++ struct virtqueue *vq; ++ bool added = false; ++ ++ vq = vsock->vqs[VSOCK_VQ_TX]; ++ mutex_lock(&vsock->tx_lock); ++ do { ++ struct virtio_vsock_pkt *pkt; ++ unsigned int len; ++ ++ virtqueue_disable_cb(vq); ++ while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { ++ virtio_transport_free_pkt(pkt); ++ added = true; ++ } ++ } while (!virtqueue_enable_cb(vq)); ++ mutex_unlock(&vsock->tx_lock); ++ ++ if (added) ++ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); ++} ++ ++/* Is there space left for replies to rx packets? */ ++static bool virtio_transport_more_replies(struct virtio_vsock *vsock) ++{ ++ struct virtqueue *vq = vsock->vqs[VSOCK_VQ_RX]; ++ int val; ++ ++ smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ ++ val = atomic_read(&vsock->queued_replies); ++ ++ return val < virtqueue_get_vring_size(vq); ++} ++ ++static void virtio_transport_rx_work(struct work_struct *work) ++{ ++ struct virtio_vsock *vsock = ++ container_of(work, struct virtio_vsock, rx_work); ++ struct virtqueue *vq; ++ ++ vq = vsock->vqs[VSOCK_VQ_RX]; ++ ++ mutex_lock(&vsock->rx_lock); ++ ++ do { ++ virtqueue_disable_cb(vq); ++ for (;;) { ++ struct virtio_vsock_pkt *pkt; ++ unsigned int len; ++ ++ if (!virtio_transport_more_replies(vsock)) { ++ /* Stop rx until the device processes already ++ * pending replies. Leave rx virtqueue ++ * callbacks disabled. ++ */ ++ goto out; ++ } ++ ++ pkt = virtqueue_get_buf(vq, &len); ++ if (!pkt) { ++ break; ++ } ++ ++ vsock->rx_buf_nr--; ++ ++ /* Drop short/long packets */ ++ if (unlikely(len < sizeof(pkt->hdr) || ++ len > sizeof(pkt->hdr) + pkt->len)) { ++ virtio_transport_free_pkt(pkt); ++ continue; ++ } ++ ++ pkt->len = len - sizeof(pkt->hdr); ++ virtio_transport_recv_pkt(pkt); ++ } ++ } while (!virtqueue_enable_cb(vq)); ++ ++out: ++ if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) ++ virtio_vsock_rx_fill(vsock); ++ mutex_unlock(&vsock->rx_lock); ++} ++ ++/* event_lock must be held */ ++static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, ++ struct virtio_vsock_event *event) ++{ ++ struct scatterlist sg; ++ struct virtqueue *vq; ++ ++ vq = vsock->vqs[VSOCK_VQ_EVENT]; ++ ++ sg_init_one(&sg, event, sizeof(*event)); ++ ++ return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL); ++} ++ ++/* event_lock must be held */ ++static void virtio_vsock_event_fill(struct virtio_vsock *vsock) ++{ ++ size_t i; ++ ++ for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) { ++ struct virtio_vsock_event *event = &vsock->event_list[i]; ++ ++ virtio_vsock_event_fill_one(vsock, event); ++ } ++ ++ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); ++} ++ ++static void virtio_vsock_reset_sock(struct sock *sk) ++{ ++ lock_sock(sk); ++ sk->sk_state = SS_UNCONNECTED; ++ sk->sk_err = ECONNRESET; ++ sk->sk_error_report(sk); ++ release_sock(sk); ++} ++ ++static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock) ++{ ++ struct virtio_device *vdev = vsock->vdev; ++ u64 guest_cid; ++ ++ vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), ++ &guest_cid, sizeof(guest_cid)); ++ vsock->guest_cid = le64_to_cpu(guest_cid); ++} ++ ++/* event_lock must be held */ ++static void virtio_vsock_event_handle(struct virtio_vsock *vsock, ++ struct virtio_vsock_event *event) ++{ ++ switch (le32_to_cpu(event->id)) { ++ case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: ++ virtio_vsock_update_guest_cid(vsock); ++ vsock_for_each_connected_socket(virtio_vsock_reset_sock); ++ break; ++ } ++} ++ ++static void virtio_transport_event_work(struct work_struct *work) ++{ ++ struct virtio_vsock *vsock = ++ container_of(work, struct virtio_vsock, event_work); ++ struct virtqueue *vq; ++ ++ vq = vsock->vqs[VSOCK_VQ_EVENT]; ++ ++ mutex_lock(&vsock->event_lock); ++ ++ do { ++ struct virtio_vsock_event *event; ++ unsigned int len; ++ ++ virtqueue_disable_cb(vq); ++ while ((event = virtqueue_get_buf(vq, &len)) != NULL) { ++ if (len == sizeof(*event)) ++ virtio_vsock_event_handle(vsock, event); ++ ++ virtio_vsock_event_fill_one(vsock, event); ++ } ++ } while (!virtqueue_enable_cb(vq)); ++ ++ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); ++ ++ mutex_unlock(&vsock->event_lock); ++} ++ ++static void virtio_vsock_event_done(struct virtqueue *vq) ++{ ++ struct virtio_vsock *vsock = vq->vdev->priv; ++ ++ if (!vsock) ++ return; ++ queue_work(virtio_vsock_workqueue, &vsock->event_work); ++} ++ ++static void virtio_vsock_tx_done(struct virtqueue *vq) ++{ ++ struct virtio_vsock *vsock = vq->vdev->priv; ++ ++ if (!vsock) ++ return; ++ queue_work(virtio_vsock_workqueue, &vsock->tx_work); ++} ++ ++static void virtio_vsock_rx_done(struct virtqueue *vq) ++{ ++ struct virtio_vsock *vsock = vq->vdev->priv; ++ ++ if (!vsock) ++ return; ++ queue_work(virtio_vsock_workqueue, &vsock->rx_work); ++} ++ ++static struct virtio_transport virtio_transport = { ++ .transport = { ++ .get_local_cid = virtio_transport_get_local_cid, ++ ++ .init = virtio_transport_do_socket_init, ++ .destruct = virtio_transport_destruct, ++ .release = virtio_transport_release, ++ .connect = virtio_transport_connect, ++ .shutdown = virtio_transport_shutdown, ++ ++ .dgram_bind = virtio_transport_dgram_bind, ++ .dgram_dequeue = virtio_transport_dgram_dequeue, ++ .dgram_enqueue = virtio_transport_dgram_enqueue, ++ .dgram_allow = virtio_transport_dgram_allow, ++ ++ .stream_dequeue = virtio_transport_stream_dequeue, ++ .stream_enqueue = virtio_transport_stream_enqueue, ++ .stream_has_data = virtio_transport_stream_has_data, ++ .stream_has_space = virtio_transport_stream_has_space, ++ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, ++ .stream_is_active = virtio_transport_stream_is_active, ++ .stream_allow = virtio_transport_stream_allow, ++ ++ .notify_poll_in = virtio_transport_notify_poll_in, ++ .notify_poll_out = virtio_transport_notify_poll_out, ++ .notify_recv_init = virtio_transport_notify_recv_init, ++ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, ++ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, ++ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, ++ .notify_send_init = virtio_transport_notify_send_init, ++ .notify_send_pre_block = virtio_transport_notify_send_pre_block, ++ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, ++ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, ++ ++ .set_buffer_size = virtio_transport_set_buffer_size, ++ .set_min_buffer_size = virtio_transport_set_min_buffer_size, ++ .set_max_buffer_size = virtio_transport_set_max_buffer_size, ++ .get_buffer_size = virtio_transport_get_buffer_size, ++ .get_min_buffer_size = virtio_transport_get_min_buffer_size, ++ .get_max_buffer_size = virtio_transport_get_max_buffer_size, ++ }, ++ ++ .send_pkt = virtio_transport_send_pkt, ++}; ++ ++static int virtio_vsock_probe(struct virtio_device *vdev) ++{ ++ vq_callback_t *callbacks[] = { ++ virtio_vsock_rx_done, ++ virtio_vsock_tx_done, ++ virtio_vsock_event_done, ++ }; ++ static const char * const names[] = { ++ "rx", ++ "tx", ++ "event", ++ }; ++ struct virtio_vsock *vsock = NULL; ++ int ret; ++ ++ ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); ++ if (ret) ++ return ret; ++ ++ /* Only one virtio-vsock device per guest is supported */ ++ if (the_virtio_vsock) { ++ ret = -EBUSY; ++ goto out; ++ } ++ ++ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); ++ if (!vsock) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ vsock->vdev = vdev; ++ ++ ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, ++ vsock->vqs, callbacks, names); ++ if (ret < 0) ++ goto out; ++ ++ virtio_vsock_update_guest_cid(vsock); ++ ++ ret = vsock_core_init(&virtio_transport.transport); ++ if (ret < 0) ++ goto out_vqs; ++ ++ vsock->rx_buf_nr = 0; ++ vsock->rx_buf_max_nr = 0; ++ atomic_set(&vsock->queued_replies, 0); ++ ++ vdev->priv = vsock; ++ the_virtio_vsock = vsock; ++ mutex_init(&vsock->tx_lock); ++ mutex_init(&vsock->rx_lock); ++ mutex_init(&vsock->event_lock); ++ spin_lock_init(&vsock->send_pkt_list_lock); ++ INIT_LIST_HEAD(&vsock->send_pkt_list); ++ INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); ++ INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); ++ INIT_WORK(&vsock->event_work, virtio_transport_event_work); ++ INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); ++ ++ mutex_lock(&vsock->rx_lock); ++ virtio_vsock_rx_fill(vsock); ++ mutex_unlock(&vsock->rx_lock); ++ ++ mutex_lock(&vsock->event_lock); ++ virtio_vsock_event_fill(vsock); ++ mutex_unlock(&vsock->event_lock); ++ ++ mutex_unlock(&the_virtio_vsock_mutex); ++ return 0; ++ ++out_vqs: ++ vsock->vdev->config->del_vqs(vsock->vdev); ++out: ++ kfree(vsock); ++ mutex_unlock(&the_virtio_vsock_mutex); ++ return ret; ++} ++ ++static void virtio_vsock_remove(struct virtio_device *vdev) ++{ ++ struct virtio_vsock *vsock = vdev->priv; ++ struct virtio_vsock_pkt *pkt; ++ ++ flush_work(&vsock->rx_work); ++ flush_work(&vsock->tx_work); ++ flush_work(&vsock->event_work); ++ flush_work(&vsock->send_pkt_work); ++ ++ vdev->config->reset(vdev); ++ ++ mutex_lock(&vsock->rx_lock); ++ while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX]))) ++ virtio_transport_free_pkt(pkt); ++ mutex_unlock(&vsock->rx_lock); ++ ++ mutex_lock(&vsock->tx_lock); ++ while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX]))) ++ virtio_transport_free_pkt(pkt); ++ mutex_unlock(&vsock->tx_lock); ++ ++ spin_lock_bh(&vsock->send_pkt_list_lock); ++ while (!list_empty(&vsock->send_pkt_list)) { ++ pkt = list_first_entry(&vsock->send_pkt_list, ++ struct virtio_vsock_pkt, list); ++ list_del(&pkt->list); ++ virtio_transport_free_pkt(pkt); ++ } ++ spin_unlock_bh(&vsock->send_pkt_list_lock); ++ ++ mutex_lock(&the_virtio_vsock_mutex); ++ the_virtio_vsock = NULL; ++ vsock_core_exit(); ++ mutex_unlock(&the_virtio_vsock_mutex); ++ ++ vdev->config->del_vqs(vdev); ++ ++ kfree(vsock); ++} ++ ++static struct virtio_device_id id_table[] = { ++ { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, ++ { 0 }, ++}; ++ ++static unsigned int features[] = { ++}; ++ ++static struct virtio_driver virtio_vsock_driver = { ++ .feature_table = features, ++ .feature_table_size = ARRAY_SIZE(features), ++ .driver.name = KBUILD_MODNAME, ++ .driver.owner = THIS_MODULE, ++ .id_table = id_table, ++ .probe = virtio_vsock_probe, ++ .remove = virtio_vsock_remove, ++}; ++ ++static int __init virtio_vsock_init(void) ++{ ++ int ret; ++ ++ virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); ++ if (!virtio_vsock_workqueue) ++ return -ENOMEM; ++ ret = register_virtio_driver(&virtio_vsock_driver); ++ if (ret) ++ destroy_workqueue(virtio_vsock_workqueue); ++ return ret; ++} ++ ++static void __exit virtio_vsock_exit(void) ++{ ++ unregister_virtio_driver(&virtio_vsock_driver); ++ destroy_workqueue(virtio_vsock_workqueue); ++} ++ ++module_init(virtio_vsock_init); ++module_exit(virtio_vsock_exit); ++MODULE_LICENSE("GPL v2"); ++MODULE_AUTHOR("Asias He"); ++MODULE_DESCRIPTION("virtio transport for vsock"); ++MODULE_DEVICE_TABLE(virtio, id_table); +-- +2.10.0 + diff --git a/alpine/kernel/patches/0009-VSOCK-Introduce-vhost_vsock.ko.patch b/alpine/kernel/patches/0009-VSOCK-Introduce-vhost_vsock.ko.patch new file mode 100644 index 000000000..ea0d3196f --- /dev/null +++ b/alpine/kernel/patches/0009-VSOCK-Introduce-vhost_vsock.ko.patch @@ -0,0 +1,777 @@ +From a0af1060ea091348b94bd3780e5b92a3334e64b2 Mon Sep 17 00:00:00 2001 +From: Asias He +Date: Thu, 28 Jul 2016 15:36:34 +0100 +Subject: [PATCH 09/42] VSOCK: Introduce vhost_vsock.ko + +VM sockets vhost transport implementation. This driver runs on the +host. + +Signed-off-by: Asias He +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 433fc58e6bf2c8bd97e57153ed28e64fd78207b8) +--- + MAINTAINERS | 2 + + drivers/vhost/vsock.c | 722 +++++++++++++++++++++++++++++++++++++++++++++ + include/uapi/linux/vhost.h | 5 + + 3 files changed, 729 insertions(+) + create mode 100644 drivers/vhost/vsock.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index 82d1123..12d49f5 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -11392,6 +11392,8 @@ F: include/linux/virtio_vsock.h + F: include/uapi/linux/virtio_vsock.h + F: net/vmw_vsock/virtio_transport_common.c + F: net/vmw_vsock/virtio_transport.c ++F: drivers/vhost/vsock.c ++F: drivers/vhost/vsock.h + + VIRTUAL SERIO DEVICE DRIVER + M: Stephen Chandler Paul +diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c +new file mode 100644 +index 0000000..028ca16 +--- /dev/null ++++ b/drivers/vhost/vsock.c +@@ -0,0 +1,722 @@ ++/* ++ * vhost transport for vsock ++ * ++ * Copyright (C) 2013-2015 Red Hat, Inc. ++ * Author: Asias He ++ * Stefan Hajnoczi ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include "vhost.h" ++ ++#define VHOST_VSOCK_DEFAULT_HOST_CID 2 ++ ++enum { ++ VHOST_VSOCK_FEATURES = VHOST_FEATURES, ++}; ++ ++/* Used to track all the vhost_vsock instances on the system. */ ++static DEFINE_SPINLOCK(vhost_vsock_lock); ++static LIST_HEAD(vhost_vsock_list); ++ ++struct vhost_vsock { ++ struct vhost_dev dev; ++ struct vhost_virtqueue vqs[2]; ++ ++ /* Link to global vhost_vsock_list, protected by vhost_vsock_lock */ ++ struct list_head list; ++ ++ struct vhost_work send_pkt_work; ++ spinlock_t send_pkt_list_lock; ++ struct list_head send_pkt_list; /* host->guest pending packets */ ++ ++ atomic_t queued_replies; ++ ++ u32 guest_cid; ++}; ++ ++static u32 vhost_transport_get_local_cid(void) ++{ ++ return VHOST_VSOCK_DEFAULT_HOST_CID; ++} ++ ++static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) ++{ ++ struct vhost_vsock *vsock; ++ ++ spin_lock_bh(&vhost_vsock_lock); ++ list_for_each_entry(vsock, &vhost_vsock_list, list) { ++ u32 other_cid = vsock->guest_cid; ++ ++ /* Skip instances that have no CID yet */ ++ if (other_cid == 0) ++ continue; ++ ++ if (other_cid == guest_cid) { ++ spin_unlock_bh(&vhost_vsock_lock); ++ return vsock; ++ } ++ } ++ spin_unlock_bh(&vhost_vsock_lock); ++ ++ return NULL; ++} ++ ++static void ++vhost_transport_do_send_pkt(struct vhost_vsock *vsock, ++ struct vhost_virtqueue *vq) ++{ ++ struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; ++ bool added = false; ++ bool restart_tx = false; ++ ++ mutex_lock(&vq->mutex); ++ ++ if (!vq->private_data) ++ goto out; ++ ++ /* Avoid further vmexits, we're already processing the virtqueue */ ++ vhost_disable_notify(&vsock->dev, vq); ++ ++ for (;;) { ++ struct virtio_vsock_pkt *pkt; ++ struct iov_iter iov_iter; ++ unsigned out, in; ++ size_t nbytes; ++ size_t len; ++ int head; ++ ++ spin_lock_bh(&vsock->send_pkt_list_lock); ++ if (list_empty(&vsock->send_pkt_list)) { ++ spin_unlock_bh(&vsock->send_pkt_list_lock); ++ vhost_enable_notify(&vsock->dev, vq); ++ break; ++ } ++ ++ pkt = list_first_entry(&vsock->send_pkt_list, ++ struct virtio_vsock_pkt, list); ++ list_del_init(&pkt->list); ++ spin_unlock_bh(&vsock->send_pkt_list_lock); ++ ++ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), ++ &out, &in, NULL, NULL); ++ if (head < 0) { ++ spin_lock_bh(&vsock->send_pkt_list_lock); ++ list_add(&pkt->list, &vsock->send_pkt_list); ++ spin_unlock_bh(&vsock->send_pkt_list_lock); ++ break; ++ } ++ ++ if (head == vq->num) { ++ spin_lock_bh(&vsock->send_pkt_list_lock); ++ list_add(&pkt->list, &vsock->send_pkt_list); ++ spin_unlock_bh(&vsock->send_pkt_list_lock); ++ ++ /* We cannot finish yet if more buffers snuck in while ++ * re-enabling notify. ++ */ ++ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { ++ vhost_disable_notify(&vsock->dev, vq); ++ continue; ++ } ++ break; ++ } ++ ++ if (out) { ++ virtio_transport_free_pkt(pkt); ++ vq_err(vq, "Expected 0 output buffers, got %u\n", out); ++ break; ++ } ++ ++ len = iov_length(&vq->iov[out], in); ++ iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); ++ ++ nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); ++ if (nbytes != sizeof(pkt->hdr)) { ++ virtio_transport_free_pkt(pkt); ++ vq_err(vq, "Faulted on copying pkt hdr\n"); ++ break; ++ } ++ ++ nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); ++ if (nbytes != pkt->len) { ++ virtio_transport_free_pkt(pkt); ++ vq_err(vq, "Faulted on copying pkt buf\n"); ++ break; ++ } ++ ++ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); ++ added = true; ++ ++ if (pkt->reply) { ++ int val; ++ ++ val = atomic_dec_return(&vsock->queued_replies); ++ ++ /* Do we have resources to resume tx processing? */ ++ if (val + 1 == tx_vq->num) ++ restart_tx = true; ++ } ++ ++ virtio_transport_free_pkt(pkt); ++ } ++ if (added) ++ vhost_signal(&vsock->dev, vq); ++ ++out: ++ mutex_unlock(&vq->mutex); ++ ++ if (restart_tx) ++ vhost_poll_queue(&tx_vq->poll); ++} ++ ++static void vhost_transport_send_pkt_work(struct vhost_work *work) ++{ ++ struct vhost_virtqueue *vq; ++ struct vhost_vsock *vsock; ++ ++ vsock = container_of(work, struct vhost_vsock, send_pkt_work); ++ vq = &vsock->vqs[VSOCK_VQ_RX]; ++ ++ vhost_transport_do_send_pkt(vsock, vq); ++} ++ ++static int ++vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) ++{ ++ struct vhost_vsock *vsock; ++ struct vhost_virtqueue *vq; ++ int len = pkt->len; ++ ++ /* Find the vhost_vsock according to guest context id */ ++ vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); ++ if (!vsock) { ++ virtio_transport_free_pkt(pkt); ++ return -ENODEV; ++ } ++ ++ vq = &vsock->vqs[VSOCK_VQ_RX]; ++ ++ if (pkt->reply) ++ atomic_inc(&vsock->queued_replies); ++ ++ spin_lock_bh(&vsock->send_pkt_list_lock); ++ list_add_tail(&pkt->list, &vsock->send_pkt_list); ++ spin_unlock_bh(&vsock->send_pkt_list_lock); ++ ++ vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); ++ return len; ++} ++ ++static struct virtio_vsock_pkt * ++vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, ++ unsigned int out, unsigned int in) ++{ ++ struct virtio_vsock_pkt *pkt; ++ struct iov_iter iov_iter; ++ size_t nbytes; ++ size_t len; ++ ++ if (in != 0) { ++ vq_err(vq, "Expected 0 input buffers, got %u\n", in); ++ return NULL; ++ } ++ ++ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); ++ if (!pkt) ++ return NULL; ++ ++ len = iov_length(vq->iov, out); ++ iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); ++ ++ nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); ++ if (nbytes != sizeof(pkt->hdr)) { ++ vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", ++ sizeof(pkt->hdr), nbytes); ++ kfree(pkt); ++ return NULL; ++ } ++ ++ if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) ++ pkt->len = le32_to_cpu(pkt->hdr.len); ++ ++ /* No payload */ ++ if (!pkt->len) ++ return pkt; ++ ++ /* The pkt is too big */ ++ if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { ++ kfree(pkt); ++ return NULL; ++ } ++ ++ pkt->buf = kmalloc(pkt->len, GFP_KERNEL); ++ if (!pkt->buf) { ++ kfree(pkt); ++ return NULL; ++ } ++ ++ nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); ++ if (nbytes != pkt->len) { ++ vq_err(vq, "Expected %u byte payload, got %zu bytes\n", ++ pkt->len, nbytes); ++ virtio_transport_free_pkt(pkt); ++ return NULL; ++ } ++ ++ return pkt; ++} ++ ++/* Is there space left for replies to rx packets? */ ++static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) ++{ ++ struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX]; ++ int val; ++ ++ smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ ++ val = atomic_read(&vsock->queued_replies); ++ ++ return val < vq->num; ++} ++ ++static void vhost_vsock_handle_tx_kick(struct vhost_work *work) ++{ ++ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, ++ poll.work); ++ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, ++ dev); ++ struct virtio_vsock_pkt *pkt; ++ int head; ++ unsigned int out, in; ++ bool added = false; ++ ++ mutex_lock(&vq->mutex); ++ ++ if (!vq->private_data) ++ goto out; ++ ++ vhost_disable_notify(&vsock->dev, vq); ++ for (;;) { ++ if (!vhost_vsock_more_replies(vsock)) { ++ /* Stop tx until the device processes already ++ * pending replies. Leave tx virtqueue ++ * callbacks disabled. ++ */ ++ goto no_more_replies; ++ } ++ ++ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), ++ &out, &in, NULL, NULL); ++ if (head < 0) ++ break; ++ ++ if (head == vq->num) { ++ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { ++ vhost_disable_notify(&vsock->dev, vq); ++ continue; ++ } ++ break; ++ } ++ ++ pkt = vhost_vsock_alloc_pkt(vq, out, in); ++ if (!pkt) { ++ vq_err(vq, "Faulted on pkt\n"); ++ continue; ++ } ++ ++ /* Only accept correctly addressed packets */ ++ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) ++ virtio_transport_recv_pkt(pkt); ++ else ++ virtio_transport_free_pkt(pkt); ++ ++ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); ++ added = true; ++ } ++ ++no_more_replies: ++ if (added) ++ vhost_signal(&vsock->dev, vq); ++ ++out: ++ mutex_unlock(&vq->mutex); ++} ++ ++static void vhost_vsock_handle_rx_kick(struct vhost_work *work) ++{ ++ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, ++ poll.work); ++ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, ++ dev); ++ ++ vhost_transport_do_send_pkt(vsock, vq); ++} ++ ++static int vhost_vsock_start(struct vhost_vsock *vsock) ++{ ++ size_t i; ++ int ret; ++ ++ mutex_lock(&vsock->dev.mutex); ++ ++ ret = vhost_dev_check_owner(&vsock->dev); ++ if (ret) ++ goto err; ++ ++ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { ++ struct vhost_virtqueue *vq = &vsock->vqs[i]; ++ ++ mutex_lock(&vq->mutex); ++ ++ if (!vhost_vq_access_ok(vq)) { ++ ret = -EFAULT; ++ mutex_unlock(&vq->mutex); ++ goto err_vq; ++ } ++ ++ if (!vq->private_data) { ++ vq->private_data = vsock; ++ vhost_vq_init_access(vq); ++ } ++ ++ mutex_unlock(&vq->mutex); ++ } ++ ++ mutex_unlock(&vsock->dev.mutex); ++ return 0; ++ ++err_vq: ++ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { ++ struct vhost_virtqueue *vq = &vsock->vqs[i]; ++ ++ mutex_lock(&vq->mutex); ++ vq->private_data = NULL; ++ mutex_unlock(&vq->mutex); ++ } ++err: ++ mutex_unlock(&vsock->dev.mutex); ++ return ret; ++} ++ ++static int vhost_vsock_stop(struct vhost_vsock *vsock) ++{ ++ size_t i; ++ int ret; ++ ++ mutex_lock(&vsock->dev.mutex); ++ ++ ret = vhost_dev_check_owner(&vsock->dev); ++ if (ret) ++ goto err; ++ ++ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { ++ struct vhost_virtqueue *vq = &vsock->vqs[i]; ++ ++ mutex_lock(&vq->mutex); ++ vq->private_data = NULL; ++ mutex_unlock(&vq->mutex); ++ } ++ ++err: ++ mutex_unlock(&vsock->dev.mutex); ++ return ret; ++} ++ ++static void vhost_vsock_free(struct vhost_vsock *vsock) ++{ ++ if (is_vmalloc_addr(vsock)) ++ vfree(vsock); ++ else ++ kfree(vsock); ++} ++ ++static int vhost_vsock_dev_open(struct inode *inode, struct file *file) ++{ ++ struct vhost_virtqueue **vqs; ++ struct vhost_vsock *vsock; ++ int ret; ++ ++ /* This struct is large and allocation could fail, fall back to vmalloc ++ * if there is no other way. ++ */ ++ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); ++ if (!vsock) { ++ vsock = vmalloc(sizeof(*vsock)); ++ if (!vsock) ++ return -ENOMEM; ++ } ++ ++ vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); ++ if (!vqs) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ atomic_set(&vsock->queued_replies, 0); ++ ++ vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; ++ vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; ++ vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; ++ vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; ++ ++ vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); ++ ++ file->private_data = vsock; ++ spin_lock_init(&vsock->send_pkt_list_lock); ++ INIT_LIST_HEAD(&vsock->send_pkt_list); ++ vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); ++ ++ spin_lock_bh(&vhost_vsock_lock); ++ list_add_tail(&vsock->list, &vhost_vsock_list); ++ spin_unlock_bh(&vhost_vsock_lock); ++ return 0; ++ ++out: ++ vhost_vsock_free(vsock); ++ return ret; ++} ++ ++static void vhost_vsock_flush(struct vhost_vsock *vsock) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) ++ if (vsock->vqs[i].handle_kick) ++ vhost_poll_flush(&vsock->vqs[i].poll); ++ vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); ++} ++ ++static void vhost_vsock_reset_orphans(struct sock *sk) ++{ ++ struct vsock_sock *vsk = vsock_sk(sk); ++ ++ /* vmci_transport.c doesn't take sk_lock here either. At least we're ++ * under vsock_table_lock so the sock cannot disappear while we're ++ * executing. ++ */ ++ ++ if (!vhost_vsock_get(vsk->local_addr.svm_cid)) { ++ sock_set_flag(sk, SOCK_DONE); ++ vsk->peer_shutdown = SHUTDOWN_MASK; ++ sk->sk_state = SS_UNCONNECTED; ++ sk->sk_err = ECONNRESET; ++ sk->sk_error_report(sk); ++ } ++} ++ ++static int vhost_vsock_dev_release(struct inode *inode, struct file *file) ++{ ++ struct vhost_vsock *vsock = file->private_data; ++ ++ spin_lock_bh(&vhost_vsock_lock); ++ list_del(&vsock->list); ++ spin_unlock_bh(&vhost_vsock_lock); ++ ++ /* Iterating over all connections for all CIDs to find orphans is ++ * inefficient. Room for improvement here. */ ++ vsock_for_each_connected_socket(vhost_vsock_reset_orphans); ++ ++ vhost_vsock_stop(vsock); ++ vhost_vsock_flush(vsock); ++ vhost_dev_stop(&vsock->dev); ++ ++ spin_lock_bh(&vsock->send_pkt_list_lock); ++ while (!list_empty(&vsock->send_pkt_list)) { ++ struct virtio_vsock_pkt *pkt; ++ ++ pkt = list_first_entry(&vsock->send_pkt_list, ++ struct virtio_vsock_pkt, list); ++ list_del_init(&pkt->list); ++ virtio_transport_free_pkt(pkt); ++ } ++ spin_unlock_bh(&vsock->send_pkt_list_lock); ++ ++ vhost_dev_cleanup(&vsock->dev, false); ++ kfree(vsock->dev.vqs); ++ vhost_vsock_free(vsock); ++ return 0; ++} ++ ++static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) ++{ ++ struct vhost_vsock *other; ++ ++ /* Refuse reserved CIDs */ ++ if (guest_cid <= VMADDR_CID_HOST || ++ guest_cid == U32_MAX) ++ return -EINVAL; ++ ++ /* 64-bit CIDs are not yet supported */ ++ if (guest_cid > U32_MAX) ++ return -EINVAL; ++ ++ /* Refuse if CID is already in use */ ++ other = vhost_vsock_get(guest_cid); ++ if (other && other != vsock) ++ return -EADDRINUSE; ++ ++ spin_lock_bh(&vhost_vsock_lock); ++ vsock->guest_cid = guest_cid; ++ spin_unlock_bh(&vhost_vsock_lock); ++ ++ return 0; ++} ++ ++static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) ++{ ++ struct vhost_virtqueue *vq; ++ int i; ++ ++ if (features & ~VHOST_VSOCK_FEATURES) ++ return -EOPNOTSUPP; ++ ++ mutex_lock(&vsock->dev.mutex); ++ if ((features & (1 << VHOST_F_LOG_ALL)) && ++ !vhost_log_access_ok(&vsock->dev)) { ++ mutex_unlock(&vsock->dev.mutex); ++ return -EFAULT; ++ } ++ ++ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { ++ vq = &vsock->vqs[i]; ++ mutex_lock(&vq->mutex); ++ vq->acked_features = features; ++ mutex_unlock(&vq->mutex); ++ } ++ mutex_unlock(&vsock->dev.mutex); ++ return 0; ++} ++ ++static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, ++ unsigned long arg) ++{ ++ struct vhost_vsock *vsock = f->private_data; ++ void __user *argp = (void __user *)arg; ++ u64 guest_cid; ++ u64 features; ++ int start; ++ int r; ++ ++ switch (ioctl) { ++ case VHOST_VSOCK_SET_GUEST_CID: ++ if (copy_from_user(&guest_cid, argp, sizeof(guest_cid))) ++ return -EFAULT; ++ return vhost_vsock_set_cid(vsock, guest_cid); ++ case VHOST_VSOCK_SET_RUNNING: ++ if (copy_from_user(&start, argp, sizeof(start))) ++ return -EFAULT; ++ if (start) ++ return vhost_vsock_start(vsock); ++ else ++ return vhost_vsock_stop(vsock); ++ case VHOST_GET_FEATURES: ++ features = VHOST_VSOCK_FEATURES; ++ if (copy_to_user(argp, &features, sizeof(features))) ++ return -EFAULT; ++ return 0; ++ case VHOST_SET_FEATURES: ++ if (copy_from_user(&features, argp, sizeof(features))) ++ return -EFAULT; ++ return vhost_vsock_set_features(vsock, features); ++ default: ++ mutex_lock(&vsock->dev.mutex); ++ r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); ++ if (r == -ENOIOCTLCMD) ++ r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); ++ else ++ vhost_vsock_flush(vsock); ++ mutex_unlock(&vsock->dev.mutex); ++ return r; ++ } ++} ++ ++static const struct file_operations vhost_vsock_fops = { ++ .owner = THIS_MODULE, ++ .open = vhost_vsock_dev_open, ++ .release = vhost_vsock_dev_release, ++ .llseek = noop_llseek, ++ .unlocked_ioctl = vhost_vsock_dev_ioctl, ++}; ++ ++static struct miscdevice vhost_vsock_misc = { ++ .minor = MISC_DYNAMIC_MINOR, ++ .name = "vhost-vsock", ++ .fops = &vhost_vsock_fops, ++}; ++ ++static struct virtio_transport vhost_transport = { ++ .transport = { ++ .get_local_cid = vhost_transport_get_local_cid, ++ ++ .init = virtio_transport_do_socket_init, ++ .destruct = virtio_transport_destruct, ++ .release = virtio_transport_release, ++ .connect = virtio_transport_connect, ++ .shutdown = virtio_transport_shutdown, ++ ++ .dgram_enqueue = virtio_transport_dgram_enqueue, ++ .dgram_dequeue = virtio_transport_dgram_dequeue, ++ .dgram_bind = virtio_transport_dgram_bind, ++ .dgram_allow = virtio_transport_dgram_allow, ++ ++ .stream_enqueue = virtio_transport_stream_enqueue, ++ .stream_dequeue = virtio_transport_stream_dequeue, ++ .stream_has_data = virtio_transport_stream_has_data, ++ .stream_has_space = virtio_transport_stream_has_space, ++ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, ++ .stream_is_active = virtio_transport_stream_is_active, ++ .stream_allow = virtio_transport_stream_allow, ++ ++ .notify_poll_in = virtio_transport_notify_poll_in, ++ .notify_poll_out = virtio_transport_notify_poll_out, ++ .notify_recv_init = virtio_transport_notify_recv_init, ++ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, ++ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, ++ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, ++ .notify_send_init = virtio_transport_notify_send_init, ++ .notify_send_pre_block = virtio_transport_notify_send_pre_block, ++ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, ++ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, ++ ++ .set_buffer_size = virtio_transport_set_buffer_size, ++ .set_min_buffer_size = virtio_transport_set_min_buffer_size, ++ .set_max_buffer_size = virtio_transport_set_max_buffer_size, ++ .get_buffer_size = virtio_transport_get_buffer_size, ++ .get_min_buffer_size = virtio_transport_get_min_buffer_size, ++ .get_max_buffer_size = virtio_transport_get_max_buffer_size, ++ }, ++ ++ .send_pkt = vhost_transport_send_pkt, ++}; ++ ++static int __init vhost_vsock_init(void) ++{ ++ int ret; ++ ++ ret = vsock_core_init(&vhost_transport.transport); ++ if (ret < 0) ++ return ret; ++ return misc_register(&vhost_vsock_misc); ++}; ++ ++static void __exit vhost_vsock_exit(void) ++{ ++ misc_deregister(&vhost_vsock_misc); ++ vsock_core_exit(); ++}; ++ ++module_init(vhost_vsock_init); ++module_exit(vhost_vsock_exit); ++MODULE_LICENSE("GPL v2"); ++MODULE_AUTHOR("Asias He"); ++MODULE_DESCRIPTION("vhost transport for vsock "); +diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h +index ab373191..b306476 100644 +--- a/include/uapi/linux/vhost.h ++++ b/include/uapi/linux/vhost.h +@@ -169,4 +169,9 @@ struct vhost_scsi_target { + #define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32) + #define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32) + ++/* VHOST_VSOCK specific defines */ ++ ++#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64) ++#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int) ++ + #endif +-- +2.10.0 + diff --git a/alpine/kernel/patches/0010-VSOCK-Add-Makefile-and-Kconfig.patch b/alpine/kernel/patches/0010-VSOCK-Add-Makefile-and-Kconfig.patch new file mode 100644 index 000000000..8c84c7879 --- /dev/null +++ b/alpine/kernel/patches/0010-VSOCK-Add-Makefile-and-Kconfig.patch @@ -0,0 +1,106 @@ +From 30e1801c9e9683512a0cd169edf015923497dd70 Mon Sep 17 00:00:00 2001 +From: Asias He +Date: Thu, 28 Jul 2016 15:36:35 +0100 +Subject: [PATCH 10/42] VSOCK: Add Makefile and Kconfig + +Enable virtio-vsock and vhost-vsock. + +Signed-off-by: Asias He +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 304ba62fd4e670c1a5784585da0fac9f7309ef6c) +--- + drivers/vhost/Kconfig | 14 ++++++++++++++ + drivers/vhost/Makefile | 4 ++++ + net/vmw_vsock/Kconfig | 20 ++++++++++++++++++++ + net/vmw_vsock/Makefile | 6 ++++++ + 4 files changed, 44 insertions(+) + +diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig +index 533eaf0..2b5f588 100644 +--- a/drivers/vhost/Kconfig ++++ b/drivers/vhost/Kconfig +@@ -21,6 +21,20 @@ config VHOST_SCSI + Say M here to enable the vhost_scsi TCM fabric module + for use with virtio-scsi guests + ++config VHOST_VSOCK ++ tristate "vhost virtio-vsock driver" ++ depends on VSOCKETS && EVENTFD ++ select VIRTIO_VSOCKETS_COMMON ++ select VHOST ++ default n ++ ---help--- ++ This kernel module can be loaded in the host kernel to provide AF_VSOCK ++ sockets for communicating with guests. The guests must have the ++ virtio_transport.ko driver loaded to use the virtio-vsock device. ++ ++ To compile this driver as a module, choose M here: the module will be called ++ vhost_vsock. ++ + config VHOST_RING + tristate + ---help--- +diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile +index e0441c3..6b012b9 100644 +--- a/drivers/vhost/Makefile ++++ b/drivers/vhost/Makefile +@@ -4,5 +4,9 @@ vhost_net-y := net.o + obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o + vhost_scsi-y := scsi.o + ++obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o ++vhost_vsock-y := vsock.o ++ + obj-$(CONFIG_VHOST_RING) += vringh.o ++ + obj-$(CONFIG_VHOST) += vhost.o +diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig +index 14810ab..8831e7c 100644 +--- a/net/vmw_vsock/Kconfig ++++ b/net/vmw_vsock/Kconfig +@@ -26,3 +26,23 @@ config VMWARE_VMCI_VSOCKETS + + To compile this driver as a module, choose M here: the module + will be called vmw_vsock_vmci_transport. If unsure, say N. ++ ++config VIRTIO_VSOCKETS ++ tristate "virtio transport for Virtual Sockets" ++ depends on VSOCKETS && VIRTIO ++ select VIRTIO_VSOCKETS_COMMON ++ help ++ This module implements a virtio transport for Virtual Sockets. ++ ++ Enable this transport if your Virtual Machine host supports Virtual ++ Sockets over virtio. ++ ++ To compile this driver as a module, choose M here: the module will be ++ called vmw_vsock_virtio_transport. If unsure, say N. ++ ++config VIRTIO_VSOCKETS_COMMON ++ tristate ++ help ++ This option is selected by any driver which needs to access ++ the virtio_vsock. The module will be called ++ vmw_vsock_virtio_transport_common. +diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile +index 2ce52d7..bc27c70 100644 +--- a/net/vmw_vsock/Makefile ++++ b/net/vmw_vsock/Makefile +@@ -1,7 +1,13 @@ + obj-$(CONFIG_VSOCKETS) += vsock.o + obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o ++obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o ++obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o + + vsock-y += af_vsock.o vsock_addr.o + + vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ + vmci_transport_notify_qstate.o ++ ++vmw_vsock_virtio_transport-y += virtio_transport.o ++ ++vmw_vsock_virtio_transport_common-y += virtio_transport_common.o +-- +2.10.0 + diff --git a/alpine/kernel/patches/0011-VSOCK-Use-kvfree.patch b/alpine/kernel/patches/0011-VSOCK-Use-kvfree.patch new file mode 100644 index 000000000..e1e78d9c8 --- /dev/null +++ b/alpine/kernel/patches/0011-VSOCK-Use-kvfree.patch @@ -0,0 +1,33 @@ +From e9a09f08525c736a71d8331fd6412a0ad19ee428 Mon Sep 17 00:00:00 2001 +From: Wei Yongjun +Date: Tue, 2 Aug 2016 13:50:42 +0000 +Subject: [PATCH 11/42] VSOCK: Use kvfree() + +Use kvfree() instead of open-coding it. + +Signed-off-by: Wei Yongjun +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit b226acab2f6aaa45c2af27279b63f622b23a44bd) +--- + drivers/vhost/vsock.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c +index 028ca16..0ddf3a2 100644 +--- a/drivers/vhost/vsock.c ++++ b/drivers/vhost/vsock.c +@@ -434,10 +434,7 @@ err: + + static void vhost_vsock_free(struct vhost_vsock *vsock) + { +- if (is_vmalloc_addr(vsock)) +- vfree(vsock); +- else +- kfree(vsock); ++ kvfree(vsock); + } + + static int vhost_vsock_dev_open(struct inode *inode, struct file *file) +-- +2.10.0 + diff --git a/alpine/kernel/patches/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch b/alpine/kernel/patches/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch new file mode 100644 index 000000000..c522808e7 --- /dev/null +++ b/alpine/kernel/patches/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch @@ -0,0 +1,53 @@ +From f886059ea8d0ac8ed981263d91d94275b85c50d5 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 4 Aug 2016 14:52:53 +0100 +Subject: [PATCH 12/42] vhost/vsock: fix vhost virtio_vsock_pkt use-after-free + +Stash the packet length in a local variable before handing over +ownership of the packet to virtio_transport_recv_pkt() or +virtio_transport_free_pkt(). + +This patch solves the use-after-free since pkt is no longer guaranteed +to be alive. + +Reported-by: Dan Carpenter +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 3fda5d6e580193fa005014355b3a61498f1b3ae0) +--- + drivers/vhost/vsock.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c +index 0ddf3a2..e3b30ea 100644 +--- a/drivers/vhost/vsock.c ++++ b/drivers/vhost/vsock.c +@@ -307,6 +307,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) + + vhost_disable_notify(&vsock->dev, vq); + for (;;) { ++ u32 len; ++ + if (!vhost_vsock_more_replies(vsock)) { + /* Stop tx until the device processes already + * pending replies. Leave tx virtqueue +@@ -334,13 +336,15 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) + continue; + } + ++ len = pkt->len; ++ + /* Only accept correctly addressed packets */ + if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) + virtio_transport_recv_pkt(pkt); + else + virtio_transport_free_pkt(pkt); + +- vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); ++ vhost_add_used(vq, head, sizeof(pkt->hdr) + len); + added = true; + } + +-- +2.10.0 + diff --git a/alpine/kernel/patches/0013-virtio-vsock-fix-include-guard-typo.patch b/alpine/kernel/patches/0013-virtio-vsock-fix-include-guard-typo.patch new file mode 100644 index 000000000..aff3fc5c6 --- /dev/null +++ b/alpine/kernel/patches/0013-virtio-vsock-fix-include-guard-typo.patch @@ -0,0 +1,28 @@ +From 6ded3ac18eabf23a790d6b6876119d8cd0538964 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 5 Aug 2016 13:52:09 +0100 +Subject: [PATCH 13/42] virtio-vsock: fix include guard typo + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 28ad55578b8a76390d966b09da8c7fa3644f5140) +--- + include/uapi/linux/virtio_vsock.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h +index 6b011c1..1d57ed3 100644 +--- a/include/uapi/linux/virtio_vsock.h ++++ b/include/uapi/linux/virtio_vsock.h +@@ -32,7 +32,7 @@ + */ + + #ifndef _UAPI_LINUX_VIRTIO_VSOCK_H +-#define _UAPI_LINUX_VIRTIO_VOSCK_H ++#define _UAPI_LINUX_VIRTIO_VSOCK_H + + #include + #include +-- +2.10.0 + diff --git a/alpine/kernel/patches/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch b/alpine/kernel/patches/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch new file mode 100644 index 000000000..cb1f6e165 --- /dev/null +++ b/alpine/kernel/patches/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch @@ -0,0 +1,61 @@ +From 5fcd2673fadd46b0d2d5f896281113cd67a2efa7 Mon Sep 17 00:00:00 2001 +From: Gerard Garcia +Date: Wed, 10 Aug 2016 17:24:34 +0200 +Subject: [PATCH 14/42] vhost/vsock: drop space available check for TX vq + +Remove unnecessary use of enable/disable callback notifications +and the incorrect more space available check. + +The virtio_transport_tx_work handles when the TX virtqueue +has more buffers available. + +Signed-off-by: Gerard Garcia +Acked-by: Stefan Hajnoczi +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 21bc54fc0cdc31de72b57d2b3c79cf9c2b83cf39) +--- + net/vmw_vsock/virtio_transport.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c +index 699dfab..936d7ee 100644 +--- a/net/vmw_vsock/virtio_transport.c ++++ b/net/vmw_vsock/virtio_transport.c +@@ -87,9 +87,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) + + vq = vsock->vqs[VSOCK_VQ_TX]; + +- /* Avoid unnecessary interrupts while we're processing the ring */ +- virtqueue_disable_cb(vq); +- + for (;;) { + struct virtio_vsock_pkt *pkt; + struct scatterlist hdr, buf, *sgs[2]; +@@ -99,7 +96,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) + spin_lock_bh(&vsock->send_pkt_list_lock); + if (list_empty(&vsock->send_pkt_list)) { + spin_unlock_bh(&vsock->send_pkt_list_lock); +- virtqueue_enable_cb(vq); + break; + } + +@@ -118,13 +114,13 @@ virtio_transport_send_pkt_work(struct work_struct *work) + } + + ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL); ++ /* Usually this means that there is no more space available in ++ * the vq ++ */ + if (ret < 0) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); +- +- if (!virtqueue_enable_cb(vq) && ret == -ENOSPC) +- continue; /* retry now that we have more space */ + break; + } + +-- +2.10.0 + diff --git a/alpine/kernel/patches/0001-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch b/alpine/kernel/patches/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch similarity index 72% rename from alpine/kernel/patches/0001-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch rename to alpine/kernel/patches/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch index ef3345f85..cc76287a8 100644 --- a/alpine/kernel/patches/0001-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch +++ b/alpine/kernel/patches/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch @@ -1,7 +1,8 @@ -From 98286199f2ba568d31ec78535c12e1818efd7daf Mon Sep 17 00:00:00 2001 +From 8719b508f509c06a7821d6f8e2fc1fcad84d6fbb Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 4 Apr 2016 14:50:10 +0100 -Subject: [PATCH 1/4] VSOCK: Only allow host network namespace to use AF_VSOCK. +Subject: [PATCH 15/42] VSOCK: Only allow host network namespace to use + AF_VSOCK. The VSOCK addressing schema does not really lend itself to simply creating an alternative end point address within a namespace. @@ -12,18 +13,19 @@ Signed-off-by: Ian Campbell 1 file changed, 3 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index 8a398b3..0edc54c 100644 +index 17dbbe6..1bb1b01 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1852,6 +1852,9 @@ static const struct proto_ops vsock_stream_ops = { static int vsock_create(struct net *net, struct socket *sock, - int protocol, int kern) + int protocol, int kern) { + if (!net_eq(net, &init_net)) + return -EAFNOSUPPORT; + - if (!sock) - return -EINVAL; + if (!sock) + return -EINVAL; + +-- +2.10.0 --- -2.10.1 diff --git a/alpine/kernel/patches/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch b/alpine/kernel/patches/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch new file mode 100644 index 000000000..4b5bf9751 --- /dev/null +++ b/alpine/kernel/patches/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch @@ -0,0 +1,63 @@ +From 84e1e7a4981f6ef926bb01481445def66e0982b2 Mon Sep 17 00:00:00 2001 +From: Jake Oshins +Date: Mon, 14 Dec 2015 16:01:41 -0800 +Subject: [PATCH 16/42] drivers:hv: Define the channel type for Hyper-V PCI + Express pass-through + +This defines the channel type for PCI front-ends in Hyper-V VMs. + +Signed-off-by: Jake Oshins +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 3053c762444a83ec6a8777f9476668b23b8ab180) +--- + drivers/hv/channel_mgmt.c | 3 +++ + include/linux/hyperv.h | 11 +++++++++++ + 2 files changed, 14 insertions(+) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index 37238df..a562318 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -359,6 +359,7 @@ enum { + SCSI, + NIC, + ND_NIC, ++ PCIE, + MAX_PERF_CHN, + }; + +@@ -376,6 +377,8 @@ static const struct hv_vmbus_device_id hp_devs[] = { + { HV_NIC_GUID, }, + /* NetworkDirect Guest RDMA */ + { HV_ND_GUID, }, ++ /* PCI Express Pass Through */ ++ { HV_PCIE_GUID, }, + }; + + +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index ae6a711..10dda1e 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1156,6 +1156,17 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + } + + /* ++ * PCI Express Pass Through ++ * {44C4F61D-4444-4400-9D52-802E27EDE19F} ++ */ ++ ++#define HV_PCIE_GUID \ ++ .guid = { \ ++ 0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, \ ++ 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F \ ++ } ++ ++/* + * Common header for Hyper-V ICs + */ + +-- +2.10.0 + diff --git a/alpine/kernel/patches/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch b/alpine/kernel/patches/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch new file mode 100644 index 000000000..623cc37ff --- /dev/null +++ b/alpine/kernel/patches/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch @@ -0,0 +1,297 @@ +From 12fbf6bcf859c7ce33766ae450dc291d0b857197 Mon Sep 17 00:00:00 2001 +From: "K. Y. Srinivasan" +Date: Mon, 14 Dec 2015 16:01:43 -0800 +Subject: [PATCH 17/42] Drivers: hv: vmbus: Use uuid_le type consistently + +Consistently use uuid_le type in the Hyper-V driver code. + +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit af3ff643ea91ba64dd8d0b1cbed54d44512f96cd) +--- + drivers/hv/channel_mgmt.c | 2 +- + drivers/hv/vmbus_drv.c | 10 ++--- + include/linux/hyperv.h | 92 ++++++++++++++--------------------------- + include/linux/mod_devicetable.h | 2 +- + scripts/mod/file2alias.c | 2 +- + 5 files changed, 40 insertions(+), 68 deletions(-) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index a562318..339277b 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -409,7 +409,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui + struct cpumask *alloced_mask; + + for (i = IDE; i < MAX_PERF_CHN; i++) { +- if (!memcmp(type_guid->b, hp_devs[i].guid, ++ if (!memcmp(type_guid->b, &hp_devs[i].guid, + sizeof(uuid_le))) { + perf_chn = true; + break; +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index 509ed97..6ce2bf8 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -533,7 +533,7 @@ static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) + + static const uuid_le null_guid; + +-static inline bool is_null_guid(const __u8 *guid) ++static inline bool is_null_guid(const uuid_le *guid) + { + if (memcmp(guid, &null_guid, sizeof(uuid_le))) + return false; +@@ -546,9 +546,9 @@ static inline bool is_null_guid(const __u8 *guid) + */ + static const struct hv_vmbus_device_id *hv_vmbus_get_id( + const struct hv_vmbus_device_id *id, +- const __u8 *guid) ++ const uuid_le *guid) + { +- for (; !is_null_guid(id->guid); id++) ++ for (; !is_null_guid(&id->guid); id++) + if (!memcmp(&id->guid, guid, sizeof(uuid_le))) + return id; + +@@ -565,7 +565,7 @@ static int vmbus_match(struct device *device, struct device_driver *driver) + struct hv_driver *drv = drv_to_hv_drv(driver); + struct hv_device *hv_dev = device_to_hv_device(device); + +- if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b)) ++ if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) + return 1; + + return 0; +@@ -582,7 +582,7 @@ static int vmbus_probe(struct device *child_device) + struct hv_device *dev = device_to_hv_device(child_device); + const struct hv_vmbus_device_id *dev_id; + +- dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b); ++ dev_id = hv_vmbus_get_id(drv->id_table, &dev->dev_type); + if (drv->probe) { + ret = drv->probe(dev, dev_id); + if (ret != 0) +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 10dda1e..4712d7d 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1012,6 +1012,8 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + .guid = { g0, g1, g2, g3, g4, g5, g6, g7, \ + g8, g9, ga, gb, gc, gd, ge, gf }, + ++ ++ + /* + * GUID definitions of various offer types - services offered to the guest. + */ +@@ -1021,118 +1023,94 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + * {f8615163-df3e-46c5-913f-f2d2f965ed0e} + */ + #define HV_NIC_GUID \ +- .guid = { \ +- 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, \ +- 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e \ +- } ++ .guid = UUID_LE(0xf8615163, 0xdf3e, 0x46c5, 0x91, 0x3f, \ ++ 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e) + + /* + * IDE GUID + * {32412632-86cb-44a2-9b5c-50d1417354f5} + */ + #define HV_IDE_GUID \ +- .guid = { \ +- 0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, \ +- 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5 \ +- } ++ .guid = UUID_LE(0x32412632, 0x86cb, 0x44a2, 0x9b, 0x5c, \ ++ 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5) + + /* + * SCSI GUID + * {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} + */ + #define HV_SCSI_GUID \ +- .guid = { \ +- 0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, \ +- 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f \ +- } ++ .guid = UUID_LE(0xba6163d9, 0x04a1, 0x4d29, 0xb6, 0x05, \ ++ 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f) + + /* + * Shutdown GUID + * {0e0b6031-5213-4934-818b-38d90ced39db} + */ + #define HV_SHUTDOWN_GUID \ +- .guid = { \ +- 0x31, 0x60, 0x0b, 0x0e, 0x13, 0x52, 0x34, 0x49, \ +- 0x81, 0x8b, 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb \ +- } ++ .guid = UUID_LE(0x0e0b6031, 0x5213, 0x4934, 0x81, 0x8b, \ ++ 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb) + + /* + * Time Synch GUID + * {9527E630-D0AE-497b-ADCE-E80AB0175CAF} + */ + #define HV_TS_GUID \ +- .guid = { \ +- 0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49, \ +- 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf \ +- } ++ .guid = UUID_LE(0x9527e630, 0xd0ae, 0x497b, 0xad, 0xce, \ ++ 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf) + + /* + * Heartbeat GUID + * {57164f39-9115-4e78-ab55-382f3bd5422d} + */ + #define HV_HEART_BEAT_GUID \ +- .guid = { \ +- 0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e, \ +- 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d \ +- } ++ .guid = UUID_LE(0x57164f39, 0x9115, 0x4e78, 0xab, 0x55, \ ++ 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d) + + /* + * KVP GUID + * {a9a0f4e7-5a45-4d96-b827-8a841e8c03e6} + */ + #define HV_KVP_GUID \ +- .guid = { \ +- 0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d, \ +- 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6 \ +- } ++ .guid = UUID_LE(0xa9a0f4e7, 0x5a45, 0x4d96, 0xb8, 0x27, \ ++ 0x8a, 0x84, 0x1e, 0x8c, 0x03, 0xe6) + + /* + * Dynamic memory GUID + * {525074dc-8985-46e2-8057-a307dc18a502} + */ + #define HV_DM_GUID \ +- .guid = { \ +- 0xdc, 0x74, 0x50, 0X52, 0x85, 0x89, 0xe2, 0x46, \ +- 0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02 \ +- } ++ .guid = UUID_LE(0x525074dc, 0x8985, 0x46e2, 0x80, 0x57, \ ++ 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02) + + /* + * Mouse GUID + * {cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a} + */ + #define HV_MOUSE_GUID \ +- .guid = { \ +- 0x9e, 0xb6, 0xa8, 0xcf, 0x4a, 0x5b, 0xc0, 0x4c, \ +- 0xb9, 0x8b, 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a \ +- } ++ .guid = UUID_LE(0xcfa8b69e, 0x5b4a, 0x4cc0, 0xb9, 0x8b, \ ++ 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a) + + /* + * VSS (Backup/Restore) GUID + */ + #define HV_VSS_GUID \ +- .guid = { \ +- 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, \ +- 0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40 \ +- } ++ .guid = UUID_LE(0x35fa2e29, 0xea23, 0x4236, 0x96, 0xae, \ ++ 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40) + /* + * Synthetic Video GUID + * {DA0A7802-E377-4aac-8E77-0558EB1073F8} + */ + #define HV_SYNTHVID_GUID \ +- .guid = { \ +- 0x02, 0x78, 0x0a, 0xda, 0x77, 0xe3, 0xac, 0x4a, \ +- 0x8e, 0x77, 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8 \ +- } ++ .guid = UUID_LE(0xda0a7802, 0xe377, 0x4aac, 0x8e, 0x77, \ ++ 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8) + + /* + * Synthetic FC GUID + * {2f9bcc4a-0069-4af3-b76b-6fd0be528cda} + */ + #define HV_SYNTHFC_GUID \ +- .guid = { \ +- 0x4A, 0xCC, 0x9B, 0x2F, 0x69, 0x00, 0xF3, 0x4A, \ +- 0xB7, 0x6B, 0x6F, 0xD0, 0xBE, 0x52, 0x8C, 0xDA \ +- } ++ .guid = UUID_LE(0x2f9bcc4a, 0x0069, 0x4af3, 0xb7, 0x6b, \ ++ 0x6f, 0xd0, 0xbe, 0x52, 0x8c, 0xda) + + /* + * Guest File Copy Service +@@ -1140,20 +1118,16 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + */ + + #define HV_FCOPY_GUID \ +- .guid = { \ +- 0xE3, 0x4B, 0xD1, 0x34, 0xE4, 0xDE, 0xC8, 0x41, \ +- 0x9A, 0xE7, 0x6B, 0x17, 0x49, 0x77, 0xC1, 0x92 \ +- } ++ .guid = UUID_LE(0x34d14be3, 0xdee4, 0x41c8, 0x9a, 0xe7, \ ++ 0x6b, 0x17, 0x49, 0x77, 0xc1, 0x92) + + /* + * NetworkDirect. This is the guest RDMA service. + * {8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501} + */ + #define HV_ND_GUID \ +- .guid = { \ +- 0x3d, 0xaf, 0x2e, 0x8c, 0xa7, 0x32, 0x09, 0x4b, \ +- 0xab, 0x99, 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01 \ +- } ++ .guid = UUID_LE(0x8c2eaf3d, 0x32a7, 0x4b09, 0xab, 0x99, \ ++ 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01) + + /* + * PCI Express Pass Through +@@ -1161,10 +1135,8 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + */ + + #define HV_PCIE_GUID \ +- .guid = { \ +- 0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, \ +- 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F \ +- } ++ .guid = UUID_LE(0x44c4f61d, 0x4444, 0x4400, 0x9d, 0x52, \ ++ 0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f) + + /* + * Common header for Hyper-V ICs +diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h +index 64f36e0..6e4c645 100644 +--- a/include/linux/mod_devicetable.h ++++ b/include/linux/mod_devicetable.h +@@ -404,7 +404,7 @@ struct virtio_device_id { + * For Hyper-V devices we use the device guid as the id. + */ + struct hv_vmbus_device_id { +- __u8 guid[16]; ++ uuid_le guid; + kernel_ulong_t driver_data; /* Data private to the driver */ + }; + +diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c +index 9f5cdd4..8e8c69b 100644 +--- a/scripts/mod/file2alias.c ++++ b/scripts/mod/file2alias.c +@@ -917,7 +917,7 @@ static int do_vmbus_entry(const char *filename, void *symval, + char guid_name[(sizeof(*guid) + 1) * 2]; + + for (i = 0; i < (sizeof(*guid) * 2); i += 2) +- sprintf(&guid_name[i], "%02x", TO_NATIVE((*guid)[i/2])); ++ sprintf(&guid_name[i], "%02x", TO_NATIVE((guid->b)[i/2])); + + strcpy(alias, "vmbus:"); + strcat(alias, guid_name); +-- +2.10.0 + diff --git a/alpine/kernel/patches/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch b/alpine/kernel/patches/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch new file mode 100644 index 000000000..fee2b87dd --- /dev/null +++ b/alpine/kernel/patches/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch @@ -0,0 +1,55 @@ +From 01a403dce6afb34dd0430e12d93b7acd5f384439 Mon Sep 17 00:00:00 2001 +From: "K. Y. Srinivasan" +Date: Mon, 14 Dec 2015 16:01:44 -0800 +Subject: [PATCH 18/42] Drivers: hv: vmbus: Use uuid_le_cmp() for comparing + GUIDs + +Use uuid_le_cmp() for comparing GUIDs. + +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 4ae9250893485f380275e7d5cb291df87c4d9710) +--- + drivers/hv/channel_mgmt.c | 3 +-- + drivers/hv/vmbus_drv.c | 4 ++-- + 2 files changed, 3 insertions(+), 4 deletions(-) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index 339277b..9b4525c 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -409,8 +409,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui + struct cpumask *alloced_mask; + + for (i = IDE; i < MAX_PERF_CHN; i++) { +- if (!memcmp(type_guid->b, &hp_devs[i].guid, +- sizeof(uuid_le))) { ++ if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) { + perf_chn = true; + break; + } +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index 6ce2bf8..7973aa5 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -535,7 +535,7 @@ static const uuid_le null_guid; + + static inline bool is_null_guid(const uuid_le *guid) + { +- if (memcmp(guid, &null_guid, sizeof(uuid_le))) ++ if (uuid_le_cmp(*guid, null_guid)) + return false; + return true; + } +@@ -549,7 +549,7 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id( + const uuid_le *guid) + { + for (; !is_null_guid(&id->guid); id++) +- if (!memcmp(&id->guid, guid, sizeof(uuid_le))) ++ if (!uuid_le_cmp(id->guid, *guid)) + return id; + + return NULL; +-- +2.10.0 + diff --git a/alpine/kernel/patches/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch b/alpine/kernel/patches/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch new file mode 100644 index 000000000..6b228bce5 --- /dev/null +++ b/alpine/kernel/patches/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch @@ -0,0 +1,42 @@ +From a9c4320f47b5a2d2ef7600c5f61f3d4256de2ba5 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Mon, 14 Dec 2015 16:01:48 -0800 +Subject: [PATCH 19/42] Drivers: hv: vmbus: do sanity check of channel state in + vmbus_close_internal() + +This fixes an incorrect assumption of channel state in the function. + +Signed-off-by: Dexuan Cui +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 64b7faf903dae2df94d89edf2c688b16751800e4) +--- + drivers/hv/channel.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index 1ef37c7..2889d97 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -512,6 +512,18 @@ static int vmbus_close_internal(struct vmbus_channel *channel) + tasklet = hv_context.event_dpc[channel->target_cpu]; + tasklet_disable(tasklet); + ++ /* ++ * In case a device driver's probe() fails (e.g., ++ * util_probe() -> vmbus_open() returns -ENOMEM) and the device is ++ * rescinded later (e.g., we dynamically disble an Integrated Service ++ * in Hyper-V Manager), the driver's remove() invokes vmbus_close(): ++ * here we should skip most of the below cleanup work. ++ */ ++ if (channel->state != CHANNEL_OPENED_STATE) { ++ ret = -EINVAL; ++ goto out; ++ } ++ + channel->state = CHANNEL_OPEN_STATE; + channel->sc_creation_callback = NULL; + /* Stop callback and cancel the timer asap */ +-- +2.10.0 + diff --git a/alpine/kernel/patches/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch b/alpine/kernel/patches/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch new file mode 100644 index 000000000..338568fb4 --- /dev/null +++ b/alpine/kernel/patches/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch @@ -0,0 +1,74 @@ +From b92976804d10f78b9a50f5d8f62f3663a44f32e6 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Mon, 14 Dec 2015 16:01:50 -0800 +Subject: [PATCH 20/42] Drivers: hv: vmbus: release relid on error in + vmbus_process_offer() + +We want to simplify vmbus_onoffer_rescind() by not invoking +hv_process_channel_removal(NULL, ...). + +Signed-off-by: Dexuan Cui +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit f52078cf5711ce47c113a58702b35c8ff5f212f5) +--- + drivers/hv/channel_mgmt.c | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index 9b4525c..8529dd2 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -178,19 +178,22 @@ static void percpu_channel_deq(void *arg) + } + + +-void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) ++static void vmbus_release_relid(u32 relid) + { + struct vmbus_channel_relid_released msg; +- unsigned long flags; +- struct vmbus_channel *primary_channel; + + memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); + msg.child_relid = relid; + msg.header.msgtype = CHANNELMSG_RELID_RELEASED; + vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released)); ++} + +- if (channel == NULL) +- return; ++void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) ++{ ++ unsigned long flags; ++ struct vmbus_channel *primary_channel; ++ ++ vmbus_release_relid(relid); + + BUG_ON(!channel->rescind); + +@@ -337,6 +340,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + return; + + err_deq_chan: ++ vmbus_release_relid(newchannel->offermsg.child_relid); ++ + spin_lock_irqsave(&vmbus_connection.channel_lock, flags); + list_del(&newchannel->listentry); + spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); +@@ -640,7 +645,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) + channel = relid2channel(rescind->child_relid); + + if (channel == NULL) { +- hv_process_channel_removal(NULL, rescind->child_relid); ++ /* ++ * This is very impossible, because in ++ * vmbus_process_offer(), we have already invoked ++ * vmbus_release_relid() on error. ++ */ + return; + } + +-- +2.10.0 + diff --git a/alpine/kernel/patches/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch b/alpine/kernel/patches/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch new file mode 100644 index 000000000..f2a28b416 --- /dev/null +++ b/alpine/kernel/patches/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch @@ -0,0 +1,116 @@ +From e34354b98924dba0128289e722bde4ca35eafa90 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Mon, 14 Dec 2015 16:01:51 -0800 +Subject: [PATCH 21/42] Drivers: hv: vmbus: channge + vmbus_connection.channel_lock to mutex + +spinlock is unnecessary here. +mutex is enough. + +Signed-off-by: Dexuan Cui +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit d6f591e339d23f434efda11917da511870891472) +--- + drivers/hv/channel_mgmt.c | 12 ++++++------ + drivers/hv/connection.c | 7 +++---- + drivers/hv/hyperv_vmbus.h | 2 +- + 3 files changed, 10 insertions(+), 11 deletions(-) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index 8529dd2..306c7df 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -207,9 +207,9 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) + } + + if (channel->primary_channel == NULL) { +- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); ++ mutex_lock(&vmbus_connection.channel_mutex); + list_del(&channel->listentry); +- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); ++ mutex_unlock(&vmbus_connection.channel_mutex); + + primary_channel = channel; + } else { +@@ -254,7 +254,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + unsigned long flags; + + /* Make sure this is a new offer */ +- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); ++ mutex_lock(&vmbus_connection.channel_mutex); + + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (!uuid_le_cmp(channel->offermsg.offer.if_type, +@@ -270,7 +270,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + list_add_tail(&newchannel->listentry, + &vmbus_connection.chn_list); + +- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); ++ mutex_unlock(&vmbus_connection.channel_mutex); + + if (!fnew) { + /* +@@ -342,9 +342,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + err_deq_chan: + vmbus_release_relid(newchannel->offermsg.child_relid); + +- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); ++ mutex_lock(&vmbus_connection.channel_mutex); + list_del(&newchannel->listentry); +- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); ++ mutex_unlock(&vmbus_connection.channel_mutex); + + if (newchannel->target_cpu != get_cpu()) { + put_cpu(); +diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c +index 4fc2e88..521f48e 100644 +--- a/drivers/hv/connection.c ++++ b/drivers/hv/connection.c +@@ -146,7 +146,7 @@ int vmbus_connect(void) + spin_lock_init(&vmbus_connection.channelmsg_lock); + + INIT_LIST_HEAD(&vmbus_connection.chn_list); +- spin_lock_init(&vmbus_connection.channel_lock); ++ mutex_init(&vmbus_connection.channel_mutex); + + /* + * Setup the vmbus event connection for channel interrupt +@@ -282,11 +282,10 @@ struct vmbus_channel *relid2channel(u32 relid) + { + struct vmbus_channel *channel; + struct vmbus_channel *found_channel = NULL; +- unsigned long flags; + struct list_head *cur, *tmp; + struct vmbus_channel *cur_sc; + +- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); ++ mutex_lock(&vmbus_connection.channel_mutex); + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (channel->offermsg.child_relid == relid) { + found_channel = channel; +@@ -305,7 +304,7 @@ struct vmbus_channel *relid2channel(u32 relid) + } + } + } +- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); ++ mutex_unlock(&vmbus_connection.channel_mutex); + + return found_channel; + } +diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h +index 12156db..50b1de7 100644 +--- a/drivers/hv/hyperv_vmbus.h ++++ b/drivers/hv/hyperv_vmbus.h +@@ -683,7 +683,7 @@ struct vmbus_connection { + + /* List of channels */ + struct list_head chn_list; +- spinlock_t channel_lock; ++ struct mutex channel_mutex; + + struct workqueue_struct *work_queue; + }; +-- +2.10.0 + diff --git a/alpine/kernel/patches/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch b/alpine/kernel/patches/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch new file mode 100644 index 000000000..c993989dc --- /dev/null +++ b/alpine/kernel/patches/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch @@ -0,0 +1,126 @@ +From 91a65c691fc22cc6bfb884dea29cc7c5c3e5f9a9 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Mon, 14 Dec 2015 19:02:00 -0800 +Subject: [PATCH 22/42] Drivers: hv: remove code duplication between + vmbus_recvpacket()/vmbus_recvpacket_raw() + +vmbus_recvpacket() and vmbus_recvpacket_raw() are almost identical but +there are two discrepancies: +1) vmbus_recvpacket() doesn't propagate errors from hv_ringbuffer_read() + which looks like it is not desired. +2) There is an error message printed in packetlen > bufferlen case in + vmbus_recvpacket(). I'm removing it as it is usless for users to see + such messages and /vmbus_recvpacket_raw() doesn't have it. + +Signed-off-by: Vitaly Kuznetsov +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 667d374064b0cc48b6122101b287908d1b392bdb) +--- + drivers/hv/channel.c | 65 ++++++++++++++++++---------------------------------- + 1 file changed, 22 insertions(+), 43 deletions(-) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index 2889d97..dd6de7f 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -922,8 +922,10 @@ EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer); + * + * Mainly used by Hyper-V drivers. + */ +-int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, +- u32 bufferlen, u32 *buffer_actual_len, u64 *requestid) ++static inline int ++__vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, ++ u32 bufferlen, u32 *buffer_actual_len, u64 *requestid, ++ bool raw) + { + struct vmpacket_descriptor desc; + u32 packetlen; +@@ -941,27 +943,34 @@ int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, + return 0; + + packetlen = desc.len8 << 3; +- userlen = packetlen - (desc.offset8 << 3); ++ if (!raw) ++ userlen = packetlen - (desc.offset8 << 3); ++ else ++ userlen = packetlen; + + *buffer_actual_len = userlen; + +- if (userlen > bufferlen) { +- +- pr_err("Buffer too small - got %d needs %d\n", +- bufferlen, userlen); +- return -ETOOSMALL; +- } ++ if (userlen > bufferlen) ++ return -ENOBUFS; + + *requestid = desc.trans_id; + + /* Copy over the packet to the user buffer */ + ret = hv_ringbuffer_read(&channel->inbound, buffer, userlen, +- (desc.offset8 << 3), &signal); ++ raw ? 0 : desc.offset8 << 3, &signal); + + if (signal) + vmbus_setevent(channel); + +- return 0; ++ return ret; ++} ++ ++int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, ++ u32 bufferlen, u32 *buffer_actual_len, ++ u64 *requestid) ++{ ++ return __vmbus_recvpacket(channel, buffer, bufferlen, ++ buffer_actual_len, requestid, false); + } + EXPORT_SYMBOL(vmbus_recvpacket); + +@@ -972,37 +981,7 @@ int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer, + u32 bufferlen, u32 *buffer_actual_len, + u64 *requestid) + { +- struct vmpacket_descriptor desc; +- u32 packetlen; +- int ret; +- bool signal = false; +- +- *buffer_actual_len = 0; +- *requestid = 0; +- +- +- ret = hv_ringbuffer_peek(&channel->inbound, &desc, +- sizeof(struct vmpacket_descriptor)); +- if (ret != 0) +- return 0; +- +- +- packetlen = desc.len8 << 3; +- +- *buffer_actual_len = packetlen; +- +- if (packetlen > bufferlen) +- return -ENOBUFS; +- +- *requestid = desc.trans_id; +- +- /* Copy over the entire packet to the user buffer */ +- ret = hv_ringbuffer_read(&channel->inbound, buffer, packetlen, 0, +- &signal); +- +- if (signal) +- vmbus_setevent(channel); +- +- return ret; ++ return __vmbus_recvpacket(channel, buffer, bufferlen, ++ buffer_actual_len, requestid, true); + } + EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw); +-- +2.10.0 + diff --git a/alpine/kernel/patches/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch b/alpine/kernel/patches/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch new file mode 100644 index 000000000..a52582000 --- /dev/null +++ b/alpine/kernel/patches/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch @@ -0,0 +1,72 @@ +From 4c754b011766c2d8a99424637656ea8096d55890 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Mon, 21 Dec 2015 12:21:22 -0800 +Subject: [PATCH 23/42] Drivers: hv: vmbus: fix the building warning with + hyperv-keyboard + +With the recent change af3ff643ea91ba64dd8d0b1cbed54d44512f96cd +(Drivers: hv: vmbus: Use uuid_le type consistently), we always get this +warning: + + CC [M] drivers/input/serio/hyperv-keyboard.o +drivers/input/serio/hyperv-keyboard.c:427:2: warning: missing braces around + initializer [-Wmissing-braces] + { HV_KBD_GUID, }, + ^ +drivers/input/serio/hyperv-keyboard.c:427:2: warning: (near initialization + for .id_table[0].guid.b.) [-Wmissing-braces] + +The patch fixes the warning. + +Signed-off-by: Dexuan Cui +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 2048157ad02e65f6327118dd4a7b9c9f1fd12f77) +--- + drivers/input/serio/hyperv-keyboard.c | 10 ---------- + include/linux/hyperv.h | 8 ++++++++ + 2 files changed, 8 insertions(+), 10 deletions(-) + +diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c +index e74e5d6..c948866 100644 +--- a/drivers/input/serio/hyperv-keyboard.c ++++ b/drivers/input/serio/hyperv-keyboard.c +@@ -412,16 +412,6 @@ static int hv_kbd_remove(struct hv_device *hv_dev) + return 0; + } + +-/* +- * Keyboard GUID +- * {f912ad6d-2b17-48ea-bd65-f927a61c7684} +- */ +-#define HV_KBD_GUID \ +- .guid = { \ +- 0x6d, 0xad, 0x12, 0xf9, 0x17, 0x2b, 0xea, 0x48, \ +- 0xbd, 0x65, 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84 \ +- } +- + static const struct hv_vmbus_device_id id_table[] = { + /* Keyboard guid */ + { HV_KBD_GUID, }, +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 4712d7d..9e2de6a 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1091,6 +1091,14 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a) + + /* ++ * Keyboard GUID ++ * {f912ad6d-2b17-48ea-bd65-f927a61c7684} ++ */ ++#define HV_KBD_GUID \ ++ .guid = UUID_LE(0xf912ad6d, 0x2b17, 0x48ea, 0xbd, 0x65, \ ++ 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84) ++ ++/* + * VSS (Backup/Restore) GUID + */ + #define HV_VSS_GUID \ +-- +2.10.0 + diff --git a/alpine/kernel/patches/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch b/alpine/kernel/patches/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch new file mode 100644 index 000000000..14c287bd4 --- /dev/null +++ b/alpine/kernel/patches/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch @@ -0,0 +1,42 @@ +From 6cb1a2f24c7b049f8a0c259afa4f5de37ac84084 Mon Sep 17 00:00:00 2001 +From: "K. Y. Srinivasan" +Date: Tue, 15 Dec 2015 16:27:27 -0800 +Subject: [PATCH 24/42] Drivers: hv: vmbus: Treat Fibre Channel devices as + performance critical + +For performance critical devices, we distribute the incoming +channel interrupt load across available CPUs in the guest. +Include Fibre channel devices in the set of devices for which +we would distribute the interrupt load. + +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 879a650a273bc3efb9d472886b8ced12630ea8ed) +--- + drivers/hv/channel_mgmt.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index 306c7df..763d0c1 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -362,6 +362,7 @@ err_free_chan: + enum { + IDE = 0, + SCSI, ++ FC, + NIC, + ND_NIC, + PCIE, +@@ -378,6 +379,8 @@ static const struct hv_vmbus_device_id hp_devs[] = { + { HV_IDE_GUID, }, + /* Storage - SCSI */ + { HV_SCSI_GUID, }, ++ /* Storage - FC */ ++ { HV_SYNTHFC_GUID, }, + /* Network */ + { HV_NIC_GUID, }, + /* NetworkDirect Guest RDMA */ +-- +2.10.0 + diff --git a/alpine/kernel/patches/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch b/alpine/kernel/patches/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch new file mode 100644 index 000000000..2e89c4ea2 --- /dev/null +++ b/alpine/kernel/patches/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch @@ -0,0 +1,355 @@ +From 69933a7f325a93afbb5ed819388b8b063d602066 Mon Sep 17 00:00:00 2001 +From: "K. Y. Srinivasan" +Date: Fri, 25 Dec 2015 20:00:30 -0800 +Subject: [PATCH 25/42] Drivers: hv: vmbus: Add vendor and device atttributes + +Add vendor and device attributes to VMBUS devices. These will be used +by Hyper-V tools as well user-level RDMA libraries that will use the +vendor/device tuple to discover the RDMA device. + +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 7047f17d70fc0599563d30d0791692cb5fe42ae6) +--- + Documentation/ABI/stable/sysfs-bus-vmbus | 14 +++ + drivers/hv/channel_mgmt.c | 166 +++++++++++++++++++++++-------- + drivers/hv/vmbus_drv.c | 21 ++++ + include/linux/hyperv.h | 28 ++++++ + 4 files changed, 186 insertions(+), 43 deletions(-) + +diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus +index 636e938..5d0125f 100644 +--- a/Documentation/ABI/stable/sysfs-bus-vmbus ++++ b/Documentation/ABI/stable/sysfs-bus-vmbus +@@ -27,3 +27,17 @@ Description: The mapping of which primary/sub channels are bound to which + Virtual Processors. + Format: + Users: tools/hv/lsvmbus ++ ++What: /sys/bus/vmbus/devices/vmbus_*/device ++Date: Dec. 2015 ++KernelVersion: 4.5 ++Contact: K. Y. Srinivasan ++Description: The 16 bit device ID of the device ++Users: tools/hv/lsvmbus and user level RDMA libraries ++ ++What: /sys/bus/vmbus/devices/vmbus_*/vendor ++Date: Dec. 2015 ++KernelVersion: 4.5 ++Contact: K. Y. Srinivasan ++Description: The 16 bit vendor ID of the device ++Users: tools/hv/lsvmbus and user level RDMA libraries +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index 763d0c1..d6c6114 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -33,8 +33,122 @@ + + #include "hyperv_vmbus.h" + +-static void init_vp_index(struct vmbus_channel *channel, +- const uuid_le *type_guid); ++static void init_vp_index(struct vmbus_channel *channel, u16 dev_type); ++ ++static const struct vmbus_device vmbus_devs[] = { ++ /* IDE */ ++ { .dev_type = HV_IDE, ++ HV_IDE_GUID, ++ .perf_device = true, ++ }, ++ ++ /* SCSI */ ++ { .dev_type = HV_SCSI, ++ HV_SCSI_GUID, ++ .perf_device = true, ++ }, ++ ++ /* Fibre Channel */ ++ { .dev_type = HV_FC, ++ HV_SYNTHFC_GUID, ++ .perf_device = true, ++ }, ++ ++ /* Synthetic NIC */ ++ { .dev_type = HV_NIC, ++ HV_NIC_GUID, ++ .perf_device = true, ++ }, ++ ++ /* Network Direct */ ++ { .dev_type = HV_ND, ++ HV_ND_GUID, ++ .perf_device = true, ++ }, ++ ++ /* PCIE */ ++ { .dev_type = HV_PCIE, ++ HV_PCIE_GUID, ++ .perf_device = true, ++ }, ++ ++ /* Synthetic Frame Buffer */ ++ { .dev_type = HV_FB, ++ HV_SYNTHVID_GUID, ++ .perf_device = false, ++ }, ++ ++ /* Synthetic Keyboard */ ++ { .dev_type = HV_KBD, ++ HV_KBD_GUID, ++ .perf_device = false, ++ }, ++ ++ /* Synthetic MOUSE */ ++ { .dev_type = HV_MOUSE, ++ HV_MOUSE_GUID, ++ .perf_device = false, ++ }, ++ ++ /* KVP */ ++ { .dev_type = HV_KVP, ++ HV_KVP_GUID, ++ .perf_device = false, ++ }, ++ ++ /* Time Synch */ ++ { .dev_type = HV_TS, ++ HV_TS_GUID, ++ .perf_device = false, ++ }, ++ ++ /* Heartbeat */ ++ { .dev_type = HV_HB, ++ HV_HEART_BEAT_GUID, ++ .perf_device = false, ++ }, ++ ++ /* Shutdown */ ++ { .dev_type = HV_SHUTDOWN, ++ HV_SHUTDOWN_GUID, ++ .perf_device = false, ++ }, ++ ++ /* File copy */ ++ { .dev_type = HV_FCOPY, ++ HV_FCOPY_GUID, ++ .perf_device = false, ++ }, ++ ++ /* Backup */ ++ { .dev_type = HV_BACKUP, ++ HV_VSS_GUID, ++ .perf_device = false, ++ }, ++ ++ /* Dynamic Memory */ ++ { .dev_type = HV_DM, ++ HV_DM_GUID, ++ .perf_device = false, ++ }, ++ ++ /* Unknown GUID */ ++ { .dev_type = HV_UNKOWN, ++ .perf_device = false, ++ }, ++}; ++ ++static u16 hv_get_dev_type(const uuid_le *guid) ++{ ++ u16 i; ++ ++ for (i = HV_IDE; i < HV_UNKOWN; i++) { ++ if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) ++ return i; ++ } ++ pr_info("Unknown GUID: %pUl\n", guid); ++ return i; ++} + + /** + * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message +@@ -252,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + struct vmbus_channel *channel; + bool fnew = true; + unsigned long flags; ++ u16 dev_type; + + /* Make sure this is a new offer */ + mutex_lock(&vmbus_connection.channel_mutex); +@@ -289,7 +404,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + goto err_free_chan; + } + +- init_vp_index(newchannel, &newchannel->offermsg.offer.if_type); ++ dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type); ++ ++ init_vp_index(newchannel, dev_type); + + if (newchannel->target_cpu != get_cpu()) { + put_cpu(); +@@ -326,6 +443,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + if (!newchannel->device_obj) + goto err_deq_chan; + ++ newchannel->device_obj->device_id = dev_type; + /* + * Add the new device to the bus. This will kick off device-driver + * binding which eventually invokes the device driver's AddDevice() +@@ -359,37 +477,6 @@ err_free_chan: + free_channel(newchannel); + } + +-enum { +- IDE = 0, +- SCSI, +- FC, +- NIC, +- ND_NIC, +- PCIE, +- MAX_PERF_CHN, +-}; +- +-/* +- * This is an array of device_ids (device types) that are performance critical. +- * We attempt to distribute the interrupt load for these devices across +- * all available CPUs. +- */ +-static const struct hv_vmbus_device_id hp_devs[] = { +- /* IDE */ +- { HV_IDE_GUID, }, +- /* Storage - SCSI */ +- { HV_SCSI_GUID, }, +- /* Storage - FC */ +- { HV_SYNTHFC_GUID, }, +- /* Network */ +- { HV_NIC_GUID, }, +- /* NetworkDirect Guest RDMA */ +- { HV_ND_GUID, }, +- /* PCI Express Pass Through */ +- { HV_PCIE_GUID, }, +-}; +- +- + /* + * We use this state to statically distribute the channel interrupt load. + */ +@@ -406,22 +493,15 @@ static int next_numa_node_id; + * For pre-win8 hosts or non-performance critical channels we assign the + * first CPU in the first NUMA node. + */ +-static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid) ++static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) + { + u32 cur_cpu; +- int i; +- bool perf_chn = false; ++ bool perf_chn = vmbus_devs[dev_type].perf_device; + struct vmbus_channel *primary = channel->primary_channel; + int next_node; + struct cpumask available_mask; + struct cpumask *alloced_mask; + +- for (i = IDE; i < MAX_PERF_CHN; i++) { +- if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) { +- perf_chn = true; +- break; +- } +- } + if ((vmbus_proto_version == VERSION_WS2008) || + (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) { + /* +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index 7973aa5..de7130c 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -480,6 +480,24 @@ static ssize_t channel_vp_mapping_show(struct device *dev, + } + static DEVICE_ATTR_RO(channel_vp_mapping); + ++static ssize_t vendor_show(struct device *dev, ++ struct device_attribute *dev_attr, ++ char *buf) ++{ ++ struct hv_device *hv_dev = device_to_hv_device(dev); ++ return sprintf(buf, "0x%x\n", hv_dev->vendor_id); ++} ++static DEVICE_ATTR_RO(vendor); ++ ++static ssize_t device_show(struct device *dev, ++ struct device_attribute *dev_attr, ++ char *buf) ++{ ++ struct hv_device *hv_dev = device_to_hv_device(dev); ++ return sprintf(buf, "0x%x\n", hv_dev->device_id); ++} ++static DEVICE_ATTR_RO(device); ++ + /* Set up per device attributes in /sys/bus/vmbus/devices/ */ + static struct attribute *vmbus_attrs[] = { + &dev_attr_id.attr, +@@ -505,6 +523,8 @@ static struct attribute *vmbus_attrs[] = { + &dev_attr_in_read_bytes_avail.attr, + &dev_attr_in_write_bytes_avail.attr, + &dev_attr_channel_vp_mapping.attr, ++ &dev_attr_vendor.attr, ++ &dev_attr_device.attr, + NULL, + }; + ATTRIBUTE_GROUPS(vmbus); +@@ -963,6 +983,7 @@ struct hv_device *vmbus_device_create(const uuid_le *type, + memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); + memcpy(&child_device_obj->dev_instance, instance, + sizeof(uuid_le)); ++ child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ + + + return child_device_obj; +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 9e2de6a..51c98fd 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -635,6 +635,32 @@ enum hv_signal_policy { + HV_SIGNAL_POLICY_EXPLICIT, + }; + ++enum vmbus_device_type { ++ HV_IDE = 0, ++ HV_SCSI, ++ HV_FC, ++ HV_NIC, ++ HV_ND, ++ HV_PCIE, ++ HV_FB, ++ HV_KBD, ++ HV_MOUSE, ++ HV_KVP, ++ HV_TS, ++ HV_HB, ++ HV_SHUTDOWN, ++ HV_FCOPY, ++ HV_BACKUP, ++ HV_DM, ++ HV_UNKOWN, ++}; ++ ++struct vmbus_device { ++ u16 dev_type; ++ uuid_le guid; ++ bool perf_device; ++}; ++ + struct vmbus_channel { + /* Unique channel id */ + int id; +@@ -961,6 +987,8 @@ struct hv_device { + + /* the device instance id of this device */ + uuid_le dev_instance; ++ u16 vendor_id; ++ u16 device_id; + + struct device device; + +-- +2.10.0 + diff --git a/alpine/kernel/patches/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch b/alpine/kernel/patches/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch new file mode 100644 index 000000000..46095d238 --- /dev/null +++ b/alpine/kernel/patches/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch @@ -0,0 +1,36 @@ +From 64f93cfc49018e7ffa772506cfe3631b3db530b9 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Wed, 27 Jan 2016 22:29:37 -0800 +Subject: [PATCH 26/42] Drivers: hv: vmbus: add a helper function to set a + channel's pending send size + +This will be used by the coming net/hvsock driver. + +Signed-off-by: Dexuan Cui +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 3c75354d043ad546148d6992e40033ecaefc5ea5) +--- + include/linux/hyperv.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 51c98fd..934542a 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -818,6 +818,12 @@ static inline void *get_per_channel_state(struct vmbus_channel *c) + return c->per_channel_state; + } + ++static inline void set_channel_pending_send_size(struct vmbus_channel *c, ++ u32 size) ++{ ++ c->outbound.ring_buffer->pending_send_sz = size; ++} ++ + void vmbus_onmessage(void *context); + + int vmbus_request_offers(void); +-- +2.10.0 + diff --git a/alpine/kernel/patches/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch b/alpine/kernel/patches/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch new file mode 100644 index 000000000..375a62ae1 --- /dev/null +++ b/alpine/kernel/patches/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch @@ -0,0 +1,44 @@ +From 8658862991789c9dca080be3d35a7e72479b91e9 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Wed, 27 Jan 2016 22:29:38 -0800 +Subject: [PATCH 27/42] Drivers: hv: vmbus: define the new offer type for + Hyper-V socket (hvsock) + +A helper function is also added. + +Signed-off-by: Dexuan Cui +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit e8d6ca023efce3bd80050dcd9e708ee3cf8babd4) +--- + include/linux/hyperv.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 934542a..a4f105d 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -237,6 +237,7 @@ struct vmbus_channel_offer { + #define VMBUS_CHANNEL_LOOPBACK_OFFER 0x100 + #define VMBUS_CHANNEL_PARENT_OFFER 0x200 + #define VMBUS_CHANNEL_REQUEST_MONITORED_NOTIFICATION 0x400 ++#define VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER 0x2000 + + struct vmpacket_descriptor { + u16 type; +@@ -797,6 +798,12 @@ struct vmbus_channel { + enum hv_signal_policy signal_policy; + }; + ++static inline bool is_hvsock_channel(const struct vmbus_channel *c) ++{ ++ return !!(c->offermsg.offer.chn_flags & ++ VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER); ++} ++ + static inline void set_channel_signal_state(struct vmbus_channel *c, + enum hv_signal_policy policy) + { +-- +2.10.0 + diff --git a/alpine/kernel/patches/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch b/alpine/kernel/patches/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch new file mode 100644 index 000000000..7c93e72a1 --- /dev/null +++ b/alpine/kernel/patches/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch @@ -0,0 +1,45 @@ +From 60af2c3c5565e40ee66123edb9386ccaa1355dff Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Wed, 27 Jan 2016 22:29:39 -0800 +Subject: [PATCH 28/42] Drivers: hv: vmbus: vmbus_sendpacket_ctl: hvsock: avoid + unnecessary signaling + +When the hvsock channel's outbound ringbuffer is full (i.e., +hv_ringbuffer_write() returns -EAGAIN), we should avoid the unnecessary +signaling the host. + +Signed-off-by: Dexuan Cui +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 5f363bc38f810d238d1e8b19998625ddec3b8138) +--- + drivers/hv/channel.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index dd6de7f..128dcf2 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -659,6 +659,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, + * If we cannot write to the ring-buffer; signal the host + * even if we may not have written anything. This is a rare + * enough condition that it should not matter. ++ * NOTE: in this case, the hvsock channel is an exception, because ++ * it looks the host side's hvsock implementation has a throttling ++ * mechanism which can hurt the performance otherwise. + */ + + if (channel->signal_policy) +@@ -666,7 +669,8 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, + else + kick_q = true; + +- if (((ret == 0) && kick_q && signal) || (ret)) ++ if (((ret == 0) && kick_q && signal) || ++ (ret && !is_hvsock_channel(channel))) + vmbus_setevent(channel); + + return ret; +-- +2.10.0 + diff --git a/alpine/kernel/patches/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch b/alpine/kernel/patches/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch new file mode 100644 index 000000000..fa317eec0 --- /dev/null +++ b/alpine/kernel/patches/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch @@ -0,0 +1,101 @@ +From 2c5183043209906ad0a41fb1a5b4d0c4c8a8e735 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Wed, 27 Jan 2016 22:29:40 -0800 +Subject: [PATCH 29/42] Drivers: hv: vmbus: define a new VMBus message type for + hvsock + +A function to send the type of message is also added. + +The coming net/hvsock driver will use this function to proactively request +the host to offer a VMBus channel for a new hvsock connection. + +Signed-off-by: Dexuan Cui +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 5c23a1a5c60b0f472cfa61cd7d8279f8aaeb5b64) +--- + drivers/hv/channel.c | 15 +++++++++++++++ + drivers/hv/channel_mgmt.c | 4 ++++ + include/linux/hyperv.h | 13 +++++++++++++ + 3 files changed, 32 insertions(+) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index 128dcf2..415f6c7 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -219,6 +219,21 @@ error0: + } + EXPORT_SYMBOL_GPL(vmbus_open); + ++/* Used for Hyper-V Socket: a guest client's connect() to the host */ ++int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, ++ const uuid_le *shv_host_servie_id) ++{ ++ struct vmbus_channel_tl_connect_request conn_msg; ++ ++ memset(&conn_msg, 0, sizeof(conn_msg)); ++ conn_msg.header.msgtype = CHANNELMSG_TL_CONNECT_REQUEST; ++ conn_msg.guest_endpoint_id = *shv_guest_servie_id; ++ conn_msg.host_service_id = *shv_host_servie_id; ++ ++ return vmbus_post_msg(&conn_msg, sizeof(conn_msg)); ++} ++EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request); ++ + /* + * create_gpadl_header - Creates a gpadl for the specified buffer + */ +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index d6c6114..60ca25b 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -958,6 +958,10 @@ struct vmbus_channel_message_table_entry + {CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response}, + {CHANNELMSG_UNLOAD, 0, NULL}, + {CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response}, ++ {CHANNELMSG_18, 0, NULL}, ++ {CHANNELMSG_19, 0, NULL}, ++ {CHANNELMSG_20, 0, NULL}, ++ {CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL}, + }; + + /* +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index a4f105d..191bc5d 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -394,6 +394,10 @@ enum vmbus_channel_message_type { + CHANNELMSG_VERSION_RESPONSE = 15, + CHANNELMSG_UNLOAD = 16, + CHANNELMSG_UNLOAD_RESPONSE = 17, ++ CHANNELMSG_18 = 18, ++ CHANNELMSG_19 = 19, ++ CHANNELMSG_20 = 20, ++ CHANNELMSG_TL_CONNECT_REQUEST = 21, + CHANNELMSG_COUNT + }; + +@@ -564,6 +568,13 @@ struct vmbus_channel_initiate_contact { + u64 monitor_page2; + } __packed; + ++/* Hyper-V socket: guest's connect()-ing to host */ ++struct vmbus_channel_tl_connect_request { ++ struct vmbus_channel_message_header header; ++ uuid_le guest_endpoint_id; ++ uuid_le host_service_id; ++} __packed; ++ + struct vmbus_channel_version_response { + struct vmbus_channel_message_header header; + u8 version_supported; +@@ -1295,4 +1306,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); + + extern __u32 vmbus_proto_version; + ++int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, ++ const uuid_le *shv_host_servie_id); + #endif /* _HYPERV_H */ +-- +2.10.0 + diff --git a/alpine/kernel/patches/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch b/alpine/kernel/patches/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch new file mode 100644 index 000000000..f264f65dd --- /dev/null +++ b/alpine/kernel/patches/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch @@ -0,0 +1,64 @@ +From 58a10705d630bdcb5ea08c894d28851c73e9bd4f Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Wed, 27 Jan 2016 22:29:41 -0800 +Subject: [PATCH 30/42] Drivers: hv: vmbus: add a hvsock flag in struct + hv_driver + +Only the coming hv_sock driver has a "true" value for this flag. + +We treat the hvsock offers/channels as special VMBus devices. +Since the hv_sock driver handles all the hvsock offers/channels, we need to +tweak vmbus_match() for hv_sock driver, so we introduce this flag. + +Signed-off-by: Dexuan Cui +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 8981da320a11217589aa3c50f9e891bcdef07ece) +--- + drivers/hv/vmbus_drv.c | 4 ++++ + include/linux/hyperv.h | 14 ++++++++++++++ + 2 files changed, 18 insertions(+) + +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index de7130c..03fc5d3 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -585,6 +585,10 @@ static int vmbus_match(struct device *device, struct device_driver *driver) + struct hv_driver *drv = drv_to_hv_drv(driver); + struct hv_device *hv_dev = device_to_hv_device(device); + ++ /* The hv_sock driver handles all hv_sock offers. */ ++ if (is_hvsock_channel(hv_dev->channel)) ++ return drv->hvsock; ++ + if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) + return 1; + +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 191bc5d..05966e2 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -992,6 +992,20 @@ extern void vmbus_ontimer(unsigned long data); + struct hv_driver { + const char *name; + ++ /* ++ * A hvsock offer, which has a VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER ++ * channel flag, actually doesn't mean a synthetic device because the ++ * offer's if_type/if_instance can change for every new hvsock ++ * connection. ++ * ++ * However, to facilitate the notification of new-offer/rescind-offer ++ * from vmbus driver to hvsock driver, we can handle hvsock offer as ++ * a special vmbus device, and hence we need the below flag to ++ * indicate if the driver is the hvsock driver or not: we need to ++ * specially treat the hvosck offer & driver in vmbus_match(). ++ */ ++ bool hvsock; ++ + /* the device type supported by this driver */ + uuid_le dev_type; + const struct hv_vmbus_device_id *id_table; +-- +2.10.0 + diff --git a/alpine/kernel/patches/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch b/alpine/kernel/patches/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch new file mode 100644 index 000000000..9768186ca --- /dev/null +++ b/alpine/kernel/patches/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch @@ -0,0 +1,72 @@ +From 6dd9db116b0985dfc56b3028205549f4c52d8be0 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Wed, 27 Jan 2016 22:29:42 -0800 +Subject: [PATCH 31/42] Drivers: hv: vmbus: add a per-channel rescind callback + +This will be used by the coming hv_sock driver. + +Signed-off-by: Dexuan Cui +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 499e8401a515d04daa986b995da710d2b9737764) +--- + drivers/hv/channel_mgmt.c | 11 +++++++++++ + include/linux/hyperv.h | 9 +++++++++ + 2 files changed, 20 insertions(+) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index 60ca25b..76864c9 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -741,6 +741,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) + spin_unlock_irqrestore(&channel->lock, flags); + + if (channel->device_obj) { ++ if (channel->chn_rescind_callback) { ++ channel->chn_rescind_callback(channel); ++ return; ++ } + /* + * We will have to unregister this device from the + * driver core. +@@ -1110,3 +1114,10 @@ bool vmbus_are_subchannels_present(struct vmbus_channel *primary) + return ret; + } + EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present); ++ ++void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, ++ void (*chn_rescind_cb)(struct vmbus_channel *)) ++{ ++ channel->chn_rescind_callback = chn_rescind_cb; ++} ++EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 05966e2..ad04017 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -768,6 +768,12 @@ struct vmbus_channel { + void (*sc_creation_callback)(struct vmbus_channel *new_sc); + + /* ++ * Channel rescind callback. Some channels (the hvsock ones), need to ++ * register a callback which is invoked in vmbus_onoffer_rescind(). ++ */ ++ void (*chn_rescind_callback)(struct vmbus_channel *channel); ++ ++ /* + * The spinlock to protect the structure. It is being used to protect + * test-and-set access to various attributes of the structure as well + * as all sc_list operations. +@@ -853,6 +859,9 @@ int vmbus_request_offers(void); + void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, + void (*sc_cr_cb)(struct vmbus_channel *new_sc)); + ++void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, ++ void (*chn_rescind_cb)(struct vmbus_channel *)); ++ + /* + * Retrieve the (sub) channel on which to send an outgoing request. + * When a primary channel has multiple sub-channels, we choose a +-- +2.10.0 + diff --git a/alpine/kernel/patches/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch b/alpine/kernel/patches/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch new file mode 100644 index 000000000..cb9a4f99c --- /dev/null +++ b/alpine/kernel/patches/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch @@ -0,0 +1,153 @@ +From 5e89daa5e8c0b5950b46ba77dd6248c5e61bc405 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Wed, 27 Jan 2016 22:29:43 -0800 +Subject: [PATCH 32/42] Drivers: hv: vmbus: add an API + vmbus_hvsock_device_unregister() + +The hvsock driver needs this API to release all the resources related +to the channel. + +Signed-off-by: Dexuan Cui +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 85d9aa705184a4504d0330017e3956fcdae8a9d6) +--- + drivers/hv/channel_mgmt.c | 33 ++++++++++++++++++++++++++++----- + drivers/hv/connection.c | 4 ++-- + include/linux/hyperv.h | 2 ++ + 3 files changed, 32 insertions(+), 7 deletions(-) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index 76864c9..cf311be 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -310,6 +310,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) + vmbus_release_relid(relid); + + BUG_ON(!channel->rescind); ++ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + + if (channel->target_cpu != get_cpu()) { + put_cpu(); +@@ -321,9 +322,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) + } + + if (channel->primary_channel == NULL) { +- mutex_lock(&vmbus_connection.channel_mutex); + list_del(&channel->listentry); +- mutex_unlock(&vmbus_connection.channel_mutex); + + primary_channel = channel; + } else { +@@ -367,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + bool fnew = true; + unsigned long flags; + u16 dev_type; ++ int ret; + + /* Make sure this is a new offer */ + mutex_lock(&vmbus_connection.channel_mutex); +@@ -449,7 +449,11 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + * binding which eventually invokes the device driver's AddDevice() + * method. + */ +- if (vmbus_device_register(newchannel->device_obj) != 0) { ++ mutex_lock(&vmbus_connection.channel_mutex); ++ ret = vmbus_device_register(newchannel->device_obj); ++ mutex_unlock(&vmbus_connection.channel_mutex); ++ ++ if (ret != 0) { + pr_err("unable to add child device object (relid %d)\n", + newchannel->offermsg.child_relid); + kfree(newchannel->device_obj); +@@ -725,6 +729,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) + struct device *dev; + + rescind = (struct vmbus_channel_rescind_offer *)hdr; ++ ++ mutex_lock(&vmbus_connection.channel_mutex); + channel = relid2channel(rescind->child_relid); + + if (channel == NULL) { +@@ -733,7 +739,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) + * vmbus_process_offer(), we have already invoked + * vmbus_release_relid() on error. + */ +- return; ++ goto out; + } + + spin_lock_irqsave(&channel->lock, flags); +@@ -743,7 +749,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) + if (channel->device_obj) { + if (channel->chn_rescind_callback) { + channel->chn_rescind_callback(channel); +- return; ++ goto out; + } + /* + * We will have to unregister this device from the +@@ -758,8 +764,25 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) + hv_process_channel_removal(channel, + channel->offermsg.child_relid); + } ++ ++out: ++ mutex_unlock(&vmbus_connection.channel_mutex); + } + ++void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) ++{ ++ mutex_lock(&vmbus_connection.channel_mutex); ++ ++ BUG_ON(!is_hvsock_channel(channel)); ++ ++ channel->rescind = true; ++ vmbus_device_unregister(channel->device_obj); ++ ++ mutex_unlock(&vmbus_connection.channel_mutex); ++} ++EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); ++ ++ + /* + * vmbus_onoffers_delivered - + * This is invoked when all offers have been delivered. +diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c +index 521f48e..09c08b5 100644 +--- a/drivers/hv/connection.c ++++ b/drivers/hv/connection.c +@@ -285,7 +285,8 @@ struct vmbus_channel *relid2channel(u32 relid) + struct list_head *cur, *tmp; + struct vmbus_channel *cur_sc; + +- mutex_lock(&vmbus_connection.channel_mutex); ++ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); ++ + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { + if (channel->offermsg.child_relid == relid) { + found_channel = channel; +@@ -304,7 +305,6 @@ struct vmbus_channel *relid2channel(u32 relid) + } + } + } +- mutex_unlock(&vmbus_connection.channel_mutex); + + return found_channel; + } +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index ad04017..993318a 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1071,6 +1071,8 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver, + const char *mod_name); + void vmbus_driver_unregister(struct hv_driver *hv_driver); + ++void vmbus_hvsock_device_unregister(struct vmbus_channel *channel); ++ + int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + resource_size_t min, resource_size_t max, + resource_size_t size, resource_size_t align, +-- +2.10.0 + diff --git a/alpine/kernel/patches/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch b/alpine/kernel/patches/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch new file mode 100644 index 000000000..fa351c53c --- /dev/null +++ b/alpine/kernel/patches/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch @@ -0,0 +1,208 @@ +From b7e3c4ad47b7fd47a79a723ac0c1823b6782d1ff Mon Sep 17 00:00:00 2001 +From: "K. Y. Srinivasan" +Date: Wed, 27 Jan 2016 22:29:45 -0800 +Subject: [PATCH 33/42] Drivers: hv: vmbus: Give control over how the ring + access is serialized + +On the channel send side, many of the VMBUS +device drivers explicity serialize access to the +outgoing ring buffer. Give more control to the +VMBUS device drivers in terms how to serialize +accesss to the outgoing ring buffer. +The default behavior will be to aquire the +ring lock to preserve the current behavior. + +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit fe760e4d64fe5c17c39e86c410d41f6587ee88bc) +--- + drivers/hv/channel.c | 15 +++++++++++---- + drivers/hv/channel_mgmt.c | 1 + + drivers/hv/hyperv_vmbus.h | 2 +- + drivers/hv/ring_buffer.c | 13 ++++++++----- + include/linux/hyperv.h | 16 ++++++++++++++++ + 5 files changed, 37 insertions(+), 10 deletions(-) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index 415f6c7..57a1b65 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -639,6 +639,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, + u64 aligned_data = 0; + int ret; + bool signal = false; ++ bool lock = channel->acquire_ring_lock; + int num_vecs = ((bufferlen != 0) ? 3 : 1); + + +@@ -658,7 +659,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, + bufferlist[2].iov_len = (packetlen_aligned - packetlen); + + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs, +- &signal); ++ &signal, lock); + + /* + * Signalling the host is conditional on many factors: +@@ -738,6 +739,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, + struct kvec bufferlist[3]; + u64 aligned_data = 0; + bool signal = false; ++ bool lock = channel->acquire_ring_lock; + + if (pagecount > MAX_PAGE_BUFFER_COUNT) + return -EINVAL; +@@ -774,7 +776,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); + +- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); ++ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, ++ &signal, lock); + + /* + * Signalling the host is conditional on many factors: +@@ -837,6 +840,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, + struct kvec bufferlist[3]; + u64 aligned_data = 0; + bool signal = false; ++ bool lock = channel->acquire_ring_lock; + + packetlen = desc_size + bufferlen; + packetlen_aligned = ALIGN(packetlen, sizeof(u64)); +@@ -856,7 +860,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); + +- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); ++ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, ++ &signal, lock); + + if (ret == 0 && signal) + vmbus_setevent(channel); +@@ -881,6 +886,7 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, + struct kvec bufferlist[3]; + u64 aligned_data = 0; + bool signal = false; ++ bool lock = channel->acquire_ring_lock; + u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset, + multi_pagebuffer->len); + +@@ -919,7 +925,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); + +- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); ++ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, ++ &signal, lock); + + if (ret == 0 && signal) + vmbus_setevent(channel); +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index cf311be..b40f429 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -259,6 +259,7 @@ static struct vmbus_channel *alloc_channel(void) + return NULL; + + channel->id = atomic_inc_return(&chan_num); ++ channel->acquire_ring_lock = true; + spin_lock_init(&channel->inbound_lock); + spin_lock_init(&channel->lock); + +diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h +index 50b1de7..89bb559 100644 +--- a/drivers/hv/hyperv_vmbus.h ++++ b/drivers/hv/hyperv_vmbus.h +@@ -617,7 +617,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); + + int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, + struct kvec *kv_list, +- u32 kv_count, bool *signal); ++ u32 kv_count, bool *signal, bool lock); + + int hv_ringbuffer_peek(struct hv_ring_buffer_info *ring_info, void *buffer, + u32 buflen); +diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c +index 70a1a9a..89a428f 100644 +--- a/drivers/hv/ring_buffer.c ++++ b/drivers/hv/ring_buffer.c +@@ -388,7 +388,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) + * + */ + int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, +- struct kvec *kv_list, u32 kv_count, bool *signal) ++ struct kvec *kv_list, u32 kv_count, bool *signal, bool lock) + { + int i = 0; + u32 bytes_avail_towrite; +@@ -398,14 +398,15 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, + u32 next_write_location; + u32 old_write; + u64 prev_indices = 0; +- unsigned long flags; ++ unsigned long flags = 0; + + for (i = 0; i < kv_count; i++) + totalbytes_towrite += kv_list[i].iov_len; + + totalbytes_towrite += sizeof(u64); + +- spin_lock_irqsave(&outring_info->ring_lock, flags); ++ if (lock) ++ spin_lock_irqsave(&outring_info->ring_lock, flags); + + hv_get_ringbuffer_availbytes(outring_info, + &bytes_avail_toread, +@@ -416,7 +417,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, + /* Otherwise, the next time around, we think the ring buffer */ + /* is empty since the read index == write index */ + if (bytes_avail_towrite <= totalbytes_towrite) { +- spin_unlock_irqrestore(&outring_info->ring_lock, flags); ++ if (lock) ++ spin_unlock_irqrestore(&outring_info->ring_lock, flags); + return -EAGAIN; + } + +@@ -447,7 +449,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, + hv_set_next_write_location(outring_info, next_write_location); + + +- spin_unlock_irqrestore(&outring_info->ring_lock, flags); ++ if (lock) ++ spin_unlock_irqrestore(&outring_info->ring_lock, flags); + + *signal = hv_need_to_signal(old_write, outring_info); + return 0; +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 993318a..6c9695e 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -813,8 +813,24 @@ struct vmbus_channel { + * signaling control. + */ + enum hv_signal_policy signal_policy; ++ /* ++ * On the channel send side, many of the VMBUS ++ * device drivers explicity serialize access to the ++ * outgoing ring buffer. Give more control to the ++ * VMBUS device drivers in terms how to serialize ++ * accesss to the outgoing ring buffer. ++ * The default behavior will be to aquire the ++ * ring lock to preserve the current behavior. ++ */ ++ bool acquire_ring_lock; ++ + }; + ++static inline void set_channel_lock_state(struct vmbus_channel *c, bool state) ++{ ++ c->acquire_ring_lock = state; ++} ++ + static inline bool is_hvsock_channel(const struct vmbus_channel *c) + { + return !!(c->offermsg.offer.chn_flags & +-- +2.10.0 + diff --git a/alpine/kernel/patches/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch b/alpine/kernel/patches/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch new file mode 100644 index 000000000..06b9acdea --- /dev/null +++ b/alpine/kernel/patches/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch @@ -0,0 +1,100 @@ +From af2dd29e3cf40c789045199893c232d57f0b7057 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Fri, 26 Feb 2016 15:13:16 -0800 +Subject: [PATCH 34/42] Drivers: hv: vmbus: avoid wait_for_completion() on + crash +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +wait_for_completion() may sleep, it enables interrupts and this +is something we really want to avoid on crashes because interrupt +handlers can cause other crashes. Switch to the recently introduced +vmbus_wait_for_unload() doing busy wait instead. + +Reported-by: Radim Krcmar +Signed-off-by: Vitaly Kuznetsov +Reviewed-by: Radim Kr.má +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 75ff3a8a9168df750b5bd0589e897a6c0517a9f1) +--- + drivers/hv/channel_mgmt.c | 4 ++-- + drivers/hv/connection.c | 2 +- + drivers/hv/hyperv_vmbus.h | 2 +- + drivers/hv/vmbus_drv.c | 4 ++-- + 4 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index b40f429..f70e352 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -641,7 +641,7 @@ static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) + complete(&vmbus_connection.unload_event); + } + +-void vmbus_initiate_unload(void) ++void vmbus_initiate_unload(bool crash) + { + struct vmbus_channel_message_header hdr; + +@@ -658,7 +658,7 @@ void vmbus_initiate_unload(void) + * vmbus_initiate_unload() is also called on crash and the crash can be + * happening in an interrupt context, where scheduling is impossible. + */ +- if (!in_interrupt()) ++ if (!crash) + wait_for_completion(&vmbus_connection.unload_event); + else + vmbus_wait_for_unload(); +diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c +index 09c08b5..78b8be8 100644 +--- a/drivers/hv/connection.c ++++ b/drivers/hv/connection.c +@@ -233,7 +233,7 @@ void vmbus_disconnect(void) + /* + * First send the unload request to the host. + */ +- vmbus_initiate_unload(); ++ vmbus_initiate_unload(false); + + if (vmbus_connection.work_queue) { + drain_workqueue(vmbus_connection.work_queue); +diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h +index 89bb559..f424c2d 100644 +--- a/drivers/hv/hyperv_vmbus.h ++++ b/drivers/hv/hyperv_vmbus.h +@@ -756,7 +756,7 @@ void hv_vss_onchannelcallback(void *); + int hv_fcopy_init(struct hv_util_service *); + void hv_fcopy_deinit(void); + void hv_fcopy_onchannelcallback(void *); +-void vmbus_initiate_unload(void); ++void vmbus_initiate_unload(bool crash); + + static inline void hv_poll_channel(struct vmbus_channel *channel, + void (*cb)(void *)) +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index 03fc5d3..b0cc6fd 100644 +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -1276,7 +1276,7 @@ static void hv_kexec_handler(void) + int cpu; + + hv_synic_clockevents_cleanup(); +- vmbus_initiate_unload(); ++ vmbus_initiate_unload(false); + for_each_online_cpu(cpu) + smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); + hv_cleanup(); +@@ -1284,7 +1284,7 @@ static void hv_kexec_handler(void) + + static void hv_crash_handler(struct pt_regs *regs) + { +- vmbus_initiate_unload(); ++ vmbus_initiate_unload(true); + /* + * In crash handler we can't schedule synic cleanup for all CPUs, + * doing the cleanup for current CPU only. This should be sufficient +-- +2.10.0 + diff --git a/alpine/kernel/patches/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch b/alpine/kernel/patches/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch new file mode 100644 index 000000000..58ba9c17c --- /dev/null +++ b/alpine/kernel/patches/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch @@ -0,0 +1,39 @@ +From fa3647ae889af3cccaaee37ac0723fc1b74689e3 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Fri, 26 Feb 2016 15:13:18 -0800 +Subject: [PATCH 35/42] Drivers: hv: vmbus: avoid unneeded compiler + optimizations in vmbus_wait_for_unload() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Message header is modified by the hypervisor and we read it in a loop, +we need to prevent compilers from optimizing accesses. There are no such +optimizations at this moment, this is just a future proof. + +Suggested-by: Radim Krcmar +Signed-off-by: Vitaly Kuznetsov +Reviewed-by: Radim Kr.má +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit d452ab7b4c65dfcaee88a0d6866eeeb98a3d1884) +--- + drivers/hv/channel_mgmt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index f70e352..c892db5 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -605,7 +605,7 @@ static void vmbus_wait_for_unload(void) + bool unloaded = false; + + while (1) { +- if (msg->header.message_type == HVMSG_NONE) { ++ if (READ_ONCE(msg->header.message_type) == HVMSG_NONE) { + mdelay(10); + continue; + } +-- +2.10.0 + diff --git a/alpine/kernel/patches/0036-kcm-Kernel-Connection-Multiplexor-module.patch b/alpine/kernel/patches/0036-kcm-Kernel-Connection-Multiplexor-module.patch new file mode 100644 index 000000000..9353d1e53 --- /dev/null +++ b/alpine/kernel/patches/0036-kcm-Kernel-Connection-Multiplexor-module.patch @@ -0,0 +1,2312 @@ +From afde92b79d7bbdf25d3f583898cbee4773b07d41 Mon Sep 17 00:00:00 2001 +From: Tom Herbert +Date: Mon, 7 Mar 2016 14:11:06 -0800 +Subject: [PATCH 36/42] kcm: Kernel Connection Multiplexor module + +This module implements the Kernel Connection Multiplexor. + +Kernel Connection Multiplexor (KCM) is a facility that provides a +message based interface over TCP for generic application protocols. +With KCM an application can efficiently send and receive application +protocol messages over TCP using datagram sockets. + +For more information see the included Documentation/networking/kcm.txt + +Signed-off-by: Tom Herbert +Signed-off-by: David S. Miller +(cherry picked from commit ab7ac4eb9832e32a09f4e8042705484d2fb0aad3) +--- + include/linux/socket.h | 6 +- + include/net/kcm.h | 125 +++ + include/uapi/linux/kcm.h | 39 + + net/Kconfig | 1 + + net/Makefile | 1 + + net/kcm/Kconfig | 9 + + net/kcm/Makefile | 3 + + net/kcm/kcmsock.c | 2015 ++++++++++++++++++++++++++++++++++++++++++++++ + 8 files changed, 2198 insertions(+), 1 deletion(-) + create mode 100644 include/net/kcm.h + create mode 100644 include/uapi/linux/kcm.h + create mode 100644 net/kcm/Kconfig + create mode 100644 net/kcm/Makefile + create mode 100644 net/kcm/kcmsock.c + +diff --git a/include/linux/socket.h b/include/linux/socket.h +index 5bf59c8..4e1ea53 100644 +--- a/include/linux/socket.h ++++ b/include/linux/socket.h +@@ -200,7 +200,9 @@ struct ucred { + #define AF_ALG 38 /* Algorithm sockets */ + #define AF_NFC 39 /* NFC sockets */ + #define AF_VSOCK 40 /* vSockets */ +-#define AF_MAX 41 /* For now.. */ ++#define AF_KCM 41 /* Kernel Connection Multiplexor*/ ++ ++#define AF_MAX 42 /* For now.. */ + + /* Protocol families, same as address families. */ + #define PF_UNSPEC AF_UNSPEC +@@ -246,6 +248,7 @@ struct ucred { + #define PF_ALG AF_ALG + #define PF_NFC AF_NFC + #define PF_VSOCK AF_VSOCK ++#define PF_KCM AF_KCM + #define PF_MAX AF_MAX + + /* Maximum queue length specifiable by listen. */ +@@ -322,6 +325,7 @@ struct ucred { + #define SOL_CAIF 278 + #define SOL_ALG 279 + #define SOL_NFC 280 ++#define SOL_KCM 281 + + /* IPX options */ + #define IPX_TYPE 1 +diff --git a/include/net/kcm.h b/include/net/kcm.h +new file mode 100644 +index 0000000..1bcae39 +--- /dev/null ++++ b/include/net/kcm.h +@@ -0,0 +1,125 @@ ++/* ++ * Kernel Connection Multiplexor ++ * ++ * Copyright (c) 2016 Tom Herbert ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation. ++ */ ++ ++#ifndef __NET_KCM_H_ ++#define __NET_KCM_H_ ++ ++#include ++#include ++#include ++ ++extern unsigned int kcm_net_id; ++ ++struct kcm_tx_msg { ++ unsigned int sent; ++ unsigned int fragidx; ++ unsigned int frag_offset; ++ unsigned int msg_flags; ++ struct sk_buff *frag_skb; ++ struct sk_buff *last_skb; ++}; ++ ++struct kcm_rx_msg { ++ int full_len; ++ int accum_len; ++ int offset; ++}; ++ ++/* Socket structure for KCM client sockets */ ++struct kcm_sock { ++ struct sock sk; ++ struct kcm_mux *mux; ++ struct list_head kcm_sock_list; ++ int index; ++ u32 done : 1; ++ struct work_struct done_work; ++ ++ /* Transmit */ ++ struct kcm_psock *tx_psock; ++ struct work_struct tx_work; ++ struct list_head wait_psock_list; ++ struct sk_buff *seq_skb; ++ ++ /* Don't use bit fields here, these are set under different locks */ ++ bool tx_wait; ++ bool tx_wait_more; ++ ++ /* Receive */ ++ struct kcm_psock *rx_psock; ++ struct list_head wait_rx_list; /* KCMs waiting for receiving */ ++ bool rx_wait; ++ u32 rx_disabled : 1; ++}; ++ ++struct bpf_prog; ++ ++/* Structure for an attached lower socket */ ++struct kcm_psock { ++ struct sock *sk; ++ struct kcm_mux *mux; ++ int index; ++ ++ u32 tx_stopped : 1; ++ u32 rx_stopped : 1; ++ u32 done : 1; ++ u32 unattaching : 1; ++ ++ void (*save_state_change)(struct sock *sk); ++ void (*save_data_ready)(struct sock *sk); ++ void (*save_write_space)(struct sock *sk); ++ ++ struct list_head psock_list; ++ ++ /* Receive */ ++ struct sk_buff *rx_skb_head; ++ struct sk_buff **rx_skb_nextp; ++ struct sk_buff *ready_rx_msg; ++ struct list_head psock_ready_list; ++ struct work_struct rx_work; ++ struct delayed_work rx_delayed_work; ++ struct bpf_prog *bpf_prog; ++ struct kcm_sock *rx_kcm; ++ ++ /* Transmit */ ++ struct kcm_sock *tx_kcm; ++ struct list_head psock_avail_list; ++}; ++ ++/* Per net MUX list */ ++struct kcm_net { ++ struct mutex mutex; ++ struct list_head mux_list; ++ int count; ++}; ++ ++/* Structure for a MUX */ ++struct kcm_mux { ++ struct list_head kcm_mux_list; ++ struct rcu_head rcu; ++ struct kcm_net *knet; ++ ++ struct list_head kcm_socks; /* All KCM sockets on MUX */ ++ int kcm_socks_cnt; /* Total KCM socket count for MUX */ ++ struct list_head psocks; /* List of all psocks on MUX */ ++ int psocks_cnt; /* Total attached sockets */ ++ ++ /* Receive */ ++ spinlock_t rx_lock ____cacheline_aligned_in_smp; ++ struct list_head kcm_rx_waiters; /* KCMs waiting for receiving */ ++ struct list_head psocks_ready; /* List of psocks with a msg ready */ ++ struct sk_buff_head rx_hold_queue; ++ ++ /* Transmit */ ++ spinlock_t lock ____cacheline_aligned_in_smp; /* TX and mux locking */ ++ struct list_head psocks_avail; /* List of available psocks */ ++ struct list_head kcm_tx_waiters; /* KCMs waiting for a TX psock */ ++}; ++ ++#endif /* __NET_KCM_H_ */ +diff --git a/include/uapi/linux/kcm.h b/include/uapi/linux/kcm.h +new file mode 100644 +index 0000000..d72350f +--- /dev/null ++++ b/include/uapi/linux/kcm.h +@@ -0,0 +1,39 @@ ++/* ++ * Kernel Connection Multiplexor ++ * ++ * Copyright (c) 2016 Tom Herbert ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation. ++ * ++ * User API to clone KCM sockets and attach transport socket to a KCM ++ * multiplexor. ++ */ ++ ++#ifndef KCM_KERNEL_H ++#define KCM_KERNEL_H ++ ++struct kcm_attach { ++ int fd; ++ int bpf_fd; ++}; ++ ++struct kcm_unattach { ++ int fd; ++}; ++ ++struct kcm_clone { ++ int fd; ++}; ++ ++#define SIOCKCMATTACH (SIOCPROTOPRIVATE + 0) ++#define SIOCKCMUNATTACH (SIOCPROTOPRIVATE + 1) ++#define SIOCKCMCLONE (SIOCPROTOPRIVATE + 2) ++ ++#define KCMPROTO_CONNECTED 0 ++ ++/* Socket options */ ++#define KCM_RECV_DISABLE 1 ++ ++#endif +diff --git a/net/Kconfig b/net/Kconfig +index 127da94..b8439e6 100644 +--- a/net/Kconfig ++++ b/net/Kconfig +@@ -351,6 +351,7 @@ source "net/can/Kconfig" + source "net/irda/Kconfig" + source "net/bluetooth/Kconfig" + source "net/rxrpc/Kconfig" ++source "net/kcm/Kconfig" + + config FIB_RULES + bool +diff --git a/net/Makefile b/net/Makefile +index a5d0409..81d1411 100644 +--- a/net/Makefile ++++ b/net/Makefile +@@ -34,6 +34,7 @@ obj-$(CONFIG_IRDA) += irda/ + obj-$(CONFIG_BT) += bluetooth/ + obj-$(CONFIG_SUNRPC) += sunrpc/ + obj-$(CONFIG_AF_RXRPC) += rxrpc/ ++obj-$(CONFIG_AF_KCM) += kcm/ + obj-$(CONFIG_ATM) += atm/ + obj-$(CONFIG_L2TP) += l2tp/ + obj-$(CONFIG_DECNET) += decnet/ +diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig +new file mode 100644 +index 0000000..4f28332 +--- /dev/null ++++ b/net/kcm/Kconfig +@@ -0,0 +1,9 @@ ++ ++config AF_KCM ++ tristate "KCM sockets" ++ depends on INET ++ select BPF_SYSCALL ++ ---help--- ++ KCM (Kernel Connection Multiplexor) sockets provide a method ++ for multiplexing messages of a message based application ++ protocol over kernel connectons (e.g. TCP connections). +diff --git a/net/kcm/Makefile b/net/kcm/Makefile +new file mode 100644 +index 0000000..cb525f7 +--- /dev/null ++++ b/net/kcm/Makefile +@@ -0,0 +1,3 @@ ++obj-$(CONFIG_AF_KCM) += kcm.o ++ ++kcm-y := kcmsock.o +diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c +new file mode 100644 +index 0000000..649d246 +--- /dev/null ++++ b/net/kcm/kcmsock.c +@@ -0,0 +1,2015 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++unsigned int kcm_net_id; ++ ++static struct kmem_cache *kcm_psockp __read_mostly; ++static struct kmem_cache *kcm_muxp __read_mostly; ++static struct workqueue_struct *kcm_wq; ++ ++static inline struct kcm_sock *kcm_sk(const struct sock *sk) ++{ ++ return (struct kcm_sock *)sk; ++} ++ ++static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb) ++{ ++ return (struct kcm_tx_msg *)skb->cb; ++} ++ ++static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb) ++{ ++ return (struct kcm_rx_msg *)((void *)skb->cb + ++ offsetof(struct qdisc_skb_cb, data)); ++} ++ ++static void report_csk_error(struct sock *csk, int err) ++{ ++ csk->sk_err = EPIPE; ++ csk->sk_error_report(csk); ++} ++ ++/* Callback lock held */ ++static void kcm_abort_rx_psock(struct kcm_psock *psock, int err, ++ struct sk_buff *skb) ++{ ++ struct sock *csk = psock->sk; ++ ++ /* Unrecoverable error in receive */ ++ ++ if (psock->rx_stopped) ++ return; ++ ++ psock->rx_stopped = 1; ++ ++ /* Report an error on the lower socket */ ++ report_csk_error(csk, err); ++} ++ ++static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, ++ bool wakeup_kcm) ++{ ++ struct sock *csk = psock->sk; ++ struct kcm_mux *mux = psock->mux; ++ ++ /* Unrecoverable error in transmit */ ++ ++ spin_lock_bh(&mux->lock); ++ ++ if (psock->tx_stopped) { ++ spin_unlock_bh(&mux->lock); ++ return; ++ } ++ ++ psock->tx_stopped = 1; ++ ++ if (!psock->tx_kcm) { ++ /* Take off psocks_avail list */ ++ list_del(&psock->psock_avail_list); ++ } else if (wakeup_kcm) { ++ /* In this case psock is being aborted while outside of ++ * write_msgs and psock is reserved. Schedule tx_work ++ * to handle the failure there. Need to commit tx_stopped ++ * before queuing work. ++ */ ++ smp_mb(); ++ ++ queue_work(kcm_wq, &psock->tx_kcm->tx_work); ++ } ++ ++ spin_unlock_bh(&mux->lock); ++ ++ /* Report error on lower socket */ ++ report_csk_error(csk, err); ++} ++ ++static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); ++ ++/* KCM is ready to receive messages on its queue-- either the KCM is new or ++ * has become unblocked after being blocked on full socket buffer. Queue any ++ * pending ready messages on a psock. RX mux lock held. ++ */ ++static void kcm_rcv_ready(struct kcm_sock *kcm) ++{ ++ struct kcm_mux *mux = kcm->mux; ++ struct kcm_psock *psock; ++ struct sk_buff *skb; ++ ++ if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled)) ++ return; ++ ++ while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) { ++ if (kcm_queue_rcv_skb(&kcm->sk, skb)) { ++ /* Assuming buffer limit has been reached */ ++ skb_queue_head(&mux->rx_hold_queue, skb); ++ WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); ++ return; ++ } ++ } ++ ++ while (!list_empty(&mux->psocks_ready)) { ++ psock = list_first_entry(&mux->psocks_ready, struct kcm_psock, ++ psock_ready_list); ++ ++ if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) { ++ /* Assuming buffer limit has been reached */ ++ WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); ++ return; ++ } ++ ++ /* Consumed the ready message on the psock. Schedule rx_work to ++ * get more messages. ++ */ ++ list_del(&psock->psock_ready_list); ++ psock->ready_rx_msg = NULL; ++ ++ /* Commit clearing of ready_rx_msg for queuing work */ ++ smp_mb(); ++ ++ queue_work(kcm_wq, &psock->rx_work); ++ } ++ ++ /* Buffer limit is okay now, add to ready list */ ++ list_add_tail(&kcm->wait_rx_list, ++ &kcm->mux->kcm_rx_waiters); ++ kcm->rx_wait = true; ++} ++ ++static void kcm_rfree(struct sk_buff *skb) ++{ ++ struct sock *sk = skb->sk; ++ struct kcm_sock *kcm = kcm_sk(sk); ++ struct kcm_mux *mux = kcm->mux; ++ unsigned int len = skb->truesize; ++ ++ sk_mem_uncharge(sk, len); ++ atomic_sub(len, &sk->sk_rmem_alloc); ++ ++ /* For reading rx_wait and rx_psock without holding lock */ ++ smp_mb__after_atomic(); ++ ++ if (!kcm->rx_wait && !kcm->rx_psock && ++ sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { ++ spin_lock_bh(&mux->rx_lock); ++ kcm_rcv_ready(kcm); ++ spin_unlock_bh(&mux->rx_lock); ++ } ++} ++ ++static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ++{ ++ struct sk_buff_head *list = &sk->sk_receive_queue; ++ ++ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) ++ return -ENOMEM; ++ ++ if (!sk_rmem_schedule(sk, skb, skb->truesize)) ++ return -ENOBUFS; ++ ++ skb->dev = NULL; ++ ++ skb_orphan(skb); ++ skb->sk = sk; ++ skb->destructor = kcm_rfree; ++ atomic_add(skb->truesize, &sk->sk_rmem_alloc); ++ sk_mem_charge(sk, skb->truesize); ++ ++ skb_queue_tail(list, skb); ++ ++ if (!sock_flag(sk, SOCK_DEAD)) ++ sk->sk_data_ready(sk); ++ ++ return 0; ++} ++ ++/* Requeue received messages for a kcm socket to other kcm sockets. This is ++ * called with a kcm socket is receive disabled. ++ * RX mux lock held. ++ */ ++static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head) ++{ ++ struct sk_buff *skb; ++ struct kcm_sock *kcm; ++ ++ while ((skb = __skb_dequeue(head))) { ++ /* Reset destructor to avoid calling kcm_rcv_ready */ ++ skb->destructor = sock_rfree; ++ skb_orphan(skb); ++try_again: ++ if (list_empty(&mux->kcm_rx_waiters)) { ++ skb_queue_tail(&mux->rx_hold_queue, skb); ++ continue; ++ } ++ ++ kcm = list_first_entry(&mux->kcm_rx_waiters, ++ struct kcm_sock, wait_rx_list); ++ ++ if (kcm_queue_rcv_skb(&kcm->sk, skb)) { ++ /* Should mean socket buffer full */ ++ list_del(&kcm->wait_rx_list); ++ kcm->rx_wait = false; ++ ++ /* Commit rx_wait to read in kcm_free */ ++ smp_wmb(); ++ ++ goto try_again; ++ } ++ } ++} ++ ++/* Lower sock lock held */ ++static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, ++ struct sk_buff *head) ++{ ++ struct kcm_mux *mux = psock->mux; ++ struct kcm_sock *kcm; ++ ++ WARN_ON(psock->ready_rx_msg); ++ ++ if (psock->rx_kcm) ++ return psock->rx_kcm; ++ ++ spin_lock_bh(&mux->rx_lock); ++ ++ if (psock->rx_kcm) { ++ spin_unlock_bh(&mux->rx_lock); ++ return psock->rx_kcm; ++ } ++ ++ if (list_empty(&mux->kcm_rx_waiters)) { ++ psock->ready_rx_msg = head; ++ list_add_tail(&psock->psock_ready_list, ++ &mux->psocks_ready); ++ spin_unlock_bh(&mux->rx_lock); ++ return NULL; ++ } ++ ++ kcm = list_first_entry(&mux->kcm_rx_waiters, ++ struct kcm_sock, wait_rx_list); ++ list_del(&kcm->wait_rx_list); ++ kcm->rx_wait = false; ++ ++ psock->rx_kcm = kcm; ++ kcm->rx_psock = psock; ++ ++ spin_unlock_bh(&mux->rx_lock); ++ ++ return kcm; ++} ++ ++static void kcm_done(struct kcm_sock *kcm); ++ ++static void kcm_done_work(struct work_struct *w) ++{ ++ kcm_done(container_of(w, struct kcm_sock, done_work)); ++} ++ ++/* Lower sock held */ ++static void unreserve_rx_kcm(struct kcm_psock *psock, ++ bool rcv_ready) ++{ ++ struct kcm_sock *kcm = psock->rx_kcm; ++ struct kcm_mux *mux = psock->mux; ++ ++ if (!kcm) ++ return; ++ ++ spin_lock_bh(&mux->rx_lock); ++ ++ psock->rx_kcm = NULL; ++ kcm->rx_psock = NULL; ++ ++ /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with ++ * kcm_rfree ++ */ ++ smp_mb(); ++ ++ if (unlikely(kcm->done)) { ++ spin_unlock_bh(&mux->rx_lock); ++ ++ /* Need to run kcm_done in a task since we need to qcquire ++ * callback locks which may already be held here. ++ */ ++ INIT_WORK(&kcm->done_work, kcm_done_work); ++ schedule_work(&kcm->done_work); ++ return; ++ } ++ ++ if (unlikely(kcm->rx_disabled)) { ++ requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); ++ } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) { ++ /* Check for degenerative race with rx_wait that all ++ * data was dequeued (accounted for in kcm_rfree). ++ */ ++ kcm_rcv_ready(kcm); ++ } ++ spin_unlock_bh(&mux->rx_lock); ++} ++ ++/* Macro to invoke filter function. */ ++#define KCM_RUN_FILTER(prog, ctx) \ ++ (*prog->bpf_func)(ctx, prog->insnsi) ++ ++/* Lower socket lock held */ ++static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, ++ unsigned int orig_offset, size_t orig_len) ++{ ++ struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data; ++ struct kcm_rx_msg *rxm; ++ struct kcm_sock *kcm; ++ struct sk_buff *head, *skb; ++ size_t eaten = 0, cand_len; ++ ssize_t extra; ++ int err; ++ bool cloned_orig = false; ++ ++ if (psock->ready_rx_msg) ++ return 0; ++ ++ head = psock->rx_skb_head; ++ if (head) { ++ /* Message already in progress */ ++ ++ if (unlikely(orig_offset)) { ++ /* Getting data with a non-zero offset when a message is ++ * in progress is not expected. If it does happen, we ++ * need to clone and pull since we can't deal with ++ * offsets in the skbs for a message expect in the head. ++ */ ++ orig_skb = skb_clone(orig_skb, GFP_ATOMIC); ++ if (!orig_skb) { ++ desc->error = -ENOMEM; ++ return 0; ++ } ++ if (!pskb_pull(orig_skb, orig_offset)) { ++ kfree_skb(orig_skb); ++ desc->error = -ENOMEM; ++ return 0; ++ } ++ cloned_orig = true; ++ orig_offset = 0; ++ } ++ ++ if (!psock->rx_skb_nextp) { ++ /* We are going to append to the frags_list of head. ++ * Need to unshare the frag_list. ++ */ ++ err = skb_unclone(head, GFP_ATOMIC); ++ if (err) { ++ desc->error = err; ++ return 0; ++ } ++ ++ if (unlikely(skb_shinfo(head)->frag_list)) { ++ /* We can't append to an sk_buff that already ++ * has a frag_list. We create a new head, point ++ * the frag_list of that to the old head, and ++ * then are able to use the old head->next for ++ * appending to the message. ++ */ ++ if (WARN_ON(head->next)) { ++ desc->error = -EINVAL; ++ return 0; ++ } ++ ++ skb = alloc_skb(0, GFP_ATOMIC); ++ if (!skb) { ++ desc->error = -ENOMEM; ++ return 0; ++ } ++ skb->len = head->len; ++ skb->data_len = head->len; ++ skb->truesize = head->truesize; ++ *kcm_rx_msg(skb) = *kcm_rx_msg(head); ++ psock->rx_skb_nextp = &head->next; ++ skb_shinfo(skb)->frag_list = head; ++ psock->rx_skb_head = skb; ++ head = skb; ++ } else { ++ psock->rx_skb_nextp = ++ &skb_shinfo(head)->frag_list; ++ } ++ } ++ } ++ ++ while (eaten < orig_len) { ++ /* Always clone since we will consume something */ ++ skb = skb_clone(orig_skb, GFP_ATOMIC); ++ if (!skb) { ++ desc->error = -ENOMEM; ++ break; ++ } ++ ++ cand_len = orig_len - eaten; ++ ++ head = psock->rx_skb_head; ++ if (!head) { ++ head = skb; ++ psock->rx_skb_head = head; ++ /* Will set rx_skb_nextp on next packet if needed */ ++ psock->rx_skb_nextp = NULL; ++ rxm = kcm_rx_msg(head); ++ memset(rxm, 0, sizeof(*rxm)); ++ rxm->offset = orig_offset + eaten; ++ } else { ++ /* Unclone since we may be appending to an skb that we ++ * already share a frag_list with. ++ */ ++ err = skb_unclone(skb, GFP_ATOMIC); ++ if (err) { ++ desc->error = err; ++ break; ++ } ++ ++ rxm = kcm_rx_msg(head); ++ *psock->rx_skb_nextp = skb; ++ psock->rx_skb_nextp = &skb->next; ++ head->data_len += skb->len; ++ head->len += skb->len; ++ head->truesize += skb->truesize; ++ } ++ ++ if (!rxm->full_len) { ++ ssize_t len; ++ ++ len = KCM_RUN_FILTER(psock->bpf_prog, head); ++ ++ if (!len) { ++ /* Need more header to determine length */ ++ rxm->accum_len += cand_len; ++ eaten += cand_len; ++ WARN_ON(eaten != orig_len); ++ break; ++ } else if (len <= (ssize_t)head->len - ++ skb->len - rxm->offset) { ++ /* Length must be into new skb (and also ++ * greater than zero) ++ */ ++ desc->error = -EPROTO; ++ psock->rx_skb_head = NULL; ++ kcm_abort_rx_psock(psock, EPROTO, head); ++ break; ++ } ++ ++ rxm->full_len = len; ++ } ++ ++ extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len; ++ ++ if (extra < 0) { ++ /* Message not complete yet. */ ++ rxm->accum_len += cand_len; ++ eaten += cand_len; ++ WARN_ON(eaten != orig_len); ++ break; ++ } ++ ++ /* Positive extra indicates ore bytes than needed for the ++ * message ++ */ ++ ++ WARN_ON(extra > cand_len); ++ ++ eaten += (cand_len - extra); ++ ++ /* Hurray, we have a new message! */ ++ psock->rx_skb_head = NULL; ++ ++try_queue: ++ kcm = reserve_rx_kcm(psock, head); ++ if (!kcm) { ++ /* Unable to reserve a KCM, message is held in psock. */ ++ break; ++ } ++ ++ if (kcm_queue_rcv_skb(&kcm->sk, head)) { ++ /* Should mean socket buffer full */ ++ unreserve_rx_kcm(psock, false); ++ goto try_queue; ++ } ++ } ++ ++ if (cloned_orig) ++ kfree_skb(orig_skb); ++ ++ return eaten; ++} ++ ++/* Called with lock held on lower socket */ ++static int psock_tcp_read_sock(struct kcm_psock *psock) ++{ ++ read_descriptor_t desc; ++ ++ desc.arg.data = psock; ++ desc.error = 0; ++ desc.count = 1; /* give more than one skb per call */ ++ ++ /* sk should be locked here, so okay to do tcp_read_sock */ ++ tcp_read_sock(psock->sk, &desc, kcm_tcp_recv); ++ ++ unreserve_rx_kcm(psock, true); ++ ++ return desc.error; ++} ++ ++/* Lower sock lock held */ ++static void psock_tcp_data_ready(struct sock *sk) ++{ ++ struct kcm_psock *psock; ++ ++ read_lock_bh(&sk->sk_callback_lock); ++ ++ psock = (struct kcm_psock *)sk->sk_user_data; ++ if (unlikely(!psock || psock->rx_stopped)) ++ goto out; ++ ++ if (psock->ready_rx_msg) ++ goto out; ++ ++ if (psock_tcp_read_sock(psock) == -ENOMEM) ++ queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); ++ ++out: ++ read_unlock_bh(&sk->sk_callback_lock); ++} ++ ++static void do_psock_rx_work(struct kcm_psock *psock) ++{ ++ read_descriptor_t rd_desc; ++ struct sock *csk = psock->sk; ++ ++ /* We need the read lock to synchronize with psock_tcp_data_ready. We ++ * need the socket lock for calling tcp_read_sock. ++ */ ++ lock_sock(csk); ++ read_lock_bh(&csk->sk_callback_lock); ++ ++ if (unlikely(csk->sk_user_data != psock)) ++ goto out; ++ ++ if (unlikely(psock->rx_stopped)) ++ goto out; ++ ++ if (psock->ready_rx_msg) ++ goto out; ++ ++ rd_desc.arg.data = psock; ++ ++ if (psock_tcp_read_sock(psock) == -ENOMEM) ++ queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); ++ ++out: ++ read_unlock_bh(&csk->sk_callback_lock); ++ release_sock(csk); ++} ++ ++static void psock_rx_work(struct work_struct *w) ++{ ++ do_psock_rx_work(container_of(w, struct kcm_psock, rx_work)); ++} ++ ++static void psock_rx_delayed_work(struct work_struct *w) ++{ ++ do_psock_rx_work(container_of(w, struct kcm_psock, ++ rx_delayed_work.work)); ++} ++ ++static void psock_tcp_state_change(struct sock *sk) ++{ ++ /* TCP only does a POLLIN for a half close. Do a POLLHUP here ++ * since application will normally not poll with POLLIN ++ * on the TCP sockets. ++ */ ++ ++ report_csk_error(sk, EPIPE); ++} ++ ++static void psock_tcp_write_space(struct sock *sk) ++{ ++ struct kcm_psock *psock; ++ struct kcm_mux *mux; ++ struct kcm_sock *kcm; ++ ++ read_lock_bh(&sk->sk_callback_lock); ++ ++ psock = (struct kcm_psock *)sk->sk_user_data; ++ if (unlikely(!psock)) ++ goto out; ++ ++ mux = psock->mux; ++ ++ spin_lock_bh(&mux->lock); ++ ++ /* Check if the socket is reserved so someone is waiting for sending. */ ++ kcm = psock->tx_kcm; ++ if (kcm) ++ queue_work(kcm_wq, &kcm->tx_work); ++ ++ spin_unlock_bh(&mux->lock); ++out: ++ read_unlock_bh(&sk->sk_callback_lock); ++} ++ ++static void unreserve_psock(struct kcm_sock *kcm); ++ ++/* kcm sock is locked. */ ++static struct kcm_psock *reserve_psock(struct kcm_sock *kcm) ++{ ++ struct kcm_mux *mux = kcm->mux; ++ struct kcm_psock *psock; ++ ++ psock = kcm->tx_psock; ++ ++ smp_rmb(); /* Must read tx_psock before tx_wait */ ++ ++ if (psock) { ++ WARN_ON(kcm->tx_wait); ++ if (unlikely(psock->tx_stopped)) ++ unreserve_psock(kcm); ++ else ++ return kcm->tx_psock; ++ } ++ ++ spin_lock_bh(&mux->lock); ++ ++ /* Check again under lock to see if psock was reserved for this ++ * psock via psock_unreserve. ++ */ ++ psock = kcm->tx_psock; ++ if (unlikely(psock)) { ++ WARN_ON(kcm->tx_wait); ++ spin_unlock_bh(&mux->lock); ++ return kcm->tx_psock; ++ } ++ ++ if (!list_empty(&mux->psocks_avail)) { ++ psock = list_first_entry(&mux->psocks_avail, ++ struct kcm_psock, ++ psock_avail_list); ++ list_del(&psock->psock_avail_list); ++ if (kcm->tx_wait) { ++ list_del(&kcm->wait_psock_list); ++ kcm->tx_wait = false; ++ } ++ kcm->tx_psock = psock; ++ psock->tx_kcm = kcm; ++ } else if (!kcm->tx_wait) { ++ list_add_tail(&kcm->wait_psock_list, ++ &mux->kcm_tx_waiters); ++ kcm->tx_wait = true; ++ } ++ ++ spin_unlock_bh(&mux->lock); ++ ++ return psock; ++} ++ ++/* mux lock held */ ++static void psock_now_avail(struct kcm_psock *psock) ++{ ++ struct kcm_mux *mux = psock->mux; ++ struct kcm_sock *kcm; ++ ++ if (list_empty(&mux->kcm_tx_waiters)) { ++ list_add_tail(&psock->psock_avail_list, ++ &mux->psocks_avail); ++ } else { ++ kcm = list_first_entry(&mux->kcm_tx_waiters, ++ struct kcm_sock, ++ wait_psock_list); ++ list_del(&kcm->wait_psock_list); ++ kcm->tx_wait = false; ++ psock->tx_kcm = kcm; ++ ++ /* Commit before changing tx_psock since that is read in ++ * reserve_psock before queuing work. ++ */ ++ smp_mb(); ++ ++ kcm->tx_psock = psock; ++ queue_work(kcm_wq, &kcm->tx_work); ++ } ++} ++ ++/* kcm sock is locked. */ ++static void unreserve_psock(struct kcm_sock *kcm) ++{ ++ struct kcm_psock *psock; ++ struct kcm_mux *mux = kcm->mux; ++ ++ spin_lock_bh(&mux->lock); ++ ++ psock = kcm->tx_psock; ++ ++ if (WARN_ON(!psock)) { ++ spin_unlock_bh(&mux->lock); ++ return; ++ } ++ ++ smp_rmb(); /* Read tx_psock before tx_wait */ ++ ++ WARN_ON(kcm->tx_wait); ++ ++ kcm->tx_psock = NULL; ++ psock->tx_kcm = NULL; ++ ++ if (unlikely(psock->tx_stopped)) { ++ if (psock->done) { ++ /* Deferred free */ ++ list_del(&psock->psock_list); ++ mux->psocks_cnt--; ++ sock_put(psock->sk); ++ fput(psock->sk->sk_socket->file); ++ kmem_cache_free(kcm_psockp, psock); ++ } ++ ++ /* Don't put back on available list */ ++ ++ spin_unlock_bh(&mux->lock); ++ ++ return; ++ } ++ ++ psock_now_avail(psock); ++ ++ spin_unlock_bh(&mux->lock); ++} ++ ++/* Write any messages ready on the kcm socket. Called with kcm sock lock ++ * held. Return bytes actually sent or error. ++ */ ++static int kcm_write_msgs(struct kcm_sock *kcm) ++{ ++ struct sock *sk = &kcm->sk; ++ struct kcm_psock *psock; ++ struct sk_buff *skb, *head; ++ struct kcm_tx_msg *txm; ++ unsigned short fragidx, frag_offset; ++ unsigned int sent, total_sent = 0; ++ int ret = 0; ++ ++ kcm->tx_wait_more = false; ++ psock = kcm->tx_psock; ++ if (unlikely(psock && psock->tx_stopped)) { ++ /* A reserved psock was aborted asynchronously. Unreserve ++ * it and we'll retry the message. ++ */ ++ unreserve_psock(kcm); ++ if (skb_queue_empty(&sk->sk_write_queue)) ++ return 0; ++ ++ kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0; ++ ++ } else if (skb_queue_empty(&sk->sk_write_queue)) { ++ return 0; ++ } ++ ++ head = skb_peek(&sk->sk_write_queue); ++ txm = kcm_tx_msg(head); ++ ++ if (txm->sent) { ++ /* Send of first skbuff in queue already in progress */ ++ if (WARN_ON(!psock)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ sent = txm->sent; ++ frag_offset = txm->frag_offset; ++ fragidx = txm->fragidx; ++ skb = txm->frag_skb; ++ ++ goto do_frag; ++ } ++ ++try_again: ++ psock = reserve_psock(kcm); ++ if (!psock) ++ goto out; ++ ++ do { ++ skb = head; ++ txm = kcm_tx_msg(head); ++ sent = 0; ++ ++do_frag_list: ++ if (WARN_ON(!skb_shinfo(skb)->nr_frags)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; ++ fragidx++) { ++ skb_frag_t *frag; ++ ++ frag_offset = 0; ++do_frag: ++ frag = &skb_shinfo(skb)->frags[fragidx]; ++ if (WARN_ON(!frag->size)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ ret = kernel_sendpage(psock->sk->sk_socket, ++ frag->page.p, ++ frag->page_offset + frag_offset, ++ frag->size - frag_offset, ++ MSG_DONTWAIT); ++ if (ret <= 0) { ++ if (ret == -EAGAIN) { ++ /* Save state to try again when there's ++ * write space on the socket ++ */ ++ txm->sent = sent; ++ txm->frag_offset = frag_offset; ++ txm->fragidx = fragidx; ++ txm->frag_skb = skb; ++ ++ ret = 0; ++ goto out; ++ } ++ ++ /* Hard failure in sending message, abort this ++ * psock since it has lost framing ++ * synchonization and retry sending the ++ * message from the beginning. ++ */ ++ kcm_abort_tx_psock(psock, ret ? -ret : EPIPE, ++ true); ++ unreserve_psock(kcm); ++ ++ txm->sent = 0; ++ ret = 0; ++ ++ goto try_again; ++ } ++ ++ sent += ret; ++ frag_offset += ret; ++ if (frag_offset < frag->size) { ++ /* Not finished with this frag */ ++ goto do_frag; ++ } ++ } ++ ++ if (skb == head) { ++ if (skb_has_frag_list(skb)) { ++ skb = skb_shinfo(skb)->frag_list; ++ goto do_frag_list; ++ } ++ } else if (skb->next) { ++ skb = skb->next; ++ goto do_frag_list; ++ } ++ ++ /* Successfully sent the whole packet, account for it. */ ++ skb_dequeue(&sk->sk_write_queue); ++ kfree_skb(head); ++ sk->sk_wmem_queued -= sent; ++ total_sent += sent; ++ } while ((head = skb_peek(&sk->sk_write_queue))); ++out: ++ if (!head) { ++ /* Done with all queued messages. */ ++ WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); ++ unreserve_psock(kcm); ++ } ++ ++ /* Check if write space is available */ ++ sk->sk_write_space(sk); ++ ++ return total_sent ? : ret; ++} ++ ++static void kcm_tx_work(struct work_struct *w) ++{ ++ struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work); ++ struct sock *sk = &kcm->sk; ++ int err; ++ ++ lock_sock(sk); ++ ++ /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx ++ * aborts ++ */ ++ err = kcm_write_msgs(kcm); ++ if (err < 0) { ++ /* Hard failure in write, report error on KCM socket */ ++ pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err); ++ report_csk_error(&kcm->sk, -err); ++ goto out; ++ } ++ ++ /* Primarily for SOCK_SEQPACKET sockets */ ++ if (likely(sk->sk_socket) && ++ test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { ++ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); ++ sk->sk_write_space(sk); ++ } ++ ++out: ++ release_sock(sk); ++} ++ ++static void kcm_push(struct kcm_sock *kcm) ++{ ++ if (kcm->tx_wait_more) ++ kcm_write_msgs(kcm); ++} ++ ++static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) ++{ ++ struct sock *sk = sock->sk; ++ struct kcm_sock *kcm = kcm_sk(sk); ++ struct sk_buff *skb = NULL, *head = NULL; ++ size_t copy, copied = 0; ++ long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); ++ int eor = (sock->type == SOCK_DGRAM) ? ++ !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR); ++ int err = -EPIPE; ++ ++ lock_sock(sk); ++ ++ /* Per tcp_sendmsg this should be in poll */ ++ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); ++ ++ if (sk->sk_err) ++ goto out_error; ++ ++ if (kcm->seq_skb) { ++ /* Previously opened message */ ++ head = kcm->seq_skb; ++ skb = kcm_tx_msg(head)->last_skb; ++ goto start; ++ } ++ ++ /* Call the sk_stream functions to manage the sndbuf mem. */ ++ if (!sk_stream_memory_free(sk)) { ++ kcm_push(kcm); ++ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); ++ err = sk_stream_wait_memory(sk, &timeo); ++ if (err) ++ goto out_error; ++ } ++ ++ /* New message, alloc head skb */ ++ head = alloc_skb(0, sk->sk_allocation); ++ while (!head) { ++ kcm_push(kcm); ++ err = sk_stream_wait_memory(sk, &timeo); ++ if (err) ++ goto out_error; ++ ++ head = alloc_skb(0, sk->sk_allocation); ++ } ++ ++ skb = head; ++ ++ /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling ++ * csum_and_copy_from_iter from skb_do_copy_data_nocache. ++ */ ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ ++start: ++ while (msg_data_left(msg)) { ++ bool merge = true; ++ int i = skb_shinfo(skb)->nr_frags; ++ struct page_frag *pfrag = sk_page_frag(sk); ++ ++ if (!sk_page_frag_refill(sk, pfrag)) ++ goto wait_for_memory; ++ ++ if (!skb_can_coalesce(skb, i, pfrag->page, ++ pfrag->offset)) { ++ if (i == MAX_SKB_FRAGS) { ++ struct sk_buff *tskb; ++ ++ tskb = alloc_skb(0, sk->sk_allocation); ++ if (!tskb) ++ goto wait_for_memory; ++ ++ if (head == skb) ++ skb_shinfo(head)->frag_list = tskb; ++ else ++ skb->next = tskb; ++ ++ skb = tskb; ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ continue; ++ } ++ merge = false; ++ } ++ ++ copy = min_t(int, msg_data_left(msg), ++ pfrag->size - pfrag->offset); ++ ++ if (!sk_wmem_schedule(sk, copy)) ++ goto wait_for_memory; ++ ++ err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, ++ pfrag->page, ++ pfrag->offset, ++ copy); ++ if (err) ++ goto out_error; ++ ++ /* Update the skb. */ ++ if (merge) { ++ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); ++ } else { ++ skb_fill_page_desc(skb, i, pfrag->page, ++ pfrag->offset, copy); ++ get_page(pfrag->page); ++ } ++ ++ pfrag->offset += copy; ++ copied += copy; ++ if (head != skb) { ++ head->len += copy; ++ head->data_len += copy; ++ } ++ ++ continue; ++ ++wait_for_memory: ++ kcm_push(kcm); ++ err = sk_stream_wait_memory(sk, &timeo); ++ if (err) ++ goto out_error; ++ } ++ ++ if (eor) { ++ bool not_busy = skb_queue_empty(&sk->sk_write_queue); ++ ++ /* Message complete, queue it on send buffer */ ++ __skb_queue_tail(&sk->sk_write_queue, head); ++ kcm->seq_skb = NULL; ++ ++ if (msg->msg_flags & MSG_BATCH) { ++ kcm->tx_wait_more = true; ++ } else if (kcm->tx_wait_more || not_busy) { ++ err = kcm_write_msgs(kcm); ++ if (err < 0) { ++ /* We got a hard error in write_msgs but have ++ * already queued this message. Report an error ++ * in the socket, but don't affect return value ++ * from sendmsg ++ */ ++ pr_warn("KCM: Hard failure on kcm_write_msgs\n"); ++ report_csk_error(&kcm->sk, -err); ++ } ++ } ++ } else { ++ /* Message not complete, save state */ ++partial_message: ++ kcm->seq_skb = head; ++ kcm_tx_msg(head)->last_skb = skb; ++ } ++ ++ release_sock(sk); ++ return copied; ++ ++out_error: ++ kcm_push(kcm); ++ ++ if (copied && sock->type == SOCK_SEQPACKET) { ++ /* Wrote some bytes before encountering an ++ * error, return partial success. ++ */ ++ goto partial_message; ++ } ++ ++ if (head != kcm->seq_skb) ++ kfree_skb(head); ++ ++ err = sk_stream_error(sk, msg->msg_flags, err); ++ ++ /* make sure we wake any epoll edge trigger waiter */ ++ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) ++ sk->sk_write_space(sk); ++ ++ release_sock(sk); ++ return err; ++} ++ ++static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, ++ long timeo, int *err) ++{ ++ struct sk_buff *skb; ++ ++ while (!(skb = skb_peek(&sk->sk_receive_queue))) { ++ if (sk->sk_err) { ++ *err = sock_error(sk); ++ return NULL; ++ } ++ ++ if (sock_flag(sk, SOCK_DONE)) ++ return NULL; ++ ++ if ((flags & MSG_DONTWAIT) || !timeo) { ++ *err = -EAGAIN; ++ return NULL; ++ } ++ ++ sk_wait_data(sk, &timeo, NULL); ++ ++ /* Handle signals */ ++ if (signal_pending(current)) { ++ *err = sock_intr_errno(timeo); ++ return NULL; ++ } ++ } ++ ++ return skb; ++} ++ ++static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, ++ size_t len, int flags) ++{ ++ struct sock *sk = sock->sk; ++ int err = 0; ++ long timeo; ++ struct kcm_rx_msg *rxm; ++ int copied = 0; ++ struct sk_buff *skb; ++ ++ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); ++ ++ lock_sock(sk); ++ ++ skb = kcm_wait_data(sk, flags, timeo, &err); ++ if (!skb) ++ goto out; ++ ++ /* Okay, have a message on the receive queue */ ++ ++ rxm = kcm_rx_msg(skb); ++ ++ if (len > rxm->full_len) ++ len = rxm->full_len; ++ ++ err = skb_copy_datagram_msg(skb, rxm->offset, msg, len); ++ if (err < 0) ++ goto out; ++ ++ copied = len; ++ if (likely(!(flags & MSG_PEEK))) { ++ if (copied < rxm->full_len) { ++ if (sock->type == SOCK_DGRAM) { ++ /* Truncated message */ ++ msg->msg_flags |= MSG_TRUNC; ++ goto msg_finished; ++ } ++ rxm->offset += copied; ++ rxm->full_len -= copied; ++ } else { ++msg_finished: ++ /* Finished with message */ ++ msg->msg_flags |= MSG_EOR; ++ skb_unlink(skb, &sk->sk_receive_queue); ++ kfree_skb(skb); ++ } ++ } ++ ++out: ++ release_sock(sk); ++ ++ return copied ? : err; ++} ++ ++/* kcm sock lock held */ ++static void kcm_recv_disable(struct kcm_sock *kcm) ++{ ++ struct kcm_mux *mux = kcm->mux; ++ ++ if (kcm->rx_disabled) ++ return; ++ ++ spin_lock_bh(&mux->rx_lock); ++ ++ kcm->rx_disabled = 1; ++ ++ /* If a psock is reserved we'll do cleanup in unreserve */ ++ if (!kcm->rx_psock) { ++ if (kcm->rx_wait) { ++ list_del(&kcm->wait_rx_list); ++ kcm->rx_wait = false; ++ } ++ ++ requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); ++ } ++ ++ spin_unlock_bh(&mux->rx_lock); ++} ++ ++/* kcm sock lock held */ ++static void kcm_recv_enable(struct kcm_sock *kcm) ++{ ++ struct kcm_mux *mux = kcm->mux; ++ ++ if (!kcm->rx_disabled) ++ return; ++ ++ spin_lock_bh(&mux->rx_lock); ++ ++ kcm->rx_disabled = 0; ++ kcm_rcv_ready(kcm); ++ ++ spin_unlock_bh(&mux->rx_lock); ++} ++ ++static int kcm_setsockopt(struct socket *sock, int level, int optname, ++ char __user *optval, unsigned int optlen) ++{ ++ struct kcm_sock *kcm = kcm_sk(sock->sk); ++ int val, valbool; ++ int err = 0; ++ ++ if (level != SOL_KCM) ++ return -ENOPROTOOPT; ++ ++ if (optlen < sizeof(int)) ++ return -EINVAL; ++ ++ if (get_user(val, (int __user *)optval)) ++ return -EINVAL; ++ ++ valbool = val ? 1 : 0; ++ ++ switch (optname) { ++ case KCM_RECV_DISABLE: ++ lock_sock(&kcm->sk); ++ if (valbool) ++ kcm_recv_disable(kcm); ++ else ++ kcm_recv_enable(kcm); ++ release_sock(&kcm->sk); ++ break; ++ default: ++ err = -ENOPROTOOPT; ++ } ++ ++ return err; ++} ++ ++static int kcm_getsockopt(struct socket *sock, int level, int optname, ++ char __user *optval, int __user *optlen) ++{ ++ struct kcm_sock *kcm = kcm_sk(sock->sk); ++ int val, len; ++ ++ if (level != SOL_KCM) ++ return -ENOPROTOOPT; ++ ++ if (get_user(len, optlen)) ++ return -EFAULT; ++ ++ len = min_t(unsigned int, len, sizeof(int)); ++ if (len < 0) ++ return -EINVAL; ++ ++ switch (optname) { ++ case KCM_RECV_DISABLE: ++ val = kcm->rx_disabled; ++ break; ++ default: ++ return -ENOPROTOOPT; ++ } ++ ++ if (put_user(len, optlen)) ++ return -EFAULT; ++ if (copy_to_user(optval, &val, len)) ++ return -EFAULT; ++ return 0; ++} ++ ++static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) ++{ ++ struct kcm_sock *tkcm; ++ struct list_head *head; ++ int index = 0; ++ ++ /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so ++ * we set sk_state, otherwise epoll_wait always returns right away with ++ * POLLHUP ++ */ ++ kcm->sk.sk_state = TCP_ESTABLISHED; ++ ++ /* Add to mux's kcm sockets list */ ++ kcm->mux = mux; ++ spin_lock_bh(&mux->lock); ++ ++ head = &mux->kcm_socks; ++ list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) { ++ if (tkcm->index != index) ++ break; ++ head = &tkcm->kcm_sock_list; ++ index++; ++ } ++ ++ list_add(&kcm->kcm_sock_list, head); ++ kcm->index = index; ++ ++ mux->kcm_socks_cnt++; ++ spin_unlock_bh(&mux->lock); ++ ++ INIT_WORK(&kcm->tx_work, kcm_tx_work); ++ ++ spin_lock_bh(&mux->rx_lock); ++ kcm_rcv_ready(kcm); ++ spin_unlock_bh(&mux->rx_lock); ++} ++ ++static int kcm_attach(struct socket *sock, struct socket *csock, ++ struct bpf_prog *prog) ++{ ++ struct kcm_sock *kcm = kcm_sk(sock->sk); ++ struct kcm_mux *mux = kcm->mux; ++ struct sock *csk; ++ struct kcm_psock *psock = NULL, *tpsock; ++ struct list_head *head; ++ int index = 0; ++ ++ if (csock->ops->family != PF_INET && ++ csock->ops->family != PF_INET6) ++ return -EINVAL; ++ ++ csk = csock->sk; ++ if (!csk) ++ return -EINVAL; ++ ++ /* Only support TCP for now */ ++ if (csk->sk_protocol != IPPROTO_TCP) ++ return -EINVAL; ++ ++ psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); ++ if (!psock) ++ return -ENOMEM; ++ ++ psock->mux = mux; ++ psock->sk = csk; ++ psock->bpf_prog = prog; ++ INIT_WORK(&psock->rx_work, psock_rx_work); ++ INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work); ++ ++ sock_hold(csk); ++ ++ write_lock_bh(&csk->sk_callback_lock); ++ psock->save_data_ready = csk->sk_data_ready; ++ psock->save_write_space = csk->sk_write_space; ++ psock->save_state_change = csk->sk_state_change; ++ csk->sk_user_data = psock; ++ csk->sk_data_ready = psock_tcp_data_ready; ++ csk->sk_write_space = psock_tcp_write_space; ++ csk->sk_state_change = psock_tcp_state_change; ++ write_unlock_bh(&csk->sk_callback_lock); ++ ++ /* Finished initialization, now add the psock to the MUX. */ ++ spin_lock_bh(&mux->lock); ++ head = &mux->psocks; ++ list_for_each_entry(tpsock, &mux->psocks, psock_list) { ++ if (tpsock->index != index) ++ break; ++ head = &tpsock->psock_list; ++ index++; ++ } ++ ++ list_add(&psock->psock_list, head); ++ psock->index = index; ++ ++ mux->psocks_cnt++; ++ psock_now_avail(psock); ++ spin_unlock_bh(&mux->lock); ++ ++ /* Schedule RX work in case there are already bytes queued */ ++ queue_work(kcm_wq, &psock->rx_work); ++ ++ return 0; ++} ++ ++static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) ++{ ++ struct socket *csock; ++ struct bpf_prog *prog; ++ int err; ++ ++ csock = sockfd_lookup(info->fd, &err); ++ if (!csock) ++ return -ENOENT; ++ ++ prog = bpf_prog_get(info->bpf_fd); ++ if (IS_ERR(prog)) { ++ err = PTR_ERR(prog); ++ goto out; ++ } ++ ++ if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { ++ bpf_prog_put(prog); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ err = kcm_attach(sock, csock, prog); ++ if (err) { ++ bpf_prog_put(prog); ++ goto out; ++ } ++ ++ /* Keep reference on file also */ ++ ++ return 0; ++out: ++ fput(csock->file); ++ return err; ++} ++ ++static void kcm_unattach(struct kcm_psock *psock) ++{ ++ struct sock *csk = psock->sk; ++ struct kcm_mux *mux = psock->mux; ++ ++ /* Stop getting callbacks from TCP socket. After this there should ++ * be no way to reserve a kcm for this psock. ++ */ ++ write_lock_bh(&csk->sk_callback_lock); ++ csk->sk_user_data = NULL; ++ csk->sk_data_ready = psock->save_data_ready; ++ csk->sk_write_space = psock->save_write_space; ++ csk->sk_state_change = psock->save_state_change; ++ psock->rx_stopped = 1; ++ ++ if (WARN_ON(psock->rx_kcm)) { ++ write_unlock_bh(&csk->sk_callback_lock); ++ return; ++ } ++ ++ spin_lock_bh(&mux->rx_lock); ++ ++ /* Stop receiver activities. After this point psock should not be ++ * able to get onto ready list either through callbacks or work. ++ */ ++ if (psock->ready_rx_msg) { ++ list_del(&psock->psock_ready_list); ++ kfree_skb(psock->ready_rx_msg); ++ psock->ready_rx_msg = NULL; ++ } ++ ++ spin_unlock_bh(&mux->rx_lock); ++ ++ write_unlock_bh(&csk->sk_callback_lock); ++ ++ cancel_work_sync(&psock->rx_work); ++ cancel_delayed_work_sync(&psock->rx_delayed_work); ++ ++ bpf_prog_put(psock->bpf_prog); ++ ++ kfree_skb(psock->rx_skb_head); ++ psock->rx_skb_head = NULL; ++ ++ spin_lock_bh(&mux->lock); ++ ++ if (psock->tx_kcm) { ++ /* psock was reserved. Just mark it finished and we will clean ++ * up in the kcm paths, we need kcm lock which can not be ++ * acquired here. ++ */ ++ spin_unlock_bh(&mux->lock); ++ ++ /* We are unattaching a socket that is reserved. Abort the ++ * socket since we may be out of sync in sending on it. We need ++ * to do this without the mux lock. ++ */ ++ kcm_abort_tx_psock(psock, EPIPE, false); ++ ++ spin_lock_bh(&mux->lock); ++ if (!psock->tx_kcm) { ++ /* psock now unreserved in window mux was unlocked */ ++ goto no_reserved; ++ } ++ psock->done = 1; ++ ++ /* Commit done before queuing work to process it */ ++ smp_mb(); ++ ++ /* Queue tx work to make sure psock->done is handled */ ++ queue_work(kcm_wq, &psock->tx_kcm->tx_work); ++ spin_unlock_bh(&mux->lock); ++ } else { ++no_reserved: ++ if (!psock->tx_stopped) ++ list_del(&psock->psock_avail_list); ++ list_del(&psock->psock_list); ++ mux->psocks_cnt--; ++ spin_unlock_bh(&mux->lock); ++ ++ sock_put(csk); ++ fput(csk->sk_socket->file); ++ kmem_cache_free(kcm_psockp, psock); ++ } ++} ++ ++static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) ++{ ++ struct kcm_sock *kcm = kcm_sk(sock->sk); ++ struct kcm_mux *mux = kcm->mux; ++ struct kcm_psock *psock; ++ struct socket *csock; ++ struct sock *csk; ++ int err; ++ ++ csock = sockfd_lookup(info->fd, &err); ++ if (!csock) ++ return -ENOENT; ++ ++ csk = csock->sk; ++ if (!csk) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ err = -ENOENT; ++ ++ spin_lock_bh(&mux->lock); ++ ++ list_for_each_entry(psock, &mux->psocks, psock_list) { ++ if (psock->sk != csk) ++ continue; ++ ++ /* Found the matching psock */ ++ ++ if (psock->unattaching || WARN_ON(psock->done)) { ++ err = -EALREADY; ++ break; ++ } ++ ++ psock->unattaching = 1; ++ ++ spin_unlock_bh(&mux->lock); ++ ++ kcm_unattach(psock); ++ ++ err = 0; ++ goto out; ++ } ++ ++ spin_unlock_bh(&mux->lock); ++ ++out: ++ fput(csock->file); ++ return err; ++} ++ ++static struct proto kcm_proto = { ++ .name = "KCM", ++ .owner = THIS_MODULE, ++ .obj_size = sizeof(struct kcm_sock), ++}; ++ ++/* Clone a kcm socket. */ ++static int kcm_clone(struct socket *osock, struct kcm_clone *info, ++ struct socket **newsockp) ++{ ++ struct socket *newsock; ++ struct sock *newsk; ++ struct file *newfile; ++ int err, newfd; ++ ++ err = -ENFILE; ++ newsock = sock_alloc(); ++ if (!newsock) ++ goto out; ++ ++ newsock->type = osock->type; ++ newsock->ops = osock->ops; ++ ++ __module_get(newsock->ops->owner); ++ ++ newfd = get_unused_fd_flags(0); ++ if (unlikely(newfd < 0)) { ++ err = newfd; ++ goto out_fd_fail; ++ } ++ ++ newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); ++ if (unlikely(IS_ERR(newfile))) { ++ err = PTR_ERR(newfile); ++ goto out_sock_alloc_fail; ++ } ++ ++ newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, ++ &kcm_proto, true); ++ if (!newsk) { ++ err = -ENOMEM; ++ goto out_sk_alloc_fail; ++ } ++ ++ sock_init_data(newsock, newsk); ++ init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); ++ ++ fd_install(newfd, newfile); ++ *newsockp = newsock; ++ info->fd = newfd; ++ ++ return 0; ++ ++out_sk_alloc_fail: ++ fput(newfile); ++out_sock_alloc_fail: ++ put_unused_fd(newfd); ++out_fd_fail: ++ sock_release(newsock); ++out: ++ return err; ++} ++ ++static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) ++{ ++ int err; ++ ++ switch (cmd) { ++ case SIOCKCMATTACH: { ++ struct kcm_attach info; ++ ++ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) ++ err = -EFAULT; ++ ++ err = kcm_attach_ioctl(sock, &info); ++ ++ break; ++ } ++ case SIOCKCMUNATTACH: { ++ struct kcm_unattach info; ++ ++ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) ++ err = -EFAULT; ++ ++ err = kcm_unattach_ioctl(sock, &info); ++ ++ break; ++ } ++ case SIOCKCMCLONE: { ++ struct kcm_clone info; ++ struct socket *newsock = NULL; ++ ++ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) ++ err = -EFAULT; ++ ++ err = kcm_clone(sock, &info, &newsock); ++ ++ if (!err) { ++ if (copy_to_user((void __user *)arg, &info, ++ sizeof(info))) { ++ err = -EFAULT; ++ sock_release(newsock); ++ } ++ } ++ ++ break; ++ } ++ default: ++ err = -ENOIOCTLCMD; ++ break; ++ } ++ ++ return err; ++} ++ ++static void free_mux(struct rcu_head *rcu) ++{ ++ struct kcm_mux *mux = container_of(rcu, ++ struct kcm_mux, rcu); ++ ++ kmem_cache_free(kcm_muxp, mux); ++} ++ ++static void release_mux(struct kcm_mux *mux) ++{ ++ struct kcm_net *knet = mux->knet; ++ struct kcm_psock *psock, *tmp_psock; ++ ++ /* Release psocks */ ++ list_for_each_entry_safe(psock, tmp_psock, ++ &mux->psocks, psock_list) { ++ if (!WARN_ON(psock->unattaching)) ++ kcm_unattach(psock); ++ } ++ ++ if (WARN_ON(mux->psocks_cnt)) ++ return; ++ ++ __skb_queue_purge(&mux->rx_hold_queue); ++ ++ mutex_lock(&knet->mutex); ++ list_del_rcu(&mux->kcm_mux_list); ++ knet->count--; ++ mutex_unlock(&knet->mutex); ++ ++ call_rcu(&mux->rcu, free_mux); ++} ++ ++static void kcm_done(struct kcm_sock *kcm) ++{ ++ struct kcm_mux *mux = kcm->mux; ++ struct sock *sk = &kcm->sk; ++ int socks_cnt; ++ ++ spin_lock_bh(&mux->rx_lock); ++ if (kcm->rx_psock) { ++ /* Cleanup in unreserve_rx_kcm */ ++ WARN_ON(kcm->done); ++ kcm->rx_disabled = 1; ++ kcm->done = 1; ++ spin_unlock_bh(&mux->rx_lock); ++ return; ++ } ++ ++ if (kcm->rx_wait) { ++ list_del(&kcm->wait_rx_list); ++ kcm->rx_wait = false; ++ } ++ /* Move any pending receive messages to other kcm sockets */ ++ requeue_rx_msgs(mux, &sk->sk_receive_queue); ++ ++ spin_unlock_bh(&mux->rx_lock); ++ ++ if (WARN_ON(sk_rmem_alloc_get(sk))) ++ return; ++ ++ /* Detach from MUX */ ++ spin_lock_bh(&mux->lock); ++ ++ list_del(&kcm->kcm_sock_list); ++ mux->kcm_socks_cnt--; ++ socks_cnt = mux->kcm_socks_cnt; ++ ++ spin_unlock_bh(&mux->lock); ++ ++ if (!socks_cnt) { ++ /* We are done with the mux now. */ ++ release_mux(mux); ++ } ++ ++ WARN_ON(kcm->rx_wait); ++ ++ sock_put(&kcm->sk); ++} ++ ++/* Called by kcm_release to close a KCM socket. ++ * If this is the last KCM socket on the MUX, destroy the MUX. ++ */ ++static int kcm_release(struct socket *sock) ++{ ++ struct sock *sk = sock->sk; ++ struct kcm_sock *kcm; ++ struct kcm_mux *mux; ++ struct kcm_psock *psock; ++ ++ if (!sk) ++ return 0; ++ ++ kcm = kcm_sk(sk); ++ mux = kcm->mux; ++ ++ sock_orphan(sk); ++ kfree_skb(kcm->seq_skb); ++ ++ lock_sock(sk); ++ /* Purge queue under lock to avoid race condition with tx_work trying ++ * to act when queue is nonempty. If tx_work runs after this point ++ * it will just return. ++ */ ++ __skb_queue_purge(&sk->sk_write_queue); ++ release_sock(sk); ++ ++ spin_lock_bh(&mux->lock); ++ if (kcm->tx_wait) { ++ /* Take of tx_wait list, after this point there should be no way ++ * that a psock will be assigned to this kcm. ++ */ ++ list_del(&kcm->wait_psock_list); ++ kcm->tx_wait = false; ++ } ++ spin_unlock_bh(&mux->lock); ++ ++ /* Cancel work. After this point there should be no outside references ++ * to the kcm socket. ++ */ ++ cancel_work_sync(&kcm->tx_work); ++ ++ lock_sock(sk); ++ psock = kcm->tx_psock; ++ if (psock) { ++ /* A psock was reserved, so we need to kill it since it ++ * may already have some bytes queued from a message. We ++ * need to do this after removing kcm from tx_wait list. ++ */ ++ kcm_abort_tx_psock(psock, EPIPE, false); ++ unreserve_psock(kcm); ++ } ++ release_sock(sk); ++ ++ WARN_ON(kcm->tx_wait); ++ WARN_ON(kcm->tx_psock); ++ ++ sock->sk = NULL; ++ ++ kcm_done(kcm); ++ ++ return 0; ++} ++ ++static const struct proto_ops kcm_ops = { ++ .family = PF_KCM, ++ .owner = THIS_MODULE, ++ .release = kcm_release, ++ .bind = sock_no_bind, ++ .connect = sock_no_connect, ++ .socketpair = sock_no_socketpair, ++ .accept = sock_no_accept, ++ .getname = sock_no_getname, ++ .poll = datagram_poll, ++ .ioctl = kcm_ioctl, ++ .listen = sock_no_listen, ++ .shutdown = sock_no_shutdown, ++ .setsockopt = kcm_setsockopt, ++ .getsockopt = kcm_getsockopt, ++ .sendmsg = kcm_sendmsg, ++ .recvmsg = kcm_recvmsg, ++ .mmap = sock_no_mmap, ++ .sendpage = sock_no_sendpage, ++}; ++ ++/* Create proto operation for kcm sockets */ ++static int kcm_create(struct net *net, struct socket *sock, ++ int protocol, int kern) ++{ ++ struct kcm_net *knet = net_generic(net, kcm_net_id); ++ struct sock *sk; ++ struct kcm_mux *mux; ++ ++ switch (sock->type) { ++ case SOCK_DGRAM: ++ case SOCK_SEQPACKET: ++ sock->ops = &kcm_ops; ++ break; ++ default: ++ return -ESOCKTNOSUPPORT; ++ } ++ ++ if (protocol != KCMPROTO_CONNECTED) ++ return -EPROTONOSUPPORT; ++ ++ sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern); ++ if (!sk) ++ return -ENOMEM; ++ ++ /* Allocate a kcm mux, shared between KCM sockets */ ++ mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL); ++ if (!mux) { ++ sk_free(sk); ++ return -ENOMEM; ++ } ++ ++ spin_lock_init(&mux->lock); ++ spin_lock_init(&mux->rx_lock); ++ INIT_LIST_HEAD(&mux->kcm_socks); ++ INIT_LIST_HEAD(&mux->kcm_rx_waiters); ++ INIT_LIST_HEAD(&mux->kcm_tx_waiters); ++ ++ INIT_LIST_HEAD(&mux->psocks); ++ INIT_LIST_HEAD(&mux->psocks_ready); ++ INIT_LIST_HEAD(&mux->psocks_avail); ++ ++ mux->knet = knet; ++ ++ /* Add new MUX to list */ ++ mutex_lock(&knet->mutex); ++ list_add_rcu(&mux->kcm_mux_list, &knet->mux_list); ++ knet->count++; ++ mutex_unlock(&knet->mutex); ++ ++ skb_queue_head_init(&mux->rx_hold_queue); ++ ++ /* Init KCM socket */ ++ sock_init_data(sock, sk); ++ init_kcm_sock(kcm_sk(sk), mux); ++ ++ return 0; ++} ++ ++static struct net_proto_family kcm_family_ops = { ++ .family = PF_KCM, ++ .create = kcm_create, ++ .owner = THIS_MODULE, ++}; ++ ++static __net_init int kcm_init_net(struct net *net) ++{ ++ struct kcm_net *knet = net_generic(net, kcm_net_id); ++ ++ INIT_LIST_HEAD_RCU(&knet->mux_list); ++ mutex_init(&knet->mutex); ++ ++ return 0; ++} ++ ++static __net_exit void kcm_exit_net(struct net *net) ++{ ++ struct kcm_net *knet = net_generic(net, kcm_net_id); ++ ++ /* All KCM sockets should be closed at this point, which should mean ++ * that all multiplexors and psocks have been destroyed. ++ */ ++ WARN_ON(!list_empty(&knet->mux_list)); ++} ++ ++static struct pernet_operations kcm_net_ops = { ++ .init = kcm_init_net, ++ .exit = kcm_exit_net, ++ .id = &kcm_net_id, ++ .size = sizeof(struct kcm_net), ++}; ++ ++static int __init kcm_init(void) ++{ ++ int err = -ENOMEM; ++ ++ kcm_muxp = kmem_cache_create("kcm_mux_cache", ++ sizeof(struct kcm_mux), 0, ++ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); ++ if (!kcm_muxp) ++ goto fail; ++ ++ kcm_psockp = kmem_cache_create("kcm_psock_cache", ++ sizeof(struct kcm_psock), 0, ++ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); ++ if (!kcm_psockp) ++ goto fail; ++ ++ kcm_wq = create_singlethread_workqueue("kkcmd"); ++ if (!kcm_wq) ++ goto fail; ++ ++ err = proto_register(&kcm_proto, 1); ++ if (err) ++ goto fail; ++ ++ err = sock_register(&kcm_family_ops); ++ if (err) ++ goto sock_register_fail; ++ ++ err = register_pernet_device(&kcm_net_ops); ++ if (err) ++ goto net_ops_fail; ++ ++ return 0; ++ ++net_ops_fail: ++ sock_unregister(PF_KCM); ++ ++sock_register_fail: ++ proto_unregister(&kcm_proto); ++ ++fail: ++ kmem_cache_destroy(kcm_muxp); ++ kmem_cache_destroy(kcm_psockp); ++ ++ if (kcm_wq) ++ destroy_workqueue(kcm_wq); ++ ++ return err; ++} ++ ++static void __exit kcm_exit(void) ++{ ++ unregister_pernet_device(&kcm_net_ops); ++ sock_unregister(PF_KCM); ++ proto_unregister(&kcm_proto); ++ destroy_workqueue(kcm_wq); ++ ++ kmem_cache_destroy(kcm_muxp); ++ kmem_cache_destroy(kcm_psockp); ++} ++ ++module_init(kcm_init); ++module_exit(kcm_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_ALIAS_NETPROTO(PF_KCM); +-- +2.10.0 + diff --git a/alpine/kernel/patches/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch b/alpine/kernel/patches/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch new file mode 100644 index 000000000..3e97c57c5 --- /dev/null +++ b/alpine/kernel/patches/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch @@ -0,0 +1,52 @@ +From 2f2e6e31ed1b82f1658139e0abe7155ee3755da1 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Mon, 21 Mar 2016 02:51:09 -0700 +Subject: [PATCH 37/42] net: add the AF_KCM entries to family name tables + +This is for the recent kcm driver, which introduces AF_KCM(41) in +b7ac4eb(kcm: Kernel Connection Multiplexor module). + +Signed-off-by: Dexuan Cui +Cc: Signed-off-by: Tom Herbert +Origin: https://patchwork.ozlabs.org/patch/600006 +--- + net/core/sock.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/net/core/sock.c b/net/core/sock.c +index 0d91f7d..925def4 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -263,7 +263,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = { + "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , + "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , + "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , +- "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" ++ "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , ++ "sk_lock-AF_MAX" + }; + static const char *const af_family_slock_key_strings[AF_MAX+1] = { + "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , +@@ -279,7 +280,8 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { + "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , + "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , + "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , +- "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" ++ "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , ++ "slock-AF_MAX" + }; + static const char *const af_family_clock_key_strings[AF_MAX+1] = { + "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , +@@ -295,7 +297,8 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { + "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , + "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , + "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , +- "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" ++ "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , ++ "clock-AF_MAX" + }; + + /* +-- +2.10.0 + diff --git a/alpine/kernel/patches/0038-net-Add-Qualcomm-IPC-router.patch b/alpine/kernel/patches/0038-net-Add-Qualcomm-IPC-router.patch new file mode 100644 index 000000000..2d3461c20 --- /dev/null +++ b/alpine/kernel/patches/0038-net-Add-Qualcomm-IPC-router.patch @@ -0,0 +1,1307 @@ +From 9e184cb0991a4cc08cd7688f2d4e23740c60e382 Mon Sep 17 00:00:00 2001 +From: Courtney Cavin +Date: Wed, 27 Apr 2016 12:13:03 -0700 +Subject: [PATCH 38/42] net: Add Qualcomm IPC router + +Add an implementation of Qualcomm's IPC router protocol, used to +communicate with service providing remote processors. + +Signed-off-by: Courtney Cavin +Signed-off-by: Bjorn Andersson +[bjorn: Cope with 0 being a valid node id and implement RTM_NEWADDR] +Signed-off-by: Bjorn Andersson +Origin: https://patchwork.ozlabs.org/patch/615774/ +--- + include/linux/socket.h | 4 +- + include/uapi/linux/qrtr.h | 12 + + net/Kconfig | 1 + + net/Makefile | 1 + + net/qrtr/Kconfig | 24 ++ + net/qrtr/Makefile | 2 + + net/qrtr/qrtr.c | 1007 +++++++++++++++++++++++++++++++++++++++++++++ + net/qrtr/qrtr.h | 31 ++ + net/qrtr/smd.c | 117 ++++++ + 9 files changed, 1198 insertions(+), 1 deletion(-) + create mode 100644 include/uapi/linux/qrtr.h + create mode 100644 net/qrtr/Kconfig + create mode 100644 net/qrtr/Makefile + create mode 100644 net/qrtr/qrtr.c + create mode 100644 net/qrtr/qrtr.h + create mode 100644 net/qrtr/smd.c + +diff --git a/include/linux/socket.h b/include/linux/socket.h +index 4e1ea53..dbd81e7 100644 +--- a/include/linux/socket.h ++++ b/include/linux/socket.h +@@ -201,8 +201,9 @@ struct ucred { + #define AF_NFC 39 /* NFC sockets */ + #define AF_VSOCK 40 /* vSockets */ + #define AF_KCM 41 /* Kernel Connection Multiplexor*/ ++#define AF_QIPCRTR 42 /* Qualcomm IPC Router */ + +-#define AF_MAX 42 /* For now.. */ ++#define AF_MAX 43 /* For now.. */ + + /* Protocol families, same as address families. */ + #define PF_UNSPEC AF_UNSPEC +@@ -249,6 +250,7 @@ struct ucred { + #define PF_NFC AF_NFC + #define PF_VSOCK AF_VSOCK + #define PF_KCM AF_KCM ++#define PF_QIPCRTR AF_QIPCRTR + #define PF_MAX AF_MAX + + /* Maximum queue length specifiable by listen. */ +diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h +new file mode 100644 +index 0000000..66c0748 +--- /dev/null ++++ b/include/uapi/linux/qrtr.h +@@ -0,0 +1,12 @@ ++#ifndef _LINUX_QRTR_H ++#define _LINUX_QRTR_H ++ ++#include ++ ++struct sockaddr_qrtr { ++ __kernel_sa_family_t sq_family; ++ __u32 sq_node; ++ __u32 sq_port; ++}; ++ ++#endif /* _LINUX_QRTR_H */ +diff --git a/net/Kconfig b/net/Kconfig +index b8439e6..1c9fda1 100644 +--- a/net/Kconfig ++++ b/net/Kconfig +@@ -233,6 +233,7 @@ source "net/mpls/Kconfig" + source "net/hsr/Kconfig" + source "net/switchdev/Kconfig" + source "net/l3mdev/Kconfig" ++source "net/qrtr/Kconfig" + + config RPS + bool +diff --git a/net/Makefile b/net/Makefile +index 81d1411..bdd1455 100644 +--- a/net/Makefile ++++ b/net/Makefile +@@ -78,3 +78,4 @@ endif + ifneq ($(CONFIG_NET_L3_MASTER_DEV),) + obj-y += l3mdev/ + endif ++obj-$(CONFIG_QRTR) += qrtr/ +diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig +new file mode 100644 +index 0000000..0c2619d +--- /dev/null ++++ b/net/qrtr/Kconfig +@@ -0,0 +1,24 @@ ++# Qualcomm IPC Router configuration ++# ++ ++config QRTR ++ bool "Qualcomm IPC Router support" ++ depends on ARCH_QCOM || COMPILE_TEST ++ ---help--- ++ Say Y if you intend to use Qualcomm IPC router protocol. The ++ protocol is used to communicate with services provided by other ++ hardware blocks in the system. ++ ++ In order to do service lookups, a userspace daemon is required to ++ maintain a service listing. ++ ++if QRTR ++ ++config QRTR_SMD ++ tristate "SMD IPC Router channels" ++ depends on QCOM_SMD || COMPILE_TEST ++ ---help--- ++ Say Y here to support SMD based ipcrouter channels. SMD is the ++ most common transport for IPC Router. ++ ++endif # QRTR +diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile +new file mode 100644 +index 0000000..e282a84 +--- /dev/null ++++ b/net/qrtr/Makefile +@@ -0,0 +1,2 @@ ++obj-y := qrtr.o ++obj-$(CONFIG_QRTR_SMD) += smd.o +diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c +new file mode 100644 +index 0000000..c985ecb +--- /dev/null ++++ b/net/qrtr/qrtr.c +@@ -0,0 +1,1007 @@ ++/* ++ * Copyright (c) 2015, Sony Mobile Communications Inc. ++ * Copyright (c) 2013, The Linux Foundation. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++#include ++#include ++#include ++#include /* For TIOCINQ/OUTQ */ ++ ++#include ++ ++#include "qrtr.h" ++ ++#define QRTR_PROTO_VER 1 ++ ++/* auto-bind range */ ++#define QRTR_MIN_EPH_SOCKET 0x4000 ++#define QRTR_MAX_EPH_SOCKET 0x7fff ++ ++enum qrtr_pkt_type { ++ QRTR_TYPE_DATA = 1, ++ QRTR_TYPE_HELLO = 2, ++ QRTR_TYPE_BYE = 3, ++ QRTR_TYPE_NEW_SERVER = 4, ++ QRTR_TYPE_DEL_SERVER = 5, ++ QRTR_TYPE_DEL_CLIENT = 6, ++ QRTR_TYPE_RESUME_TX = 7, ++ QRTR_TYPE_EXIT = 8, ++ QRTR_TYPE_PING = 9, ++}; ++ ++/** ++ * struct qrtr_hdr - (I|R)PCrouter packet header ++ * @version: protocol version ++ * @type: packet type; one of QRTR_TYPE_* ++ * @src_node_id: source node ++ * @src_port_id: source port ++ * @confirm_rx: boolean; whether a resume-tx packet should be send in reply ++ * @size: length of packet, excluding this header ++ * @dst_node_id: destination node ++ * @dst_port_id: destination port ++ */ ++struct qrtr_hdr { ++ __le32 version; ++ __le32 type; ++ __le32 src_node_id; ++ __le32 src_port_id; ++ __le32 confirm_rx; ++ __le32 size; ++ __le32 dst_node_id; ++ __le32 dst_port_id; ++} __packed; ++ ++#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr) ++#define QRTR_NODE_BCAST ((unsigned int)-1) ++#define QRTR_PORT_CTRL ((unsigned int)-2) ++ ++struct qrtr_sock { ++ /* WARNING: sk must be the first member */ ++ struct sock sk; ++ struct sockaddr_qrtr us; ++ struct sockaddr_qrtr peer; ++}; ++ ++static inline struct qrtr_sock *qrtr_sk(struct sock *sk) ++{ ++ BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0); ++ return container_of(sk, struct qrtr_sock, sk); ++} ++ ++static unsigned int qrtr_local_nid = -1; ++ ++/* for node ids */ ++static RADIX_TREE(qrtr_nodes, GFP_KERNEL); ++/* broadcast list */ ++static LIST_HEAD(qrtr_all_nodes); ++/* lock for qrtr_nodes, qrtr_all_nodes and node reference */ ++static DEFINE_MUTEX(qrtr_node_lock); ++ ++/* local port allocation management */ ++static DEFINE_IDR(qrtr_ports); ++static DEFINE_MUTEX(qrtr_port_lock); ++ ++/** ++ * struct qrtr_node - endpoint node ++ * @ep_lock: lock for endpoint management and callbacks ++ * @ep: endpoint ++ * @ref: reference count for node ++ * @nid: node id ++ * @rx_queue: receive queue ++ * @work: scheduled work struct for recv work ++ * @item: list item for broadcast list ++ */ ++struct qrtr_node { ++ struct mutex ep_lock; ++ struct qrtr_endpoint *ep; ++ struct kref ref; ++ unsigned int nid; ++ ++ struct sk_buff_head rx_queue; ++ struct work_struct work; ++ struct list_head item; ++}; ++ ++/* Release node resources and free the node. ++ * ++ * Do not call directly, use qrtr_node_release. To be used with ++ * kref_put_mutex. As such, the node mutex is expected to be locked on call. ++ */ ++static void __qrtr_node_release(struct kref *kref) ++{ ++ struct qrtr_node *node = container_of(kref, struct qrtr_node, ref); ++ ++ if (node->nid != QRTR_EP_NID_AUTO) ++ radix_tree_delete(&qrtr_nodes, node->nid); ++ ++ list_del(&node->item); ++ mutex_unlock(&qrtr_node_lock); ++ ++ skb_queue_purge(&node->rx_queue); ++ kfree(node); ++} ++ ++/* Increment reference to node. */ ++static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node) ++{ ++ if (node) ++ kref_get(&node->ref); ++ return node; ++} ++ ++/* Decrement reference to node and release as necessary. */ ++static void qrtr_node_release(struct qrtr_node *node) ++{ ++ if (!node) ++ return; ++ kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock); ++} ++ ++/* Pass an outgoing packet socket buffer to the endpoint driver. */ ++static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb) ++{ ++ int rc = -ENODEV; ++ ++ mutex_lock(&node->ep_lock); ++ if (node->ep) ++ rc = node->ep->xmit(node->ep, skb); ++ else ++ kfree_skb(skb); ++ mutex_unlock(&node->ep_lock); ++ ++ return rc; ++} ++ ++/* Lookup node by id. ++ * ++ * callers must release with qrtr_node_release() ++ */ ++static struct qrtr_node *qrtr_node_lookup(unsigned int nid) ++{ ++ struct qrtr_node *node; ++ ++ mutex_lock(&qrtr_node_lock); ++ node = radix_tree_lookup(&qrtr_nodes, nid); ++ node = qrtr_node_acquire(node); ++ mutex_unlock(&qrtr_node_lock); ++ ++ return node; ++} ++ ++/* Assign node id to node. ++ * ++ * This is mostly useful for automatic node id assignment, based on ++ * the source id in the incoming packet. ++ */ ++static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid) ++{ ++ if (node->nid != QRTR_EP_NID_AUTO || nid == QRTR_EP_NID_AUTO) ++ return; ++ ++ mutex_lock(&qrtr_node_lock); ++ radix_tree_insert(&qrtr_nodes, nid, node); ++ node->nid = nid; ++ mutex_unlock(&qrtr_node_lock); ++} ++ ++/** ++ * qrtr_endpoint_post() - post incoming data ++ * @ep: endpoint handle ++ * @data: data pointer ++ * @len: size of data in bytes ++ * ++ * Return: 0 on success; negative error code on failure ++ */ ++int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) ++{ ++ struct qrtr_node *node = ep->node; ++ const struct qrtr_hdr *phdr = data; ++ struct sk_buff *skb; ++ unsigned int psize; ++ unsigned int size; ++ unsigned int type; ++ unsigned int ver; ++ unsigned int dst; ++ ++ if (len < QRTR_HDR_SIZE || len & 3) ++ return -EINVAL; ++ ++ ver = le32_to_cpu(phdr->version); ++ size = le32_to_cpu(phdr->size); ++ type = le32_to_cpu(phdr->type); ++ dst = le32_to_cpu(phdr->dst_port_id); ++ ++ psize = (size + 3) & ~3; ++ ++ if (ver != QRTR_PROTO_VER) ++ return -EINVAL; ++ ++ if (len != psize + QRTR_HDR_SIZE) ++ return -EINVAL; ++ ++ if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA) ++ return -EINVAL; ++ ++ skb = netdev_alloc_skb(NULL, len); ++ if (!skb) ++ return -ENOMEM; ++ ++ skb_reset_transport_header(skb); ++ memcpy(skb_put(skb, len), data, len); ++ ++ skb_queue_tail(&node->rx_queue, skb); ++ schedule_work(&node->work); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(qrtr_endpoint_post); ++ ++/* Allocate and construct a resume-tx packet. */ ++static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, ++ u32 dst_node, u32 port) ++{ ++ const int pkt_len = 20; ++ struct qrtr_hdr *hdr; ++ struct sk_buff *skb; ++ u32 *buf; ++ ++ skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); ++ if (!skb) ++ return NULL; ++ skb_reset_transport_header(skb); ++ ++ hdr = (struct qrtr_hdr *)skb_put(skb, QRTR_HDR_SIZE); ++ hdr->version = cpu_to_le32(QRTR_PROTO_VER); ++ hdr->type = cpu_to_le32(QRTR_TYPE_RESUME_TX); ++ hdr->src_node_id = cpu_to_le32(src_node); ++ hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL); ++ hdr->confirm_rx = cpu_to_le32(0); ++ hdr->size = cpu_to_le32(pkt_len); ++ hdr->dst_node_id = cpu_to_le32(dst_node); ++ hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); ++ ++ buf = (u32 *)skb_put(skb, pkt_len); ++ memset(buf, 0, pkt_len); ++ buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); ++ buf[1] = cpu_to_le32(src_node); ++ buf[2] = cpu_to_le32(port); ++ ++ return skb; ++} ++ ++static struct qrtr_sock *qrtr_port_lookup(int port); ++static void qrtr_port_put(struct qrtr_sock *ipc); ++ ++/* Handle and route a received packet. ++ * ++ * This will auto-reply with resume-tx packet as necessary. ++ */ ++static void qrtr_node_rx_work(struct work_struct *work) ++{ ++ struct qrtr_node *node = container_of(work, struct qrtr_node, work); ++ struct sk_buff *skb; ++ ++ while ((skb = skb_dequeue(&node->rx_queue)) != NULL) { ++ const struct qrtr_hdr *phdr; ++ u32 dst_node, dst_port; ++ struct qrtr_sock *ipc; ++ u32 src_node; ++ int confirm; ++ ++ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); ++ src_node = le32_to_cpu(phdr->src_node_id); ++ dst_node = le32_to_cpu(phdr->dst_node_id); ++ dst_port = le32_to_cpu(phdr->dst_port_id); ++ confirm = !!phdr->confirm_rx; ++ ++ qrtr_node_assign(node, src_node); ++ ++ ipc = qrtr_port_lookup(dst_port); ++ if (!ipc) { ++ kfree_skb(skb); ++ } else { ++ if (sock_queue_rcv_skb(&ipc->sk, skb)) ++ kfree_skb(skb); ++ ++ qrtr_port_put(ipc); ++ } ++ ++ if (confirm) { ++ skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port); ++ if (!skb) ++ break; ++ if (qrtr_node_enqueue(node, skb)) ++ break; ++ } ++ } ++} ++ ++/** ++ * qrtr_endpoint_register() - register a new endpoint ++ * @ep: endpoint to register ++ * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment ++ * Return: 0 on success; negative error code on failure ++ * ++ * The specified endpoint must have the xmit function pointer set on call. ++ */ ++int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid) ++{ ++ struct qrtr_node *node; ++ ++ if (!ep || !ep->xmit) ++ return -EINVAL; ++ ++ node = kzalloc(sizeof(*node), GFP_KERNEL); ++ if (!node) ++ return -ENOMEM; ++ ++ INIT_WORK(&node->work, qrtr_node_rx_work); ++ kref_init(&node->ref); ++ mutex_init(&node->ep_lock); ++ skb_queue_head_init(&node->rx_queue); ++ node->nid = QRTR_EP_NID_AUTO; ++ node->ep = ep; ++ ++ qrtr_node_assign(node, nid); ++ ++ mutex_lock(&qrtr_node_lock); ++ list_add(&node->item, &qrtr_all_nodes); ++ mutex_unlock(&qrtr_node_lock); ++ ep->node = node; ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(qrtr_endpoint_register); ++ ++/** ++ * qrtr_endpoint_unregister - unregister endpoint ++ * @ep: endpoint to unregister ++ */ ++void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) ++{ ++ struct qrtr_node *node = ep->node; ++ ++ mutex_lock(&node->ep_lock); ++ node->ep = NULL; ++ mutex_unlock(&node->ep_lock); ++ ++ qrtr_node_release(node); ++ ep->node = NULL; ++} ++EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister); ++ ++/* Lookup socket by port. ++ * ++ * Callers must release with qrtr_port_put() ++ */ ++static struct qrtr_sock *qrtr_port_lookup(int port) ++{ ++ struct qrtr_sock *ipc; ++ ++ if (port == QRTR_PORT_CTRL) ++ port = 0; ++ ++ mutex_lock(&qrtr_port_lock); ++ ipc = idr_find(&qrtr_ports, port); ++ if (ipc) ++ sock_hold(&ipc->sk); ++ mutex_unlock(&qrtr_port_lock); ++ ++ return ipc; ++} ++ ++/* Release acquired socket. */ ++static void qrtr_port_put(struct qrtr_sock *ipc) ++{ ++ sock_put(&ipc->sk); ++} ++ ++/* Remove port assignment. */ ++static void qrtr_port_remove(struct qrtr_sock *ipc) ++{ ++ int port = ipc->us.sq_port; ++ ++ if (port == QRTR_PORT_CTRL) ++ port = 0; ++ ++ __sock_put(&ipc->sk); ++ ++ mutex_lock(&qrtr_port_lock); ++ idr_remove(&qrtr_ports, port); ++ mutex_unlock(&qrtr_port_lock); ++} ++ ++/* Assign port number to socket. ++ * ++ * Specify port in the integer pointed to by port, and it will be adjusted ++ * on return as necesssary. ++ * ++ * Port may be: ++ * 0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET] ++ * QRTR_MIN_EPH_SOCKET: Specified; available to all ++ */ ++static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) ++{ ++ int rc; ++ ++ mutex_lock(&qrtr_port_lock); ++ if (!*port) { ++ rc = idr_alloc(&qrtr_ports, ipc, ++ QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET + 1, ++ GFP_ATOMIC); ++ if (rc >= 0) ++ *port = rc; ++ } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { ++ rc = -EACCES; ++ } else if (*port == QRTR_PORT_CTRL) { ++ rc = idr_alloc(&qrtr_ports, ipc, 0, 1, GFP_ATOMIC); ++ } else { ++ rc = idr_alloc(&qrtr_ports, ipc, *port, *port + 1, GFP_ATOMIC); ++ if (rc >= 0) ++ *port = rc; ++ } ++ mutex_unlock(&qrtr_port_lock); ++ ++ if (rc == -ENOSPC) ++ return -EADDRINUSE; ++ else if (rc < 0) ++ return rc; ++ ++ sock_hold(&ipc->sk); ++ ++ return 0; ++} ++ ++/* Bind socket to address. ++ * ++ * Socket should be locked upon call. ++ */ ++static int __qrtr_bind(struct socket *sock, ++ const struct sockaddr_qrtr *addr, int zapped) ++{ ++ struct qrtr_sock *ipc = qrtr_sk(sock->sk); ++ struct sock *sk = sock->sk; ++ int port; ++ int rc; ++ ++ /* rebinding ok */ ++ if (!zapped && addr->sq_port == ipc->us.sq_port) ++ return 0; ++ ++ port = addr->sq_port; ++ rc = qrtr_port_assign(ipc, &port); ++ if (rc) ++ return rc; ++ ++ /* unbind previous, if any */ ++ if (!zapped) ++ qrtr_port_remove(ipc); ++ ipc->us.sq_port = port; ++ ++ sock_reset_flag(sk, SOCK_ZAPPED); ++ ++ return 0; ++} ++ ++/* Auto bind to an ephemeral port. */ ++static int qrtr_autobind(struct socket *sock) ++{ ++ struct sock *sk = sock->sk; ++ struct sockaddr_qrtr addr; ++ ++ if (!sock_flag(sk, SOCK_ZAPPED)) ++ return 0; ++ ++ addr.sq_family = AF_QIPCRTR; ++ addr.sq_node = qrtr_local_nid; ++ addr.sq_port = 0; ++ ++ return __qrtr_bind(sock, &addr, 1); ++} ++ ++/* Bind socket to specified sockaddr. */ ++static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len) ++{ ++ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); ++ struct qrtr_sock *ipc = qrtr_sk(sock->sk); ++ struct sock *sk = sock->sk; ++ int rc; ++ ++ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) ++ return -EINVAL; ++ ++ if (addr->sq_node != ipc->us.sq_node) ++ return -EINVAL; ++ ++ lock_sock(sk); ++ rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED)); ++ release_sock(sk); ++ ++ return rc; ++} ++ ++/* Queue packet to local peer socket. */ ++static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb) ++{ ++ const struct qrtr_hdr *phdr; ++ struct qrtr_sock *ipc; ++ ++ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); ++ ++ ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id)); ++ if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ ++ kfree_skb(skb); ++ return -ENODEV; ++ } ++ ++ if (sock_queue_rcv_skb(&ipc->sk, skb)) { ++ qrtr_port_put(ipc); ++ kfree_skb(skb); ++ return -ENOSPC; ++ } ++ ++ qrtr_port_put(ipc); ++ ++ return 0; ++} ++ ++/* Queue packet for broadcast. */ ++static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) ++{ ++ struct sk_buff *skbn; ++ ++ mutex_lock(&qrtr_node_lock); ++ list_for_each_entry(node, &qrtr_all_nodes, item) { ++ skbn = skb_clone(skb, GFP_KERNEL); ++ if (!skbn) ++ break; ++ skb_set_owner_w(skbn, skb->sk); ++ qrtr_node_enqueue(node, skbn); ++ } ++ mutex_unlock(&qrtr_node_lock); ++ ++ qrtr_local_enqueue(node, skb); ++ ++ return 0; ++} ++ ++static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) ++{ ++ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); ++ int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *); ++ struct qrtr_sock *ipc = qrtr_sk(sock->sk); ++ struct sock *sk = sock->sk; ++ struct qrtr_node *node; ++ struct qrtr_hdr *hdr; ++ struct sk_buff *skb; ++ size_t plen; ++ int rc; ++ ++ if (msg->msg_flags & ~(MSG_DONTWAIT)) ++ return -EINVAL; ++ ++ if (len > 65535) ++ return -EMSGSIZE; ++ ++ lock_sock(sk); ++ ++ if (addr) { ++ if (msg->msg_namelen < sizeof(*addr)) { ++ release_sock(sk); ++ return -EINVAL; ++ } ++ ++ if (addr->sq_family != AF_QIPCRTR) { ++ release_sock(sk); ++ return -EINVAL; ++ } ++ ++ rc = qrtr_autobind(sock); ++ if (rc) { ++ release_sock(sk); ++ return rc; ++ } ++ } else if (sk->sk_state == TCP_ESTABLISHED) { ++ addr = &ipc->peer; ++ } else { ++ release_sock(sk); ++ return -ENOTCONN; ++ } ++ ++ node = NULL; ++ if (addr->sq_node == QRTR_NODE_BCAST) { ++ enqueue_fn = qrtr_bcast_enqueue; ++ } else if (addr->sq_node == ipc->us.sq_node) { ++ enqueue_fn = qrtr_local_enqueue; ++ } else { ++ enqueue_fn = qrtr_node_enqueue; ++ node = qrtr_node_lookup(addr->sq_node); ++ if (!node) { ++ release_sock(sk); ++ return -ECONNRESET; ++ } ++ } ++ ++ plen = (len + 3) & ~3; ++ skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE, ++ msg->msg_flags & MSG_DONTWAIT, &rc); ++ if (!skb) ++ goto out_node; ++ ++ skb_reset_transport_header(skb); ++ skb_put(skb, len + QRTR_HDR_SIZE); ++ ++ hdr = (struct qrtr_hdr *)skb_transport_header(skb); ++ hdr->version = cpu_to_le32(QRTR_PROTO_VER); ++ hdr->src_node_id = cpu_to_le32(ipc->us.sq_node); ++ hdr->src_port_id = cpu_to_le32(ipc->us.sq_port); ++ hdr->confirm_rx = cpu_to_le32(0); ++ hdr->size = cpu_to_le32(len); ++ hdr->dst_node_id = cpu_to_le32(addr->sq_node); ++ hdr->dst_port_id = cpu_to_le32(addr->sq_port); ++ ++ rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE, ++ &msg->msg_iter, len); ++ if (rc) { ++ kfree_skb(skb); ++ goto out_node; ++ } ++ ++ if (plen != len) { ++ skb_pad(skb, plen - len); ++ skb_put(skb, plen - len); ++ } ++ ++ if (ipc->us.sq_port == QRTR_PORT_CTRL) { ++ if (len < 4) { ++ rc = -EINVAL; ++ kfree_skb(skb); ++ goto out_node; ++ } ++ ++ /* control messages already require the type as 'command' */ ++ skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4); ++ } else { ++ hdr->type = cpu_to_le32(QRTR_TYPE_DATA); ++ } ++ ++ rc = enqueue_fn(node, skb); ++ if (rc >= 0) ++ rc = len; ++ ++out_node: ++ qrtr_node_release(node); ++ release_sock(sk); ++ ++ return rc; ++} ++ ++static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, ++ size_t size, int flags) ++{ ++ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); ++ const struct qrtr_hdr *phdr; ++ struct sock *sk = sock->sk; ++ struct sk_buff *skb; ++ int copied, rc; ++ ++ lock_sock(sk); ++ ++ if (sock_flag(sk, SOCK_ZAPPED)) { ++ release_sock(sk); ++ return -EADDRNOTAVAIL; ++ } ++ ++ skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, ++ flags & MSG_DONTWAIT, &rc); ++ if (!skb) { ++ release_sock(sk); ++ return rc; ++ } ++ ++ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); ++ copied = le32_to_cpu(phdr->size); ++ if (copied > size) { ++ copied = size; ++ msg->msg_flags |= MSG_TRUNC; ++ } ++ ++ rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied); ++ if (rc < 0) ++ goto out; ++ rc = copied; ++ ++ if (addr) { ++ addr->sq_family = AF_QIPCRTR; ++ addr->sq_node = le32_to_cpu(phdr->src_node_id); ++ addr->sq_port = le32_to_cpu(phdr->src_port_id); ++ msg->msg_namelen = sizeof(*addr); ++ } ++ ++out: ++ skb_free_datagram(sk, skb); ++ release_sock(sk); ++ ++ return rc; ++} ++ ++static int qrtr_connect(struct socket *sock, struct sockaddr *saddr, ++ int len, int flags) ++{ ++ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); ++ struct qrtr_sock *ipc = qrtr_sk(sock->sk); ++ struct sock *sk = sock->sk; ++ int rc; ++ ++ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) ++ return -EINVAL; ++ ++ lock_sock(sk); ++ ++ sk->sk_state = TCP_CLOSE; ++ sock->state = SS_UNCONNECTED; ++ ++ rc = qrtr_autobind(sock); ++ if (rc) { ++ release_sock(sk); ++ return rc; ++ } ++ ++ ipc->peer = *addr; ++ sock->state = SS_CONNECTED; ++ sk->sk_state = TCP_ESTABLISHED; ++ ++ release_sock(sk); ++ ++ return 0; ++} ++ ++static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, ++ int *len, int peer) ++{ ++ struct qrtr_sock *ipc = qrtr_sk(sock->sk); ++ struct sockaddr_qrtr qaddr; ++ struct sock *sk = sock->sk; ++ ++ lock_sock(sk); ++ if (peer) { ++ if (sk->sk_state != TCP_ESTABLISHED) { ++ release_sock(sk); ++ return -ENOTCONN; ++ } ++ ++ qaddr = ipc->peer; ++ } else { ++ qaddr = ipc->us; ++ } ++ release_sock(sk); ++ ++ *len = sizeof(qaddr); ++ qaddr.sq_family = AF_QIPCRTR; ++ ++ memcpy(saddr, &qaddr, sizeof(qaddr)); ++ ++ return 0; ++} ++ ++static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) ++{ ++ void __user *argp = (void __user *)arg; ++ struct qrtr_sock *ipc = qrtr_sk(sock->sk); ++ struct sock *sk = sock->sk; ++ struct sockaddr_qrtr *sq; ++ struct sk_buff *skb; ++ struct ifreq ifr; ++ long len = 0; ++ int rc = 0; ++ ++ lock_sock(sk); ++ ++ switch (cmd) { ++ case TIOCOUTQ: ++ len = sk->sk_sndbuf - sk_wmem_alloc_get(sk); ++ if (len < 0) ++ len = 0; ++ rc = put_user(len, (int __user *)argp); ++ break; ++ case TIOCINQ: ++ skb = skb_peek(&sk->sk_receive_queue); ++ if (skb) ++ len = skb->len - QRTR_HDR_SIZE; ++ rc = put_user(len, (int __user *)argp); ++ break; ++ case SIOCGIFADDR: ++ if (copy_from_user(&ifr, argp, sizeof(ifr))) { ++ rc = -EFAULT; ++ break; ++ } ++ ++ sq = (struct sockaddr_qrtr *)&ifr.ifr_addr; ++ *sq = ipc->us; ++ if (copy_to_user(argp, &ifr, sizeof(ifr))) { ++ rc = -EFAULT; ++ break; ++ } ++ break; ++ case SIOCGSTAMP: ++ rc = sock_get_timestamp(sk, argp); ++ break; ++ case SIOCADDRT: ++ case SIOCDELRT: ++ case SIOCSIFADDR: ++ case SIOCGIFDSTADDR: ++ case SIOCSIFDSTADDR: ++ case SIOCGIFBRDADDR: ++ case SIOCSIFBRDADDR: ++ case SIOCGIFNETMASK: ++ case SIOCSIFNETMASK: ++ rc = -EINVAL; ++ break; ++ default: ++ rc = -ENOIOCTLCMD; ++ break; ++ } ++ ++ release_sock(sk); ++ ++ return rc; ++} ++ ++static int qrtr_release(struct socket *sock) ++{ ++ struct sock *sk = sock->sk; ++ struct qrtr_sock *ipc; ++ ++ if (!sk) ++ return 0; ++ ++ lock_sock(sk); ++ ++ ipc = qrtr_sk(sk); ++ sk->sk_shutdown = SHUTDOWN_MASK; ++ if (!sock_flag(sk, SOCK_DEAD)) ++ sk->sk_state_change(sk); ++ ++ sock_set_flag(sk, SOCK_DEAD); ++ sock->sk = NULL; ++ ++ if (!sock_flag(sk, SOCK_ZAPPED)) ++ qrtr_port_remove(ipc); ++ ++ skb_queue_purge(&sk->sk_receive_queue); ++ ++ release_sock(sk); ++ sock_put(sk); ++ ++ return 0; ++} ++ ++static const struct proto_ops qrtr_proto_ops = { ++ .owner = THIS_MODULE, ++ .family = AF_QIPCRTR, ++ .bind = qrtr_bind, ++ .connect = qrtr_connect, ++ .socketpair = sock_no_socketpair, ++ .accept = sock_no_accept, ++ .listen = sock_no_listen, ++ .sendmsg = qrtr_sendmsg, ++ .recvmsg = qrtr_recvmsg, ++ .getname = qrtr_getname, ++ .ioctl = qrtr_ioctl, ++ .poll = datagram_poll, ++ .shutdown = sock_no_shutdown, ++ .setsockopt = sock_no_setsockopt, ++ .getsockopt = sock_no_getsockopt, ++ .release = qrtr_release, ++ .mmap = sock_no_mmap, ++ .sendpage = sock_no_sendpage, ++}; ++ ++static struct proto qrtr_proto = { ++ .name = "QIPCRTR", ++ .owner = THIS_MODULE, ++ .obj_size = sizeof(struct qrtr_sock), ++}; ++ ++static int qrtr_create(struct net *net, struct socket *sock, ++ int protocol, int kern) ++{ ++ struct qrtr_sock *ipc; ++ struct sock *sk; ++ ++ if (sock->type != SOCK_DGRAM) ++ return -EPROTOTYPE; ++ ++ sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern); ++ if (!sk) ++ return -ENOMEM; ++ ++ sock_set_flag(sk, SOCK_ZAPPED); ++ ++ sock_init_data(sock, sk); ++ sock->ops = &qrtr_proto_ops; ++ ++ ipc = qrtr_sk(sk); ++ ipc->us.sq_family = AF_QIPCRTR; ++ ipc->us.sq_node = qrtr_local_nid; ++ ipc->us.sq_port = 0; ++ ++ return 0; ++} ++ ++static const struct nla_policy qrtr_policy[IFA_MAX + 1] = { ++ [IFA_LOCAL] = { .type = NLA_U32 }, ++}; ++ ++static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) ++{ ++ struct nlattr *tb[IFA_MAX + 1]; ++ struct ifaddrmsg *ifm; ++ int rc; ++ ++ if (!netlink_capable(skb, CAP_NET_ADMIN)) ++ return -EPERM; ++ ++ if (!netlink_capable(skb, CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ ASSERT_RTNL(); ++ ++ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy); ++ if (rc < 0) ++ return rc; ++ ++ ifm = nlmsg_data(nlh); ++ if (!tb[IFA_LOCAL]) ++ return -EINVAL; ++ ++ qrtr_local_nid = nla_get_u32(tb[IFA_LOCAL]); ++ return 0; ++} ++ ++static const struct net_proto_family qrtr_family = { ++ .owner = THIS_MODULE, ++ .family = AF_QIPCRTR, ++ .create = qrtr_create, ++}; ++ ++static int __init qrtr_proto_init(void) ++{ ++ int rc; ++ ++ rc = proto_register(&qrtr_proto, 1); ++ if (rc) ++ return rc; ++ ++ rc = sock_register(&qrtr_family); ++ if (rc) { ++ proto_unregister(&qrtr_proto); ++ return rc; ++ } ++ ++ rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL); ++ ++ return 0; ++} ++module_init(qrtr_proto_init); ++ ++static void __exit qrtr_proto_fini(void) ++{ ++ rtnl_unregister(PF_QIPCRTR, RTM_NEWADDR); ++ sock_unregister(qrtr_family.family); ++ proto_unregister(&qrtr_proto); ++} ++module_exit(qrtr_proto_fini); ++ ++MODULE_DESCRIPTION("Qualcomm IPC-router driver"); ++MODULE_LICENSE("GPL v2"); +diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h +new file mode 100644 +index 0000000..2b84871 +--- /dev/null ++++ b/net/qrtr/qrtr.h +@@ -0,0 +1,31 @@ ++#ifndef __QRTR_H_ ++#define __QRTR_H_ ++ ++#include ++ ++struct sk_buff; ++ ++/* endpoint node id auto assignment */ ++#define QRTR_EP_NID_AUTO (-1) ++ ++/** ++ * struct qrtr_endpoint - endpoint handle ++ * @xmit: Callback for outgoing packets ++ * ++ * The socket buffer passed to the xmit function becomes owned by the endpoint ++ * driver. As such, when the driver is done with the buffer, it should ++ * call kfree_skb() on failure, or consume_skb() on success. ++ */ ++struct qrtr_endpoint { ++ int (*xmit)(struct qrtr_endpoint *ep, struct sk_buff *skb); ++ /* private: not for endpoint use */ ++ struct qrtr_node *node; ++}; ++ ++int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid); ++ ++void qrtr_endpoint_unregister(struct qrtr_endpoint *ep); ++ ++int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len); ++ ++#endif +diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c +new file mode 100644 +index 0000000..84ebce7 +--- /dev/null ++++ b/net/qrtr/smd.c +@@ -0,0 +1,117 @@ ++/* ++ * Copyright (c) 2015, Sony Mobile Communications Inc. ++ * Copyright (c) 2013, The Linux Foundation. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++ ++#include "qrtr.h" ++ ++struct qrtr_smd_dev { ++ struct qrtr_endpoint ep; ++ struct qcom_smd_channel *channel; ++}; ++ ++/* from smd to qrtr */ ++static int qcom_smd_qrtr_callback(struct qcom_smd_device *sdev, ++ const void *data, size_t len) ++{ ++ struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); ++ int rc; ++ ++ if (!qdev) ++ return -EAGAIN; ++ ++ rc = qrtr_endpoint_post(&qdev->ep, data, len); ++ if (rc == -EINVAL) { ++ dev_err(&sdev->dev, "invalid ipcrouter packet\n"); ++ /* return 0 to let smd drop the packet */ ++ rc = 0; ++ } ++ ++ return rc; ++} ++ ++/* from qrtr to smd */ ++static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) ++{ ++ struct qrtr_smd_dev *qdev = container_of(ep, struct qrtr_smd_dev, ep); ++ int rc; ++ ++ rc = skb_linearize(skb); ++ if (rc) ++ goto out; ++ ++ rc = qcom_smd_send(qdev->channel, skb->data, skb->len); ++ ++out: ++ if (rc) ++ kfree_skb(skb); ++ else ++ consume_skb(skb); ++ return rc; ++} ++ ++static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev) ++{ ++ struct qrtr_smd_dev *qdev; ++ int rc; ++ ++ qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL); ++ if (!qdev) ++ return -ENOMEM; ++ ++ qdev->channel = sdev->channel; ++ qdev->ep.xmit = qcom_smd_qrtr_send; ++ ++ rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO); ++ if (rc) ++ return rc; ++ ++ dev_set_drvdata(&sdev->dev, qdev); ++ ++ dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n"); ++ ++ return 0; ++} ++ ++static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev) ++{ ++ struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); ++ ++ qrtr_endpoint_unregister(&qdev->ep); ++ ++ dev_set_drvdata(&sdev->dev, NULL); ++} ++ ++static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = { ++ { "IPCRTR" }, ++ {} ++}; ++ ++static struct qcom_smd_driver qcom_smd_qrtr_driver = { ++ .probe = qcom_smd_qrtr_probe, ++ .remove = qcom_smd_qrtr_remove, ++ .callback = qcom_smd_qrtr_callback, ++ .smd_match_table = qcom_smd_qrtr_smd_match, ++ .driver = { ++ .name = "qcom_smd_qrtr", ++ .owner = THIS_MODULE, ++ }, ++}; ++ ++module_qcom_smd_driver(qcom_smd_qrtr_driver); ++ ++MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver"); ++MODULE_LICENSE("GPL v2"); +-- +2.10.0 + diff --git a/alpine/kernel/patches/0003-hv_sock-introduce-Hyper-V-Sockets.patch b/alpine/kernel/patches/0039-hv_sock-introduce-Hyper-V-Sockets.patch similarity index 81% rename from alpine/kernel/patches/0003-hv_sock-introduce-Hyper-V-Sockets.patch rename to alpine/kernel/patches/0039-hv_sock-introduce-Hyper-V-Sockets.patch index e554e2ae6..b1c07b141 100644 --- a/alpine/kernel/patches/0003-hv_sock-introduce-Hyper-V-Sockets.patch +++ b/alpine/kernel/patches/0039-hv_sock-introduce-Hyper-V-Sockets.patch @@ -1,7 +1,7 @@ -From e5597ae98118b800f9930606af65503f64944af2 Mon Sep 17 00:00:00 2001 +From 51293adacd73d7bc6baee18e87b0d17ad52a61d4 Mon Sep 17 00:00:00 2001 From: Dexuan Cui -Date: Sat, 23 Jul 2016 01:35:51 +0000 -Subject: [PATCH 3/4] hv_sock: introduce Hyper-V Sockets +Date: Sun, 15 May 2016 09:53:11 -0700 +Subject: [PATCH 39/42] hv_sock: introduce Hyper-V Sockets Hyper-V Sockets (hv_sock) supplies a byte-stream based communication mechanism between the host and the guest. It's somewhat like TCP over @@ -22,30 +22,29 @@ Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Vitaly Kuznetsov Cc: Cathy Avery -Cc: Olaf Hering -Origin: https://patchwork.kernel.org/patch/9244467/ +Origin: https://patchwork.ozlabs.org/patch/622404/ --- MAINTAINERS | 2 + - include/linux/hyperv.h | 13 + + include/linux/hyperv.h | 14 + include/linux/socket.h | 4 +- include/net/af_hvsock.h | 78 +++ - include/uapi/linux/hyperv.h | 23 + + include/uapi/linux/hyperv.h | 25 + net/Kconfig | 1 + net/Makefile | 1 + net/hv_sock/Kconfig | 10 + net/hv_sock/Makefile | 3 + - net/hv_sock/af_hvsock.c | 1507 +++++++++++++++++++++++++++++++++++++++++++ - 10 files changed, 1641 insertions(+), 1 deletion(-) + net/hv_sock/af_hvsock.c | 1520 +++++++++++++++++++++++++++++++++++++++++++ + 10 files changed, 1657 insertions(+), 1 deletion(-) create mode 100644 include/net/af_hvsock.h create mode 100644 net/hv_sock/Kconfig create mode 100644 net/hv_sock/Makefile create mode 100644 net/hv_sock/af_hvsock.c diff --git a/MAINTAINERS b/MAINTAINERS -index f593300..7432d79 100644 +index 12d49f5..fa87bdd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -5667,7 +5667,9 @@ F: drivers/pci/host/pci-hyperv.c +@@ -5123,7 +5123,9 @@ F: drivers/input/serio/hyperv-keyboard.c F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/video/fbdev/hyperv_fb.c @@ -54,15 +53,15 @@ index f593300..7432d79 100644 +F: include/net/af_hvsock.h F: tools/hv/ F: Documentation/ABI/stable/sysfs-bus-vmbus - + diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index b10954a..50f8976 100644 +index 6c9695e..187d4bd 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h -@@ -1505,5 +1505,18 @@ static inline void commit_rd_index(struct vmbus_channel *channel) - vmbus_set_event(channel); - } - +@@ -1349,4 +1349,18 @@ extern __u32 vmbus_proto_version; + + int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, + const uuid_le *shv_host_servie_id); +struct vmpipe_proto_header { + u32 pkt_type; + u32 data_size; @@ -73,13 +72,13 @@ index b10954a..50f8976 100644 + +/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write() */ +#define PREV_INDICES_LEN (sizeof(u64)) - ++ +#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \ + ALIGN((payload_len), 8) + \ + PREV_INDICES_LEN) #endif /* _HYPERV_H */ diff --git a/include/linux/socket.h b/include/linux/socket.h -index b5cc5a6..0b68b58 100644 +index dbd81e7..6634c47 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -202,8 +202,9 @@ struct ucred { @@ -87,10 +86,10 @@ index b5cc5a6..0b68b58 100644 #define AF_KCM 41 /* Kernel Connection Multiplexor*/ #define AF_QIPCRTR 42 /* Qualcomm IPC Router */ +#define AF_HYPERV 43 /* Hyper-V Sockets */ - + -#define AF_MAX 43 /* For now.. */ +#define AF_MAX 44 /* For now.. */ - + /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -251,6 +252,7 @@ struct ucred { @@ -99,11 +98,11 @@ index b5cc5a6..0b68b58 100644 #define PF_QIPCRTR AF_QIPCRTR +#define PF_HYPERV AF_HYPERV #define PF_MAX AF_MAX - + /* Maximum queue length specifiable by listen. */ diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h new file mode 100644 -index 0000000..e7a8a3a +index 0000000..7c8c41e --- /dev/null +++ b/include/net/af_hvsock.h @@ -0,0 +1,78 @@ @@ -114,39 +113,49 @@ index 0000000..e7a8a3a +#include +#include + -+/* The host side's design of the feature requires 5 exact 4KB pages for -+ * recv/send rings respectively -- this is suboptimal considering memory -+ * consumption, however unluckily we have to live with it, before the -+ * host comes up with a better design in the future. ++/* Note: 3-page is the minimal recv ringbuffer size by default: ++ * ++ * the 1st page is used as the shared read/write index etc, rather than data: ++ * see hv_ringbuffer_init(); ++ * ++ * the payload length in the vmbus pipe message received from the host can ++ * be 4096 bytes, and considing the header of HVSOCK_HEADER_LEN bytes, we ++ * need at least 2 extra pages for ringbuffer data. + */ -+#define PAGE_SIZE_4K 4096 -+#define RINGBUFFER_HVSOCK_RCV_SIZE (PAGE_SIZE_4K * 5) -+#define RINGBUFFER_HVSOCK_SND_SIZE (PAGE_SIZE_4K * 5) ++#define HVSOCK_RCV_BUF_SZ PAGE_SIZE ++#define DEF_RINGBUFFER_PAGES_HVSOCK_RCV 3 + -+/* The MTU is 16KB per the host side's design. -+ * In future, the buffer can be elimiated when we switch to use the coming -+ * new VMBus ringbuffer "in-place consumption" APIs, by which we can -+ * directly copy data from VMBus ringbuffer into the userspace buffer. ++/* As to send, here let's make sure the hvsock_send_buf struct can be held in 1 ++ * page, and since we want to use 2 pages for the send ringbuffer size (this is ++ * the minimal size by default, because the 1st page of the two is used as the ++ * shared read/write index etc, rather than data), we only have 1 page for ++ * ringbuffer data, this means: the max payload length for hvsock data is ++ * PAGE_SIZE - HVSOCK_PKT_LEN(0). And, let's reduce the length by 8-bytes ++ * because the ringbuffer can't be 100% full: see hv_ringbuffer_write(). + */ -+#define HVSOCK_MTU_SIZE (1024 * 16) -+struct hvsock_recv_buf { -+ unsigned int data_len; -+ unsigned int data_offset; ++#define HVSOCK_SND_BUF_SZ (PAGE_SIZE - HVSOCK_PKT_LEN(0) - 8) ++#define DEF_RINGBUFFER_PAGES_HVSOCK_SND 2 + -+ struct vmpipe_proto_header hdr; -+ u8 buf[HVSOCK_MTU_SIZE]; -+}; -+ -+/* In the VM, actually we can send up to HVSOCK_MTU_SIZE bytes of payload, -+ * but for now let's use a smaller size to minimize the dynamically-allocated -+ * buffer. Note: the buffer can be elimiated in future when we add new VMBus -+ * ringbuffer APIs that allow us to directly copy data from userspace buf to -+ * VMBus ringbuffer. ++/* We only send data when the available space is "big enough". This artificial ++ * value must be less than HVSOCK_SND_BUF_SZ. ++ * + */ -+#define HVSOCK_MAX_SND_SIZE_BY_VM (1024 * 4) ++#define HVSOCK_SND_THRESHOLD (PAGE_SIZE / 2) ++ ++#define sk_to_hvsock(__sk) ((struct hvsock_sock *)(__sk)) ++#define hvsock_to_sk(__hvsk) ((struct sock *)(__hvsk)) ++ +struct hvsock_send_buf { + struct vmpipe_proto_header hdr; -+ u8 buf[HVSOCK_MAX_SND_SIZE_BY_VM]; ++ u8 buf[HVSOCK_SND_BUF_SZ]; ++}; ++ ++struct hvsock_recv_buf { ++ struct vmpipe_proto_header hdr; ++ u8 buf[HVSOCK_RCV_BUF_SZ]; ++ ++ unsigned int data_len; ++ unsigned int data_offset; +}; + +struct hvsock_sock { @@ -174,34 +183,25 @@ index 0000000..e7a8a3a + struct hvsock_recv_buf *recv; +}; + -+static inline struct hvsock_sock *sk_to_hvsock(struct sock *sk) -+{ -+ return (struct hvsock_sock *)sk; -+} -+ -+static inline struct sock *hvsock_to_sk(struct hvsock_sock *hvsk) -+{ -+ return (struct sock *)hvsk; -+} -+ +#endif /* __AF_HVSOCK_H__ */ diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h -index e347b24..eb3e44b 100644 +index e347b24..408b832 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -26,6 +26,7 @@ #define _UAPI_HYPERV_H - + #include +#include - + /* * Framework version for util services. -@@ -396,4 +397,26 @@ struct hv_kvp_ip_msg { - struct hv_kvp_ipaddr_value kvp_ip_val; +@@ -396,4 +397,28 @@ struct hv_kvp_ip_msg { + struct hv_kvp_ipaddr_value kvp_ip_val; } __attribute__((packed)); - -+/* This is the address format of Hyper-V Sockets. + ++/* ++ * This is the address fromat of Hyper-V Sockets. + * Note: here we just borrow the kernel's built-in type uuid_le. When + * an application calls bind() or connect(), the 2 members of struct + * sockaddr_hv must be of GUID. @@ -209,11 +209,12 @@ index e347b24..eb3e44b 100644 + * the first 3 fields. Refer to: + * https://en.wikipedia.org/wiki/Globally_unique_identifier + */ ++#define guid_t uuid_le +struct sockaddr_hv { + __kernel_sa_family_t shv_family; /* Address family */ -+ u16 reserved; /* Must be Zero */ -+ uuid_le shv_vm_guid; /* VM ID */ -+ uuid_le shv_service_guid; /* Service ID */ ++ __le16 reserved; /* Must be Zero */ ++ guid_t shv_vm_id; /* VM ID */ ++ guid_t shv_service_id; /* Service ID */ +}; + +#define SHV_VMID_GUEST NULL_UUID_LE @@ -225,10 +226,10 @@ index e347b24..eb3e44b 100644 + #endif /* _UAPI_HYPERV_H */ diff --git a/net/Kconfig b/net/Kconfig -index c2cdbce..921e86f 100644 +index 1c9fda1..9eeccb7 100644 --- a/net/Kconfig +++ b/net/Kconfig -@@ -231,6 +231,7 @@ source "net/dns_resolver/Kconfig" +@@ -228,6 +228,7 @@ source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" @@ -237,7 +238,7 @@ index c2cdbce..921e86f 100644 source "net/mpls/Kconfig" source "net/hsr/Kconfig" diff --git a/net/Makefile b/net/Makefile -index 9bd20bb..b4d4e9a 100644 +index bdd1455..ec175dd 100644 --- a/net/Makefile +++ b/net/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ @@ -250,7 +251,7 @@ index 9bd20bb..b4d4e9a 100644 ifneq ($(CONFIG_NET_SWITCHDEV),) diff --git a/net/hv_sock/Kconfig b/net/hv_sock/Kconfig new file mode 100644 -index 0000000..ff84875 +index 0000000..1f41848 --- /dev/null +++ b/net/hv_sock/Kconfig @@ -0,0 +1,10 @@ @@ -259,8 +260,8 @@ index 0000000..ff84875 + depends on HYPERV + default m if HYPERV + help -+ Hyper-V Sockets is a socket interface for high speed -+ communication between Linux guest and Hyper-V host over VMBus. ++ Hyper-V Sockets is somewhat like TCP over VMBus, allowing ++ communication between Linux guest and Hyper-V host without TCP/IP. + + To compile this driver as a module, choose M here: the module + will be called hv_sock. @@ -275,17 +276,15 @@ index 0000000..716c012 +hv_sock-y += af_hvsock.o diff --git a/net/hv_sock/af_hvsock.c b/net/hv_sock/af_hvsock.c new file mode 100644 -index 0000000..331d375 +index 0000000..b91bd60 --- /dev/null +++ b/net/hv_sock/af_hvsock.c -@@ -0,0 +1,1507 @@ +@@ -0,0 +1,1520 @@ +/* + * Hyper-V Sockets -- a socket-based communication channel between the + * Hyper-V host and the virtual machines running on it. + * -+ * Copyright (c) 2016 Microsoft Corporation. -+ * -+ * All rights reserved. ++ * Copyright(c) 2016, Microsoft Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions @@ -300,10 +299,6 @@ index 0000000..331d375 + * products derived from this software without specific prior written + * permission. + * -+ * Alternatively, this software may be distributed under the terms of the -+ * GNU General Public License ("GPL") version 2 as published by the Free -+ * Software Foundation. -+ * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -320,9 +315,23 @@ index 0000000..331d375 + +#include +#include -+#include +#include + ++static uint send_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_SND; ++static uint recv_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_RCV; ++static uint max_socket_number = 1024; ++ ++static atomic_t total_num_hvsock = ATOMIC_INIT(0); ++ ++module_param(send_ring_page, uint, 0444); ++MODULE_PARM_DESC(send_ring_page, "Send ring buffer size (# of pages)"); ++ ++module_param(recv_ring_page, uint, 0444); ++MODULE_PARM_DESC(recv_ring_page, "Receive ring buffer size (# of pages)"); ++ ++module_param(max_socket_number, uint, 0644); ++MODULE_PARM_DESC(max_socket_number, "The max number of created sockets"); ++ +static struct proto hvsock_proto = { + .name = "HV_SOCK", + .owner = THIS_MODULE, @@ -331,22 +340,22 @@ index 0000000..331d375 + +#define SS_LISTEN 255 + -+#define HVSOCK_CONNECT_TIMEOUT (30 * HZ) -+ -+/* This is an artificial limit */ -+#define HVSOCK_MAX_BACKLOG 128 -+ +static LIST_HEAD(hvsock_bound_list); +static LIST_HEAD(hvsock_connected_list); +static DEFINE_MUTEX(hvsock_mutex); + ++static bool uuid_equals(uuid_le u1, uuid_le u2) ++{ ++ return !uuid_le_cmp(u1, u2); ++} ++ +static struct sock *hvsock_find_bound_socket(const struct sockaddr_hv *addr) +{ + struct hvsock_sock *hvsk; + + list_for_each_entry(hvsk, &hvsock_bound_list, bound_list) { -+ if (!uuid_le_cmp(addr->shv_service_guid, -+ hvsk->local_addr.shv_service_guid)) ++ if (uuid_equals(addr->shv_service_id, ++ hvsk->local_addr.shv_service_id)) + return hvsock_to_sk(hvsk); + } + return NULL; @@ -364,11 +373,11 @@ index 0000000..331d375 + return NULL; +} + -+static void hvsock_enqueue_accept(struct sock *listener, -+ struct sock *connected) ++static ++void hvsock_enqueue_accept(struct sock *listener, struct sock *connected) +{ -+ struct hvsock_sock *hvconnected; + struct hvsock_sock *hvlistener; ++ struct hvsock_sock *hvconnected; + + hvlistener = sk_to_hvsock(listener); + hvconnected = sk_to_hvsock(connected); @@ -384,8 +393,8 @@ index 0000000..331d375 + +static struct sock *hvsock_dequeue_accept(struct sock *listener) +{ -+ struct hvsock_sock *hvconnected; + struct hvsock_sock *hvlistener; ++ struct hvsock_sock *hvconnected; + + hvlistener = sk_to_hvsock(listener); + @@ -428,7 +437,7 @@ index 0000000..331d375 +{ + memset(addr, 0, sizeof(*addr)); + addr->shv_family = AF_HYPERV; -+ addr->shv_service_guid = service_id; ++ addr->shv_service_id = service_id; +} + +static int hvsock_addr_validate(const struct sockaddr_hv *addr) @@ -447,7 +456,7 @@ index 0000000..331d375 + +static bool hvsock_addr_bound(const struct sockaddr_hv *addr) +{ -+ return !!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY); ++ return !uuid_equals(addr->shv_service_id, SHV_SERVICE_ID_ANY); +} + +static int hvsock_addr_cast(const struct sockaddr *addr, size_t len, @@ -466,13 +475,13 @@ index 0000000..331d375 + struct sockaddr_hv hv_addr; + int ret = 0; + -+ hvsock_addr_init(&hv_addr, addr->shv_service_guid); ++ hvsock_addr_init(&hv_addr, addr->shv_service_id); + + mutex_lock(&hvsock_mutex); + -+ if (!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY)) { ++ if (uuid_equals(addr->shv_service_id, SHV_SERVICE_ID_ANY)) { + do { -+ uuid_le_gen(&hv_addr.shv_service_guid); ++ uuid_le_gen(&hv_addr.shv_service_id); + } while (hvsock_find_bound_socket(&hv_addr)); + } else { + if (hvsock_find_bound_socket(&hv_addr)) { @@ -481,7 +490,7 @@ index 0000000..331d375 + } + } + -+ hvsock_addr_init(&hvsk->local_addr, hv_addr.shv_service_guid); ++ hvsock_addr_init(&hvsk->local_addr, hv_addr.shv_service_id); + + sock_hold(&hvsk->sk); + list_add(&hvsk->bound_list, &hvsock_bound_list); @@ -526,14 +535,13 @@ index 0000000..331d375 + +static void hvsock_sk_destruct(struct sock *sk) +{ -+ struct vmbus_channel *channel; -+ struct hvsock_sock *hvsk; ++ struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ struct vmbus_channel *channel = hvsk->channel; + -+ hvsk = sk_to_hvsock(sk); -+ vfree(hvsk->send); -+ vfree(hvsk->recv); ++ kfree(hvsk->send); ++ kfree(hvsk->recv); ++ atomic_dec(&total_num_hvsock); + -+ channel = hvsk->channel; + if (!channel) + return; + @@ -589,37 +597,51 @@ index 0000000..331d375 + return 0; +} + -+static struct sock *hvsock_create(struct net *net, struct socket *sock, -+ gfp_t priority, unsigned short type) ++static int hvsock_create(struct net *net, struct socket *sock, ++ gfp_t priority, unsigned short type, ++ struct sock **sk) +{ ++ struct hvsock_send_buf *send = NULL; ++ struct hvsock_recv_buf *recv = NULL; + struct hvsock_sock *hvsk; -+ struct sock *sk; ++ int ret = -EMFILE; ++ int num_hvsock; + -+ sk = sk_alloc(net, AF_HYPERV, priority, &hvsock_proto, 0); -+ if (!sk) -+ return NULL; ++ num_hvsock = atomic_inc_return(&total_num_hvsock); ++ if (num_hvsock > max_socket_number) ++ goto err; + -+ sock_init_data(sock, sk); ++ ret = -ENOMEM; ++ send = kmalloc(sizeof(*send), GFP_KERNEL); ++ recv = kmalloc(sizeof(*recv), GFP_KERNEL); ++ if (!send || !recv) ++ goto err; + -+ /* sk->sk_type is normally set in sock_init_data, but only if sock ++ *sk = sk_alloc(net, AF_HYPERV, priority, &hvsock_proto, 0); ++ if (!*sk) ++ goto err; ++ ++ sock_init_data(sock, *sk); ++ ++ /* (*sk)->sk_type is normally set in sock_init_data, but only if sock + * is non-NULL. We make sure that our sockets always have a type by + * setting it here if needed. + */ + if (!sock) -+ sk->sk_type = type; ++ (*sk)->sk_type = type; + -+ sk->sk_destruct = hvsock_sk_destruct; ++ (*sk)->sk_destruct = hvsock_sk_destruct; + + /* Looks stream-based socket doesn't need this. */ -+ sk->sk_backlog_rcv = NULL; ++ (*sk)->sk_backlog_rcv = NULL; + -+ sk->sk_state = 0; -+ sock_reset_flag(sk, SOCK_DONE); ++ (*sk)->sk_state = 0; ++ sock_reset_flag(*sk, SOCK_DONE); + -+ hvsk = sk_to_hvsock(sk); ++ hvsk = sk_to_hvsock(*sk); + -+ hvsk->send = NULL; -+ hvsk->recv = NULL; ++ hvsk->send = send; ++ hvsk->recv = recv; + + hvsock_addr_init(&hvsk->local_addr, SHV_SERVICE_ID_ANY); + hvsock_addr_init(&hvsk->remote_addr, SHV_SERVICE_ID_ANY); @@ -632,7 +654,16 @@ index 0000000..331d375 + + hvsk->peer_shutdown = 0; + -+ return sk; ++ hvsk->recv->data_len = 0; ++ hvsk->recv->data_offset = 0; ++ ++ return 0; ++err: ++ atomic_dec(&total_num_hvsock); ++ kfree(send); ++ kfree(recv); ++ *sk = NULL; ++ return ret; +} + +static int hvsock_bind(struct socket *sock, struct sockaddr *addr, @@ -647,7 +678,7 @@ index 0000000..331d375 + if (hvsock_addr_cast(addr, addr_len, &hv_addr) != 0) + return -EINVAL; + -+ if (uuid_le_cmp(hv_addr->shv_vm_guid, NULL_UUID_LE)) ++ if (!uuid_equals(hv_addr->shv_vm_id, NULL_UUID_LE)) + return -EINVAL; + + lock_sock(sk); @@ -709,8 +740,8 @@ index 0000000..331d375 + &dummy, + &avl_write_bytes); + -+ /* We only write if there is enough space */ -+ *can_write = avl_write_bytes > HVSOCK_PKT_LEN(PAGE_SIZE_4K); ++ *can_write = avl_write_bytes > ++ HVSOCK_PKT_LEN(HVSOCK_SND_THRESHOLD); + } +} + @@ -723,29 +754,18 @@ index 0000000..331d375 + &dummy, + &avl_write_bytes); + ++ if (avl_write_bytes < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) ++ return 0; ++ + /* The ringbuffer mustn't be 100% full, and we should reserve a + * zero-length-payload packet for the FIN: see hv_ringbuffer_write() + * and hvsock_shutdown(). + */ -+ if (avl_write_bytes < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) -+ return 0; + ret = avl_write_bytes - HVSOCK_PKT_LEN(1) - HVSOCK_PKT_LEN(0); + + return round_down(ret, 8); +} + -+static int hvsock_get_send_buf(struct hvsock_sock *hvsk) -+{ -+ hvsk->send = vmalloc(sizeof(*hvsk->send)); -+ return hvsk->send ? 0 : -ENOMEM; -+} -+ -+static void hvsock_put_send_buf(struct hvsock_sock *hvsk) -+{ -+ vfree(hvsk->send); -+ hvsk->send = NULL; -+} -+ +static int hvsock_send_data(struct vmbus_channel *channel, + struct hvsock_sock *hvsk, + size_t to_write) @@ -757,18 +777,6 @@ index 0000000..331d375 + 0, VM_PKT_DATA_INBAND, 0); +} + -+static int hvsock_get_recv_buf(struct hvsock_sock *hvsk) -+{ -+ hvsk->recv = vmalloc(sizeof(*hvsk->recv)); -+ return hvsk->recv ? 0 : -ENOMEM; -+} -+ -+static void hvsock_put_recv_buf(struct hvsock_sock *hvsk) -+{ -+ vfree(hvsk->recv); -+ hvsk->recv = NULL; -+} -+ +static int hvsock_recv_data(struct vmbus_channel *channel, + struct hvsock_sock *hvsk, + size_t *payload_len) @@ -793,7 +801,6 @@ index 0000000..331d375 +{ + struct hvsock_sock *hvsk; + struct sock *sk; -+ int ret = 0; + + if (mode < SHUT_RD || mode > SHUT_RDWR) + return -EINVAL; @@ -818,21 +825,13 @@ index 0000000..331d375 + + if (mode & SEND_SHUTDOWN) { + hvsk = sk_to_hvsock(sk); -+ -+ ret = hvsock_get_send_buf(hvsk); -+ if (ret < 0) -+ goto out; -+ + /* It can't fail: see get_ringbuffer_writable_bytes(). */ + (void)hvsock_send_data(hvsk->channel, hvsk, 0); -+ -+ hvsock_put_send_buf(hvsk); + } + -+out: + release_sock(sk); + -+ return ret; ++ return 0; +} + +static unsigned int hvsock_poll(struct file *file, struct socket *sock, @@ -841,8 +840,8 @@ index 0000000..331d375 + struct vmbus_channel *channel; + bool can_read, can_write; + struct hvsock_sock *hvsk; -+ unsigned int mask; + struct sock *sk; ++ unsigned int mask; + + sk = sock->sk; + hvsk = sk_to_hvsock(sk); @@ -884,12 +883,13 @@ index 0000000..331d375 + /* If there is something in the queue then we can read */ + get_ringbuffer_rw_status(channel, &can_read, &can_write); + -+ if (!can_read && hvsk->recv) ++ if (!can_read && hvsk->recv->data_len > 0) + can_read = true; + + if (!(sk->sk_shutdown & RCV_SHUTDOWN) && can_read) + mask |= POLLIN | POLLRDNORM; + } else { ++ can_read = false; + can_write = false; + } + @@ -928,13 +928,14 @@ index 0000000..331d375 +static void hvsock_on_channel_cb(void *ctx) +{ + struct sock *sk = (struct sock *)ctx; -+ struct vmbus_channel *channel; -+ struct hvsock_sock *hvsk; ++ struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ struct vmbus_channel *channel = hvsk->channel; + bool can_read, can_write; + -+ hvsk = sk_to_hvsock(sk); -+ channel = hvsk->channel; -+ BUG_ON(!channel); ++ if (!channel) { ++ WARN_ONCE(1, "NULL channel! There is a programming bug.\n"); ++ return; ++ } + + get_ringbuffer_rw_status(channel, &can_read, &can_write); + @@ -971,11 +972,12 @@ index 0000000..331d375 + +static int hvsock_open_connection(struct vmbus_channel *channel) +{ -+ struct hvsock_sock *hvsk = NULL, *new_hvsk = NULL; -+ uuid_le *instance, *service_id; -+ unsigned char conn_from_host; ++ struct hvsock_sock *hvsk, *new_hvsk; + struct sockaddr_hv hv_addr; -+ struct sock *sk, *new_sk = NULL; ++ struct sock *sk, *new_sk; ++ unsigned char conn_from_host; ++ ++ uuid_le *instance, *service_id; + int ret; + + instance = &channel->offermsg.offer.if_instance; @@ -997,16 +999,14 @@ index 0000000..331d375 + + if (conn_from_host) { + if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { -+ ret = -ECONNREFUSED; ++ ret = -EMFILE; + goto out; + } + -+ new_sk = hvsock_create(sock_net(sk), NULL, GFP_KERNEL, -+ sk->sk_type); -+ if (!new_sk) { -+ ret = -ENOMEM; ++ ret = hvsock_create(sock_net(sk), NULL, GFP_KERNEL, ++ sk->sk_type, &new_sk); ++ if (ret != 0) + goto out; -+ } + + new_sk->sk_state = SS_CONNECTING; + new_hvsk = sk_to_hvsock(new_sk); @@ -1019,8 +1019,8 @@ index 0000000..331d375 + } + + set_channel_read_state(channel, false); -+ ret = vmbus_open(channel, RINGBUFFER_HVSOCK_SND_SIZE, -+ RINGBUFFER_HVSOCK_RCV_SIZE, NULL, 0, ++ ret = vmbus_open(channel, send_ring_page * PAGE_SIZE, ++ recv_ring_page * PAGE_SIZE, NULL, 0, + hvsock_on_channel_cb, conn_from_host ? new_sk : sk); + if (ret != 0) { + if (conn_from_host) { @@ -1033,10 +1033,8 @@ index 0000000..331d375 + } + + vmbus_set_chn_rescind_callback(channel, hvsock_close_connection); -+ -+ /* see get_ringbuffer_rw_status() */ + set_channel_pending_send_size(channel, -+ HVSOCK_PKT_LEN(PAGE_SIZE_4K) + 1); ++ HVSOCK_PKT_LEN(HVSOCK_SND_THRESHOLD)); + + if (conn_from_host) { + new_sk->sk_state = SS_CONNECTED; @@ -1083,13 +1081,13 @@ index 0000000..331d375 + int flags, int current_ret) +{ + struct sock *sk = sock->sk; -+ struct hvsock_sock *hvsk; -+ int ret = current_ret; -+ DEFINE_WAIT(wait); -+ long timeout; ++ struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ ++ int ret = current_ret; ++ ++ long timeout = 30 * HZ; ++ DEFINE_WAIT(wait); + -+ hvsk = sk_to_hvsock(sk); -+ timeout = HVSOCK_CONNECT_TIMEOUT; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { @@ -1183,8 +1181,8 @@ index 0000000..331d375 + sk->sk_state = SS_CONNECTING; + + ret = vmbus_send_tl_connect_request( -+ &hvsk->local_addr.shv_service_guid, -+ &hvsk->remote_addr.shv_service_guid); ++ &hvsk->local_addr.shv_service_id, ++ &hvsk->remote_addr.shv_service_id); + if (ret < 0) + goto out; + @@ -1242,12 +1240,15 @@ index 0000000..331d375 + lock_sock(connected); + hvconnected = sk_to_hvsock(connected); + -+ if (!ret) { ++ if (ret) { ++ release_sock(connected); ++ sock_put(connected); ++ } else { + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); ++ release_sock(connected); ++ sock_put(connected); + } -+ release_sock(connected); -+ sock_put(connected); + } + +out_wait: @@ -1255,8 +1256,8 @@ index 0000000..331d375 + return ret; +} + -+static int hvsock_accept(struct socket *sock, struct socket *newsock, -+ int flags) ++static ++int hvsock_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *listener; + int ret; @@ -1304,8 +1305,9 @@ index 0000000..331d375 + ret = -EINVAL; + goto out; + } -+ if (backlog > HVSOCK_MAX_BACKLOG) -+ backlog = HVSOCK_MAX_BACKLOG; ++ /* This is an artificial limit */ ++ if (backlog > 128) ++ backlog = 128; + + hvsk = sk_to_hvsock(sk); + if (!hvsock_addr_bound(&hvsk->local_addr)) { @@ -1321,21 +1323,23 @@ index 0000000..331d375 + return ret; +} + -+static int hvsock_sendmsg_wait(struct sock *sk, struct msghdr *msg, -+ size_t len) ++static ++int hvsock_sendmsg_wait(struct sock *sk, struct msghdr *msg, size_t len) +{ + struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel; ++ struct vmbus_channel *channel = hvsk->channel; ++ + size_t total_to_write = len; + size_t total_written = 0; -+ DEFINE_WAIT(wait); + bool can_write; ++ ++ int ret = 0; ++ ++ DEFINE_WAIT(wait); + long timeout; -+ int ret = -EIO; + + timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -+ channel = hvsk->channel; + + while (total_to_write > 0) { + size_t to_write, max_writable; @@ -1393,23 +1397,16 @@ index 0000000..331d375 + if (max_writable == 0) + goto out_wait; + -+ to_write = min_t(size_t, sizeof(hvsk->send->buf), ++ to_write = min_t(size_t, HVSOCK_SND_BUF_SZ, + total_to_write); + if (to_write > max_writable) + to_write = max_writable; + -+ ret = hvsock_get_send_buf(hvsk); -+ if (ret < 0) -+ goto out_wait; -+ + ret = memcpy_from_msg(hvsk->send->buf, msg, to_write); -+ if (ret != 0) { -+ hvsock_put_send_buf(hvsk); ++ if (ret != 0) + goto out_wait; -+ } + + ret = hvsock_send_data(channel, hvsk, to_write); -+ hvsock_put_send_buf(hvsk); + if (ret != 0) + goto out_wait; + @@ -1426,8 +1423,7 @@ index 0000000..331d375 + return ret; +} + -+static int hvsock_sendmsg(struct socket *sock, struct msghdr *msg, -+ size_t len) ++static int hvsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) +{ + struct hvsock_sock *hvsk; + struct sock *sk; @@ -1436,8 +1432,11 @@ index 0000000..331d375 + if (len == 0) + return -EINVAL; + -+ if (msg->msg_flags & ~MSG_DONTWAIT) ++ if (msg->msg_flags & ~MSG_DONTWAIT) { ++ pr_err("%s: unsupported flags=0x%x\n", __func__, ++ msg->msg_flags); + return -EOPNOTSUPP; ++ } + + sk = sock->sk; + hvsk = sk_to_hvsock(sk); @@ -1472,10 +1471,11 @@ index 0000000..331d375 +out: + release_sock(sk); + -+ /* ret should be a bigger-than-0 total_written or a negative err -+ * code. -+ */ -+ BUG_ON(ret == 0); ++ /* ret is a bigger-than-0 total_written or a negative err code. */ ++ if (ret == 0) { ++ WARN(1, "unexpected return value of 0\n"); ++ ret = -EIO; ++ } + + return ret; +} @@ -1484,56 +1484,43 @@ index 0000000..331d375 + size_t len, int flags) +{ + struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ struct vmbus_channel *channel = hvsk->channel; ++ + size_t to_read, total_to_read = len; -+ struct vmbus_channel *channel; -+ DEFINE_WAIT(wait); + size_t copied = 0; + bool can_read; -+ long timeout; ++ + int ret = 0; + ++ DEFINE_WAIT(wait); ++ long timeout; ++ + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -+ channel = hvsk->channel; + + while (1) { -+ bool need_refill = !hvsk->recv; ++ bool need_refill = hvsk->recv->data_len == 0; + -+ if (need_refill) { -+ if (hvsk->peer_shutdown & SEND_SHUTDOWN) -+ can_read = false; -+ else -+ get_ringbuffer_rw_status(channel, &can_read, -+ NULL); -+ } else { ++ if (need_refill) ++ get_ringbuffer_rw_status(channel, &can_read, NULL); ++ else + can_read = true; -+ } + + if (can_read) { + size_t payload_len; + + if (need_refill) { -+ ret = hvsock_get_recv_buf(hvsk); -+ if (ret < 0) { -+ if (copied > 0) -+ ret = copied; -+ goto out_wait; -+ } -+ + ret = hvsock_recv_data(channel, hvsk, + &payload_len); + if (ret != 0 || -+ payload_len > sizeof(hvsk->recv->buf)) { ++ payload_len > HVSOCK_RCV_BUF_SZ) { + ret = -EIO; -+ hvsock_put_recv_buf(hvsk); + goto out_wait; + } + + if (payload_len == 0) { + ret = copied; -+ hvsock_put_recv_buf(hvsk); -+ hvsk->peer_shutdown |= SEND_SHUTDOWN; -+ break; ++ goto out_wait; + } + + hvsk->recv->data_len = payload_len; @@ -1555,7 +1542,7 @@ index 0000000..331d375 + hvsk->recv->data_len -= to_read; + + if (hvsk->recv->data_len == 0) -+ hvsock_put_recv_buf(hvsk); ++ hvsk->recv->data_offset = 0; + else + hvsk->recv->data_offset += to_read; + @@ -1597,8 +1584,23 @@ index 0000000..331d375 + else if (sk->sk_shutdown & RCV_SHUTDOWN) + ret = 0; + -+ if (copied > 0) ++ if (copied > 0) { + ret = copied; ++ ++ /* If the other side has shutdown for sending and there ++ * is nothing more to read, then we modify the socket ++ * state. ++ */ ++ if ((hvsk->peer_shutdown & SEND_SHUTDOWN) && ++ hvsk->recv->data_len == 0) { ++ get_ringbuffer_rw_status(channel, &can_read, NULL); ++ if (!can_read) { ++ sk->sk_state = SS_UNCONNECTED; ++ sock_set_flag(sk, SOCK_DONE); ++ sk->sk_state_change(sk); ++ } ++ } ++ } +out_wait: + finish_wait(sk_sleep(sk), &wait); + return ret; @@ -1628,6 +1630,7 @@ index 0000000..331d375 + + /* We ignore msg->addr_name/len. */ + if (flags & ~MSG_DONTWAIT) { ++ pr_err("%s: unsupported flags=0x%x\n", __func__, flags); + ret = -EOPNOTSUPP; + goto out; + } @@ -1681,6 +1684,9 @@ index 0000000..331d375 +{ + struct sock *sk; + ++ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ + if (protocol != 0 && protocol != SHV_PROTO_RAW) + return -EPROTONOSUPPORT; + @@ -1694,8 +1700,7 @@ index 0000000..331d375 + + sock->state = SS_UNCONNECTED; + -+ sk = hvsock_create(net, sock, GFP_KERNEL, 0); -+ return sk ? 0 : -ENOMEM; ++ return hvsock_create(net, sock, GFP_KERNEL, 0, &sk); +} + +static const struct net_proto_family hvsock_family_ops = { @@ -1744,8 +1749,17 @@ index 0000000..331d375 +{ + int ret; + -+ if (vmbus_proto_version < VERSION_WIN10) ++ if (send_ring_page < DEF_RINGBUFFER_PAGES_HVSOCK_SND) ++ send_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_SND; ++ ++ if (recv_ring_page < DEF_RINGBUFFER_PAGES_HVSOCK_RCV) ++ recv_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_RCV; ++ ++ /* Hyper-V Sockets requires at least VMBus 4.0 */ ++ if ((vmbus_proto_version >> 16) < 4) { ++ pr_err("failed to load: VMBus 4 or later is required\n"); + return -ENODEV; ++ } + + ret = vmbus_driver_register(&hvsock_drv); + if (ret) { @@ -1786,5 +1800,6 @@ index 0000000..331d375 + +MODULE_DESCRIPTION("Hyper-V Sockets"); +MODULE_LICENSE("Dual BSD/GPL"); --- -2.10.1 +-- +2.10.0 + diff --git a/alpine/kernel/patches/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch b/alpine/kernel/patches/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch new file mode 100644 index 000000000..ce4bee972 --- /dev/null +++ b/alpine/kernel/patches/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch @@ -0,0 +1,49 @@ +From b7da2c01ddbb00ed9ccdd3d646f6129f07016cf8 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Mon, 21 Mar 2016 02:53:08 -0700 +Subject: [PATCH 40/42] net: add the AF_HYPERV entries to family name tables + +This is for the hv_sock driver, which introduces AF_HYPERV(42). + +Signed-off-by: Dexuan Cui +Cc: "K. Y. Srinivasan" +Cc: Haiyang Zhang +Origin: https://patchwork.ozlabs.org/patch/600009 +--- + net/core/sock.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/core/sock.c b/net/core/sock.c +index 925def4..323f7a3 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -264,7 +264,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { + "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , + "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , + "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , +- "sk_lock-AF_MAX" ++ "sk_lock-AF_HYPERV", "sk_lock-AF_MAX" + }; + static const char *const af_family_slock_key_strings[AF_MAX+1] = { + "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , +@@ -281,7 +281,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { + "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , + "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , + "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , +- "slock-AF_MAX" ++ "slock-AF_HYPERV", "slock-AF_MAX" + }; + static const char *const af_family_clock_key_strings[AF_MAX+1] = { + "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , +@@ -298,7 +298,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { + "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , + "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , + "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , +- "clock-AF_MAX" ++ "clock-AF_HYPERV", "clock-AF_MAX" + }; + + /* +-- +2.10.0 + diff --git a/alpine/kernel/patches/0002-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch b/alpine/kernel/patches/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch similarity index 67% rename from alpine/kernel/patches/0002-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch rename to alpine/kernel/patches/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch index aa2a432bb..8c3619340 100644 --- a/alpine/kernel/patches/0002-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch +++ b/alpine/kernel/patches/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch @@ -1,8 +1,8 @@ -From b7c88e0b81e971a99a4213515ea3bce1c136a724 Mon Sep 17 00:00:00 2001 +From cd11346c60451032d97062e25ed025bf692dff91 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 21 May 2016 16:55:50 +0800 -Subject: [PATCH 2/4] Drivers: hv: vmbus: fix the race when querying & updating - the percpu list +Subject: [PATCH 41/42] Drivers: hv: vmbus: fix the race when querying & + updating the percpu list There is a rare race when we remove an entry from the global list hv_context.percpu_list[cpu] in hv_process_channel_removal() -> @@ -28,24 +28,24 @@ Origin: https://github.com/dcui/linux/commit/fbcca73228b9b90911ab30fdf75f532b2b7 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 56dd261..75343e0 100644 +index 57a1b65..da76a2e 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -592,6 +592,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel) - + out: - tasklet_enable(tasklet); + tasklet_enable(tasklet); + tasklet_schedule(tasklet); - - return ret; + + return ret; } diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index b6c1211..8f4e6070 100644 +index c892db5..0a54317 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -21,6 +21,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - + #include +#include #include @@ -53,80 +53,81 @@ index b6c1211..8f4e6070 100644 #include @@ -307,12 +308,13 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) { - unsigned long flags; - struct vmbus_channel *primary_channel; + unsigned long flags; + struct vmbus_channel *primary_channel; - - vmbus_release_relid(relid); + struct tasklet_struct *tasklet; - - BUG_ON(!channel->rescind); - BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - + + BUG_ON(!channel->rescind); + BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + + tasklet = hv_context.event_dpc[channel->target_cpu]; + tasklet_disable(tasklet); - if (channel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(channel->target_cpu, + if (channel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(channel->target_cpu, @@ -321,6 +323,8 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - percpu_channel_deq(channel); - put_cpu(); - } + percpu_channel_deq(channel); + put_cpu(); + } + tasklet_enable(tasklet); + tasklet_schedule(tasklet); - - if (channel->primary_channel == NULL) { - list_del(&channel->listentry); + + if (channel->primary_channel == NULL) { + list_del(&channel->listentry); @@ -342,6 +346,8 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - &primary_channel->alloced_cpus_in_node); - - free_channel(channel); + &primary_channel->alloced_cpus_in_node); + + free_channel(channel); + + vmbus_release_relid(relid); } - + void vmbus_free_channels(void) @@ -363,6 +369,7 @@ void vmbus_free_channels(void) */ static void vmbus_process_offer(struct vmbus_channel *newchannel) { + struct tasklet_struct *tasklet; - struct vmbus_channel *channel; - bool fnew = true; - unsigned long flags; + struct vmbus_channel *channel; + bool fnew = true; + unsigned long flags; @@ -409,6 +416,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - - init_vp_index(newchannel, dev_type); - + + init_vp_index(newchannel, dev_type); + + tasklet = hv_context.event_dpc[newchannel->target_cpu]; + tasklet_disable(tasklet); - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(newchannel->target_cpu, + if (newchannel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(newchannel->target_cpu, @@ -418,6 +427,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - percpu_channel_enq(newchannel); - put_cpu(); - } + percpu_channel_enq(newchannel); + put_cpu(); + } + tasklet_enable(tasklet); + tasklet_schedule(tasklet); - - /* - * This state is used to indicate a successful open + + /* + * This state is used to indicate a successful open @@ -469,6 +480,7 @@ err_deq_chan: - list_del(&newchannel->listentry); - mutex_unlock(&vmbus_connection.channel_mutex); - + list_del(&newchannel->listentry); + mutex_unlock(&vmbus_connection.channel_mutex); + + tasklet_disable(tasklet); - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(newchannel->target_cpu, + if (newchannel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(newchannel->target_cpu, @@ -477,6 +489,8 @@ err_deq_chan: - percpu_channel_deq(newchannel); - put_cpu(); - } + percpu_channel_deq(newchannel); + put_cpu(); + } + tasklet_enable(tasklet); + tasklet_schedule(tasklet); - + err_free_chan: - free_channel(newchannel); --- -2.10.1 + free_channel(newchannel); +-- +2.10.0 + diff --git a/alpine/kernel/patches/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch b/alpine/kernel/patches/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch similarity index 74% rename from alpine/kernel/patches/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch rename to alpine/kernel/patches/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch index 36f180fe9..d477b2713 100644 --- a/alpine/kernel/patches/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch +++ b/alpine/kernel/patches/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch @@ -1,7 +1,7 @@ -From 7b394fbb825d9367fa6433ff2382af2fc32fb1c6 Mon Sep 17 00:00:00 2001 +From 7abd92fd5987e1ad79f2272cbe544be0cfe84165 Mon Sep 17 00:00:00 2001 From: Rolf Neugebauer Date: Mon, 23 May 2016 18:55:45 +0100 -Subject: [PATCH 4/4] vmbus: Don't spam the logs with unknown GUIDs +Subject: [PATCH 42/42] vmbus: Don't spam the logs with unknown GUIDs With Hyper-V sockets device types are introduced on the fly. The pr_info() then prints a message on every connection, which is way too verbose. Since @@ -14,16 +14,17 @@ Signed-off-by: Rolf Neugebauer 1 file changed, 1 deletion(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 8f4e6070..ef4a512 100644 +index 0a54317..120ee22 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -147,7 +147,6 @@ static u16 hv_get_dev_type(const uuid_le *guid) - if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) - return i; - } + if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) + return i; + } - pr_info("Unknown GUID: %pUl\n", guid); - return i; + return i; } + +-- +2.10.0 --- -2.10.1