From 4b02dc7ada79c70b27994ca4e4d9465cf2328ad3 Mon Sep 17 00:00:00 2001 From: Rolf Neugebauer Date: Wed, 16 Nov 2016 16:27:04 +0000 Subject: [PATCH] kernel: update to 4.8.8 This removes all the patches which have been upstreamed since 4.4.x and only leaves patches for a minor fix to AF_VSOCK, the Hyper-V socket patch and a new patch for fixing delays on creating netns with tunnel interfaces. The latter has been accecpted into the upstream netdev branch and will likely appear in 4.9.0 and we can cherry pick from there then. Signed-off-by: Rolf Neugebauer --- alpine/kernel/Dockerfile | 8 +- ...host-network-namespace-to-use-AF_VS.patch} | 18 +- ...make-find_vqs-checkpatch.pl-friendly.patch | 219 -- ...fix-the-race-when-querying-updating.patch} | 125 +- ...vmci_transport_notify_ops-structures.patch | 77 - ...the-area-influenced-by-prepare_to_wa.patch | 336 --- ...3-hv_sock-introduce-Hyper-V-Sockets.patch} | 525 ++-- ...-t-spam-the-logs-with-unknown-GUIDs.patch} | 19 +- ...istener-child-lock-ordering-explicit.patch | 63 - ...t-specific-vsock_transport-functions.patch | 59 - ...api-struct-as-not-busy-poll-candidat.patch | 49 + ...OCK-defer-sock-removal-to-transports.patch | 83 - ...OCK-Introduce-virtio_vsock_common.ko.patch | 1496 ----------- ...-VSOCK-Introduce-virtio_transport.ko.patch | 663 ----- .../0009-VSOCK-Introduce-vhost_vsock.ko.patch | 777 ------ .../0010-VSOCK-Add-Makefile-and-Kconfig.patch | 106 - .../patches/0011-VSOCK-Use-kvfree.patch | 33 - ...vhost-virtio_vsock_pkt-use-after-fre.patch | 53 - ...-virtio-vsock-fix-include-guard-typo.patch | 28 - ...drop-space-available-check-for-TX-vq.patch | 61 - ...e-the-channel-type-for-Hyper-V-PCI-E.patch | 63 - ...-vmbus-Use-uuid_le-type-consistently.patch | 297 --- ...-Use-uuid_le_cmp-for-comparing-GUIDs.patch | 55 - ...-do-sanity-check-of-channel-state-in.patch | 42 - ...-release-relid-on-error-in-vmbus_pro.patch | 74 - ...-channge-vmbus_connection.channel_lo.patch | 116 - ...e-code-duplication-between-vmbus_rec.patch | 126 - ...-fix-the-building-warning-with-hyper.patch | 72 - ...-Treat-Fibre-Channel-devices-as-perf.patch | 42 - ...us-Add-vendor-and-device-atttributes.patch | 355 --- ...-add-a-helper-function-to-set-a-chan.patch | 36 - ...-define-the-new-offer-type-for-Hyper.patch | 44 - ...-vmbus_sendpacket_ctl-hvsock-avoid-u.patch | 45 - ...-define-a-new-VMBus-message-type-for.patch | 101 - ...-add-a-hvsock-flag-in-struct-hv_driv.patch | 64 - ...s-add-a-per-channel-rescind-callback.patch | 72 - ...-add-an-API-vmbus_hvsock_device_unre.patch | 153 -- ...-Give-control-over-how-the-ring-acce.patch | 208 -- ...s-avoid-wait_for_completion-on-crash.patch | 100 - ...-avoid-unneeded-compiler-optimizatio.patch | 39 - ...Kernel-Connection-Multiplexor-module.patch | 2312 ----------------- ...AF_KCM-entries-to-family-name-tables.patch | 52 - .../0038-net-Add-Qualcomm-IPC-router.patch | 1307 ---------- ...HYPERV-entries-to-family-name-tables.patch | 49 - 44 files changed, 387 insertions(+), 10235 deletions(-) rename alpine/kernel/patches/{0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch => 0001-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch} (72%) delete mode 100644 alpine/kernel/patches/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch rename alpine/kernel/patches/{0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch => 0002-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch} (67%) delete mode 100644 alpine/kernel/patches/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch delete mode 100644 alpine/kernel/patches/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch rename alpine/kernel/patches/{0039-hv_sock-introduce-Hyper-V-Sockets.patch => 0003-hv_sock-introduce-Hyper-V-Sockets.patch} (81%) rename alpine/kernel/patches/{0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch => 0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch} (74%) delete mode 100644 alpine/kernel/patches/0004-vsock-make-listener-child-lock-ordering-explicit.patch delete mode 100644 alpine/kernel/patches/0005-VSOCK-transport-specific-vsock_transport-functions.patch create mode 100644 alpine/kernel/patches/0005-gro_cells-mark-napi-struct-as-not-busy-poll-candidat.patch delete mode 100644 alpine/kernel/patches/0006-VSOCK-defer-sock-removal-to-transports.patch delete mode 100644 alpine/kernel/patches/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch delete mode 100644 alpine/kernel/patches/0008-VSOCK-Introduce-virtio_transport.ko.patch delete mode 100644 alpine/kernel/patches/0009-VSOCK-Introduce-vhost_vsock.ko.patch delete mode 100644 alpine/kernel/patches/0010-VSOCK-Add-Makefile-and-Kconfig.patch delete mode 100644 alpine/kernel/patches/0011-VSOCK-Use-kvfree.patch delete mode 100644 alpine/kernel/patches/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch delete mode 100644 alpine/kernel/patches/0013-virtio-vsock-fix-include-guard-typo.patch delete mode 100644 alpine/kernel/patches/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch delete mode 100644 alpine/kernel/patches/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch delete mode 100644 alpine/kernel/patches/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch delete mode 100644 alpine/kernel/patches/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch delete mode 100644 alpine/kernel/patches/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch delete mode 100644 alpine/kernel/patches/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch delete mode 100644 alpine/kernel/patches/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch delete mode 100644 alpine/kernel/patches/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch delete mode 100644 alpine/kernel/patches/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch delete mode 100644 alpine/kernel/patches/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch delete mode 100644 alpine/kernel/patches/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch delete mode 100644 alpine/kernel/patches/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch delete mode 100644 alpine/kernel/patches/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch delete mode 100644 alpine/kernel/patches/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch delete mode 100644 alpine/kernel/patches/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch delete mode 100644 alpine/kernel/patches/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch delete mode 100644 alpine/kernel/patches/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch delete mode 100644 alpine/kernel/patches/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch delete mode 100644 alpine/kernel/patches/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch delete mode 100644 alpine/kernel/patches/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch delete mode 100644 alpine/kernel/patches/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch delete mode 100644 alpine/kernel/patches/0036-kcm-Kernel-Connection-Multiplexor-module.patch delete mode 100644 alpine/kernel/patches/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch delete mode 100644 alpine/kernel/patches/0038-net-Add-Qualcomm-IPC-router.patch delete mode 100644 alpine/kernel/patches/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch diff --git a/alpine/kernel/Dockerfile b/alpine/kernel/Dockerfile index 50db9d37c..d1efa016a 100644 --- a/alpine/kernel/Dockerfile +++ b/alpine/kernel/Dockerfile @@ -1,6 +1,6 @@ FROM mobylinux/alpine-build-c:7303e33e9dcd5276b8bb5269644a9bf3354008c8 -ARG KERNEL_VERSION=4.4.32 +ARG KERNEL_VERSION=4.8.8 ENV KERNEL_SOURCE=https://www.kernel.org/pub/linux/kernel/v4.x/linux-${KERNEL_VERSION}.tar.xz @@ -8,10 +8,10 @@ RUN curl -fsSL -o linux-${KERNEL_VERSION}.tar.xz ${KERNEL_SOURCE} RUN cat linux-${KERNEL_VERSION}.tar.xz | tar --absolute-names -xJ && mv /linux-${KERNEL_VERSION} /linux -# this is aufs4.4 20160912 +# this is aufs4.8 20161010 ENV AUFS_REPO https://github.com/sfjro/aufs4-standalone -ENV AUFS_BRANCH aufs4.4 -ENV AUFS_COMMIT 7d174ae40b4c9c876ee51aa50fa4ee1f3747de23 +ENV AUFS_BRANCH aufs4.8 +ENV AUFS_COMMIT e9fd128dcb16167417683e199a5feb14f3c9eca8 # Download AUFS RUN git clone -b "$AUFS_BRANCH" "$AUFS_REPO" /aufs && \ diff --git a/alpine/kernel/patches/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch b/alpine/kernel/patches/0001-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch similarity index 72% rename from alpine/kernel/patches/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch rename to alpine/kernel/patches/0001-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch index cc76287a8..83a00fd99 100644 --- a/alpine/kernel/patches/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch +++ b/alpine/kernel/patches/0001-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch @@ -1,8 +1,7 @@ -From 8719b508f509c06a7821d6f8e2fc1fcad84d6fbb Mon Sep 17 00:00:00 2001 +From 888876dd84da7cdcb7c2ce7568efb2a2adbc9031 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 4 Apr 2016 14:50:10 +0100 -Subject: [PATCH 15/42] VSOCK: Only allow host network namespace to use - AF_VSOCK. +Subject: [PATCH 1/5] VSOCK: Only allow host network namespace to use AF_VSOCK. The VSOCK addressing schema does not really lend itself to simply creating an alternative end point address within a namespace. @@ -13,19 +12,18 @@ Signed-off-by: Ian Campbell 1 file changed, 3 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index 17dbbe6..1bb1b01 100644 +index 8a398b3..0edc54c 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1852,6 +1852,9 @@ static const struct proto_ops vsock_stream_ops = { static int vsock_create(struct net *net, struct socket *sock, - int protocol, int kern) + int protocol, int kern) { + if (!net_eq(net, &init_net)) + return -EAFNOSUPPORT; + - if (!sock) - return -EINVAL; - --- -2.10.0 + if (!sock) + return -EINVAL; +-- +2.10.1 diff --git a/alpine/kernel/patches/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch b/alpine/kernel/patches/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch deleted file mode 100644 index 0fd255114..000000000 --- a/alpine/kernel/patches/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch +++ /dev/null @@ -1,219 +0,0 @@ -From 622883ec571c468f756195c13726740bdd33a0ee Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 17 Dec 2015 16:53:43 +0800 -Subject: [PATCH 01/42] virtio: make find_vqs() checkpatch.pl-friendly - -checkpatch.pl wants arrays of strings declared as follows: - - static const char * const names[] = { "vq-1", "vq-2", "vq-3" }; - -Currently the find_vqs() function takes a const char *names[] argument -so passing checkpatch.pl's const char * const names[] results in a -compiler error due to losing the second const. - -This patch adjusts the find_vqs() prototype and updates all virtio -transports. This makes it possible for virtio_balloon.c, virtio_input.c, -virtgpu_kms.c, and virtio_rpmsg_bus.c to use the checkpatch.pl-friendly -type. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -Acked-by: Bjorn Andersson -(cherry picked from commit f7ad26ff952b3ca2702d7da03aad0ab1f6c01d7c) ---- - drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +- - drivers/misc/mic/card/mic_virtio.c | 2 +- - drivers/remoteproc/remoteproc_virtio.c | 2 +- - drivers/rpmsg/virtio_rpmsg_bus.c | 2 +- - drivers/s390/virtio/kvm_virtio.c | 2 +- - drivers/s390/virtio/virtio_ccw.c | 2 +- - drivers/virtio/virtio_balloon.c | 2 +- - drivers/virtio/virtio_input.c | 2 +- - drivers/virtio/virtio_mmio.c | 2 +- - drivers/virtio/virtio_pci_common.c | 4 ++-- - drivers/virtio/virtio_pci_common.h | 2 +- - drivers/virtio/virtio_pci_modern.c | 2 +- - include/linux/virtio_config.h | 2 +- - 13 files changed, 14 insertions(+), 14 deletions(-) - -diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c -index 06496a1..4150873 100644 ---- a/drivers/gpu/drm/virtio/virtgpu_kms.c -+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c -@@ -130,7 +130,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) - static vq_callback_t *callbacks[] = { - virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack - }; -- static const char *names[] = { "control", "cursor" }; -+ static const char * const names[] = { "control", "cursor" }; - - struct virtio_gpu_device *vgdev; - /* this will expand later */ -diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c -index e486a0c..f6ed57d 100644 ---- a/drivers/misc/mic/card/mic_virtio.c -+++ b/drivers/misc/mic/card/mic_virtio.c -@@ -311,7 +311,7 @@ unmap: - static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct mic_vdev *mvdev = to_micvdev(vdev); - struct mic_device_ctrl __iomem *dc = mvdev->dc; -diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c -index e1a1023..e44872f 100644 ---- a/drivers/remoteproc/remoteproc_virtio.c -+++ b/drivers/remoteproc/remoteproc_virtio.c -@@ -147,7 +147,7 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev) - static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct rproc *rproc = vdev_to_rproc(vdev); - int i, ret; -diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c -index 73354ee..1fcd27c 100644 ---- a/drivers/rpmsg/virtio_rpmsg_bus.c -+++ b/drivers/rpmsg/virtio_rpmsg_bus.c -@@ -945,7 +945,7 @@ static void rpmsg_ns_cb(struct rpmsg_channel *rpdev, void *data, int len, - static int rpmsg_probe(struct virtio_device *vdev) - { - vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done }; -- const char *names[] = { "input", "output" }; -+ static const char * const names[] = { "input", "output" }; - struct virtqueue *vqs[2]; - struct virtproc_info *vrp; - void *bufs_va; -diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c -index 53fb975..1d060fd 100644 ---- a/drivers/s390/virtio/kvm_virtio.c -+++ b/drivers/s390/virtio/kvm_virtio.c -@@ -255,7 +255,7 @@ static void kvm_del_vqs(struct virtio_device *vdev) - static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct kvm_device *kdev = to_kvmdev(vdev); - int i; -diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c -index 1b83159..bf2d130 100644 ---- a/drivers/s390/virtio/virtio_ccw.c -+++ b/drivers/s390/virtio/virtio_ccw.c -@@ -635,7 +635,7 @@ out: - static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct virtio_ccw_device *vcdev = to_vc_device(vdev); - unsigned long *indicatorp = NULL; -diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c -index 56f7e25..66082c9 100644 ---- a/drivers/virtio/virtio_balloon.c -+++ b/drivers/virtio/virtio_balloon.c -@@ -394,7 +394,7 @@ static int init_vqs(struct virtio_balloon *vb) - { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; -- const char *names[] = { "inflate", "deflate", "stats" }; -+ static const char * const names[] = { "inflate", "deflate", "stats" }; - int err, nvqs; - - /* -diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c -index c96944b..350a2a5 100644 ---- a/drivers/virtio/virtio_input.c -+++ b/drivers/virtio/virtio_input.c -@@ -170,7 +170,7 @@ static int virtinput_init_vqs(struct virtio_input *vi) - struct virtqueue *vqs[2]; - vq_callback_t *cbs[] = { virtinput_recv_events, - virtinput_recv_status }; -- static const char *names[] = { "events", "status" }; -+ static const char * const names[] = { "events", "status" }; - int err; - - err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names); -diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c -index f499d9d..745c6ee 100644 ---- a/drivers/virtio/virtio_mmio.c -+++ b/drivers/virtio/virtio_mmio.c -@@ -482,7 +482,7 @@ error_available: - static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); - unsigned int irq = platform_get_irq(vm_dev->pdev, 0); -diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c -index 2046a68..f6bed86 100644 ---- a/drivers/virtio/virtio_pci_common.c -+++ b/drivers/virtio/virtio_pci_common.c -@@ -296,7 +296,7 @@ void vp_del_vqs(struct virtio_device *vdev) - static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[], -+ const char * const names[], - bool use_msix, - bool per_vq_vectors) - { -@@ -376,7 +376,7 @@ error_find: - int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - int err; - -diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h -index b976d96..2cc2522 100644 ---- a/drivers/virtio/virtio_pci_common.h -+++ b/drivers/virtio/virtio_pci_common.h -@@ -139,7 +139,7 @@ void vp_del_vqs(struct virtio_device *vdev); - int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]); -+ const char * const names[]); - const char *vp_bus_name(struct virtio_device *vdev); - - /* Setup the affinity for a virtqueue: -diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c -index 4469202..631021c 100644 ---- a/drivers/virtio/virtio_pci_modern.c -+++ b/drivers/virtio/virtio_pci_modern.c -@@ -423,7 +423,7 @@ err_new_queue: - static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtqueue *vq; -diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h -index e5ce8ab..6e6cb0c 100644 ---- a/include/linux/virtio_config.h -+++ b/include/linux/virtio_config.h -@@ -70,7 +70,7 @@ struct virtio_config_ops { - int (*find_vqs)(struct virtio_device *, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]); -+ const char * const names[]); - void (*del_vqs)(struct virtio_device *); - u64 (*get_features)(struct virtio_device *vdev); - int (*finalize_features)(struct virtio_device *vdev); --- -2.10.0 - diff --git a/alpine/kernel/patches/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch b/alpine/kernel/patches/0002-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch similarity index 67% rename from alpine/kernel/patches/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch rename to alpine/kernel/patches/0002-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch index 8c3619340..113d22564 100644 --- a/alpine/kernel/patches/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch +++ b/alpine/kernel/patches/0002-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch @@ -1,8 +1,8 @@ -From cd11346c60451032d97062e25ed025bf692dff91 Mon Sep 17 00:00:00 2001 +From 478faef05c2fd212054793b4f5fde3f7ad502e20 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 21 May 2016 16:55:50 +0800 -Subject: [PATCH 41/42] Drivers: hv: vmbus: fix the race when querying & - updating the percpu list +Subject: [PATCH 2/5] Drivers: hv: vmbus: fix the race when querying & updating + the percpu list There is a rare race when we remove an entry from the global list hv_context.percpu_list[cpu] in hv_process_channel_removal() -> @@ -28,24 +28,24 @@ Origin: https://github.com/dcui/linux/commit/fbcca73228b9b90911ab30fdf75f532b2b7 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 57a1b65..da76a2e 100644 +index 56dd261..75343e0 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -592,6 +592,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel) - + out: - tasklet_enable(tasklet); + tasklet_enable(tasklet); + tasklet_schedule(tasklet); - - return ret; + + return ret; } diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index c892db5..0a54317 100644 +index b6c1211..8f4e6070 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -21,6 +21,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - + #include +#include #include @@ -53,81 +53,80 @@ index c892db5..0a54317 100644 #include @@ -307,12 +308,13 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) { - unsigned long flags; - struct vmbus_channel *primary_channel; + unsigned long flags; + struct vmbus_channel *primary_channel; - - vmbus_release_relid(relid); + struct tasklet_struct *tasklet; - - BUG_ON(!channel->rescind); - BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - + + BUG_ON(!channel->rescind); + BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + + tasklet = hv_context.event_dpc[channel->target_cpu]; + tasklet_disable(tasklet); - if (channel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(channel->target_cpu, + if (channel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(channel->target_cpu, @@ -321,6 +323,8 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - percpu_channel_deq(channel); - put_cpu(); - } + percpu_channel_deq(channel); + put_cpu(); + } + tasklet_enable(tasklet); + tasklet_schedule(tasklet); - - if (channel->primary_channel == NULL) { - list_del(&channel->listentry); + + if (channel->primary_channel == NULL) { + list_del(&channel->listentry); @@ -342,6 +346,8 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - &primary_channel->alloced_cpus_in_node); - - free_channel(channel); + &primary_channel->alloced_cpus_in_node); + + free_channel(channel); + + vmbus_release_relid(relid); } - + void vmbus_free_channels(void) @@ -363,6 +369,7 @@ void vmbus_free_channels(void) */ static void vmbus_process_offer(struct vmbus_channel *newchannel) { + struct tasklet_struct *tasklet; - struct vmbus_channel *channel; - bool fnew = true; - unsigned long flags; + struct vmbus_channel *channel; + bool fnew = true; + unsigned long flags; @@ -409,6 +416,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - - init_vp_index(newchannel, dev_type); - + + init_vp_index(newchannel, dev_type); + + tasklet = hv_context.event_dpc[newchannel->target_cpu]; + tasklet_disable(tasklet); - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(newchannel->target_cpu, + if (newchannel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(newchannel->target_cpu, @@ -418,6 +427,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - percpu_channel_enq(newchannel); - put_cpu(); - } + percpu_channel_enq(newchannel); + put_cpu(); + } + tasklet_enable(tasklet); + tasklet_schedule(tasklet); - - /* - * This state is used to indicate a successful open -@@ -469,6 +480,7 @@ err_deq_chan: - list_del(&newchannel->listentry); - mutex_unlock(&vmbus_connection.channel_mutex); - -+ tasklet_disable(tasklet); - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(newchannel->target_cpu, -@@ -477,6 +489,8 @@ err_deq_chan: - percpu_channel_deq(newchannel); - put_cpu(); - } -+ tasklet_enable(tasklet); -+ tasklet_schedule(tasklet); - - err_free_chan: - free_channel(newchannel); --- -2.10.0 + /* + * This state is used to indicate a successful open +@@ -469,6 +480,7 @@ err_deq_chan: + list_del(&newchannel->listentry); + mutex_unlock(&vmbus_connection.channel_mutex); + ++ tasklet_disable(tasklet); + if (newchannel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(newchannel->target_cpu, +@@ -477,6 +489,8 @@ err_deq_chan: + percpu_channel_deq(newchannel); + put_cpu(); + } ++ tasklet_enable(tasklet); ++ tasklet_schedule(tasklet); + + err_free_chan: + free_channel(newchannel); +-- +2.10.1 diff --git a/alpine/kernel/patches/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch b/alpine/kernel/patches/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch deleted file mode 100644 index ff2c9634d..000000000 --- a/alpine/kernel/patches/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 8dc15fd8fab55e076a640d1a5d6f34b77e196632 Mon Sep 17 00:00:00 2001 -From: Julia Lawall -Date: Sat, 21 Nov 2015 18:39:17 +0100 -Subject: [PATCH 02/42] VSOCK: constify vmci_transport_notify_ops structures - -The vmci_transport_notify_ops structures are never modified, so declare -them as const. - -Done with the help of Coccinelle. - -Signed-off-by: Julia Lawall -Signed-off-by: David S. Miller -(cherry picked from commit 3b22dae38db1cea9ead3229f08cfb0b69aca5706) ---- - net/vmw_vsock/vmci_transport.h | 2 +- - net/vmw_vsock/vmci_transport_notify.c | 2 +- - net/vmw_vsock/vmci_transport_notify.h | 5 +++-- - net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +- - 4 files changed, 6 insertions(+), 5 deletions(-) - -diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h -index 2ad46f3..1820e74 100644 ---- a/net/vmw_vsock/vmci_transport.h -+++ b/net/vmw_vsock/vmci_transport.h -@@ -121,7 +121,7 @@ struct vmci_transport { - u64 queue_pair_max_size; - u32 detach_sub_id; - union vmci_transport_notify notify; -- struct vmci_transport_notify_ops *notify_ops; -+ const struct vmci_transport_notify_ops *notify_ops; - struct list_head elem; - struct sock *sk; - spinlock_t lock; /* protects sk. */ -diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c -index 9b7f207..fd8cf02 100644 ---- a/net/vmw_vsock/vmci_transport_notify.c -+++ b/net/vmw_vsock/vmci_transport_notify.c -@@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) - } - - /* Socket control packet based operations. */ --struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { -+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { - vmci_transport_notify_pkt_socket_init, - vmci_transport_notify_pkt_socket_destruct, - vmci_transport_notify_pkt_poll_in, -diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h -index 7df7932..3c464d3 100644 ---- a/net/vmw_vsock/vmci_transport_notify.h -+++ b/net/vmw_vsock/vmci_transport_notify.h -@@ -77,7 +77,8 @@ struct vmci_transport_notify_ops { - void (*process_negotiate) (struct sock *sk); - }; - --extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; --extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; -+extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; -+extern const -+struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; - - #endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ -diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c -index dc9c792..21e591d 100644 ---- a/net/vmw_vsock/vmci_transport_notify_qstate.c -+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c -@@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue( - } - - /* Socket always on control packet based operations. */ --struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { -+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { - vmci_transport_notify_pkt_socket_init, - vmci_transport_notify_pkt_socket_destruct, - vmci_transport_notify_pkt_poll_in, --- -2.10.0 - diff --git a/alpine/kernel/patches/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch b/alpine/kernel/patches/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch deleted file mode 100644 index 8c0d62952..000000000 --- a/alpine/kernel/patches/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch +++ /dev/null @@ -1,336 +0,0 @@ -From 761aa629641afa804127aea0e3ce5c95dddfcb17 Mon Sep 17 00:00:00 2001 -From: Claudio Imbrenda -Date: Tue, 22 Mar 2016 17:05:52 +0100 -Subject: [PATCH 03/42] AF_VSOCK: Shrink the area influenced by prepare_to_wait - -When a thread is prepared for waiting by calling prepare_to_wait, sleeping -is not allowed until either the wait has taken place or finish_wait has -been called. The existing code in af_vsock imposed unnecessary no-sleep -assumptions to a broad list of backend functions. -This patch shrinks the influence of prepare_to_wait to the area where it -is strictly needed, therefore relaxing the no-sleep restriction there. - -Signed-off-by: Claudio Imbrenda -Signed-off-by: David S. Miller -(cherry picked from commit f7f9b5e7f8eccfd68ffa7b8d74b07c478bb9e7f0) ---- - net/vmw_vsock/af_vsock.c | 158 +++++++++++++++++++++++++---------------------- - 1 file changed, 85 insertions(+), 73 deletions(-) - -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index 9b5bd6d..b5f1221 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -1209,10 +1209,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, - - if (signal_pending(current)) { - err = sock_intr_errno(timeout); -- goto out_wait_error; -+ sk->sk_state = SS_UNCONNECTED; -+ sock->state = SS_UNCONNECTED; -+ goto out_wait; - } else if (timeout == 0) { - err = -ETIMEDOUT; -- goto out_wait_error; -+ sk->sk_state = SS_UNCONNECTED; -+ sock->state = SS_UNCONNECTED; -+ goto out_wait; - } - - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -@@ -1220,20 +1224,17 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, - - if (sk->sk_err) { - err = -sk->sk_err; -- goto out_wait_error; -- } else -+ sk->sk_state = SS_UNCONNECTED; -+ sock->state = SS_UNCONNECTED; -+ } else { - err = 0; -+ } - - out_wait: - finish_wait(sk_sleep(sk), &wait); - out: - release_sock(sk); - return err; -- --out_wait_error: -- sk->sk_state = SS_UNCONNECTED; -- sock->state = SS_UNCONNECTED; -- goto out_wait; - } - - static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) -@@ -1270,18 +1271,20 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) - listener->sk_err == 0) { - release_sock(listener); - timeout = schedule_timeout(timeout); -+ finish_wait(sk_sleep(listener), &wait); - lock_sock(listener); - - if (signal_pending(current)) { - err = sock_intr_errno(timeout); -- goto out_wait; -+ goto out; - } else if (timeout == 0) { - err = -EAGAIN; -- goto out_wait; -+ goto out; - } - - prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); - } -+ finish_wait(sk_sleep(listener), &wait); - - if (listener->sk_err) - err = -listener->sk_err; -@@ -1301,19 +1304,15 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) - */ - if (err) { - vconnected->rejected = true; -- release_sock(connected); -- sock_put(connected); -- goto out_wait; -+ } else { -+ newsock->state = SS_CONNECTED; -+ sock_graft(connected, newsock); - } - -- newsock->state = SS_CONNECTED; -- sock_graft(connected, newsock); - release_sock(connected); - sock_put(connected); - } - --out_wait: -- finish_wait(sk_sleep(listener), &wait); - out: - release_sock(listener); - return err; -@@ -1557,11 +1556,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - if (err < 0) - goto out; - -- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - - while (total_written < len) { - ssize_t written; - -+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - while (vsock_stream_has_space(vsk) == 0 && - sk->sk_err == 0 && - !(sk->sk_shutdown & SEND_SHUTDOWN) && -@@ -1570,27 +1569,33 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - /* Don't wait for non-blocking sockets. */ - if (timeout == 0) { - err = -EAGAIN; -- goto out_wait; -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; - } - - err = transport->notify_send_pre_block(vsk, &send_data); -- if (err < 0) -- goto out_wait; -+ if (err < 0) { -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; -+ } - - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - if (signal_pending(current)) { - err = sock_intr_errno(timeout); -- goto out_wait; -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; - } else if (timeout == 0) { - err = -EAGAIN; -- goto out_wait; -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; - } - - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - } -+ finish_wait(sk_sleep(sk), &wait); - - /* These checks occur both as part of and after the loop - * conditional since we need to check before and after -@@ -1598,16 +1603,16 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - */ - if (sk->sk_err) { - err = -sk->sk_err; -- goto out_wait; -+ goto out_err; - } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || - (vsk->peer_shutdown & RCV_SHUTDOWN)) { - err = -EPIPE; -- goto out_wait; -+ goto out_err; - } - - err = transport->notify_send_pre_enqueue(vsk, &send_data); - if (err < 0) -- goto out_wait; -+ goto out_err; - - /* Note that enqueue will only write as many bytes as are free - * in the produce queue, so we don't need to ensure len is -@@ -1620,7 +1625,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - len - total_written); - if (written < 0) { - err = -ENOMEM; -- goto out_wait; -+ goto out_err; - } - - total_written += written; -@@ -1628,14 +1633,13 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - err = transport->notify_send_post_enqueue( - vsk, written, &send_data); - if (err < 0) -- goto out_wait; -+ goto out_err; - - } - --out_wait: -+out_err: - if (total_written > 0) - err = total_written; -- finish_wait(sk_sleep(sk), &wait); - out: - release_sock(sk); - return err; -@@ -1716,21 +1720,61 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - if (err < 0) - goto out; - -- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - - while (1) { -- s64 ready = vsock_stream_has_data(vsk); -+ s64 ready; - -- if (ready < 0) { -- /* Invalid queue pair content. XXX This should be -- * changed to a connection reset in a later change. -- */ -+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -+ ready = vsock_stream_has_data(vsk); - -- err = -ENOMEM; -- goto out_wait; -- } else if (ready > 0) { -+ if (ready == 0) { -+ if (sk->sk_err != 0 || -+ (sk->sk_shutdown & RCV_SHUTDOWN) || -+ (vsk->peer_shutdown & SEND_SHUTDOWN)) { -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ /* Don't wait for non-blocking sockets. */ -+ if (timeout == 0) { -+ err = -EAGAIN; -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ -+ err = transport->notify_recv_pre_block( -+ vsk, target, &recv_data); -+ if (err < 0) { -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ release_sock(sk); -+ timeout = schedule_timeout(timeout); -+ lock_sock(sk); -+ -+ if (signal_pending(current)) { -+ err = sock_intr_errno(timeout); -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } else if (timeout == 0) { -+ err = -EAGAIN; -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ } else { - ssize_t read; - -+ finish_wait(sk_sleep(sk), &wait); -+ -+ if (ready < 0) { -+ /* Invalid queue pair content. XXX This should -+ * be changed to a connection reset in a later -+ * change. -+ */ -+ -+ err = -ENOMEM; -+ goto out; -+ } -+ - err = transport->notify_recv_pre_dequeue( - vsk, target, &recv_data); - if (err < 0) -@@ -1750,42 +1794,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - vsk, target, read, - !(flags & MSG_PEEK), &recv_data); - if (err < 0) -- goto out_wait; -+ goto out; - - if (read >= target || flags & MSG_PEEK) - break; - - target -= read; -- } else { -- if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) -- || (vsk->peer_shutdown & SEND_SHUTDOWN)) { -- break; -- } -- /* Don't wait for non-blocking sockets. */ -- if (timeout == 0) { -- err = -EAGAIN; -- break; -- } -- -- err = transport->notify_recv_pre_block( -- vsk, target, &recv_data); -- if (err < 0) -- break; -- -- release_sock(sk); -- timeout = schedule_timeout(timeout); -- lock_sock(sk); -- -- if (signal_pending(current)) { -- err = sock_intr_errno(timeout); -- break; -- } else if (timeout == 0) { -- err = -EAGAIN; -- break; -- } -- -- prepare_to_wait(sk_sleep(sk), &wait, -- TASK_INTERRUPTIBLE); - } - } - -@@ -1797,8 +1811,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - if (copied > 0) - err = copied; - --out_wait: -- finish_wait(sk_sleep(sk), &wait); - out: - release_sock(sk); - return err; --- -2.10.0 - diff --git a/alpine/kernel/patches/0039-hv_sock-introduce-Hyper-V-Sockets.patch b/alpine/kernel/patches/0003-hv_sock-introduce-Hyper-V-Sockets.patch similarity index 81% rename from alpine/kernel/patches/0039-hv_sock-introduce-Hyper-V-Sockets.patch rename to alpine/kernel/patches/0003-hv_sock-introduce-Hyper-V-Sockets.patch index b1c07b141..51e91680a 100644 --- a/alpine/kernel/patches/0039-hv_sock-introduce-Hyper-V-Sockets.patch +++ b/alpine/kernel/patches/0003-hv_sock-introduce-Hyper-V-Sockets.patch @@ -1,7 +1,7 @@ -From 51293adacd73d7bc6baee18e87b0d17ad52a61d4 Mon Sep 17 00:00:00 2001 +From ccd1d4920969d0dcba862dc98f9b4747a383bfe2 Mon Sep 17 00:00:00 2001 From: Dexuan Cui -Date: Sun, 15 May 2016 09:53:11 -0700 -Subject: [PATCH 39/42] hv_sock: introduce Hyper-V Sockets +Date: Sat, 23 Jul 2016 01:35:51 +0000 +Subject: [PATCH 3/5] hv_sock: introduce Hyper-V Sockets Hyper-V Sockets (hv_sock) supplies a byte-stream based communication mechanism between the host and the guest. It's somewhat like TCP over @@ -22,29 +22,30 @@ Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Vitaly Kuznetsov Cc: Cathy Avery -Origin: https://patchwork.ozlabs.org/patch/622404/ +Cc: Olaf Hering +Origin: https://patchwork.kernel.org/patch/9244467/ --- MAINTAINERS | 2 + - include/linux/hyperv.h | 14 + + include/linux/hyperv.h | 13 + include/linux/socket.h | 4 +- include/net/af_hvsock.h | 78 +++ - include/uapi/linux/hyperv.h | 25 + + include/uapi/linux/hyperv.h | 23 + net/Kconfig | 1 + net/Makefile | 1 + net/hv_sock/Kconfig | 10 + net/hv_sock/Makefile | 3 + - net/hv_sock/af_hvsock.c | 1520 +++++++++++++++++++++++++++++++++++++++++++ - 10 files changed, 1657 insertions(+), 1 deletion(-) + net/hv_sock/af_hvsock.c | 1507 +++++++++++++++++++++++++++++++++++++++++++ + 10 files changed, 1641 insertions(+), 1 deletion(-) create mode 100644 include/net/af_hvsock.h create mode 100644 net/hv_sock/Kconfig create mode 100644 net/hv_sock/Makefile create mode 100644 net/hv_sock/af_hvsock.c diff --git a/MAINTAINERS b/MAINTAINERS -index 12d49f5..fa87bdd 100644 +index babaf82..6126545 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -5123,7 +5123,9 @@ F: drivers/input/serio/hyperv-keyboard.c +@@ -5667,7 +5667,9 @@ F: drivers/pci/host/pci-hyperv.c F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/video/fbdev/hyperv_fb.c @@ -53,15 +54,15 @@ index 12d49f5..fa87bdd 100644 +F: include/net/af_hvsock.h F: tools/hv/ F: Documentation/ABI/stable/sysfs-bus-vmbus - + diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 6c9695e..187d4bd 100644 +index b10954a..50f8976 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h -@@ -1349,4 +1349,18 @@ extern __u32 vmbus_proto_version; - - int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, - const uuid_le *shv_host_servie_id); +@@ -1505,5 +1505,18 @@ static inline void commit_rd_index(struct vmbus_channel *channel) + vmbus_set_event(channel); + } + +struct vmpipe_proto_header { + u32 pkt_type; + u32 data_size; @@ -72,13 +73,13 @@ index 6c9695e..187d4bd 100644 + +/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write() */ +#define PREV_INDICES_LEN (sizeof(u64)) -+ + +#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \ + ALIGN((payload_len), 8) + \ + PREV_INDICES_LEN) #endif /* _HYPERV_H */ diff --git a/include/linux/socket.h b/include/linux/socket.h -index dbd81e7..6634c47 100644 +index b5cc5a6..0b68b58 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -202,8 +202,9 @@ struct ucred { @@ -86,10 +87,10 @@ index dbd81e7..6634c47 100644 #define AF_KCM 41 /* Kernel Connection Multiplexor*/ #define AF_QIPCRTR 42 /* Qualcomm IPC Router */ +#define AF_HYPERV 43 /* Hyper-V Sockets */ - + -#define AF_MAX 43 /* For now.. */ +#define AF_MAX 44 /* For now.. */ - + /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -251,6 +252,7 @@ struct ucred { @@ -98,11 +99,11 @@ index dbd81e7..6634c47 100644 #define PF_QIPCRTR AF_QIPCRTR +#define PF_HYPERV AF_HYPERV #define PF_MAX AF_MAX - + /* Maximum queue length specifiable by listen. */ diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h new file mode 100644 -index 0000000..7c8c41e +index 0000000..e7a8a3a --- /dev/null +++ b/include/net/af_hvsock.h @@ -0,0 +1,78 @@ @@ -113,49 +114,39 @@ index 0000000..7c8c41e +#include +#include + -+/* Note: 3-page is the minimal recv ringbuffer size by default: -+ * -+ * the 1st page is used as the shared read/write index etc, rather than data: -+ * see hv_ringbuffer_init(); -+ * -+ * the payload length in the vmbus pipe message received from the host can -+ * be 4096 bytes, and considing the header of HVSOCK_HEADER_LEN bytes, we -+ * need at least 2 extra pages for ringbuffer data. ++/* The host side's design of the feature requires 5 exact 4KB pages for ++ * recv/send rings respectively -- this is suboptimal considering memory ++ * consumption, however unluckily we have to live with it, before the ++ * host comes up with a better design in the future. + */ -+#define HVSOCK_RCV_BUF_SZ PAGE_SIZE -+#define DEF_RINGBUFFER_PAGES_HVSOCK_RCV 3 ++#define PAGE_SIZE_4K 4096 ++#define RINGBUFFER_HVSOCK_RCV_SIZE (PAGE_SIZE_4K * 5) ++#define RINGBUFFER_HVSOCK_SND_SIZE (PAGE_SIZE_4K * 5) + -+/* As to send, here let's make sure the hvsock_send_buf struct can be held in 1 -+ * page, and since we want to use 2 pages for the send ringbuffer size (this is -+ * the minimal size by default, because the 1st page of the two is used as the -+ * shared read/write index etc, rather than data), we only have 1 page for -+ * ringbuffer data, this means: the max payload length for hvsock data is -+ * PAGE_SIZE - HVSOCK_PKT_LEN(0). And, let's reduce the length by 8-bytes -+ * because the ringbuffer can't be 100% full: see hv_ringbuffer_write(). ++/* The MTU is 16KB per the host side's design. ++ * In future, the buffer can be elimiated when we switch to use the coming ++ * new VMBus ringbuffer "in-place consumption" APIs, by which we can ++ * directly copy data from VMBus ringbuffer into the userspace buffer. + */ -+#define HVSOCK_SND_BUF_SZ (PAGE_SIZE - HVSOCK_PKT_LEN(0) - 8) -+#define DEF_RINGBUFFER_PAGES_HVSOCK_SND 2 -+ -+/* We only send data when the available space is "big enough". This artificial -+ * value must be less than HVSOCK_SND_BUF_SZ. -+ * -+ */ -+#define HVSOCK_SND_THRESHOLD (PAGE_SIZE / 2) -+ -+#define sk_to_hvsock(__sk) ((struct hvsock_sock *)(__sk)) -+#define hvsock_to_sk(__hvsk) ((struct sock *)(__hvsk)) -+ -+struct hvsock_send_buf { -+ struct vmpipe_proto_header hdr; -+ u8 buf[HVSOCK_SND_BUF_SZ]; -+}; -+ ++#define HVSOCK_MTU_SIZE (1024 * 16) +struct hvsock_recv_buf { -+ struct vmpipe_proto_header hdr; -+ u8 buf[HVSOCK_RCV_BUF_SZ]; -+ + unsigned int data_len; + unsigned int data_offset; ++ ++ struct vmpipe_proto_header hdr; ++ u8 buf[HVSOCK_MTU_SIZE]; ++}; ++ ++/* In the VM, actually we can send up to HVSOCK_MTU_SIZE bytes of payload, ++ * but for now let's use a smaller size to minimize the dynamically-allocated ++ * buffer. Note: the buffer can be elimiated in future when we add new VMBus ++ * ringbuffer APIs that allow us to directly copy data from userspace buf to ++ * VMBus ringbuffer. ++ */ ++#define HVSOCK_MAX_SND_SIZE_BY_VM (1024 * 4) ++struct hvsock_send_buf { ++ struct vmpipe_proto_header hdr; ++ u8 buf[HVSOCK_MAX_SND_SIZE_BY_VM]; +}; + +struct hvsock_sock { @@ -183,25 +174,34 @@ index 0000000..7c8c41e + struct hvsock_recv_buf *recv; +}; + ++static inline struct hvsock_sock *sk_to_hvsock(struct sock *sk) ++{ ++ return (struct hvsock_sock *)sk; ++} ++ ++static inline struct sock *hvsock_to_sk(struct hvsock_sock *hvsk) ++{ ++ return (struct sock *)hvsk; ++} ++ +#endif /* __AF_HVSOCK_H__ */ diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h -index e347b24..408b832 100644 +index e347b24..eb3e44b 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -26,6 +26,7 @@ #define _UAPI_HYPERV_H - + #include +#include - + /* * Framework version for util services. -@@ -396,4 +397,28 @@ struct hv_kvp_ip_msg { - struct hv_kvp_ipaddr_value kvp_ip_val; +@@ -396,4 +397,26 @@ struct hv_kvp_ip_msg { + struct hv_kvp_ipaddr_value kvp_ip_val; } __attribute__((packed)); - -+/* -+ * This is the address fromat of Hyper-V Sockets. + ++/* This is the address format of Hyper-V Sockets. + * Note: here we just borrow the kernel's built-in type uuid_le. When + * an application calls bind() or connect(), the 2 members of struct + * sockaddr_hv must be of GUID. @@ -209,12 +209,11 @@ index e347b24..408b832 100644 + * the first 3 fields. Refer to: + * https://en.wikipedia.org/wiki/Globally_unique_identifier + */ -+#define guid_t uuid_le +struct sockaddr_hv { + __kernel_sa_family_t shv_family; /* Address family */ -+ __le16 reserved; /* Must be Zero */ -+ guid_t shv_vm_id; /* VM ID */ -+ guid_t shv_service_id; /* Service ID */ ++ u16 reserved; /* Must be Zero */ ++ uuid_le shv_vm_guid; /* VM ID */ ++ uuid_le shv_service_guid; /* Service ID */ +}; + +#define SHV_VMID_GUEST NULL_UUID_LE @@ -226,10 +225,10 @@ index e347b24..408b832 100644 + #endif /* _UAPI_HYPERV_H */ diff --git a/net/Kconfig b/net/Kconfig -index 1c9fda1..9eeccb7 100644 +index c2cdbce..921e86f 100644 --- a/net/Kconfig +++ b/net/Kconfig -@@ -228,6 +228,7 @@ source "net/dns_resolver/Kconfig" +@@ -231,6 +231,7 @@ source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" @@ -238,7 +237,7 @@ index 1c9fda1..9eeccb7 100644 source "net/mpls/Kconfig" source "net/hsr/Kconfig" diff --git a/net/Makefile b/net/Makefile -index bdd1455..ec175dd 100644 +index 9bd20bb..b4d4e9a 100644 --- a/net/Makefile +++ b/net/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ @@ -251,7 +250,7 @@ index bdd1455..ec175dd 100644 ifneq ($(CONFIG_NET_SWITCHDEV),) diff --git a/net/hv_sock/Kconfig b/net/hv_sock/Kconfig new file mode 100644 -index 0000000..1f41848 +index 0000000..ff84875 --- /dev/null +++ b/net/hv_sock/Kconfig @@ -0,0 +1,10 @@ @@ -260,8 +259,8 @@ index 0000000..1f41848 + depends on HYPERV + default m if HYPERV + help -+ Hyper-V Sockets is somewhat like TCP over VMBus, allowing -+ communication between Linux guest and Hyper-V host without TCP/IP. ++ Hyper-V Sockets is a socket interface for high speed ++ communication between Linux guest and Hyper-V host over VMBus. + + To compile this driver as a module, choose M here: the module + will be called hv_sock. @@ -276,15 +275,17 @@ index 0000000..716c012 +hv_sock-y += af_hvsock.o diff --git a/net/hv_sock/af_hvsock.c b/net/hv_sock/af_hvsock.c new file mode 100644 -index 0000000..b91bd60 +index 0000000..331d375 --- /dev/null +++ b/net/hv_sock/af_hvsock.c -@@ -0,0 +1,1520 @@ +@@ -0,0 +1,1507 @@ +/* + * Hyper-V Sockets -- a socket-based communication channel between the + * Hyper-V host and the virtual machines running on it. + * -+ * Copyright(c) 2016, Microsoft Corporation. All rights reserved. ++ * Copyright (c) 2016 Microsoft Corporation. ++ * ++ * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions @@ -299,6 +300,10 @@ index 0000000..b91bd60 + * products derived from this software without specific prior written + * permission. + * ++ * Alternatively, this software may be distributed under the terms of the ++ * GNU General Public License ("GPL") version 2 as published by the Free ++ * Software Foundation. ++ * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -315,23 +320,9 @@ index 0000000..b91bd60 + +#include +#include ++#include +#include + -+static uint send_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_SND; -+static uint recv_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_RCV; -+static uint max_socket_number = 1024; -+ -+static atomic_t total_num_hvsock = ATOMIC_INIT(0); -+ -+module_param(send_ring_page, uint, 0444); -+MODULE_PARM_DESC(send_ring_page, "Send ring buffer size (# of pages)"); -+ -+module_param(recv_ring_page, uint, 0444); -+MODULE_PARM_DESC(recv_ring_page, "Receive ring buffer size (# of pages)"); -+ -+module_param(max_socket_number, uint, 0644); -+MODULE_PARM_DESC(max_socket_number, "The max number of created sockets"); -+ +static struct proto hvsock_proto = { + .name = "HV_SOCK", + .owner = THIS_MODULE, @@ -340,22 +331,22 @@ index 0000000..b91bd60 + +#define SS_LISTEN 255 + ++#define HVSOCK_CONNECT_TIMEOUT (30 * HZ) ++ ++/* This is an artificial limit */ ++#define HVSOCK_MAX_BACKLOG 128 ++ +static LIST_HEAD(hvsock_bound_list); +static LIST_HEAD(hvsock_connected_list); +static DEFINE_MUTEX(hvsock_mutex); + -+static bool uuid_equals(uuid_le u1, uuid_le u2) -+{ -+ return !uuid_le_cmp(u1, u2); -+} -+ +static struct sock *hvsock_find_bound_socket(const struct sockaddr_hv *addr) +{ + struct hvsock_sock *hvsk; + + list_for_each_entry(hvsk, &hvsock_bound_list, bound_list) { -+ if (uuid_equals(addr->shv_service_id, -+ hvsk->local_addr.shv_service_id)) ++ if (!uuid_le_cmp(addr->shv_service_guid, ++ hvsk->local_addr.shv_service_guid)) + return hvsock_to_sk(hvsk); + } + return NULL; @@ -373,11 +364,11 @@ index 0000000..b91bd60 + return NULL; +} + -+static -+void hvsock_enqueue_accept(struct sock *listener, struct sock *connected) ++static void hvsock_enqueue_accept(struct sock *listener, ++ struct sock *connected) +{ -+ struct hvsock_sock *hvlistener; + struct hvsock_sock *hvconnected; ++ struct hvsock_sock *hvlistener; + + hvlistener = sk_to_hvsock(listener); + hvconnected = sk_to_hvsock(connected); @@ -393,8 +384,8 @@ index 0000000..b91bd60 + +static struct sock *hvsock_dequeue_accept(struct sock *listener) +{ -+ struct hvsock_sock *hvlistener; + struct hvsock_sock *hvconnected; ++ struct hvsock_sock *hvlistener; + + hvlistener = sk_to_hvsock(listener); + @@ -437,7 +428,7 @@ index 0000000..b91bd60 +{ + memset(addr, 0, sizeof(*addr)); + addr->shv_family = AF_HYPERV; -+ addr->shv_service_id = service_id; ++ addr->shv_service_guid = service_id; +} + +static int hvsock_addr_validate(const struct sockaddr_hv *addr) @@ -456,7 +447,7 @@ index 0000000..b91bd60 + +static bool hvsock_addr_bound(const struct sockaddr_hv *addr) +{ -+ return !uuid_equals(addr->shv_service_id, SHV_SERVICE_ID_ANY); ++ return !!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY); +} + +static int hvsock_addr_cast(const struct sockaddr *addr, size_t len, @@ -475,13 +466,13 @@ index 0000000..b91bd60 + struct sockaddr_hv hv_addr; + int ret = 0; + -+ hvsock_addr_init(&hv_addr, addr->shv_service_id); ++ hvsock_addr_init(&hv_addr, addr->shv_service_guid); + + mutex_lock(&hvsock_mutex); + -+ if (uuid_equals(addr->shv_service_id, SHV_SERVICE_ID_ANY)) { ++ if (!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY)) { + do { -+ uuid_le_gen(&hv_addr.shv_service_id); ++ uuid_le_gen(&hv_addr.shv_service_guid); + } while (hvsock_find_bound_socket(&hv_addr)); + } else { + if (hvsock_find_bound_socket(&hv_addr)) { @@ -490,7 +481,7 @@ index 0000000..b91bd60 + } + } + -+ hvsock_addr_init(&hvsk->local_addr, hv_addr.shv_service_id); ++ hvsock_addr_init(&hvsk->local_addr, hv_addr.shv_service_guid); + + sock_hold(&hvsk->sk); + list_add(&hvsk->bound_list, &hvsock_bound_list); @@ -535,13 +526,14 @@ index 0000000..b91bd60 + +static void hvsock_sk_destruct(struct sock *sk) +{ -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; ++ struct vmbus_channel *channel; ++ struct hvsock_sock *hvsk; + -+ kfree(hvsk->send); -+ kfree(hvsk->recv); -+ atomic_dec(&total_num_hvsock); ++ hvsk = sk_to_hvsock(sk); ++ vfree(hvsk->send); ++ vfree(hvsk->recv); + ++ channel = hvsk->channel; + if (!channel) + return; + @@ -597,51 +589,37 @@ index 0000000..b91bd60 + return 0; +} + -+static int hvsock_create(struct net *net, struct socket *sock, -+ gfp_t priority, unsigned short type, -+ struct sock **sk) ++static struct sock *hvsock_create(struct net *net, struct socket *sock, ++ gfp_t priority, unsigned short type) +{ -+ struct hvsock_send_buf *send = NULL; -+ struct hvsock_recv_buf *recv = NULL; + struct hvsock_sock *hvsk; -+ int ret = -EMFILE; -+ int num_hvsock; ++ struct sock *sk; + -+ num_hvsock = atomic_inc_return(&total_num_hvsock); -+ if (num_hvsock > max_socket_number) -+ goto err; ++ sk = sk_alloc(net, AF_HYPERV, priority, &hvsock_proto, 0); ++ if (!sk) ++ return NULL; + -+ ret = -ENOMEM; -+ send = kmalloc(sizeof(*send), GFP_KERNEL); -+ recv = kmalloc(sizeof(*recv), GFP_KERNEL); -+ if (!send || !recv) -+ goto err; ++ sock_init_data(sock, sk); + -+ *sk = sk_alloc(net, AF_HYPERV, priority, &hvsock_proto, 0); -+ if (!*sk) -+ goto err; -+ -+ sock_init_data(sock, *sk); -+ -+ /* (*sk)->sk_type is normally set in sock_init_data, but only if sock ++ /* sk->sk_type is normally set in sock_init_data, but only if sock + * is non-NULL. We make sure that our sockets always have a type by + * setting it here if needed. + */ + if (!sock) -+ (*sk)->sk_type = type; ++ sk->sk_type = type; + -+ (*sk)->sk_destruct = hvsock_sk_destruct; ++ sk->sk_destruct = hvsock_sk_destruct; + + /* Looks stream-based socket doesn't need this. */ -+ (*sk)->sk_backlog_rcv = NULL; ++ sk->sk_backlog_rcv = NULL; + -+ (*sk)->sk_state = 0; -+ sock_reset_flag(*sk, SOCK_DONE); ++ sk->sk_state = 0; ++ sock_reset_flag(sk, SOCK_DONE); + -+ hvsk = sk_to_hvsock(*sk); ++ hvsk = sk_to_hvsock(sk); + -+ hvsk->send = send; -+ hvsk->recv = recv; ++ hvsk->send = NULL; ++ hvsk->recv = NULL; + + hvsock_addr_init(&hvsk->local_addr, SHV_SERVICE_ID_ANY); + hvsock_addr_init(&hvsk->remote_addr, SHV_SERVICE_ID_ANY); @@ -654,16 +632,7 @@ index 0000000..b91bd60 + + hvsk->peer_shutdown = 0; + -+ hvsk->recv->data_len = 0; -+ hvsk->recv->data_offset = 0; -+ -+ return 0; -+err: -+ atomic_dec(&total_num_hvsock); -+ kfree(send); -+ kfree(recv); -+ *sk = NULL; -+ return ret; ++ return sk; +} + +static int hvsock_bind(struct socket *sock, struct sockaddr *addr, @@ -678,7 +647,7 @@ index 0000000..b91bd60 + if (hvsock_addr_cast(addr, addr_len, &hv_addr) != 0) + return -EINVAL; + -+ if (!uuid_equals(hv_addr->shv_vm_id, NULL_UUID_LE)) ++ if (uuid_le_cmp(hv_addr->shv_vm_guid, NULL_UUID_LE)) + return -EINVAL; + + lock_sock(sk); @@ -740,8 +709,8 @@ index 0000000..b91bd60 + &dummy, + &avl_write_bytes); + -+ *can_write = avl_write_bytes > -+ HVSOCK_PKT_LEN(HVSOCK_SND_THRESHOLD); ++ /* We only write if there is enough space */ ++ *can_write = avl_write_bytes > HVSOCK_PKT_LEN(PAGE_SIZE_4K); + } +} + @@ -754,18 +723,29 @@ index 0000000..b91bd60 + &dummy, + &avl_write_bytes); + -+ if (avl_write_bytes < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) -+ return 0; -+ + /* The ringbuffer mustn't be 100% full, and we should reserve a + * zero-length-payload packet for the FIN: see hv_ringbuffer_write() + * and hvsock_shutdown(). + */ ++ if (avl_write_bytes < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) ++ return 0; + ret = avl_write_bytes - HVSOCK_PKT_LEN(1) - HVSOCK_PKT_LEN(0); + + return round_down(ret, 8); +} + ++static int hvsock_get_send_buf(struct hvsock_sock *hvsk) ++{ ++ hvsk->send = vmalloc(sizeof(*hvsk->send)); ++ return hvsk->send ? 0 : -ENOMEM; ++} ++ ++static void hvsock_put_send_buf(struct hvsock_sock *hvsk) ++{ ++ vfree(hvsk->send); ++ hvsk->send = NULL; ++} ++ +static int hvsock_send_data(struct vmbus_channel *channel, + struct hvsock_sock *hvsk, + size_t to_write) @@ -777,6 +757,18 @@ index 0000000..b91bd60 + 0, VM_PKT_DATA_INBAND, 0); +} + ++static int hvsock_get_recv_buf(struct hvsock_sock *hvsk) ++{ ++ hvsk->recv = vmalloc(sizeof(*hvsk->recv)); ++ return hvsk->recv ? 0 : -ENOMEM; ++} ++ ++static void hvsock_put_recv_buf(struct hvsock_sock *hvsk) ++{ ++ vfree(hvsk->recv); ++ hvsk->recv = NULL; ++} ++ +static int hvsock_recv_data(struct vmbus_channel *channel, + struct hvsock_sock *hvsk, + size_t *payload_len) @@ -801,6 +793,7 @@ index 0000000..b91bd60 +{ + struct hvsock_sock *hvsk; + struct sock *sk; ++ int ret = 0; + + if (mode < SHUT_RD || mode > SHUT_RDWR) + return -EINVAL; @@ -825,13 +818,21 @@ index 0000000..b91bd60 + + if (mode & SEND_SHUTDOWN) { + hvsk = sk_to_hvsock(sk); ++ ++ ret = hvsock_get_send_buf(hvsk); ++ if (ret < 0) ++ goto out; ++ + /* It can't fail: see get_ringbuffer_writable_bytes(). */ + (void)hvsock_send_data(hvsk->channel, hvsk, 0); ++ ++ hvsock_put_send_buf(hvsk); + } + ++out: + release_sock(sk); + -+ return 0; ++ return ret; +} + +static unsigned int hvsock_poll(struct file *file, struct socket *sock, @@ -840,8 +841,8 @@ index 0000000..b91bd60 + struct vmbus_channel *channel; + bool can_read, can_write; + struct hvsock_sock *hvsk; -+ struct sock *sk; + unsigned int mask; ++ struct sock *sk; + + sk = sock->sk; + hvsk = sk_to_hvsock(sk); @@ -883,13 +884,12 @@ index 0000000..b91bd60 + /* If there is something in the queue then we can read */ + get_ringbuffer_rw_status(channel, &can_read, &can_write); + -+ if (!can_read && hvsk->recv->data_len > 0) ++ if (!can_read && hvsk->recv) + can_read = true; + + if (!(sk->sk_shutdown & RCV_SHUTDOWN) && can_read) + mask |= POLLIN | POLLRDNORM; + } else { -+ can_read = false; + can_write = false; + } + @@ -928,14 +928,13 @@ index 0000000..b91bd60 +static void hvsock_on_channel_cb(void *ctx) +{ + struct sock *sk = (struct sock *)ctx; -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; ++ struct vmbus_channel *channel; ++ struct hvsock_sock *hvsk; + bool can_read, can_write; + -+ if (!channel) { -+ WARN_ONCE(1, "NULL channel! There is a programming bug.\n"); -+ return; -+ } ++ hvsk = sk_to_hvsock(sk); ++ channel = hvsk->channel; ++ BUG_ON(!channel); + + get_ringbuffer_rw_status(channel, &can_read, &can_write); + @@ -972,12 +971,11 @@ index 0000000..b91bd60 + +static int hvsock_open_connection(struct vmbus_channel *channel) +{ -+ struct hvsock_sock *hvsk, *new_hvsk; -+ struct sockaddr_hv hv_addr; -+ struct sock *sk, *new_sk; -+ unsigned char conn_from_host; -+ ++ struct hvsock_sock *hvsk = NULL, *new_hvsk = NULL; + uuid_le *instance, *service_id; ++ unsigned char conn_from_host; ++ struct sockaddr_hv hv_addr; ++ struct sock *sk, *new_sk = NULL; + int ret; + + instance = &channel->offermsg.offer.if_instance; @@ -999,14 +997,16 @@ index 0000000..b91bd60 + + if (conn_from_host) { + if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { -+ ret = -EMFILE; ++ ret = -ECONNREFUSED; + goto out; + } + -+ ret = hvsock_create(sock_net(sk), NULL, GFP_KERNEL, -+ sk->sk_type, &new_sk); -+ if (ret != 0) ++ new_sk = hvsock_create(sock_net(sk), NULL, GFP_KERNEL, ++ sk->sk_type); ++ if (!new_sk) { ++ ret = -ENOMEM; + goto out; ++ } + + new_sk->sk_state = SS_CONNECTING; + new_hvsk = sk_to_hvsock(new_sk); @@ -1019,8 +1019,8 @@ index 0000000..b91bd60 + } + + set_channel_read_state(channel, false); -+ ret = vmbus_open(channel, send_ring_page * PAGE_SIZE, -+ recv_ring_page * PAGE_SIZE, NULL, 0, ++ ret = vmbus_open(channel, RINGBUFFER_HVSOCK_SND_SIZE, ++ RINGBUFFER_HVSOCK_RCV_SIZE, NULL, 0, + hvsock_on_channel_cb, conn_from_host ? new_sk : sk); + if (ret != 0) { + if (conn_from_host) { @@ -1033,8 +1033,10 @@ index 0000000..b91bd60 + } + + vmbus_set_chn_rescind_callback(channel, hvsock_close_connection); ++ ++ /* see get_ringbuffer_rw_status() */ + set_channel_pending_send_size(channel, -+ HVSOCK_PKT_LEN(HVSOCK_SND_THRESHOLD)); ++ HVSOCK_PKT_LEN(PAGE_SIZE_4K) + 1); + + if (conn_from_host) { + new_sk->sk_state = SS_CONNECTED; @@ -1081,13 +1083,13 @@ index 0000000..b91bd60 + int flags, int current_ret) +{ + struct sock *sk = sock->sk; -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ ++ struct hvsock_sock *hvsk; + int ret = current_ret; -+ -+ long timeout = 30 * HZ; + DEFINE_WAIT(wait); ++ long timeout; + ++ hvsk = sk_to_hvsock(sk); ++ timeout = HVSOCK_CONNECT_TIMEOUT; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { @@ -1181,8 +1183,8 @@ index 0000000..b91bd60 + sk->sk_state = SS_CONNECTING; + + ret = vmbus_send_tl_connect_request( -+ &hvsk->local_addr.shv_service_id, -+ &hvsk->remote_addr.shv_service_id); ++ &hvsk->local_addr.shv_service_guid, ++ &hvsk->remote_addr.shv_service_guid); + if (ret < 0) + goto out; + @@ -1240,15 +1242,12 @@ index 0000000..b91bd60 + lock_sock(connected); + hvconnected = sk_to_hvsock(connected); + -+ if (ret) { -+ release_sock(connected); -+ sock_put(connected); -+ } else { ++ if (!ret) { + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); -+ release_sock(connected); -+ sock_put(connected); + } ++ release_sock(connected); ++ sock_put(connected); + } + +out_wait: @@ -1256,8 +1255,8 @@ index 0000000..b91bd60 + return ret; +} + -+static -+int hvsock_accept(struct socket *sock, struct socket *newsock, int flags) ++static int hvsock_accept(struct socket *sock, struct socket *newsock, ++ int flags) +{ + struct sock *listener; + int ret; @@ -1305,9 +1304,8 @@ index 0000000..b91bd60 + ret = -EINVAL; + goto out; + } -+ /* This is an artificial limit */ -+ if (backlog > 128) -+ backlog = 128; ++ if (backlog > HVSOCK_MAX_BACKLOG) ++ backlog = HVSOCK_MAX_BACKLOG; + + hvsk = sk_to_hvsock(sk); + if (!hvsock_addr_bound(&hvsk->local_addr)) { @@ -1323,23 +1321,21 @@ index 0000000..b91bd60 + return ret; +} + -+static -+int hvsock_sendmsg_wait(struct sock *sk, struct msghdr *msg, size_t len) ++static int hvsock_sendmsg_wait(struct sock *sk, struct msghdr *msg, ++ size_t len) +{ + struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; -+ ++ struct vmbus_channel *channel; + size_t total_to_write = len; + size_t total_written = 0; -+ bool can_write; -+ -+ int ret = 0; -+ + DEFINE_WAIT(wait); ++ bool can_write; + long timeout; ++ int ret = -EIO; + + timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ channel = hvsk->channel; + + while (total_to_write > 0) { + size_t to_write, max_writable; @@ -1397,16 +1393,23 @@ index 0000000..b91bd60 + if (max_writable == 0) + goto out_wait; + -+ to_write = min_t(size_t, HVSOCK_SND_BUF_SZ, ++ to_write = min_t(size_t, sizeof(hvsk->send->buf), + total_to_write); + if (to_write > max_writable) + to_write = max_writable; + -+ ret = memcpy_from_msg(hvsk->send->buf, msg, to_write); -+ if (ret != 0) ++ ret = hvsock_get_send_buf(hvsk); ++ if (ret < 0) + goto out_wait; + ++ ret = memcpy_from_msg(hvsk->send->buf, msg, to_write); ++ if (ret != 0) { ++ hvsock_put_send_buf(hvsk); ++ goto out_wait; ++ } ++ + ret = hvsock_send_data(channel, hvsk, to_write); ++ hvsock_put_send_buf(hvsk); + if (ret != 0) + goto out_wait; + @@ -1423,7 +1426,8 @@ index 0000000..b91bd60 + return ret; +} + -+static int hvsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) ++static int hvsock_sendmsg(struct socket *sock, struct msghdr *msg, ++ size_t len) +{ + struct hvsock_sock *hvsk; + struct sock *sk; @@ -1432,11 +1436,8 @@ index 0000000..b91bd60 + if (len == 0) + return -EINVAL; + -+ if (msg->msg_flags & ~MSG_DONTWAIT) { -+ pr_err("%s: unsupported flags=0x%x\n", __func__, -+ msg->msg_flags); ++ if (msg->msg_flags & ~MSG_DONTWAIT) + return -EOPNOTSUPP; -+ } + + sk = sock->sk; + hvsk = sk_to_hvsock(sk); @@ -1471,11 +1472,10 @@ index 0000000..b91bd60 +out: + release_sock(sk); + -+ /* ret is a bigger-than-0 total_written or a negative err code. */ -+ if (ret == 0) { -+ WARN(1, "unexpected return value of 0\n"); -+ ret = -EIO; -+ } ++ /* ret should be a bigger-than-0 total_written or a negative err ++ * code. ++ */ ++ BUG_ON(ret == 0); + + return ret; +} @@ -1484,43 +1484,56 @@ index 0000000..b91bd60 + size_t len, int flags) +{ + struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; -+ + size_t to_read, total_to_read = len; ++ struct vmbus_channel *channel; ++ DEFINE_WAIT(wait); + size_t copied = 0; + bool can_read; -+ -+ int ret = 0; -+ -+ DEFINE_WAIT(wait); + long timeout; ++ int ret = 0; + + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ channel = hvsk->channel; + + while (1) { -+ bool need_refill = hvsk->recv->data_len == 0; ++ bool need_refill = !hvsk->recv; + -+ if (need_refill) -+ get_ringbuffer_rw_status(channel, &can_read, NULL); -+ else ++ if (need_refill) { ++ if (hvsk->peer_shutdown & SEND_SHUTDOWN) ++ can_read = false; ++ else ++ get_ringbuffer_rw_status(channel, &can_read, ++ NULL); ++ } else { + can_read = true; ++ } + + if (can_read) { + size_t payload_len; + + if (need_refill) { ++ ret = hvsock_get_recv_buf(hvsk); ++ if (ret < 0) { ++ if (copied > 0) ++ ret = copied; ++ goto out_wait; ++ } ++ + ret = hvsock_recv_data(channel, hvsk, + &payload_len); + if (ret != 0 || -+ payload_len > HVSOCK_RCV_BUF_SZ) { ++ payload_len > sizeof(hvsk->recv->buf)) { + ret = -EIO; ++ hvsock_put_recv_buf(hvsk); + goto out_wait; + } + + if (payload_len == 0) { + ret = copied; -+ goto out_wait; ++ hvsock_put_recv_buf(hvsk); ++ hvsk->peer_shutdown |= SEND_SHUTDOWN; ++ break; + } + + hvsk->recv->data_len = payload_len; @@ -1542,7 +1555,7 @@ index 0000000..b91bd60 + hvsk->recv->data_len -= to_read; + + if (hvsk->recv->data_len == 0) -+ hvsk->recv->data_offset = 0; ++ hvsock_put_recv_buf(hvsk); + else + hvsk->recv->data_offset += to_read; + @@ -1584,23 +1597,8 @@ index 0000000..b91bd60 + else if (sk->sk_shutdown & RCV_SHUTDOWN) + ret = 0; + -+ if (copied > 0) { ++ if (copied > 0) + ret = copied; -+ -+ /* If the other side has shutdown for sending and there -+ * is nothing more to read, then we modify the socket -+ * state. -+ */ -+ if ((hvsk->peer_shutdown & SEND_SHUTDOWN) && -+ hvsk->recv->data_len == 0) { -+ get_ringbuffer_rw_status(channel, &can_read, NULL); -+ if (!can_read) { -+ sk->sk_state = SS_UNCONNECTED; -+ sock_set_flag(sk, SOCK_DONE); -+ sk->sk_state_change(sk); -+ } -+ } -+ } +out_wait: + finish_wait(sk_sleep(sk), &wait); + return ret; @@ -1630,7 +1628,6 @@ index 0000000..b91bd60 + + /* We ignore msg->addr_name/len. */ + if (flags & ~MSG_DONTWAIT) { -+ pr_err("%s: unsupported flags=0x%x\n", __func__, flags); + ret = -EOPNOTSUPP; + goto out; + } @@ -1684,9 +1681,6 @@ index 0000000..b91bd60 +{ + struct sock *sk; + -+ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ + if (protocol != 0 && protocol != SHV_PROTO_RAW) + return -EPROTONOSUPPORT; + @@ -1700,7 +1694,8 @@ index 0000000..b91bd60 + + sock->state = SS_UNCONNECTED; + -+ return hvsock_create(net, sock, GFP_KERNEL, 0, &sk); ++ sk = hvsock_create(net, sock, GFP_KERNEL, 0); ++ return sk ? 0 : -ENOMEM; +} + +static const struct net_proto_family hvsock_family_ops = { @@ -1749,17 +1744,8 @@ index 0000000..b91bd60 +{ + int ret; + -+ if (send_ring_page < DEF_RINGBUFFER_PAGES_HVSOCK_SND) -+ send_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_SND; -+ -+ if (recv_ring_page < DEF_RINGBUFFER_PAGES_HVSOCK_RCV) -+ recv_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_RCV; -+ -+ /* Hyper-V Sockets requires at least VMBus 4.0 */ -+ if ((vmbus_proto_version >> 16) < 4) { -+ pr_err("failed to load: VMBus 4 or later is required\n"); ++ if (vmbus_proto_version < VERSION_WIN10) + return -ENODEV; -+ } + + ret = vmbus_driver_register(&hvsock_drv); + if (ret) { @@ -1800,6 +1786,5 @@ index 0000000..b91bd60 + +MODULE_DESCRIPTION("Hyper-V Sockets"); +MODULE_LICENSE("Dual BSD/GPL"); --- -2.10.0 - +-- +2.10.1 diff --git a/alpine/kernel/patches/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch b/alpine/kernel/patches/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch similarity index 74% rename from alpine/kernel/patches/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch rename to alpine/kernel/patches/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch index d477b2713..8a8e01cbd 100644 --- a/alpine/kernel/patches/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch +++ b/alpine/kernel/patches/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch @@ -1,7 +1,7 @@ -From 7abd92fd5987e1ad79f2272cbe544be0cfe84165 Mon Sep 17 00:00:00 2001 +From 59f4c8c2b14753db30aea235624a7d5212a0c715 Mon Sep 17 00:00:00 2001 From: Rolf Neugebauer Date: Mon, 23 May 2016 18:55:45 +0100 -Subject: [PATCH 42/42] vmbus: Don't spam the logs with unknown GUIDs +Subject: [PATCH 4/5] vmbus: Don't spam the logs with unknown GUIDs With Hyper-V sockets device types are introduced on the fly. The pr_info() then prints a message on every connection, which is way too verbose. Since @@ -14,17 +14,16 @@ Signed-off-by: Rolf Neugebauer 1 file changed, 1 deletion(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 0a54317..120ee22 100644 +index 8f4e6070..ef4a512 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -147,7 +147,6 @@ static u16 hv_get_dev_type(const uuid_le *guid) - if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) - return i; - } + if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) + return i; + } - pr_info("Unknown GUID: %pUl\n", guid); - return i; + return i; } - --- -2.10.0 +-- +2.10.1 diff --git a/alpine/kernel/patches/0004-vsock-make-listener-child-lock-ordering-explicit.patch b/alpine/kernel/patches/0004-vsock-make-listener-child-lock-ordering-explicit.patch deleted file mode 100644 index ecaa8e3e0..000000000 --- a/alpine/kernel/patches/0004-vsock-make-listener-child-lock-ordering-explicit.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 8386f4e436f280cec08f95338ae5e44bc8aa5b5e Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 23 Jun 2016 16:28:58 +0100 -Subject: [PATCH 04/42] vsock: make listener child lock ordering explicit - -There are several places where the listener and pending or accept queue -child sockets are accessed at the same time. Lockdep is unhappy that -two locks from the same class are held. - -Tell lockdep that it is safe and document the lock ordering. - -Originally Claudio Imbrenda sent a similar -patch asking whether this is safe. I have audited the code and also -covered the vsock_pending_work() function. - -Suggested-by: Claudio Imbrenda -Signed-off-by: Stefan Hajnoczi -Signed-off-by: David S. Miller -(cherry picked from commit 4192f672fae559f32d82de72a677701853cc98a7) ---- - net/vmw_vsock/af_vsock.c | 12 ++++++++++-- - 1 file changed, 10 insertions(+), 2 deletions(-) - -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index b5f1221..b96ac91 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -61,6 +61,14 @@ - * function will also cleanup rejected sockets, those that reach the connected - * state but leave it before they have been accepted. - * -+ * - Lock ordering for pending or accept queue sockets is: -+ * -+ * lock_sock(listener); -+ * lock_sock_nested(pending, SINGLE_DEPTH_NESTING); -+ * -+ * Using explicit nested locking keeps lockdep happy since normally only one -+ * lock of a given class may be taken at a time. -+ * - * - Sockets created by user action will be cleaned up when the user process - * calls close(2), causing our release implementation to be called. Our release - * implementation will perform some cleanup then drop the last reference so our -@@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work) - cleanup = true; - - lock_sock(listener); -- lock_sock(sk); -+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - - if (vsock_is_pending(sk)) { - vsock_remove_pending(listener, sk); -@@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) - if (connected) { - listener->sk_ack_backlog--; - -- lock_sock(connected); -+ lock_sock_nested(connected, SINGLE_DEPTH_NESTING); - vconnected = vsock_sk(connected); - - /* If the listener socket has received an error, then we should --- -2.10.0 - diff --git a/alpine/kernel/patches/0005-VSOCK-transport-specific-vsock_transport-functions.patch b/alpine/kernel/patches/0005-VSOCK-transport-specific-vsock_transport-functions.patch deleted file mode 100644 index edf521c29..000000000 --- a/alpine/kernel/patches/0005-VSOCK-transport-specific-vsock_transport-functions.patch +++ /dev/null @@ -1,59 +0,0 @@ -From ae6d39c3a4cd08ce37606ab36b202702a48f5440 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 28 Jul 2016 15:36:30 +0100 -Subject: [PATCH 05/42] VSOCK: transport-specific vsock_transport functions - -struct vsock_transport contains function pointers called by AF_VSOCK -core code. The transport may want its own transport-specific function -pointers and they can be added after struct vsock_transport. - -Allow the transport to fetch vsock_transport. It can downcast it to -access transport-specific function pointers. - -The virtio transport will use this. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 0b01aeb3d2fbf16787f0c9629f4ca52ae792f732) ---- - include/net/af_vsock.h | 3 +++ - net/vmw_vsock/af_vsock.c | 9 +++++++++ - 2 files changed, 12 insertions(+) - -diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h -index e9eb2d6..23f5525 100644 ---- a/include/net/af_vsock.h -+++ b/include/net/af_vsock.h -@@ -165,6 +165,9 @@ static inline int vsock_core_init(const struct vsock_transport *t) - } - void vsock_core_exit(void); - -+/* The transport may downcast this to access transport-specific functions */ -+const struct vsock_transport *vsock_core_get_transport(void); -+ - /**** UTILS ****/ - - void vsock_release_pending(struct sock *pending); -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index b96ac91..e34d96f 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -1995,6 +1995,15 @@ void vsock_core_exit(void) - } - EXPORT_SYMBOL_GPL(vsock_core_exit); - -+const struct vsock_transport *vsock_core_get_transport(void) -+{ -+ /* vsock_register_mutex not taken since only the transport uses this -+ * function and only while registered. -+ */ -+ return transport; -+} -+EXPORT_SYMBOL_GPL(vsock_core_get_transport); -+ - MODULE_AUTHOR("VMware, Inc."); - MODULE_DESCRIPTION("VMware Virtual Socket Family"); - MODULE_VERSION("1.0.1.0-k"); --- -2.10.0 - diff --git a/alpine/kernel/patches/0005-gro_cells-mark-napi-struct-as-not-busy-poll-candidat.patch b/alpine/kernel/patches/0005-gro_cells-mark-napi-struct-as-not-busy-poll-candidat.patch new file mode 100644 index 000000000..53cc990a6 --- /dev/null +++ b/alpine/kernel/patches/0005-gro_cells-mark-napi-struct-as-not-busy-poll-candidat.patch @@ -0,0 +1,49 @@ +From 56b20d9d99f039745e3b9e9fe8c4243e90d13334 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 14 Nov 2016 16:28:42 -0800 +Subject: [PATCH 5/5] gro_cells: mark napi struct as not busy poll candidates + +Rolf Neugebauer reported very long delays at netns dismantle. + +Eric W. Biederman was kind enough to look at this problem +and noticed synchronize_net() occurring from netif_napi_del() that was +added in linux-4.5 + +Busy polling makes no sense for tunnels NAPI. +If busy poll is used for sessions over tunnels, the poller will need to +poll the physical device queue anyway. + +netif_tx_napi_add() could be used here, but function name is misleading, +and renaming it is not stable material, so set NAPI_STATE_NO_BUSY_POLL +bit directly. + +This will avoid inserting gro_cells napi structures in napi_hash[] +and avoid the problematic synchronize_net() (per possible cpu) that +Rolf reported. + +Fixes: 93d05d4a320c ("net: provide generic busy polling to all NAPI drivers") +Signed-off-by: Eric Dumazet +Reported-by: Rolf Neugebauer +Reported-by: Eric W. Biederman +Acked-by: Cong Wang +Origin: https://patchwork.ozlabs.org/patch/694780/ +--- + include/net/gro_cells.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h +index d15214d..2a1abbf 100644 +--- a/include/net/gro_cells.h ++++ b/include/net/gro_cells.h +@@ -68,6 +68,9 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de + struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); + + __skb_queue_head_init(&cell->napi_skbs); ++ ++ set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); ++ + netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); + napi_enable(&cell->napi); + } +-- +2.10.1 diff --git a/alpine/kernel/patches/0006-VSOCK-defer-sock-removal-to-transports.patch b/alpine/kernel/patches/0006-VSOCK-defer-sock-removal-to-transports.patch deleted file mode 100644 index 36f65a70d..000000000 --- a/alpine/kernel/patches/0006-VSOCK-defer-sock-removal-to-transports.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 816c87fe6ecfa46981c0ca332d21f1e0d8bfd8a0 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 28 Jul 2016 15:36:31 +0100 -Subject: [PATCH 06/42] VSOCK: defer sock removal to transports - -The virtio transport will implement graceful shutdown and the related -SO_LINGER socket option. This requires orphaning the sock but keeping -it in the table of connections after .release(). - -This patch adds the vsock_remove_sock() function and leaves it up to the -transport when to remove the sock. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6773b7dc39f165bd9d824b50ac52cbb3f87d53c8) ---- - include/net/af_vsock.h | 1 + - net/vmw_vsock/af_vsock.c | 16 ++++++++++------ - net/vmw_vsock/vmci_transport.c | 2 ++ - 3 files changed, 13 insertions(+), 6 deletions(-) - -diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h -index 23f5525..3af0b22 100644 ---- a/include/net/af_vsock.h -+++ b/include/net/af_vsock.h -@@ -180,6 +180,7 @@ void vsock_remove_connected(struct vsock_sock *vsk); - struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); - struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, - struct sockaddr_vm *dst); -+void vsock_remove_sock(struct vsock_sock *vsk); - void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); - - #endif /* __AF_VSOCK_H__ */ -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index e34d96f..17dbbe6 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -344,6 +344,16 @@ static bool vsock_in_connected_table(struct vsock_sock *vsk) - return ret; - } - -+void vsock_remove_sock(struct vsock_sock *vsk) -+{ -+ if (vsock_in_bound_table(vsk)) -+ vsock_remove_bound(vsk); -+ -+ if (vsock_in_connected_table(vsk)) -+ vsock_remove_connected(vsk); -+} -+EXPORT_SYMBOL_GPL(vsock_remove_sock); -+ - void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) - { - int i; -@@ -660,12 +670,6 @@ static void __vsock_release(struct sock *sk) - vsk = vsock_sk(sk); - pending = NULL; /* Compiler warning. */ - -- if (vsock_in_bound_table(vsk)) -- vsock_remove_bound(vsk); -- -- if (vsock_in_connected_table(vsk)) -- vsock_remove_connected(vsk); -- - transport->release(vsk); - - lock_sock(sk); -diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c -index 0a369bb..706991e 100644 ---- a/net/vmw_vsock/vmci_transport.c -+++ b/net/vmw_vsock/vmci_transport.c -@@ -1644,6 +1644,8 @@ static void vmci_transport_destruct(struct vsock_sock *vsk) - - static void vmci_transport_release(struct vsock_sock *vsk) - { -+ vsock_remove_sock(vsk); -+ - if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { - vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); - vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; --- -2.10.0 - diff --git a/alpine/kernel/patches/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch b/alpine/kernel/patches/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch deleted file mode 100644 index aaa7a887a..000000000 --- a/alpine/kernel/patches/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch +++ /dev/null @@ -1,1496 +0,0 @@ -From fe9f8cb30a5c819adabb5b9b598f7776cbbdc4f0 Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:32 +0100 -Subject: [PATCH 07/42] VSOCK: Introduce virtio_vsock_common.ko - -This module contains the common code and header files for the following -virtio_transporto and vhost_vsock kernel modules. - -Signed-off-by: Asias He -Signed-off-by: Claudio Imbrenda -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 06a8fc78367d070720af960dcecec917d3ae5f3b) ---- - MAINTAINERS | 10 + - include/linux/virtio_vsock.h | 154 ++++ - include/net/af_vsock.h | 2 + - .../trace/events/vsock_virtio_transport_common.h | 144 +++ - include/uapi/linux/Kbuild | 1 + - include/uapi/linux/virtio_ids.h | 1 + - include/uapi/linux/virtio_vsock.h | 94 ++ - net/vmw_vsock/virtio_transport_common.c | 992 +++++++++++++++++++++ - 8 files changed, 1398 insertions(+) - create mode 100644 include/linux/virtio_vsock.h - create mode 100644 include/trace/events/vsock_virtio_transport_common.h - create mode 100644 include/uapi/linux/virtio_vsock.h - create mode 100644 net/vmw_vsock/virtio_transport_common.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index ab65bbe..b93ba8b 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -11382,6 +11382,16 @@ S: Maintained - F: drivers/media/v4l2-core/videobuf2-* - F: include/media/videobuf2-* - -+VIRTIO AND VHOST VSOCK DRIVER -+M: Stefan Hajnoczi -+L: kvm@vger.kernel.org -+L: virtualization@lists.linux-foundation.org -+L: netdev@vger.kernel.org -+S: Maintained -+F: include/linux/virtio_vsock.h -+F: include/uapi/linux/virtio_vsock.h -+F: net/vmw_vsock/virtio_transport_common.c -+ - VIRTUAL SERIO DEVICE DRIVER - M: Stephen Chandler Paul - S: Maintained -diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h -new file mode 100644 -index 0000000..9638bfe ---- /dev/null -+++ b/include/linux/virtio_vsock.h -@@ -0,0 +1,154 @@ -+#ifndef _LINUX_VIRTIO_VSOCK_H -+#define _LINUX_VIRTIO_VSOCK_H -+ -+#include -+#include -+#include -+#include -+ -+#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128 -+#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256) -+#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) -+#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) -+#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL -+#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) -+ -+enum { -+ VSOCK_VQ_RX = 0, /* for host to guest data */ -+ VSOCK_VQ_TX = 1, /* for guest to host data */ -+ VSOCK_VQ_EVENT = 2, -+ VSOCK_VQ_MAX = 3, -+}; -+ -+/* Per-socket state (accessed via vsk->trans) */ -+struct virtio_vsock_sock { -+ struct vsock_sock *vsk; -+ -+ /* Protected by lock_sock(sk_vsock(trans->vsk)) */ -+ u32 buf_size; -+ u32 buf_size_min; -+ u32 buf_size_max; -+ -+ spinlock_t tx_lock; -+ spinlock_t rx_lock; -+ -+ /* Protected by tx_lock */ -+ u32 tx_cnt; -+ u32 buf_alloc; -+ u32 peer_fwd_cnt; -+ u32 peer_buf_alloc; -+ -+ /* Protected by rx_lock */ -+ u32 fwd_cnt; -+ u32 rx_bytes; -+ struct list_head rx_queue; -+}; -+ -+struct virtio_vsock_pkt { -+ struct virtio_vsock_hdr hdr; -+ struct work_struct work; -+ struct list_head list; -+ void *buf; -+ u32 len; -+ u32 off; -+ bool reply; -+}; -+ -+struct virtio_vsock_pkt_info { -+ u32 remote_cid, remote_port; -+ struct msghdr *msg; -+ u32 pkt_len; -+ u16 type; -+ u16 op; -+ u32 flags; -+ bool reply; -+}; -+ -+struct virtio_transport { -+ /* This must be the first field */ -+ struct vsock_transport transport; -+ -+ /* Takes ownership of the packet */ -+ int (*send_pkt)(struct virtio_vsock_pkt *pkt); -+}; -+ -+ssize_t -+virtio_transport_stream_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, -+ int type); -+int -+virtio_transport_dgram_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, int flags); -+ -+s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); -+s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); -+ -+int virtio_transport_do_socket_init(struct vsock_sock *vsk, -+ struct vsock_sock *psk); -+u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk); -+u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk); -+u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk); -+void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val); -+void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val); -+void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val); -+int -+virtio_transport_notify_poll_in(struct vsock_sock *vsk, -+ size_t target, -+ bool *data_ready_now); -+int -+virtio_transport_notify_poll_out(struct vsock_sock *vsk, -+ size_t target, -+ bool *space_available_now); -+ -+int virtio_transport_notify_recv_init(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, -+ size_t target, ssize_t copied, bool data_read, -+ struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_send_init(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data); -+int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data); -+int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data); -+int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, -+ ssize_t written, struct vsock_transport_send_notify_data *data); -+ -+u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); -+bool virtio_transport_stream_is_active(struct vsock_sock *vsk); -+bool virtio_transport_stream_allow(u32 cid, u32 port); -+int virtio_transport_dgram_bind(struct vsock_sock *vsk, -+ struct sockaddr_vm *addr); -+bool virtio_transport_dgram_allow(u32 cid, u32 port); -+ -+int virtio_transport_connect(struct vsock_sock *vsk); -+ -+int virtio_transport_shutdown(struct vsock_sock *vsk, int mode); -+ -+void virtio_transport_release(struct vsock_sock *vsk); -+ -+ssize_t -+virtio_transport_stream_enqueue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len); -+int -+virtio_transport_dgram_enqueue(struct vsock_sock *vsk, -+ struct sockaddr_vm *remote_addr, -+ struct msghdr *msg, -+ size_t len); -+ -+void virtio_transport_destruct(struct vsock_sock *vsk); -+ -+void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); -+void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); -+void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt); -+u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); -+void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); -+ -+#endif /* _LINUX_VIRTIO_VSOCK_H */ -diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h -index 3af0b22..f275896 100644 ---- a/include/net/af_vsock.h -+++ b/include/net/af_vsock.h -@@ -63,6 +63,8 @@ struct vsock_sock { - struct list_head accept_queue; - bool rejected; - struct delayed_work dwork; -+ struct delayed_work close_work; -+ bool close_work_scheduled; - u32 peer_shutdown; - bool sent_request; - bool ignore_connecting_rst; -diff --git a/include/trace/events/vsock_virtio_transport_common.h b/include/trace/events/vsock_virtio_transport_common.h -new file mode 100644 -index 0000000..b7f1d62 ---- /dev/null -+++ b/include/trace/events/vsock_virtio_transport_common.h -@@ -0,0 +1,144 @@ -+#undef TRACE_SYSTEM -+#define TRACE_SYSTEM vsock -+ -+#if !defined(_TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H) || \ -+ defined(TRACE_HEADER_MULTI_READ) -+#define _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H -+ -+#include -+ -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_STREAM); -+ -+#define show_type(val) \ -+ __print_symbolic(val, { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" }) -+ -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_INVALID); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_REQUEST); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RESPONSE); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RST); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_SHUTDOWN); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RW); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_UPDATE); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_REQUEST); -+ -+#define show_op(val) \ -+ __print_symbolic(val, \ -+ { VIRTIO_VSOCK_OP_INVALID, "INVALID" }, \ -+ { VIRTIO_VSOCK_OP_REQUEST, "REQUEST" }, \ -+ { VIRTIO_VSOCK_OP_RESPONSE, "RESPONSE" }, \ -+ { VIRTIO_VSOCK_OP_RST, "RST" }, \ -+ { VIRTIO_VSOCK_OP_SHUTDOWN, "SHUTDOWN" }, \ -+ { VIRTIO_VSOCK_OP_RW, "RW" }, \ -+ { VIRTIO_VSOCK_OP_CREDIT_UPDATE, "CREDIT_UPDATE" }, \ -+ { VIRTIO_VSOCK_OP_CREDIT_REQUEST, "CREDIT_REQUEST" }) -+ -+TRACE_EVENT(virtio_transport_alloc_pkt, -+ TP_PROTO( -+ __u32 src_cid, __u32 src_port, -+ __u32 dst_cid, __u32 dst_port, -+ __u32 len, -+ __u16 type, -+ __u16 op, -+ __u32 flags -+ ), -+ TP_ARGS( -+ src_cid, src_port, -+ dst_cid, dst_port, -+ len, -+ type, -+ op, -+ flags -+ ), -+ TP_STRUCT__entry( -+ __field(__u32, src_cid) -+ __field(__u32, src_port) -+ __field(__u32, dst_cid) -+ __field(__u32, dst_port) -+ __field(__u32, len) -+ __field(__u16, type) -+ __field(__u16, op) -+ __field(__u32, flags) -+ ), -+ TP_fast_assign( -+ __entry->src_cid = src_cid; -+ __entry->src_port = src_port; -+ __entry->dst_cid = dst_cid; -+ __entry->dst_port = dst_port; -+ __entry->len = len; -+ __entry->type = type; -+ __entry->op = op; -+ __entry->flags = flags; -+ ), -+ TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x", -+ __entry->src_cid, __entry->src_port, -+ __entry->dst_cid, __entry->dst_port, -+ __entry->len, -+ show_type(__entry->type), -+ show_op(__entry->op), -+ __entry->flags) -+); -+ -+TRACE_EVENT(virtio_transport_recv_pkt, -+ TP_PROTO( -+ __u32 src_cid, __u32 src_port, -+ __u32 dst_cid, __u32 dst_port, -+ __u32 len, -+ __u16 type, -+ __u16 op, -+ __u32 flags, -+ __u32 buf_alloc, -+ __u32 fwd_cnt -+ ), -+ TP_ARGS( -+ src_cid, src_port, -+ dst_cid, dst_port, -+ len, -+ type, -+ op, -+ flags, -+ buf_alloc, -+ fwd_cnt -+ ), -+ TP_STRUCT__entry( -+ __field(__u32, src_cid) -+ __field(__u32, src_port) -+ __field(__u32, dst_cid) -+ __field(__u32, dst_port) -+ __field(__u32, len) -+ __field(__u16, type) -+ __field(__u16, op) -+ __field(__u32, flags) -+ __field(__u32, buf_alloc) -+ __field(__u32, fwd_cnt) -+ ), -+ TP_fast_assign( -+ __entry->src_cid = src_cid; -+ __entry->src_port = src_port; -+ __entry->dst_cid = dst_cid; -+ __entry->dst_port = dst_port; -+ __entry->len = len; -+ __entry->type = type; -+ __entry->op = op; -+ __entry->flags = flags; -+ __entry->buf_alloc = buf_alloc; -+ __entry->fwd_cnt = fwd_cnt; -+ ), -+ TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x " -+ "buf_alloc=%u fwd_cnt=%u", -+ __entry->src_cid, __entry->src_port, -+ __entry->dst_cid, __entry->dst_port, -+ __entry->len, -+ show_type(__entry->type), -+ show_op(__entry->op), -+ __entry->flags, -+ __entry->buf_alloc, -+ __entry->fwd_cnt) -+); -+ -+#endif /* _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H */ -+ -+#undef TRACE_INCLUDE_FILE -+#define TRACE_INCLUDE_FILE vsock_virtio_transport_common -+ -+/* This part must be outside protection */ -+#include -diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild -index ebd10e6..6c51a4d 100644 ---- a/include/uapi/linux/Kbuild -+++ b/include/uapi/linux/Kbuild -@@ -447,6 +447,7 @@ header-y += virtio_ring.h - header-y += virtio_rng.h - header-y += virtio_scsi.h - header-y += virtio_types.h -+header-y += virtio_vsock.h - header-y += vm_sockets.h - header-y += vt.h - header-y += wait.h -diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h -index 77925f5..3228d58 100644 ---- a/include/uapi/linux/virtio_ids.h -+++ b/include/uapi/linux/virtio_ids.h -@@ -41,5 +41,6 @@ - #define VIRTIO_ID_CAIF 12 /* Virtio caif */ - #define VIRTIO_ID_GPU 16 /* virtio GPU */ - #define VIRTIO_ID_INPUT 18 /* virtio input */ -+#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ - - #endif /* _LINUX_VIRTIO_IDS_H */ -diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h -new file mode 100644 -index 0000000..6b011c1 ---- /dev/null -+++ b/include/uapi/linux/virtio_vsock.h -@@ -0,0 +1,94 @@ -+/* -+ * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so -+ * anyone can use the definitions to implement compatible drivers/servers: -+ * -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of IBM nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * Copyright (C) Red Hat, Inc., 2013-2015 -+ * Copyright (C) Asias He , 2013 -+ * Copyright (C) Stefan Hajnoczi , 2015 -+ */ -+ -+#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H -+#define _UAPI_LINUX_VIRTIO_VOSCK_H -+ -+#include -+#include -+#include -+ -+struct virtio_vsock_config { -+ __le64 guest_cid; -+} __attribute__((packed)); -+ -+enum virtio_vsock_event_id { -+ VIRTIO_VSOCK_EVENT_TRANSPORT_RESET = 0, -+}; -+ -+struct virtio_vsock_event { -+ __le32 id; -+} __attribute__((packed)); -+ -+struct virtio_vsock_hdr { -+ __le64 src_cid; -+ __le64 dst_cid; -+ __le32 src_port; -+ __le32 dst_port; -+ __le32 len; -+ __le16 type; /* enum virtio_vsock_type */ -+ __le16 op; /* enum virtio_vsock_op */ -+ __le32 flags; -+ __le32 buf_alloc; -+ __le32 fwd_cnt; -+} __attribute__((packed)); -+ -+enum virtio_vsock_type { -+ VIRTIO_VSOCK_TYPE_STREAM = 1, -+}; -+ -+enum virtio_vsock_op { -+ VIRTIO_VSOCK_OP_INVALID = 0, -+ -+ /* Connect operations */ -+ VIRTIO_VSOCK_OP_REQUEST = 1, -+ VIRTIO_VSOCK_OP_RESPONSE = 2, -+ VIRTIO_VSOCK_OP_RST = 3, -+ VIRTIO_VSOCK_OP_SHUTDOWN = 4, -+ -+ /* To send payload */ -+ VIRTIO_VSOCK_OP_RW = 5, -+ -+ /* Tell the peer our credit info */ -+ VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6, -+ /* Request the peer to send the credit info to us */ -+ VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7, -+}; -+ -+/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ -+enum virtio_vsock_shutdown { -+ VIRTIO_VSOCK_SHUTDOWN_RCV = 1, -+ VIRTIO_VSOCK_SHUTDOWN_SEND = 2, -+}; -+ -+#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ -diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c -new file mode 100644 -index 0000000..a53b3a1 ---- /dev/null -+++ b/net/vmw_vsock/virtio_transport_common.c -@@ -0,0 +1,992 @@ -+/* -+ * common code for virtio vsock -+ * -+ * Copyright (C) 2013-2015 Red Hat, Inc. -+ * Author: Asias He -+ * Stefan Hajnoczi -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#define CREATE_TRACE_POINTS -+#include -+ -+/* How long to wait for graceful shutdown of a connection */ -+#define VSOCK_CLOSE_TIMEOUT (8 * HZ) -+ -+static const struct virtio_transport *virtio_transport_get_ops(void) -+{ -+ const struct vsock_transport *t = vsock_core_get_transport(); -+ -+ return container_of(t, struct virtio_transport, transport); -+} -+ -+struct virtio_vsock_pkt * -+virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, -+ size_t len, -+ u32 src_cid, -+ u32 src_port, -+ u32 dst_cid, -+ u32 dst_port) -+{ -+ struct virtio_vsock_pkt *pkt; -+ int err; -+ -+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); -+ if (!pkt) -+ return NULL; -+ -+ pkt->hdr.type = cpu_to_le16(info->type); -+ pkt->hdr.op = cpu_to_le16(info->op); -+ pkt->hdr.src_cid = cpu_to_le64(src_cid); -+ pkt->hdr.dst_cid = cpu_to_le64(dst_cid); -+ pkt->hdr.src_port = cpu_to_le32(src_port); -+ pkt->hdr.dst_port = cpu_to_le32(dst_port); -+ pkt->hdr.flags = cpu_to_le32(info->flags); -+ pkt->len = len; -+ pkt->hdr.len = cpu_to_le32(len); -+ pkt->reply = info->reply; -+ -+ if (info->msg && len > 0) { -+ pkt->buf = kmalloc(len, GFP_KERNEL); -+ if (!pkt->buf) -+ goto out_pkt; -+ err = memcpy_from_msg(pkt->buf, info->msg, len); -+ if (err) -+ goto out; -+ } -+ -+ trace_virtio_transport_alloc_pkt(src_cid, src_port, -+ dst_cid, dst_port, -+ len, -+ info->type, -+ info->op, -+ info->flags); -+ -+ return pkt; -+ -+out: -+ kfree(pkt->buf); -+out_pkt: -+ kfree(pkt); -+ return NULL; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); -+ -+static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, -+ struct virtio_vsock_pkt_info *info) -+{ -+ u32 src_cid, src_port, dst_cid, dst_port; -+ struct virtio_vsock_sock *vvs; -+ struct virtio_vsock_pkt *pkt; -+ u32 pkt_len = info->pkt_len; -+ -+ src_cid = vm_sockets_get_local_cid(); -+ src_port = vsk->local_addr.svm_port; -+ if (!info->remote_cid) { -+ dst_cid = vsk->remote_addr.svm_cid; -+ dst_port = vsk->remote_addr.svm_port; -+ } else { -+ dst_cid = info->remote_cid; -+ dst_port = info->remote_port; -+ } -+ -+ vvs = vsk->trans; -+ -+ /* we can send less than pkt_len bytes */ -+ if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) -+ pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; -+ -+ /* virtio_transport_get_credit might return less than pkt_len credit */ -+ pkt_len = virtio_transport_get_credit(vvs, pkt_len); -+ -+ /* Do not send zero length OP_RW pkt */ -+ if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) -+ return pkt_len; -+ -+ pkt = virtio_transport_alloc_pkt(info, pkt_len, -+ src_cid, src_port, -+ dst_cid, dst_port); -+ if (!pkt) { -+ virtio_transport_put_credit(vvs, pkt_len); -+ return -ENOMEM; -+ } -+ -+ virtio_transport_inc_tx_pkt(vvs, pkt); -+ -+ return virtio_transport_get_ops()->send_pkt(pkt); -+} -+ -+static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, -+ struct virtio_vsock_pkt *pkt) -+{ -+ vvs->rx_bytes += pkt->len; -+} -+ -+static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, -+ struct virtio_vsock_pkt *pkt) -+{ -+ vvs->rx_bytes -= pkt->len; -+ vvs->fwd_cnt += pkt->len; -+} -+ -+void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) -+{ -+ spin_lock_bh(&vvs->tx_lock); -+ pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); -+ pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); -+ spin_unlock_bh(&vvs->tx_lock); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); -+ -+u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) -+{ -+ u32 ret; -+ -+ spin_lock_bh(&vvs->tx_lock); -+ ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); -+ if (ret > credit) -+ ret = credit; -+ vvs->tx_cnt += ret; -+ spin_unlock_bh(&vvs->tx_lock); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_credit); -+ -+void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) -+{ -+ spin_lock_bh(&vvs->tx_lock); -+ vvs->tx_cnt -= credit; -+ spin_unlock_bh(&vvs->tx_lock); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_put_credit); -+ -+static int virtio_transport_send_credit_update(struct vsock_sock *vsk, -+ int type, -+ struct virtio_vsock_hdr *hdr) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, -+ .type = type, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+ -+static ssize_t -+virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ struct virtio_vsock_pkt *pkt; -+ size_t bytes, total = 0; -+ int err = -EFAULT; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ while (total < len && !list_empty(&vvs->rx_queue)) { -+ pkt = list_first_entry(&vvs->rx_queue, -+ struct virtio_vsock_pkt, list); -+ -+ bytes = len - total; -+ if (bytes > pkt->len - pkt->off) -+ bytes = pkt->len - pkt->off; -+ -+ /* sk_lock is held by caller so no one else can dequeue. -+ * Unlock rx_lock since memcpy_to_msg() may sleep. -+ */ -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); -+ if (err) -+ goto out; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ -+ total += bytes; -+ pkt->off += bytes; -+ if (pkt->off == pkt->len) { -+ virtio_transport_dec_rx_pkt(vvs, pkt); -+ list_del(&pkt->list); -+ virtio_transport_free_pkt(pkt); -+ } -+ } -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ /* Send a credit pkt to peer */ -+ virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, -+ NULL); -+ -+ return total; -+ -+out: -+ if (total) -+ err = total; -+ return err; -+} -+ -+ssize_t -+virtio_transport_stream_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, int flags) -+{ -+ if (flags & MSG_PEEK) -+ return -EOPNOTSUPP; -+ -+ return virtio_transport_stream_do_dequeue(vsk, msg, len); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); -+ -+int -+virtio_transport_dgram_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); -+ -+s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ s64 bytes; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ bytes = vvs->rx_bytes; -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ return bytes; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); -+ -+static s64 virtio_transport_has_space(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ s64 bytes; -+ -+ bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); -+ if (bytes < 0) -+ bytes = 0; -+ -+ return bytes; -+} -+ -+s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ s64 bytes; -+ -+ spin_lock_bh(&vvs->tx_lock); -+ bytes = virtio_transport_has_space(vsk); -+ spin_unlock_bh(&vvs->tx_lock); -+ -+ return bytes; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); -+ -+int virtio_transport_do_socket_init(struct vsock_sock *vsk, -+ struct vsock_sock *psk) -+{ -+ struct virtio_vsock_sock *vvs; -+ -+ vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); -+ if (!vvs) -+ return -ENOMEM; -+ -+ vsk->trans = vvs; -+ vvs->vsk = vsk; -+ if (psk) { -+ struct virtio_vsock_sock *ptrans = psk->trans; -+ -+ vvs->buf_size = ptrans->buf_size; -+ vvs->buf_size_min = ptrans->buf_size_min; -+ vvs->buf_size_max = ptrans->buf_size_max; -+ vvs->peer_buf_alloc = ptrans->peer_buf_alloc; -+ } else { -+ vvs->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; -+ vvs->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; -+ vvs->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; -+ } -+ -+ vvs->buf_alloc = vvs->buf_size; -+ -+ spin_lock_init(&vvs->rx_lock); -+ spin_lock_init(&vvs->tx_lock); -+ INIT_LIST_HEAD(&vvs->rx_queue); -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); -+ -+u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); -+ -+u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size_min; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); -+ -+u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size_max; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); -+ -+void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) -+ val = VIRTIO_VSOCK_MAX_BUF_SIZE; -+ if (val < vvs->buf_size_min) -+ vvs->buf_size_min = val; -+ if (val > vvs->buf_size_max) -+ vvs->buf_size_max = val; -+ vvs->buf_size = val; -+ vvs->buf_alloc = val; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); -+ -+void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) -+ val = VIRTIO_VSOCK_MAX_BUF_SIZE; -+ if (val > vvs->buf_size) -+ vvs->buf_size = val; -+ vvs->buf_size_min = val; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); -+ -+void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) -+ val = VIRTIO_VSOCK_MAX_BUF_SIZE; -+ if (val < vvs->buf_size) -+ vvs->buf_size = val; -+ vvs->buf_size_max = val; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); -+ -+int -+virtio_transport_notify_poll_in(struct vsock_sock *vsk, -+ size_t target, -+ bool *data_ready_now) -+{ -+ if (vsock_stream_has_data(vsk)) -+ *data_ready_now = true; -+ else -+ *data_ready_now = false; -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); -+ -+int -+virtio_transport_notify_poll_out(struct vsock_sock *vsk, -+ size_t target, -+ bool *space_avail_now) -+{ -+ s64 free_space; -+ -+ free_space = vsock_stream_has_space(vsk); -+ if (free_space > 0) -+ *space_avail_now = true; -+ else if (free_space == 0) -+ *space_avail_now = false; -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); -+ -+int virtio_transport_notify_recv_init(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); -+ -+int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); -+ -+int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); -+ -+int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, -+ size_t target, ssize_t copied, bool data_read, -+ struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); -+ -+int virtio_transport_notify_send_init(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); -+ -+int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); -+ -+int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); -+ -+int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, -+ ssize_t written, struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); -+ -+u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); -+ -+bool virtio_transport_stream_is_active(struct vsock_sock *vsk) -+{ -+ return true; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); -+ -+bool virtio_transport_stream_allow(u32 cid, u32 port) -+{ -+ return true; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); -+ -+int virtio_transport_dgram_bind(struct vsock_sock *vsk, -+ struct sockaddr_vm *addr) -+{ -+ return -EOPNOTSUPP; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); -+ -+bool virtio_transport_dgram_allow(u32 cid, u32 port) -+{ -+ return false; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); -+ -+int virtio_transport_connect(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_REQUEST, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_connect); -+ -+int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_SHUTDOWN, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .flags = (mode & RCV_SHUTDOWN ? -+ VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | -+ (mode & SEND_SHUTDOWN ? -+ VIRTIO_VSOCK_SHUTDOWN_SEND : 0), -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_shutdown); -+ -+int -+virtio_transport_dgram_enqueue(struct vsock_sock *vsk, -+ struct sockaddr_vm *remote_addr, -+ struct msghdr *msg, -+ size_t dgram_len) -+{ -+ return -EOPNOTSUPP; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); -+ -+ssize_t -+virtio_transport_stream_enqueue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RW, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .msg = msg, -+ .pkt_len = len, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); -+ -+void virtio_transport_destruct(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ kfree(vvs); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_destruct); -+ -+static int virtio_transport_reset(struct vsock_sock *vsk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RST, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .reply = !!pkt, -+ }; -+ -+ /* Send RST only if the original pkt is not a RST pkt */ -+ if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) -+ return 0; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+ -+/* Normally packets are associated with a socket. There may be no socket if an -+ * attempt was made to connect to a socket that does not exist. -+ */ -+static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RST, -+ .type = le16_to_cpu(pkt->hdr.type), -+ .reply = true, -+ }; -+ -+ /* Send RST only if the original pkt is not a RST pkt */ -+ if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) -+ return 0; -+ -+ pkt = virtio_transport_alloc_pkt(&info, 0, -+ le32_to_cpu(pkt->hdr.dst_cid), -+ le32_to_cpu(pkt->hdr.dst_port), -+ le32_to_cpu(pkt->hdr.src_cid), -+ le32_to_cpu(pkt->hdr.src_port)); -+ if (!pkt) -+ return -ENOMEM; -+ -+ return virtio_transport_get_ops()->send_pkt(pkt); -+} -+ -+static void virtio_transport_wait_close(struct sock *sk, long timeout) -+{ -+ if (timeout) { -+ DEFINE_WAIT(wait); -+ -+ do { -+ prepare_to_wait(sk_sleep(sk), &wait, -+ TASK_INTERRUPTIBLE); -+ if (sk_wait_event(sk, &timeout, -+ sock_flag(sk, SOCK_DONE))) -+ break; -+ } while (!signal_pending(current) && timeout); -+ -+ finish_wait(sk_sleep(sk), &wait); -+ } -+} -+ -+static void virtio_transport_do_close(struct vsock_sock *vsk, -+ bool cancel_timeout) -+{ -+ struct sock *sk = sk_vsock(vsk); -+ -+ sock_set_flag(sk, SOCK_DONE); -+ vsk->peer_shutdown = SHUTDOWN_MASK; -+ if (vsock_stream_has_data(vsk) <= 0) -+ sk->sk_state = SS_DISCONNECTING; -+ sk->sk_state_change(sk); -+ -+ if (vsk->close_work_scheduled && -+ (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { -+ vsk->close_work_scheduled = false; -+ -+ vsock_remove_sock(vsk); -+ -+ /* Release refcnt obtained when we scheduled the timeout */ -+ sock_put(sk); -+ } -+} -+ -+static void virtio_transport_close_timeout(struct work_struct *work) -+{ -+ struct vsock_sock *vsk = -+ container_of(work, struct vsock_sock, close_work.work); -+ struct sock *sk = sk_vsock(vsk); -+ -+ sock_hold(sk); -+ lock_sock(sk); -+ -+ if (!sock_flag(sk, SOCK_DONE)) { -+ (void)virtio_transport_reset(vsk, NULL); -+ -+ virtio_transport_do_close(vsk, false); -+ } -+ -+ vsk->close_work_scheduled = false; -+ -+ release_sock(sk); -+ sock_put(sk); -+} -+ -+/* User context, vsk->sk is locked */ -+static bool virtio_transport_close(struct vsock_sock *vsk) -+{ -+ struct sock *sk = &vsk->sk; -+ -+ if (!(sk->sk_state == SS_CONNECTED || -+ sk->sk_state == SS_DISCONNECTING)) -+ return true; -+ -+ /* Already received SHUTDOWN from peer, reply with RST */ -+ if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { -+ (void)virtio_transport_reset(vsk, NULL); -+ return true; -+ } -+ -+ if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) -+ (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); -+ -+ if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) -+ virtio_transport_wait_close(sk, sk->sk_lingertime); -+ -+ if (sock_flag(sk, SOCK_DONE)) { -+ return true; -+ } -+ -+ sock_hold(sk); -+ INIT_DELAYED_WORK(&vsk->close_work, -+ virtio_transport_close_timeout); -+ vsk->close_work_scheduled = true; -+ schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); -+ return false; -+} -+ -+void virtio_transport_release(struct vsock_sock *vsk) -+{ -+ struct sock *sk = &vsk->sk; -+ bool remove_sock = true; -+ -+ lock_sock(sk); -+ if (sk->sk_type == SOCK_STREAM) -+ remove_sock = virtio_transport_close(vsk); -+ release_sock(sk); -+ -+ if (remove_sock) -+ vsock_remove_sock(vsk); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_release); -+ -+static int -+virtio_transport_recv_connecting(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ int err; -+ int skerr; -+ -+ switch (le16_to_cpu(pkt->hdr.op)) { -+ case VIRTIO_VSOCK_OP_RESPONSE: -+ sk->sk_state = SS_CONNECTED; -+ sk->sk_socket->state = SS_CONNECTED; -+ vsock_insert_connected(vsk); -+ sk->sk_state_change(sk); -+ break; -+ case VIRTIO_VSOCK_OP_INVALID: -+ break; -+ case VIRTIO_VSOCK_OP_RST: -+ skerr = ECONNRESET; -+ err = 0; -+ goto destroy; -+ default: -+ skerr = EPROTO; -+ err = -EINVAL; -+ goto destroy; -+ } -+ return 0; -+ -+destroy: -+ virtio_transport_reset(vsk, pkt); -+ sk->sk_state = SS_UNCONNECTED; -+ sk->sk_err = skerr; -+ sk->sk_error_report(sk); -+ return err; -+} -+ -+static int -+virtio_transport_recv_connected(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ int err = 0; -+ -+ switch (le16_to_cpu(pkt->hdr.op)) { -+ case VIRTIO_VSOCK_OP_RW: -+ pkt->len = le32_to_cpu(pkt->hdr.len); -+ pkt->off = 0; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ virtio_transport_inc_rx_pkt(vvs, pkt); -+ list_add_tail(&pkt->list, &vvs->rx_queue); -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ sk->sk_data_ready(sk); -+ return err; -+ case VIRTIO_VSOCK_OP_CREDIT_UPDATE: -+ sk->sk_write_space(sk); -+ break; -+ case VIRTIO_VSOCK_OP_SHUTDOWN: -+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) -+ vsk->peer_shutdown |= RCV_SHUTDOWN; -+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) -+ vsk->peer_shutdown |= SEND_SHUTDOWN; -+ if (vsk->peer_shutdown == SHUTDOWN_MASK && -+ vsock_stream_has_data(vsk) <= 0) -+ sk->sk_state = SS_DISCONNECTING; -+ if (le32_to_cpu(pkt->hdr.flags)) -+ sk->sk_state_change(sk); -+ break; -+ case VIRTIO_VSOCK_OP_RST: -+ virtio_transport_do_close(vsk, true); -+ break; -+ default: -+ err = -EINVAL; -+ break; -+ } -+ -+ virtio_transport_free_pkt(pkt); -+ return err; -+} -+ -+static void -+virtio_transport_recv_disconnecting(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ -+ if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) -+ virtio_transport_do_close(vsk, true); -+} -+ -+static int -+virtio_transport_send_response(struct vsock_sock *vsk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RESPONSE, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .remote_cid = le32_to_cpu(pkt->hdr.src_cid), -+ .remote_port = le32_to_cpu(pkt->hdr.src_port), -+ .reply = true, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+ -+/* Handle server socket */ -+static int -+virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ struct vsock_sock *vchild; -+ struct sock *child; -+ -+ if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { -+ virtio_transport_reset(vsk, pkt); -+ return -EINVAL; -+ } -+ -+ if (sk_acceptq_is_full(sk)) { -+ virtio_transport_reset(vsk, pkt); -+ return -ENOMEM; -+ } -+ -+ child = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, -+ sk->sk_type, 0); -+ if (!child) { -+ virtio_transport_reset(vsk, pkt); -+ return -ENOMEM; -+ } -+ -+ sk->sk_ack_backlog++; -+ -+ lock_sock_nested(child, SINGLE_DEPTH_NESTING); -+ -+ child->sk_state = SS_CONNECTED; -+ -+ vchild = vsock_sk(child); -+ vsock_addr_init(&vchild->local_addr, le32_to_cpu(pkt->hdr.dst_cid), -+ le32_to_cpu(pkt->hdr.dst_port)); -+ vsock_addr_init(&vchild->remote_addr, le32_to_cpu(pkt->hdr.src_cid), -+ le32_to_cpu(pkt->hdr.src_port)); -+ -+ vsock_insert_connected(vchild); -+ vsock_enqueue_accept(sk, child); -+ virtio_transport_send_response(vchild, pkt); -+ -+ release_sock(child); -+ -+ sk->sk_data_ready(sk); -+ return 0; -+} -+ -+static bool virtio_transport_space_update(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ bool space_available; -+ -+ /* buf_alloc and fwd_cnt is always included in the hdr */ -+ spin_lock_bh(&vvs->tx_lock); -+ vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); -+ vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); -+ space_available = virtio_transport_has_space(vsk); -+ spin_unlock_bh(&vvs->tx_lock); -+ return space_available; -+} -+ -+/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex -+ * lock. -+ */ -+void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ struct sockaddr_vm src, dst; -+ struct vsock_sock *vsk; -+ struct sock *sk; -+ bool space_available; -+ -+ vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), -+ le32_to_cpu(pkt->hdr.src_port)); -+ vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), -+ le32_to_cpu(pkt->hdr.dst_port)); -+ -+ trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, -+ dst.svm_cid, dst.svm_port, -+ le32_to_cpu(pkt->hdr.len), -+ le16_to_cpu(pkt->hdr.type), -+ le16_to_cpu(pkt->hdr.op), -+ le32_to_cpu(pkt->hdr.flags), -+ le32_to_cpu(pkt->hdr.buf_alloc), -+ le32_to_cpu(pkt->hdr.fwd_cnt)); -+ -+ if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) { -+ (void)virtio_transport_reset_no_sock(pkt); -+ goto free_pkt; -+ } -+ -+ /* The socket must be in connected or bound table -+ * otherwise send reset back -+ */ -+ sk = vsock_find_connected_socket(&src, &dst); -+ if (!sk) { -+ sk = vsock_find_bound_socket(&dst); -+ if (!sk) { -+ (void)virtio_transport_reset_no_sock(pkt); -+ goto free_pkt; -+ } -+ } -+ -+ vsk = vsock_sk(sk); -+ -+ space_available = virtio_transport_space_update(sk, pkt); -+ -+ lock_sock(sk); -+ -+ /* Update CID in case it has changed after a transport reset event */ -+ vsk->local_addr.svm_cid = dst.svm_cid; -+ -+ if (space_available) -+ sk->sk_write_space(sk); -+ -+ switch (sk->sk_state) { -+ case VSOCK_SS_LISTEN: -+ virtio_transport_recv_listen(sk, pkt); -+ virtio_transport_free_pkt(pkt); -+ break; -+ case SS_CONNECTING: -+ virtio_transport_recv_connecting(sk, pkt); -+ virtio_transport_free_pkt(pkt); -+ break; -+ case SS_CONNECTED: -+ virtio_transport_recv_connected(sk, pkt); -+ break; -+ case SS_DISCONNECTING: -+ virtio_transport_recv_disconnecting(sk, pkt); -+ virtio_transport_free_pkt(pkt); -+ break; -+ default: -+ virtio_transport_free_pkt(pkt); -+ break; -+ } -+ release_sock(sk); -+ -+ /* Release refcnt obtained when we fetched this socket out of the -+ * bound or connected list. -+ */ -+ sock_put(sk); -+ return; -+ -+free_pkt: -+ virtio_transport_free_pkt(pkt); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); -+ -+void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ kfree(pkt->buf); -+ kfree(pkt); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); -+ -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("Asias He"); -+MODULE_DESCRIPTION("common code for virtio vsock"); --- -2.10.0 - diff --git a/alpine/kernel/patches/0008-VSOCK-Introduce-virtio_transport.ko.patch b/alpine/kernel/patches/0008-VSOCK-Introduce-virtio_transport.ko.patch deleted file mode 100644 index 78931bc21..000000000 --- a/alpine/kernel/patches/0008-VSOCK-Introduce-virtio_transport.ko.patch +++ /dev/null @@ -1,663 +0,0 @@ -From c384834d9495c7b2a36b0054d08ddf3240687bdc Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:33 +0100 -Subject: [PATCH 08/42] VSOCK: Introduce virtio_transport.ko - -VM sockets virtio transport implementation. This driver runs in the -guest. - -Signed-off-by: Asias He -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 0ea9e1d3a9e3ef7d2a1462d3de6b95131dc7d872) ---- - MAINTAINERS | 1 + - net/vmw_vsock/virtio_transport.c | 624 +++++++++++++++++++++++++++++++++++++++ - 2 files changed, 625 insertions(+) - create mode 100644 net/vmw_vsock/virtio_transport.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index b93ba8b..82d1123 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -11391,6 +11391,7 @@ S: Maintained - F: include/linux/virtio_vsock.h - F: include/uapi/linux/virtio_vsock.h - F: net/vmw_vsock/virtio_transport_common.c -+F: net/vmw_vsock/virtio_transport.c - - VIRTUAL SERIO DEVICE DRIVER - M: Stephen Chandler Paul -diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c -new file mode 100644 -index 0000000..699dfab ---- /dev/null -+++ b/net/vmw_vsock/virtio_transport.c -@@ -0,0 +1,624 @@ -+/* -+ * virtio transport for vsock -+ * -+ * Copyright (C) 2013-2015 Red Hat, Inc. -+ * Author: Asias He -+ * Stefan Hajnoczi -+ * -+ * Some of the code is take from Gerd Hoffmann 's -+ * early virtio-vsock proof-of-concept bits. -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static struct workqueue_struct *virtio_vsock_workqueue; -+static struct virtio_vsock *the_virtio_vsock; -+static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ -+ -+struct virtio_vsock { -+ struct virtio_device *vdev; -+ struct virtqueue *vqs[VSOCK_VQ_MAX]; -+ -+ /* Virtqueue processing is deferred to a workqueue */ -+ struct work_struct tx_work; -+ struct work_struct rx_work; -+ struct work_struct event_work; -+ -+ /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX] -+ * must be accessed with tx_lock held. -+ */ -+ struct mutex tx_lock; -+ -+ struct work_struct send_pkt_work; -+ spinlock_t send_pkt_list_lock; -+ struct list_head send_pkt_list; -+ -+ atomic_t queued_replies; -+ -+ /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] -+ * must be accessed with rx_lock held. -+ */ -+ struct mutex rx_lock; -+ int rx_buf_nr; -+ int rx_buf_max_nr; -+ -+ /* The following fields are protected by event_lock. -+ * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held. -+ */ -+ struct mutex event_lock; -+ struct virtio_vsock_event event_list[8]; -+ -+ u32 guest_cid; -+}; -+ -+static struct virtio_vsock *virtio_vsock_get(void) -+{ -+ return the_virtio_vsock; -+} -+ -+static u32 virtio_transport_get_local_cid(void) -+{ -+ struct virtio_vsock *vsock = virtio_vsock_get(); -+ -+ return vsock->guest_cid; -+} -+ -+static void -+virtio_transport_send_pkt_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, send_pkt_work); -+ struct virtqueue *vq; -+ bool added = false; -+ bool restart_rx = false; -+ -+ mutex_lock(&vsock->tx_lock); -+ -+ vq = vsock->vqs[VSOCK_VQ_TX]; -+ -+ /* Avoid unnecessary interrupts while we're processing the ring */ -+ virtqueue_disable_cb(vq); -+ -+ for (;;) { -+ struct virtio_vsock_pkt *pkt; -+ struct scatterlist hdr, buf, *sgs[2]; -+ int ret, in_sg = 0, out_sg = 0; -+ bool reply; -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ if (list_empty(&vsock->send_pkt_list)) { -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ virtqueue_enable_cb(vq); -+ break; -+ } -+ -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del_init(&pkt->list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ reply = pkt->reply; -+ -+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); -+ sgs[out_sg++] = &hdr; -+ if (pkt->buf) { -+ sg_init_one(&buf, pkt->buf, pkt->len); -+ sgs[out_sg++] = &buf; -+ } -+ -+ ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL); -+ if (ret < 0) { -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ if (!virtqueue_enable_cb(vq) && ret == -ENOSPC) -+ continue; /* retry now that we have more space */ -+ break; -+ } -+ -+ if (reply) { -+ struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; -+ int val; -+ -+ val = atomic_dec_return(&vsock->queued_replies); -+ -+ /* Do we now have resources to resume rx processing? */ -+ if (val + 1 == virtqueue_get_vring_size(rx_vq)) -+ restart_rx = true; -+ } -+ -+ added = true; -+ } -+ -+ if (added) -+ virtqueue_kick(vq); -+ -+ mutex_unlock(&vsock->tx_lock); -+ -+ if (restart_rx) -+ queue_work(virtio_vsock_workqueue, &vsock->rx_work); -+} -+ -+static int -+virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock *vsock; -+ int len = pkt->len; -+ -+ vsock = virtio_vsock_get(); -+ if (!vsock) { -+ virtio_transport_free_pkt(pkt); -+ return -ENODEV; -+ } -+ -+ if (pkt->reply) -+ atomic_inc(&vsock->queued_replies); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add_tail(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); -+ return len; -+} -+ -+static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) -+{ -+ int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; -+ struct virtio_vsock_pkt *pkt; -+ struct scatterlist hdr, buf, *sgs[2]; -+ struct virtqueue *vq; -+ int ret; -+ -+ vq = vsock->vqs[VSOCK_VQ_RX]; -+ -+ do { -+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); -+ if (!pkt) -+ break; -+ -+ pkt->buf = kmalloc(buf_len, GFP_KERNEL); -+ if (!pkt->buf) { -+ virtio_transport_free_pkt(pkt); -+ break; -+ } -+ -+ pkt->len = buf_len; -+ -+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); -+ sgs[0] = &hdr; -+ -+ sg_init_one(&buf, pkt->buf, buf_len); -+ sgs[1] = &buf; -+ ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); -+ if (ret) { -+ virtio_transport_free_pkt(pkt); -+ break; -+ } -+ vsock->rx_buf_nr++; -+ } while (vq->num_free); -+ if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) -+ vsock->rx_buf_max_nr = vsock->rx_buf_nr; -+ virtqueue_kick(vq); -+} -+ -+static void virtio_transport_tx_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, tx_work); -+ struct virtqueue *vq; -+ bool added = false; -+ -+ vq = vsock->vqs[VSOCK_VQ_TX]; -+ mutex_lock(&vsock->tx_lock); -+ do { -+ struct virtio_vsock_pkt *pkt; -+ unsigned int len; -+ -+ virtqueue_disable_cb(vq); -+ while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { -+ virtio_transport_free_pkt(pkt); -+ added = true; -+ } -+ } while (!virtqueue_enable_cb(vq)); -+ mutex_unlock(&vsock->tx_lock); -+ -+ if (added) -+ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); -+} -+ -+/* Is there space left for replies to rx packets? */ -+static bool virtio_transport_more_replies(struct virtio_vsock *vsock) -+{ -+ struct virtqueue *vq = vsock->vqs[VSOCK_VQ_RX]; -+ int val; -+ -+ smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ -+ val = atomic_read(&vsock->queued_replies); -+ -+ return val < virtqueue_get_vring_size(vq); -+} -+ -+static void virtio_transport_rx_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, rx_work); -+ struct virtqueue *vq; -+ -+ vq = vsock->vqs[VSOCK_VQ_RX]; -+ -+ mutex_lock(&vsock->rx_lock); -+ -+ do { -+ virtqueue_disable_cb(vq); -+ for (;;) { -+ struct virtio_vsock_pkt *pkt; -+ unsigned int len; -+ -+ if (!virtio_transport_more_replies(vsock)) { -+ /* Stop rx until the device processes already -+ * pending replies. Leave rx virtqueue -+ * callbacks disabled. -+ */ -+ goto out; -+ } -+ -+ pkt = virtqueue_get_buf(vq, &len); -+ if (!pkt) { -+ break; -+ } -+ -+ vsock->rx_buf_nr--; -+ -+ /* Drop short/long packets */ -+ if (unlikely(len < sizeof(pkt->hdr) || -+ len > sizeof(pkt->hdr) + pkt->len)) { -+ virtio_transport_free_pkt(pkt); -+ continue; -+ } -+ -+ pkt->len = len - sizeof(pkt->hdr); -+ virtio_transport_recv_pkt(pkt); -+ } -+ } while (!virtqueue_enable_cb(vq)); -+ -+out: -+ if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) -+ virtio_vsock_rx_fill(vsock); -+ mutex_unlock(&vsock->rx_lock); -+} -+ -+/* event_lock must be held */ -+static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, -+ struct virtio_vsock_event *event) -+{ -+ struct scatterlist sg; -+ struct virtqueue *vq; -+ -+ vq = vsock->vqs[VSOCK_VQ_EVENT]; -+ -+ sg_init_one(&sg, event, sizeof(*event)); -+ -+ return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL); -+} -+ -+/* event_lock must be held */ -+static void virtio_vsock_event_fill(struct virtio_vsock *vsock) -+{ -+ size_t i; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) { -+ struct virtio_vsock_event *event = &vsock->event_list[i]; -+ -+ virtio_vsock_event_fill_one(vsock, event); -+ } -+ -+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); -+} -+ -+static void virtio_vsock_reset_sock(struct sock *sk) -+{ -+ lock_sock(sk); -+ sk->sk_state = SS_UNCONNECTED; -+ sk->sk_err = ECONNRESET; -+ sk->sk_error_report(sk); -+ release_sock(sk); -+} -+ -+static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock) -+{ -+ struct virtio_device *vdev = vsock->vdev; -+ u64 guest_cid; -+ -+ vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), -+ &guest_cid, sizeof(guest_cid)); -+ vsock->guest_cid = le64_to_cpu(guest_cid); -+} -+ -+/* event_lock must be held */ -+static void virtio_vsock_event_handle(struct virtio_vsock *vsock, -+ struct virtio_vsock_event *event) -+{ -+ switch (le32_to_cpu(event->id)) { -+ case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: -+ virtio_vsock_update_guest_cid(vsock); -+ vsock_for_each_connected_socket(virtio_vsock_reset_sock); -+ break; -+ } -+} -+ -+static void virtio_transport_event_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, event_work); -+ struct virtqueue *vq; -+ -+ vq = vsock->vqs[VSOCK_VQ_EVENT]; -+ -+ mutex_lock(&vsock->event_lock); -+ -+ do { -+ struct virtio_vsock_event *event; -+ unsigned int len; -+ -+ virtqueue_disable_cb(vq); -+ while ((event = virtqueue_get_buf(vq, &len)) != NULL) { -+ if (len == sizeof(*event)) -+ virtio_vsock_event_handle(vsock, event); -+ -+ virtio_vsock_event_fill_one(vsock, event); -+ } -+ } while (!virtqueue_enable_cb(vq)); -+ -+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); -+ -+ mutex_unlock(&vsock->event_lock); -+} -+ -+static void virtio_vsock_event_done(struct virtqueue *vq) -+{ -+ struct virtio_vsock *vsock = vq->vdev->priv; -+ -+ if (!vsock) -+ return; -+ queue_work(virtio_vsock_workqueue, &vsock->event_work); -+} -+ -+static void virtio_vsock_tx_done(struct virtqueue *vq) -+{ -+ struct virtio_vsock *vsock = vq->vdev->priv; -+ -+ if (!vsock) -+ return; -+ queue_work(virtio_vsock_workqueue, &vsock->tx_work); -+} -+ -+static void virtio_vsock_rx_done(struct virtqueue *vq) -+{ -+ struct virtio_vsock *vsock = vq->vdev->priv; -+ -+ if (!vsock) -+ return; -+ queue_work(virtio_vsock_workqueue, &vsock->rx_work); -+} -+ -+static struct virtio_transport virtio_transport = { -+ .transport = { -+ .get_local_cid = virtio_transport_get_local_cid, -+ -+ .init = virtio_transport_do_socket_init, -+ .destruct = virtio_transport_destruct, -+ .release = virtio_transport_release, -+ .connect = virtio_transport_connect, -+ .shutdown = virtio_transport_shutdown, -+ -+ .dgram_bind = virtio_transport_dgram_bind, -+ .dgram_dequeue = virtio_transport_dgram_dequeue, -+ .dgram_enqueue = virtio_transport_dgram_enqueue, -+ .dgram_allow = virtio_transport_dgram_allow, -+ -+ .stream_dequeue = virtio_transport_stream_dequeue, -+ .stream_enqueue = virtio_transport_stream_enqueue, -+ .stream_has_data = virtio_transport_stream_has_data, -+ .stream_has_space = virtio_transport_stream_has_space, -+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, -+ .stream_is_active = virtio_transport_stream_is_active, -+ .stream_allow = virtio_transport_stream_allow, -+ -+ .notify_poll_in = virtio_transport_notify_poll_in, -+ .notify_poll_out = virtio_transport_notify_poll_out, -+ .notify_recv_init = virtio_transport_notify_recv_init, -+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, -+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, -+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, -+ .notify_send_init = virtio_transport_notify_send_init, -+ .notify_send_pre_block = virtio_transport_notify_send_pre_block, -+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, -+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, -+ -+ .set_buffer_size = virtio_transport_set_buffer_size, -+ .set_min_buffer_size = virtio_transport_set_min_buffer_size, -+ .set_max_buffer_size = virtio_transport_set_max_buffer_size, -+ .get_buffer_size = virtio_transport_get_buffer_size, -+ .get_min_buffer_size = virtio_transport_get_min_buffer_size, -+ .get_max_buffer_size = virtio_transport_get_max_buffer_size, -+ }, -+ -+ .send_pkt = virtio_transport_send_pkt, -+}; -+ -+static int virtio_vsock_probe(struct virtio_device *vdev) -+{ -+ vq_callback_t *callbacks[] = { -+ virtio_vsock_rx_done, -+ virtio_vsock_tx_done, -+ virtio_vsock_event_done, -+ }; -+ static const char * const names[] = { -+ "rx", -+ "tx", -+ "event", -+ }; -+ struct virtio_vsock *vsock = NULL; -+ int ret; -+ -+ ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); -+ if (ret) -+ return ret; -+ -+ /* Only one virtio-vsock device per guest is supported */ -+ if (the_virtio_vsock) { -+ ret = -EBUSY; -+ goto out; -+ } -+ -+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); -+ if (!vsock) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ vsock->vdev = vdev; -+ -+ ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, -+ vsock->vqs, callbacks, names); -+ if (ret < 0) -+ goto out; -+ -+ virtio_vsock_update_guest_cid(vsock); -+ -+ ret = vsock_core_init(&virtio_transport.transport); -+ if (ret < 0) -+ goto out_vqs; -+ -+ vsock->rx_buf_nr = 0; -+ vsock->rx_buf_max_nr = 0; -+ atomic_set(&vsock->queued_replies, 0); -+ -+ vdev->priv = vsock; -+ the_virtio_vsock = vsock; -+ mutex_init(&vsock->tx_lock); -+ mutex_init(&vsock->rx_lock); -+ mutex_init(&vsock->event_lock); -+ spin_lock_init(&vsock->send_pkt_list_lock); -+ INIT_LIST_HEAD(&vsock->send_pkt_list); -+ INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); -+ INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); -+ INIT_WORK(&vsock->event_work, virtio_transport_event_work); -+ INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); -+ -+ mutex_lock(&vsock->rx_lock); -+ virtio_vsock_rx_fill(vsock); -+ mutex_unlock(&vsock->rx_lock); -+ -+ mutex_lock(&vsock->event_lock); -+ virtio_vsock_event_fill(vsock); -+ mutex_unlock(&vsock->event_lock); -+ -+ mutex_unlock(&the_virtio_vsock_mutex); -+ return 0; -+ -+out_vqs: -+ vsock->vdev->config->del_vqs(vsock->vdev); -+out: -+ kfree(vsock); -+ mutex_unlock(&the_virtio_vsock_mutex); -+ return ret; -+} -+ -+static void virtio_vsock_remove(struct virtio_device *vdev) -+{ -+ struct virtio_vsock *vsock = vdev->priv; -+ struct virtio_vsock_pkt *pkt; -+ -+ flush_work(&vsock->rx_work); -+ flush_work(&vsock->tx_work); -+ flush_work(&vsock->event_work); -+ flush_work(&vsock->send_pkt_work); -+ -+ vdev->config->reset(vdev); -+ -+ mutex_lock(&vsock->rx_lock); -+ while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX]))) -+ virtio_transport_free_pkt(pkt); -+ mutex_unlock(&vsock->rx_lock); -+ -+ mutex_lock(&vsock->tx_lock); -+ while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX]))) -+ virtio_transport_free_pkt(pkt); -+ mutex_unlock(&vsock->tx_lock); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ while (!list_empty(&vsock->send_pkt_list)) { -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del(&pkt->list); -+ virtio_transport_free_pkt(pkt); -+ } -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ mutex_lock(&the_virtio_vsock_mutex); -+ the_virtio_vsock = NULL; -+ vsock_core_exit(); -+ mutex_unlock(&the_virtio_vsock_mutex); -+ -+ vdev->config->del_vqs(vdev); -+ -+ kfree(vsock); -+} -+ -+static struct virtio_device_id id_table[] = { -+ { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, -+ { 0 }, -+}; -+ -+static unsigned int features[] = { -+}; -+ -+static struct virtio_driver virtio_vsock_driver = { -+ .feature_table = features, -+ .feature_table_size = ARRAY_SIZE(features), -+ .driver.name = KBUILD_MODNAME, -+ .driver.owner = THIS_MODULE, -+ .id_table = id_table, -+ .probe = virtio_vsock_probe, -+ .remove = virtio_vsock_remove, -+}; -+ -+static int __init virtio_vsock_init(void) -+{ -+ int ret; -+ -+ virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); -+ if (!virtio_vsock_workqueue) -+ return -ENOMEM; -+ ret = register_virtio_driver(&virtio_vsock_driver); -+ if (ret) -+ destroy_workqueue(virtio_vsock_workqueue); -+ return ret; -+} -+ -+static void __exit virtio_vsock_exit(void) -+{ -+ unregister_virtio_driver(&virtio_vsock_driver); -+ destroy_workqueue(virtio_vsock_workqueue); -+} -+ -+module_init(virtio_vsock_init); -+module_exit(virtio_vsock_exit); -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("Asias He"); -+MODULE_DESCRIPTION("virtio transport for vsock"); -+MODULE_DEVICE_TABLE(virtio, id_table); --- -2.10.0 - diff --git a/alpine/kernel/patches/0009-VSOCK-Introduce-vhost_vsock.ko.patch b/alpine/kernel/patches/0009-VSOCK-Introduce-vhost_vsock.ko.patch deleted file mode 100644 index ea0d3196f..000000000 --- a/alpine/kernel/patches/0009-VSOCK-Introduce-vhost_vsock.ko.patch +++ /dev/null @@ -1,777 +0,0 @@ -From a0af1060ea091348b94bd3780e5b92a3334e64b2 Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:34 +0100 -Subject: [PATCH 09/42] VSOCK: Introduce vhost_vsock.ko - -VM sockets vhost transport implementation. This driver runs on the -host. - -Signed-off-by: Asias He -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 433fc58e6bf2c8bd97e57153ed28e64fd78207b8) ---- - MAINTAINERS | 2 + - drivers/vhost/vsock.c | 722 +++++++++++++++++++++++++++++++++++++++++++++ - include/uapi/linux/vhost.h | 5 + - 3 files changed, 729 insertions(+) - create mode 100644 drivers/vhost/vsock.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index 82d1123..12d49f5 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -11392,6 +11392,8 @@ F: include/linux/virtio_vsock.h - F: include/uapi/linux/virtio_vsock.h - F: net/vmw_vsock/virtio_transport_common.c - F: net/vmw_vsock/virtio_transport.c -+F: drivers/vhost/vsock.c -+F: drivers/vhost/vsock.h - - VIRTUAL SERIO DEVICE DRIVER - M: Stephen Chandler Paul -diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c -new file mode 100644 -index 0000000..028ca16 ---- /dev/null -+++ b/drivers/vhost/vsock.c -@@ -0,0 +1,722 @@ -+/* -+ * vhost transport for vsock -+ * -+ * Copyright (C) 2013-2015 Red Hat, Inc. -+ * Author: Asias He -+ * Stefan Hajnoczi -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include "vhost.h" -+ -+#define VHOST_VSOCK_DEFAULT_HOST_CID 2 -+ -+enum { -+ VHOST_VSOCK_FEATURES = VHOST_FEATURES, -+}; -+ -+/* Used to track all the vhost_vsock instances on the system. */ -+static DEFINE_SPINLOCK(vhost_vsock_lock); -+static LIST_HEAD(vhost_vsock_list); -+ -+struct vhost_vsock { -+ struct vhost_dev dev; -+ struct vhost_virtqueue vqs[2]; -+ -+ /* Link to global vhost_vsock_list, protected by vhost_vsock_lock */ -+ struct list_head list; -+ -+ struct vhost_work send_pkt_work; -+ spinlock_t send_pkt_list_lock; -+ struct list_head send_pkt_list; /* host->guest pending packets */ -+ -+ atomic_t queued_replies; -+ -+ u32 guest_cid; -+}; -+ -+static u32 vhost_transport_get_local_cid(void) -+{ -+ return VHOST_VSOCK_DEFAULT_HOST_CID; -+} -+ -+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) -+{ -+ struct vhost_vsock *vsock; -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ list_for_each_entry(vsock, &vhost_vsock_list, list) { -+ u32 other_cid = vsock->guest_cid; -+ -+ /* Skip instances that have no CID yet */ -+ if (other_cid == 0) -+ continue; -+ -+ if (other_cid == guest_cid) { -+ spin_unlock_bh(&vhost_vsock_lock); -+ return vsock; -+ } -+ } -+ spin_unlock_bh(&vhost_vsock_lock); -+ -+ return NULL; -+} -+ -+static void -+vhost_transport_do_send_pkt(struct vhost_vsock *vsock, -+ struct vhost_virtqueue *vq) -+{ -+ struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; -+ bool added = false; -+ bool restart_tx = false; -+ -+ mutex_lock(&vq->mutex); -+ -+ if (!vq->private_data) -+ goto out; -+ -+ /* Avoid further vmexits, we're already processing the virtqueue */ -+ vhost_disable_notify(&vsock->dev, vq); -+ -+ for (;;) { -+ struct virtio_vsock_pkt *pkt; -+ struct iov_iter iov_iter; -+ unsigned out, in; -+ size_t nbytes; -+ size_t len; -+ int head; -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ if (list_empty(&vsock->send_pkt_list)) { -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ vhost_enable_notify(&vsock->dev, vq); -+ break; -+ } -+ -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del_init(&pkt->list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), -+ &out, &in, NULL, NULL); -+ if (head < 0) { -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ break; -+ } -+ -+ if (head == vq->num) { -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ /* We cannot finish yet if more buffers snuck in while -+ * re-enabling notify. -+ */ -+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { -+ vhost_disable_notify(&vsock->dev, vq); -+ continue; -+ } -+ break; -+ } -+ -+ if (out) { -+ virtio_transport_free_pkt(pkt); -+ vq_err(vq, "Expected 0 output buffers, got %u\n", out); -+ break; -+ } -+ -+ len = iov_length(&vq->iov[out], in); -+ iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); -+ -+ nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); -+ if (nbytes != sizeof(pkt->hdr)) { -+ virtio_transport_free_pkt(pkt); -+ vq_err(vq, "Faulted on copying pkt hdr\n"); -+ break; -+ } -+ -+ nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); -+ if (nbytes != pkt->len) { -+ virtio_transport_free_pkt(pkt); -+ vq_err(vq, "Faulted on copying pkt buf\n"); -+ break; -+ } -+ -+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); -+ added = true; -+ -+ if (pkt->reply) { -+ int val; -+ -+ val = atomic_dec_return(&vsock->queued_replies); -+ -+ /* Do we have resources to resume tx processing? */ -+ if (val + 1 == tx_vq->num) -+ restart_tx = true; -+ } -+ -+ virtio_transport_free_pkt(pkt); -+ } -+ if (added) -+ vhost_signal(&vsock->dev, vq); -+ -+out: -+ mutex_unlock(&vq->mutex); -+ -+ if (restart_tx) -+ vhost_poll_queue(&tx_vq->poll); -+} -+ -+static void vhost_transport_send_pkt_work(struct vhost_work *work) -+{ -+ struct vhost_virtqueue *vq; -+ struct vhost_vsock *vsock; -+ -+ vsock = container_of(work, struct vhost_vsock, send_pkt_work); -+ vq = &vsock->vqs[VSOCK_VQ_RX]; -+ -+ vhost_transport_do_send_pkt(vsock, vq); -+} -+ -+static int -+vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ struct vhost_vsock *vsock; -+ struct vhost_virtqueue *vq; -+ int len = pkt->len; -+ -+ /* Find the vhost_vsock according to guest context id */ -+ vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); -+ if (!vsock) { -+ virtio_transport_free_pkt(pkt); -+ return -ENODEV; -+ } -+ -+ vq = &vsock->vqs[VSOCK_VQ_RX]; -+ -+ if (pkt->reply) -+ atomic_inc(&vsock->queued_replies); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add_tail(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); -+ return len; -+} -+ -+static struct virtio_vsock_pkt * -+vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, -+ unsigned int out, unsigned int in) -+{ -+ struct virtio_vsock_pkt *pkt; -+ struct iov_iter iov_iter; -+ size_t nbytes; -+ size_t len; -+ -+ if (in != 0) { -+ vq_err(vq, "Expected 0 input buffers, got %u\n", in); -+ return NULL; -+ } -+ -+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); -+ if (!pkt) -+ return NULL; -+ -+ len = iov_length(vq->iov, out); -+ iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); -+ -+ nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); -+ if (nbytes != sizeof(pkt->hdr)) { -+ vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", -+ sizeof(pkt->hdr), nbytes); -+ kfree(pkt); -+ return NULL; -+ } -+ -+ if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) -+ pkt->len = le32_to_cpu(pkt->hdr.len); -+ -+ /* No payload */ -+ if (!pkt->len) -+ return pkt; -+ -+ /* The pkt is too big */ -+ if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { -+ kfree(pkt); -+ return NULL; -+ } -+ -+ pkt->buf = kmalloc(pkt->len, GFP_KERNEL); -+ if (!pkt->buf) { -+ kfree(pkt); -+ return NULL; -+ } -+ -+ nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); -+ if (nbytes != pkt->len) { -+ vq_err(vq, "Expected %u byte payload, got %zu bytes\n", -+ pkt->len, nbytes); -+ virtio_transport_free_pkt(pkt); -+ return NULL; -+ } -+ -+ return pkt; -+} -+ -+/* Is there space left for replies to rx packets? */ -+static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) -+{ -+ struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX]; -+ int val; -+ -+ smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ -+ val = atomic_read(&vsock->queued_replies); -+ -+ return val < vq->num; -+} -+ -+static void vhost_vsock_handle_tx_kick(struct vhost_work *work) -+{ -+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, -+ poll.work); -+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, -+ dev); -+ struct virtio_vsock_pkt *pkt; -+ int head; -+ unsigned int out, in; -+ bool added = false; -+ -+ mutex_lock(&vq->mutex); -+ -+ if (!vq->private_data) -+ goto out; -+ -+ vhost_disable_notify(&vsock->dev, vq); -+ for (;;) { -+ if (!vhost_vsock_more_replies(vsock)) { -+ /* Stop tx until the device processes already -+ * pending replies. Leave tx virtqueue -+ * callbacks disabled. -+ */ -+ goto no_more_replies; -+ } -+ -+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), -+ &out, &in, NULL, NULL); -+ if (head < 0) -+ break; -+ -+ if (head == vq->num) { -+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { -+ vhost_disable_notify(&vsock->dev, vq); -+ continue; -+ } -+ break; -+ } -+ -+ pkt = vhost_vsock_alloc_pkt(vq, out, in); -+ if (!pkt) { -+ vq_err(vq, "Faulted on pkt\n"); -+ continue; -+ } -+ -+ /* Only accept correctly addressed packets */ -+ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) -+ virtio_transport_recv_pkt(pkt); -+ else -+ virtio_transport_free_pkt(pkt); -+ -+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); -+ added = true; -+ } -+ -+no_more_replies: -+ if (added) -+ vhost_signal(&vsock->dev, vq); -+ -+out: -+ mutex_unlock(&vq->mutex); -+} -+ -+static void vhost_vsock_handle_rx_kick(struct vhost_work *work) -+{ -+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, -+ poll.work); -+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, -+ dev); -+ -+ vhost_transport_do_send_pkt(vsock, vq); -+} -+ -+static int vhost_vsock_start(struct vhost_vsock *vsock) -+{ -+ size_t i; -+ int ret; -+ -+ mutex_lock(&vsock->dev.mutex); -+ -+ ret = vhost_dev_check_owner(&vsock->dev); -+ if (ret) -+ goto err; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ struct vhost_virtqueue *vq = &vsock->vqs[i]; -+ -+ mutex_lock(&vq->mutex); -+ -+ if (!vhost_vq_access_ok(vq)) { -+ ret = -EFAULT; -+ mutex_unlock(&vq->mutex); -+ goto err_vq; -+ } -+ -+ if (!vq->private_data) { -+ vq->private_data = vsock; -+ vhost_vq_init_access(vq); -+ } -+ -+ mutex_unlock(&vq->mutex); -+ } -+ -+ mutex_unlock(&vsock->dev.mutex); -+ return 0; -+ -+err_vq: -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ struct vhost_virtqueue *vq = &vsock->vqs[i]; -+ -+ mutex_lock(&vq->mutex); -+ vq->private_data = NULL; -+ mutex_unlock(&vq->mutex); -+ } -+err: -+ mutex_unlock(&vsock->dev.mutex); -+ return ret; -+} -+ -+static int vhost_vsock_stop(struct vhost_vsock *vsock) -+{ -+ size_t i; -+ int ret; -+ -+ mutex_lock(&vsock->dev.mutex); -+ -+ ret = vhost_dev_check_owner(&vsock->dev); -+ if (ret) -+ goto err; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ struct vhost_virtqueue *vq = &vsock->vqs[i]; -+ -+ mutex_lock(&vq->mutex); -+ vq->private_data = NULL; -+ mutex_unlock(&vq->mutex); -+ } -+ -+err: -+ mutex_unlock(&vsock->dev.mutex); -+ return ret; -+} -+ -+static void vhost_vsock_free(struct vhost_vsock *vsock) -+{ -+ if (is_vmalloc_addr(vsock)) -+ vfree(vsock); -+ else -+ kfree(vsock); -+} -+ -+static int vhost_vsock_dev_open(struct inode *inode, struct file *file) -+{ -+ struct vhost_virtqueue **vqs; -+ struct vhost_vsock *vsock; -+ int ret; -+ -+ /* This struct is large and allocation could fail, fall back to vmalloc -+ * if there is no other way. -+ */ -+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); -+ if (!vsock) { -+ vsock = vmalloc(sizeof(*vsock)); -+ if (!vsock) -+ return -ENOMEM; -+ } -+ -+ vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); -+ if (!vqs) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ atomic_set(&vsock->queued_replies, 0); -+ -+ vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; -+ vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; -+ vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; -+ vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; -+ -+ vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); -+ -+ file->private_data = vsock; -+ spin_lock_init(&vsock->send_pkt_list_lock); -+ INIT_LIST_HEAD(&vsock->send_pkt_list); -+ vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ list_add_tail(&vsock->list, &vhost_vsock_list); -+ spin_unlock_bh(&vhost_vsock_lock); -+ return 0; -+ -+out: -+ vhost_vsock_free(vsock); -+ return ret; -+} -+ -+static void vhost_vsock_flush(struct vhost_vsock *vsock) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) -+ if (vsock->vqs[i].handle_kick) -+ vhost_poll_flush(&vsock->vqs[i].poll); -+ vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); -+} -+ -+static void vhost_vsock_reset_orphans(struct sock *sk) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ -+ /* vmci_transport.c doesn't take sk_lock here either. At least we're -+ * under vsock_table_lock so the sock cannot disappear while we're -+ * executing. -+ */ -+ -+ if (!vhost_vsock_get(vsk->local_addr.svm_cid)) { -+ sock_set_flag(sk, SOCK_DONE); -+ vsk->peer_shutdown = SHUTDOWN_MASK; -+ sk->sk_state = SS_UNCONNECTED; -+ sk->sk_err = ECONNRESET; -+ sk->sk_error_report(sk); -+ } -+} -+ -+static int vhost_vsock_dev_release(struct inode *inode, struct file *file) -+{ -+ struct vhost_vsock *vsock = file->private_data; -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ list_del(&vsock->list); -+ spin_unlock_bh(&vhost_vsock_lock); -+ -+ /* Iterating over all connections for all CIDs to find orphans is -+ * inefficient. Room for improvement here. */ -+ vsock_for_each_connected_socket(vhost_vsock_reset_orphans); -+ -+ vhost_vsock_stop(vsock); -+ vhost_vsock_flush(vsock); -+ vhost_dev_stop(&vsock->dev); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ while (!list_empty(&vsock->send_pkt_list)) { -+ struct virtio_vsock_pkt *pkt; -+ -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del_init(&pkt->list); -+ virtio_transport_free_pkt(pkt); -+ } -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ vhost_dev_cleanup(&vsock->dev, false); -+ kfree(vsock->dev.vqs); -+ vhost_vsock_free(vsock); -+ return 0; -+} -+ -+static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) -+{ -+ struct vhost_vsock *other; -+ -+ /* Refuse reserved CIDs */ -+ if (guest_cid <= VMADDR_CID_HOST || -+ guest_cid == U32_MAX) -+ return -EINVAL; -+ -+ /* 64-bit CIDs are not yet supported */ -+ if (guest_cid > U32_MAX) -+ return -EINVAL; -+ -+ /* Refuse if CID is already in use */ -+ other = vhost_vsock_get(guest_cid); -+ if (other && other != vsock) -+ return -EADDRINUSE; -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ vsock->guest_cid = guest_cid; -+ spin_unlock_bh(&vhost_vsock_lock); -+ -+ return 0; -+} -+ -+static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) -+{ -+ struct vhost_virtqueue *vq; -+ int i; -+ -+ if (features & ~VHOST_VSOCK_FEATURES) -+ return -EOPNOTSUPP; -+ -+ mutex_lock(&vsock->dev.mutex); -+ if ((features & (1 << VHOST_F_LOG_ALL)) && -+ !vhost_log_access_ok(&vsock->dev)) { -+ mutex_unlock(&vsock->dev.mutex); -+ return -EFAULT; -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ vq = &vsock->vqs[i]; -+ mutex_lock(&vq->mutex); -+ vq->acked_features = features; -+ mutex_unlock(&vq->mutex); -+ } -+ mutex_unlock(&vsock->dev.mutex); -+ return 0; -+} -+ -+static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, -+ unsigned long arg) -+{ -+ struct vhost_vsock *vsock = f->private_data; -+ void __user *argp = (void __user *)arg; -+ u64 guest_cid; -+ u64 features; -+ int start; -+ int r; -+ -+ switch (ioctl) { -+ case VHOST_VSOCK_SET_GUEST_CID: -+ if (copy_from_user(&guest_cid, argp, sizeof(guest_cid))) -+ return -EFAULT; -+ return vhost_vsock_set_cid(vsock, guest_cid); -+ case VHOST_VSOCK_SET_RUNNING: -+ if (copy_from_user(&start, argp, sizeof(start))) -+ return -EFAULT; -+ if (start) -+ return vhost_vsock_start(vsock); -+ else -+ return vhost_vsock_stop(vsock); -+ case VHOST_GET_FEATURES: -+ features = VHOST_VSOCK_FEATURES; -+ if (copy_to_user(argp, &features, sizeof(features))) -+ return -EFAULT; -+ return 0; -+ case VHOST_SET_FEATURES: -+ if (copy_from_user(&features, argp, sizeof(features))) -+ return -EFAULT; -+ return vhost_vsock_set_features(vsock, features); -+ default: -+ mutex_lock(&vsock->dev.mutex); -+ r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); -+ if (r == -ENOIOCTLCMD) -+ r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); -+ else -+ vhost_vsock_flush(vsock); -+ mutex_unlock(&vsock->dev.mutex); -+ return r; -+ } -+} -+ -+static const struct file_operations vhost_vsock_fops = { -+ .owner = THIS_MODULE, -+ .open = vhost_vsock_dev_open, -+ .release = vhost_vsock_dev_release, -+ .llseek = noop_llseek, -+ .unlocked_ioctl = vhost_vsock_dev_ioctl, -+}; -+ -+static struct miscdevice vhost_vsock_misc = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "vhost-vsock", -+ .fops = &vhost_vsock_fops, -+}; -+ -+static struct virtio_transport vhost_transport = { -+ .transport = { -+ .get_local_cid = vhost_transport_get_local_cid, -+ -+ .init = virtio_transport_do_socket_init, -+ .destruct = virtio_transport_destruct, -+ .release = virtio_transport_release, -+ .connect = virtio_transport_connect, -+ .shutdown = virtio_transport_shutdown, -+ -+ .dgram_enqueue = virtio_transport_dgram_enqueue, -+ .dgram_dequeue = virtio_transport_dgram_dequeue, -+ .dgram_bind = virtio_transport_dgram_bind, -+ .dgram_allow = virtio_transport_dgram_allow, -+ -+ .stream_enqueue = virtio_transport_stream_enqueue, -+ .stream_dequeue = virtio_transport_stream_dequeue, -+ .stream_has_data = virtio_transport_stream_has_data, -+ .stream_has_space = virtio_transport_stream_has_space, -+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, -+ .stream_is_active = virtio_transport_stream_is_active, -+ .stream_allow = virtio_transport_stream_allow, -+ -+ .notify_poll_in = virtio_transport_notify_poll_in, -+ .notify_poll_out = virtio_transport_notify_poll_out, -+ .notify_recv_init = virtio_transport_notify_recv_init, -+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, -+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, -+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, -+ .notify_send_init = virtio_transport_notify_send_init, -+ .notify_send_pre_block = virtio_transport_notify_send_pre_block, -+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, -+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, -+ -+ .set_buffer_size = virtio_transport_set_buffer_size, -+ .set_min_buffer_size = virtio_transport_set_min_buffer_size, -+ .set_max_buffer_size = virtio_transport_set_max_buffer_size, -+ .get_buffer_size = virtio_transport_get_buffer_size, -+ .get_min_buffer_size = virtio_transport_get_min_buffer_size, -+ .get_max_buffer_size = virtio_transport_get_max_buffer_size, -+ }, -+ -+ .send_pkt = vhost_transport_send_pkt, -+}; -+ -+static int __init vhost_vsock_init(void) -+{ -+ int ret; -+ -+ ret = vsock_core_init(&vhost_transport.transport); -+ if (ret < 0) -+ return ret; -+ return misc_register(&vhost_vsock_misc); -+}; -+ -+static void __exit vhost_vsock_exit(void) -+{ -+ misc_deregister(&vhost_vsock_misc); -+ vsock_core_exit(); -+}; -+ -+module_init(vhost_vsock_init); -+module_exit(vhost_vsock_exit); -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("Asias He"); -+MODULE_DESCRIPTION("vhost transport for vsock "); -diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h -index ab373191..b306476 100644 ---- a/include/uapi/linux/vhost.h -+++ b/include/uapi/linux/vhost.h -@@ -169,4 +169,9 @@ struct vhost_scsi_target { - #define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32) - #define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32) - -+/* VHOST_VSOCK specific defines */ -+ -+#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64) -+#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int) -+ - #endif --- -2.10.0 - diff --git a/alpine/kernel/patches/0010-VSOCK-Add-Makefile-and-Kconfig.patch b/alpine/kernel/patches/0010-VSOCK-Add-Makefile-and-Kconfig.patch deleted file mode 100644 index 8c84c7879..000000000 --- a/alpine/kernel/patches/0010-VSOCK-Add-Makefile-and-Kconfig.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 30e1801c9e9683512a0cd169edf015923497dd70 Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:35 +0100 -Subject: [PATCH 10/42] VSOCK: Add Makefile and Kconfig - -Enable virtio-vsock and vhost-vsock. - -Signed-off-by: Asias He -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 304ba62fd4e670c1a5784585da0fac9f7309ef6c) ---- - drivers/vhost/Kconfig | 14 ++++++++++++++ - drivers/vhost/Makefile | 4 ++++ - net/vmw_vsock/Kconfig | 20 ++++++++++++++++++++ - net/vmw_vsock/Makefile | 6 ++++++ - 4 files changed, 44 insertions(+) - -diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig -index 533eaf0..2b5f588 100644 ---- a/drivers/vhost/Kconfig -+++ b/drivers/vhost/Kconfig -@@ -21,6 +21,20 @@ config VHOST_SCSI - Say M here to enable the vhost_scsi TCM fabric module - for use with virtio-scsi guests - -+config VHOST_VSOCK -+ tristate "vhost virtio-vsock driver" -+ depends on VSOCKETS && EVENTFD -+ select VIRTIO_VSOCKETS_COMMON -+ select VHOST -+ default n -+ ---help--- -+ This kernel module can be loaded in the host kernel to provide AF_VSOCK -+ sockets for communicating with guests. The guests must have the -+ virtio_transport.ko driver loaded to use the virtio-vsock device. -+ -+ To compile this driver as a module, choose M here: the module will be called -+ vhost_vsock. -+ - config VHOST_RING - tristate - ---help--- -diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile -index e0441c3..6b012b9 100644 ---- a/drivers/vhost/Makefile -+++ b/drivers/vhost/Makefile -@@ -4,5 +4,9 @@ vhost_net-y := net.o - obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o - vhost_scsi-y := scsi.o - -+obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o -+vhost_vsock-y := vsock.o -+ - obj-$(CONFIG_VHOST_RING) += vringh.o -+ - obj-$(CONFIG_VHOST) += vhost.o -diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig -index 14810ab..8831e7c 100644 ---- a/net/vmw_vsock/Kconfig -+++ b/net/vmw_vsock/Kconfig -@@ -26,3 +26,23 @@ config VMWARE_VMCI_VSOCKETS - - To compile this driver as a module, choose M here: the module - will be called vmw_vsock_vmci_transport. If unsure, say N. -+ -+config VIRTIO_VSOCKETS -+ tristate "virtio transport for Virtual Sockets" -+ depends on VSOCKETS && VIRTIO -+ select VIRTIO_VSOCKETS_COMMON -+ help -+ This module implements a virtio transport for Virtual Sockets. -+ -+ Enable this transport if your Virtual Machine host supports Virtual -+ Sockets over virtio. -+ -+ To compile this driver as a module, choose M here: the module will be -+ called vmw_vsock_virtio_transport. If unsure, say N. -+ -+config VIRTIO_VSOCKETS_COMMON -+ tristate -+ help -+ This option is selected by any driver which needs to access -+ the virtio_vsock. The module will be called -+ vmw_vsock_virtio_transport_common. -diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile -index 2ce52d7..bc27c70 100644 ---- a/net/vmw_vsock/Makefile -+++ b/net/vmw_vsock/Makefile -@@ -1,7 +1,13 @@ - obj-$(CONFIG_VSOCKETS) += vsock.o - obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o -+obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o -+obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o - - vsock-y += af_vsock.o vsock_addr.o - - vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ - vmci_transport_notify_qstate.o -+ -+vmw_vsock_virtio_transport-y += virtio_transport.o -+ -+vmw_vsock_virtio_transport_common-y += virtio_transport_common.o --- -2.10.0 - diff --git a/alpine/kernel/patches/0011-VSOCK-Use-kvfree.patch b/alpine/kernel/patches/0011-VSOCK-Use-kvfree.patch deleted file mode 100644 index e1e78d9c8..000000000 --- a/alpine/kernel/patches/0011-VSOCK-Use-kvfree.patch +++ /dev/null @@ -1,33 +0,0 @@ -From e9a09f08525c736a71d8331fd6412a0ad19ee428 Mon Sep 17 00:00:00 2001 -From: Wei Yongjun -Date: Tue, 2 Aug 2016 13:50:42 +0000 -Subject: [PATCH 11/42] VSOCK: Use kvfree() - -Use kvfree() instead of open-coding it. - -Signed-off-by: Wei Yongjun -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit b226acab2f6aaa45c2af27279b63f622b23a44bd) ---- - drivers/vhost/vsock.c | 5 +---- - 1 file changed, 1 insertion(+), 4 deletions(-) - -diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c -index 028ca16..0ddf3a2 100644 ---- a/drivers/vhost/vsock.c -+++ b/drivers/vhost/vsock.c -@@ -434,10 +434,7 @@ err: - - static void vhost_vsock_free(struct vhost_vsock *vsock) - { -- if (is_vmalloc_addr(vsock)) -- vfree(vsock); -- else -- kfree(vsock); -+ kvfree(vsock); - } - - static int vhost_vsock_dev_open(struct inode *inode, struct file *file) --- -2.10.0 - diff --git a/alpine/kernel/patches/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch b/alpine/kernel/patches/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch deleted file mode 100644 index c522808e7..000000000 --- a/alpine/kernel/patches/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch +++ /dev/null @@ -1,53 +0,0 @@ -From f886059ea8d0ac8ed981263d91d94275b85c50d5 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 4 Aug 2016 14:52:53 +0100 -Subject: [PATCH 12/42] vhost/vsock: fix vhost virtio_vsock_pkt use-after-free - -Stash the packet length in a local variable before handing over -ownership of the packet to virtio_transport_recv_pkt() or -virtio_transport_free_pkt(). - -This patch solves the use-after-free since pkt is no longer guaranteed -to be alive. - -Reported-by: Dan Carpenter -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 3fda5d6e580193fa005014355b3a61498f1b3ae0) ---- - drivers/vhost/vsock.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c -index 0ddf3a2..e3b30ea 100644 ---- a/drivers/vhost/vsock.c -+++ b/drivers/vhost/vsock.c -@@ -307,6 +307,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) - - vhost_disable_notify(&vsock->dev, vq); - for (;;) { -+ u32 len; -+ - if (!vhost_vsock_more_replies(vsock)) { - /* Stop tx until the device processes already - * pending replies. Leave tx virtqueue -@@ -334,13 +336,15 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) - continue; - } - -+ len = pkt->len; -+ - /* Only accept correctly addressed packets */ - if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) - virtio_transport_recv_pkt(pkt); - else - virtio_transport_free_pkt(pkt); - -- vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); -+ vhost_add_used(vq, head, sizeof(pkt->hdr) + len); - added = true; - } - --- -2.10.0 - diff --git a/alpine/kernel/patches/0013-virtio-vsock-fix-include-guard-typo.patch b/alpine/kernel/patches/0013-virtio-vsock-fix-include-guard-typo.patch deleted file mode 100644 index aff3fc5c6..000000000 --- a/alpine/kernel/patches/0013-virtio-vsock-fix-include-guard-typo.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 6ded3ac18eabf23a790d6b6876119d8cd0538964 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 5 Aug 2016 13:52:09 +0100 -Subject: [PATCH 13/42] virtio-vsock: fix include guard typo - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 28ad55578b8a76390d966b09da8c7fa3644f5140) ---- - include/uapi/linux/virtio_vsock.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h -index 6b011c1..1d57ed3 100644 ---- a/include/uapi/linux/virtio_vsock.h -+++ b/include/uapi/linux/virtio_vsock.h -@@ -32,7 +32,7 @@ - */ - - #ifndef _UAPI_LINUX_VIRTIO_VSOCK_H --#define _UAPI_LINUX_VIRTIO_VOSCK_H -+#define _UAPI_LINUX_VIRTIO_VSOCK_H - - #include - #include --- -2.10.0 - diff --git a/alpine/kernel/patches/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch b/alpine/kernel/patches/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch deleted file mode 100644 index cb1f6e165..000000000 --- a/alpine/kernel/patches/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 5fcd2673fadd46b0d2d5f896281113cd67a2efa7 Mon Sep 17 00:00:00 2001 -From: Gerard Garcia -Date: Wed, 10 Aug 2016 17:24:34 +0200 -Subject: [PATCH 14/42] vhost/vsock: drop space available check for TX vq - -Remove unnecessary use of enable/disable callback notifications -and the incorrect more space available check. - -The virtio_transport_tx_work handles when the TX virtqueue -has more buffers available. - -Signed-off-by: Gerard Garcia -Acked-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 21bc54fc0cdc31de72b57d2b3c79cf9c2b83cf39) ---- - net/vmw_vsock/virtio_transport.c | 10 +++------- - 1 file changed, 3 insertions(+), 7 deletions(-) - -diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c -index 699dfab..936d7ee 100644 ---- a/net/vmw_vsock/virtio_transport.c -+++ b/net/vmw_vsock/virtio_transport.c -@@ -87,9 +87,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) - - vq = vsock->vqs[VSOCK_VQ_TX]; - -- /* Avoid unnecessary interrupts while we're processing the ring */ -- virtqueue_disable_cb(vq); -- - for (;;) { - struct virtio_vsock_pkt *pkt; - struct scatterlist hdr, buf, *sgs[2]; -@@ -99,7 +96,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) - spin_lock_bh(&vsock->send_pkt_list_lock); - if (list_empty(&vsock->send_pkt_list)) { - spin_unlock_bh(&vsock->send_pkt_list_lock); -- virtqueue_enable_cb(vq); - break; - } - -@@ -118,13 +114,13 @@ virtio_transport_send_pkt_work(struct work_struct *work) - } - - ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL); -+ /* Usually this means that there is no more space available in -+ * the vq -+ */ - if (ret < 0) { - spin_lock_bh(&vsock->send_pkt_list_lock); - list_add(&pkt->list, &vsock->send_pkt_list); - spin_unlock_bh(&vsock->send_pkt_list_lock); -- -- if (!virtqueue_enable_cb(vq) && ret == -ENOSPC) -- continue; /* retry now that we have more space */ - break; - } - --- -2.10.0 - diff --git a/alpine/kernel/patches/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch b/alpine/kernel/patches/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch deleted file mode 100644 index 4b5bf9751..000000000 --- a/alpine/kernel/patches/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 84e1e7a4981f6ef926bb01481445def66e0982b2 Mon Sep 17 00:00:00 2001 -From: Jake Oshins -Date: Mon, 14 Dec 2015 16:01:41 -0800 -Subject: [PATCH 16/42] drivers:hv: Define the channel type for Hyper-V PCI - Express pass-through - -This defines the channel type for PCI front-ends in Hyper-V VMs. - -Signed-off-by: Jake Oshins -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 3053c762444a83ec6a8777f9476668b23b8ab180) ---- - drivers/hv/channel_mgmt.c | 3 +++ - include/linux/hyperv.h | 11 +++++++++++ - 2 files changed, 14 insertions(+) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 37238df..a562318 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -359,6 +359,7 @@ enum { - SCSI, - NIC, - ND_NIC, -+ PCIE, - MAX_PERF_CHN, - }; - -@@ -376,6 +377,8 @@ static const struct hv_vmbus_device_id hp_devs[] = { - { HV_NIC_GUID, }, - /* NetworkDirect Guest RDMA */ - { HV_ND_GUID, }, -+ /* PCI Express Pass Through */ -+ { HV_PCIE_GUID, }, - }; - - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index ae6a711..10dda1e 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1156,6 +1156,17 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - } - - /* -+ * PCI Express Pass Through -+ * {44C4F61D-4444-4400-9D52-802E27EDE19F} -+ */ -+ -+#define HV_PCIE_GUID \ -+ .guid = { \ -+ 0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, \ -+ 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F \ -+ } -+ -+/* - * Common header for Hyper-V ICs - */ - --- -2.10.0 - diff --git a/alpine/kernel/patches/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch b/alpine/kernel/patches/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch deleted file mode 100644 index 623cc37ff..000000000 --- a/alpine/kernel/patches/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch +++ /dev/null @@ -1,297 +0,0 @@ -From 12fbf6bcf859c7ce33766ae450dc291d0b857197 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Mon, 14 Dec 2015 16:01:43 -0800 -Subject: [PATCH 17/42] Drivers: hv: vmbus: Use uuid_le type consistently - -Consistently use uuid_le type in the Hyper-V driver code. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit af3ff643ea91ba64dd8d0b1cbed54d44512f96cd) ---- - drivers/hv/channel_mgmt.c | 2 +- - drivers/hv/vmbus_drv.c | 10 ++--- - include/linux/hyperv.h | 92 ++++++++++++++--------------------------- - include/linux/mod_devicetable.h | 2 +- - scripts/mod/file2alias.c | 2 +- - 5 files changed, 40 insertions(+), 68 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index a562318..339277b 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -409,7 +409,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui - struct cpumask *alloced_mask; - - for (i = IDE; i < MAX_PERF_CHN; i++) { -- if (!memcmp(type_guid->b, hp_devs[i].guid, -+ if (!memcmp(type_guid->b, &hp_devs[i].guid, - sizeof(uuid_le))) { - perf_chn = true; - break; -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index 509ed97..6ce2bf8 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -533,7 +533,7 @@ static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) - - static const uuid_le null_guid; - --static inline bool is_null_guid(const __u8 *guid) -+static inline bool is_null_guid(const uuid_le *guid) - { - if (memcmp(guid, &null_guid, sizeof(uuid_le))) - return false; -@@ -546,9 +546,9 @@ static inline bool is_null_guid(const __u8 *guid) - */ - static const struct hv_vmbus_device_id *hv_vmbus_get_id( - const struct hv_vmbus_device_id *id, -- const __u8 *guid) -+ const uuid_le *guid) - { -- for (; !is_null_guid(id->guid); id++) -+ for (; !is_null_guid(&id->guid); id++) - if (!memcmp(&id->guid, guid, sizeof(uuid_le))) - return id; - -@@ -565,7 +565,7 @@ static int vmbus_match(struct device *device, struct device_driver *driver) - struct hv_driver *drv = drv_to_hv_drv(driver); - struct hv_device *hv_dev = device_to_hv_device(device); - -- if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b)) -+ if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) - return 1; - - return 0; -@@ -582,7 +582,7 @@ static int vmbus_probe(struct device *child_device) - struct hv_device *dev = device_to_hv_device(child_device); - const struct hv_vmbus_device_id *dev_id; - -- dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b); -+ dev_id = hv_vmbus_get_id(drv->id_table, &dev->dev_type); - if (drv->probe) { - ret = drv->probe(dev, dev_id); - if (ret != 0) -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 10dda1e..4712d7d 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1012,6 +1012,8 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - .guid = { g0, g1, g2, g3, g4, g5, g6, g7, \ - g8, g9, ga, gb, gc, gd, ge, gf }, - -+ -+ - /* - * GUID definitions of various offer types - services offered to the guest. - */ -@@ -1021,118 +1023,94 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - * {f8615163-df3e-46c5-913f-f2d2f965ed0e} - */ - #define HV_NIC_GUID \ -- .guid = { \ -- 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, \ -- 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e \ -- } -+ .guid = UUID_LE(0xf8615163, 0xdf3e, 0x46c5, 0x91, 0x3f, \ -+ 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e) - - /* - * IDE GUID - * {32412632-86cb-44a2-9b5c-50d1417354f5} - */ - #define HV_IDE_GUID \ -- .guid = { \ -- 0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, \ -- 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5 \ -- } -+ .guid = UUID_LE(0x32412632, 0x86cb, 0x44a2, 0x9b, 0x5c, \ -+ 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5) - - /* - * SCSI GUID - * {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} - */ - #define HV_SCSI_GUID \ -- .guid = { \ -- 0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, \ -- 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f \ -- } -+ .guid = UUID_LE(0xba6163d9, 0x04a1, 0x4d29, 0xb6, 0x05, \ -+ 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f) - - /* - * Shutdown GUID - * {0e0b6031-5213-4934-818b-38d90ced39db} - */ - #define HV_SHUTDOWN_GUID \ -- .guid = { \ -- 0x31, 0x60, 0x0b, 0x0e, 0x13, 0x52, 0x34, 0x49, \ -- 0x81, 0x8b, 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb \ -- } -+ .guid = UUID_LE(0x0e0b6031, 0x5213, 0x4934, 0x81, 0x8b, \ -+ 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb) - - /* - * Time Synch GUID - * {9527E630-D0AE-497b-ADCE-E80AB0175CAF} - */ - #define HV_TS_GUID \ -- .guid = { \ -- 0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49, \ -- 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf \ -- } -+ .guid = UUID_LE(0x9527e630, 0xd0ae, 0x497b, 0xad, 0xce, \ -+ 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf) - - /* - * Heartbeat GUID - * {57164f39-9115-4e78-ab55-382f3bd5422d} - */ - #define HV_HEART_BEAT_GUID \ -- .guid = { \ -- 0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e, \ -- 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d \ -- } -+ .guid = UUID_LE(0x57164f39, 0x9115, 0x4e78, 0xab, 0x55, \ -+ 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d) - - /* - * KVP GUID - * {a9a0f4e7-5a45-4d96-b827-8a841e8c03e6} - */ - #define HV_KVP_GUID \ -- .guid = { \ -- 0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d, \ -- 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6 \ -- } -+ .guid = UUID_LE(0xa9a0f4e7, 0x5a45, 0x4d96, 0xb8, 0x27, \ -+ 0x8a, 0x84, 0x1e, 0x8c, 0x03, 0xe6) - - /* - * Dynamic memory GUID - * {525074dc-8985-46e2-8057-a307dc18a502} - */ - #define HV_DM_GUID \ -- .guid = { \ -- 0xdc, 0x74, 0x50, 0X52, 0x85, 0x89, 0xe2, 0x46, \ -- 0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02 \ -- } -+ .guid = UUID_LE(0x525074dc, 0x8985, 0x46e2, 0x80, 0x57, \ -+ 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02) - - /* - * Mouse GUID - * {cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a} - */ - #define HV_MOUSE_GUID \ -- .guid = { \ -- 0x9e, 0xb6, 0xa8, 0xcf, 0x4a, 0x5b, 0xc0, 0x4c, \ -- 0xb9, 0x8b, 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a \ -- } -+ .guid = UUID_LE(0xcfa8b69e, 0x5b4a, 0x4cc0, 0xb9, 0x8b, \ -+ 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a) - - /* - * VSS (Backup/Restore) GUID - */ - #define HV_VSS_GUID \ -- .guid = { \ -- 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, \ -- 0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40 \ -- } -+ .guid = UUID_LE(0x35fa2e29, 0xea23, 0x4236, 0x96, 0xae, \ -+ 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40) - /* - * Synthetic Video GUID - * {DA0A7802-E377-4aac-8E77-0558EB1073F8} - */ - #define HV_SYNTHVID_GUID \ -- .guid = { \ -- 0x02, 0x78, 0x0a, 0xda, 0x77, 0xe3, 0xac, 0x4a, \ -- 0x8e, 0x77, 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8 \ -- } -+ .guid = UUID_LE(0xda0a7802, 0xe377, 0x4aac, 0x8e, 0x77, \ -+ 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8) - - /* - * Synthetic FC GUID - * {2f9bcc4a-0069-4af3-b76b-6fd0be528cda} - */ - #define HV_SYNTHFC_GUID \ -- .guid = { \ -- 0x4A, 0xCC, 0x9B, 0x2F, 0x69, 0x00, 0xF3, 0x4A, \ -- 0xB7, 0x6B, 0x6F, 0xD0, 0xBE, 0x52, 0x8C, 0xDA \ -- } -+ .guid = UUID_LE(0x2f9bcc4a, 0x0069, 0x4af3, 0xb7, 0x6b, \ -+ 0x6f, 0xd0, 0xbe, 0x52, 0x8c, 0xda) - - /* - * Guest File Copy Service -@@ -1140,20 +1118,16 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - */ - - #define HV_FCOPY_GUID \ -- .guid = { \ -- 0xE3, 0x4B, 0xD1, 0x34, 0xE4, 0xDE, 0xC8, 0x41, \ -- 0x9A, 0xE7, 0x6B, 0x17, 0x49, 0x77, 0xC1, 0x92 \ -- } -+ .guid = UUID_LE(0x34d14be3, 0xdee4, 0x41c8, 0x9a, 0xe7, \ -+ 0x6b, 0x17, 0x49, 0x77, 0xc1, 0x92) - - /* - * NetworkDirect. This is the guest RDMA service. - * {8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501} - */ - #define HV_ND_GUID \ -- .guid = { \ -- 0x3d, 0xaf, 0x2e, 0x8c, 0xa7, 0x32, 0x09, 0x4b, \ -- 0xab, 0x99, 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01 \ -- } -+ .guid = UUID_LE(0x8c2eaf3d, 0x32a7, 0x4b09, 0xab, 0x99, \ -+ 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01) - - /* - * PCI Express Pass Through -@@ -1161,10 +1135,8 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - */ - - #define HV_PCIE_GUID \ -- .guid = { \ -- 0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, \ -- 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F \ -- } -+ .guid = UUID_LE(0x44c4f61d, 0x4444, 0x4400, 0x9d, 0x52, \ -+ 0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f) - - /* - * Common header for Hyper-V ICs -diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h -index 64f36e0..6e4c645 100644 ---- a/include/linux/mod_devicetable.h -+++ b/include/linux/mod_devicetable.h -@@ -404,7 +404,7 @@ struct virtio_device_id { - * For Hyper-V devices we use the device guid as the id. - */ - struct hv_vmbus_device_id { -- __u8 guid[16]; -+ uuid_le guid; - kernel_ulong_t driver_data; /* Data private to the driver */ - }; - -diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c -index 9f5cdd4..8e8c69b 100644 ---- a/scripts/mod/file2alias.c -+++ b/scripts/mod/file2alias.c -@@ -917,7 +917,7 @@ static int do_vmbus_entry(const char *filename, void *symval, - char guid_name[(sizeof(*guid) + 1) * 2]; - - for (i = 0; i < (sizeof(*guid) * 2); i += 2) -- sprintf(&guid_name[i], "%02x", TO_NATIVE((*guid)[i/2])); -+ sprintf(&guid_name[i], "%02x", TO_NATIVE((guid->b)[i/2])); - - strcpy(alias, "vmbus:"); - strcat(alias, guid_name); --- -2.10.0 - diff --git a/alpine/kernel/patches/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch b/alpine/kernel/patches/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch deleted file mode 100644 index fee2b87dd..000000000 --- a/alpine/kernel/patches/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 01a403dce6afb34dd0430e12d93b7acd5f384439 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Mon, 14 Dec 2015 16:01:44 -0800 -Subject: [PATCH 18/42] Drivers: hv: vmbus: Use uuid_le_cmp() for comparing - GUIDs - -Use uuid_le_cmp() for comparing GUIDs. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 4ae9250893485f380275e7d5cb291df87c4d9710) ---- - drivers/hv/channel_mgmt.c | 3 +-- - drivers/hv/vmbus_drv.c | 4 ++-- - 2 files changed, 3 insertions(+), 4 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 339277b..9b4525c 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -409,8 +409,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui - struct cpumask *alloced_mask; - - for (i = IDE; i < MAX_PERF_CHN; i++) { -- if (!memcmp(type_guid->b, &hp_devs[i].guid, -- sizeof(uuid_le))) { -+ if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) { - perf_chn = true; - break; - } -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index 6ce2bf8..7973aa5 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -535,7 +535,7 @@ static const uuid_le null_guid; - - static inline bool is_null_guid(const uuid_le *guid) - { -- if (memcmp(guid, &null_guid, sizeof(uuid_le))) -+ if (uuid_le_cmp(*guid, null_guid)) - return false; - return true; - } -@@ -549,7 +549,7 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id( - const uuid_le *guid) - { - for (; !is_null_guid(&id->guid); id++) -- if (!memcmp(&id->guid, guid, sizeof(uuid_le))) -+ if (!uuid_le_cmp(id->guid, *guid)) - return id; - - return NULL; --- -2.10.0 - diff --git a/alpine/kernel/patches/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch b/alpine/kernel/patches/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch deleted file mode 100644 index 6b228bce5..000000000 --- a/alpine/kernel/patches/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch +++ /dev/null @@ -1,42 +0,0 @@ -From a9c4320f47b5a2d2ef7600c5f61f3d4256de2ba5 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 14 Dec 2015 16:01:48 -0800 -Subject: [PATCH 19/42] Drivers: hv: vmbus: do sanity check of channel state in - vmbus_close_internal() - -This fixes an incorrect assumption of channel state in the function. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 64b7faf903dae2df94d89edf2c688b16751800e4) ---- - drivers/hv/channel.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 1ef37c7..2889d97 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -512,6 +512,18 @@ static int vmbus_close_internal(struct vmbus_channel *channel) - tasklet = hv_context.event_dpc[channel->target_cpu]; - tasklet_disable(tasklet); - -+ /* -+ * In case a device driver's probe() fails (e.g., -+ * util_probe() -> vmbus_open() returns -ENOMEM) and the device is -+ * rescinded later (e.g., we dynamically disble an Integrated Service -+ * in Hyper-V Manager), the driver's remove() invokes vmbus_close(): -+ * here we should skip most of the below cleanup work. -+ */ -+ if (channel->state != CHANNEL_OPENED_STATE) { -+ ret = -EINVAL; -+ goto out; -+ } -+ - channel->state = CHANNEL_OPEN_STATE; - channel->sc_creation_callback = NULL; - /* Stop callback and cancel the timer asap */ --- -2.10.0 - diff --git a/alpine/kernel/patches/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch b/alpine/kernel/patches/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch deleted file mode 100644 index 338568fb4..000000000 --- a/alpine/kernel/patches/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch +++ /dev/null @@ -1,74 +0,0 @@ -From b92976804d10f78b9a50f5d8f62f3663a44f32e6 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 14 Dec 2015 16:01:50 -0800 -Subject: [PATCH 20/42] Drivers: hv: vmbus: release relid on error in - vmbus_process_offer() - -We want to simplify vmbus_onoffer_rescind() by not invoking -hv_process_channel_removal(NULL, ...). - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit f52078cf5711ce47c113a58702b35c8ff5f212f5) ---- - drivers/hv/channel_mgmt.c | 21 +++++++++++++++------ - 1 file changed, 15 insertions(+), 6 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 9b4525c..8529dd2 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -178,19 +178,22 @@ static void percpu_channel_deq(void *arg) - } - - --void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) -+static void vmbus_release_relid(u32 relid) - { - struct vmbus_channel_relid_released msg; -- unsigned long flags; -- struct vmbus_channel *primary_channel; - - memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); - msg.child_relid = relid; - msg.header.msgtype = CHANNELMSG_RELID_RELEASED; - vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released)); -+} - -- if (channel == NULL) -- return; -+void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) -+{ -+ unsigned long flags; -+ struct vmbus_channel *primary_channel; -+ -+ vmbus_release_relid(relid); - - BUG_ON(!channel->rescind); - -@@ -337,6 +340,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - return; - - err_deq_chan: -+ vmbus_release_relid(newchannel->offermsg.child_relid); -+ - spin_lock_irqsave(&vmbus_connection.channel_lock, flags); - list_del(&newchannel->listentry); - spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -@@ -640,7 +645,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - channel = relid2channel(rescind->child_relid); - - if (channel == NULL) { -- hv_process_channel_removal(NULL, rescind->child_relid); -+ /* -+ * This is very impossible, because in -+ * vmbus_process_offer(), we have already invoked -+ * vmbus_release_relid() on error. -+ */ - return; - } - --- -2.10.0 - diff --git a/alpine/kernel/patches/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch b/alpine/kernel/patches/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch deleted file mode 100644 index f2a28b416..000000000 --- a/alpine/kernel/patches/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch +++ /dev/null @@ -1,116 +0,0 @@ -From e34354b98924dba0128289e722bde4ca35eafa90 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 14 Dec 2015 16:01:51 -0800 -Subject: [PATCH 21/42] Drivers: hv: vmbus: channge - vmbus_connection.channel_lock to mutex - -spinlock is unnecessary here. -mutex is enough. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit d6f591e339d23f434efda11917da511870891472) ---- - drivers/hv/channel_mgmt.c | 12 ++++++------ - drivers/hv/connection.c | 7 +++---- - drivers/hv/hyperv_vmbus.h | 2 +- - 3 files changed, 10 insertions(+), 11 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 8529dd2..306c7df 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -207,9 +207,9 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - } - - if (channel->primary_channel == NULL) { -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - list_del(&channel->listentry); -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - primary_channel = channel; - } else { -@@ -254,7 +254,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - unsigned long flags; - - /* Make sure this is a new offer */ -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (!uuid_le_cmp(channel->offermsg.offer.if_type, -@@ -270,7 +270,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - list_add_tail(&newchannel->listentry, - &vmbus_connection.chn_list); - -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - if (!fnew) { - /* -@@ -342,9 +342,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - err_deq_chan: - vmbus_release_relid(newchannel->offermsg.child_relid); - -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - list_del(&newchannel->listentry); -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); -diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c -index 4fc2e88..521f48e 100644 ---- a/drivers/hv/connection.c -+++ b/drivers/hv/connection.c -@@ -146,7 +146,7 @@ int vmbus_connect(void) - spin_lock_init(&vmbus_connection.channelmsg_lock); - - INIT_LIST_HEAD(&vmbus_connection.chn_list); -- spin_lock_init(&vmbus_connection.channel_lock); -+ mutex_init(&vmbus_connection.channel_mutex); - - /* - * Setup the vmbus event connection for channel interrupt -@@ -282,11 +282,10 @@ struct vmbus_channel *relid2channel(u32 relid) - { - struct vmbus_channel *channel; - struct vmbus_channel *found_channel = NULL; -- unsigned long flags; - struct list_head *cur, *tmp; - struct vmbus_channel *cur_sc; - -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (channel->offermsg.child_relid == relid) { - found_channel = channel; -@@ -305,7 +304,7 @@ struct vmbus_channel *relid2channel(u32 relid) - } - } - } -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - return found_channel; - } -diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h -index 12156db..50b1de7 100644 ---- a/drivers/hv/hyperv_vmbus.h -+++ b/drivers/hv/hyperv_vmbus.h -@@ -683,7 +683,7 @@ struct vmbus_connection { - - /* List of channels */ - struct list_head chn_list; -- spinlock_t channel_lock; -+ struct mutex channel_mutex; - - struct workqueue_struct *work_queue; - }; --- -2.10.0 - diff --git a/alpine/kernel/patches/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch b/alpine/kernel/patches/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch deleted file mode 100644 index c993989dc..000000000 --- a/alpine/kernel/patches/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 91a65c691fc22cc6bfb884dea29cc7c5c3e5f9a9 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Mon, 14 Dec 2015 19:02:00 -0800 -Subject: [PATCH 22/42] Drivers: hv: remove code duplication between - vmbus_recvpacket()/vmbus_recvpacket_raw() - -vmbus_recvpacket() and vmbus_recvpacket_raw() are almost identical but -there are two discrepancies: -1) vmbus_recvpacket() doesn't propagate errors from hv_ringbuffer_read() - which looks like it is not desired. -2) There is an error message printed in packetlen > bufferlen case in - vmbus_recvpacket(). I'm removing it as it is usless for users to see - such messages and /vmbus_recvpacket_raw() doesn't have it. - -Signed-off-by: Vitaly Kuznetsov -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 667d374064b0cc48b6122101b287908d1b392bdb) ---- - drivers/hv/channel.c | 65 ++++++++++++++++++---------------------------------- - 1 file changed, 22 insertions(+), 43 deletions(-) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 2889d97..dd6de7f 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -922,8 +922,10 @@ EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer); - * - * Mainly used by Hyper-V drivers. - */ --int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, -- u32 bufferlen, u32 *buffer_actual_len, u64 *requestid) -+static inline int -+__vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, -+ u32 bufferlen, u32 *buffer_actual_len, u64 *requestid, -+ bool raw) - { - struct vmpacket_descriptor desc; - u32 packetlen; -@@ -941,27 +943,34 @@ int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, - return 0; - - packetlen = desc.len8 << 3; -- userlen = packetlen - (desc.offset8 << 3); -+ if (!raw) -+ userlen = packetlen - (desc.offset8 << 3); -+ else -+ userlen = packetlen; - - *buffer_actual_len = userlen; - -- if (userlen > bufferlen) { -- -- pr_err("Buffer too small - got %d needs %d\n", -- bufferlen, userlen); -- return -ETOOSMALL; -- } -+ if (userlen > bufferlen) -+ return -ENOBUFS; - - *requestid = desc.trans_id; - - /* Copy over the packet to the user buffer */ - ret = hv_ringbuffer_read(&channel->inbound, buffer, userlen, -- (desc.offset8 << 3), &signal); -+ raw ? 0 : desc.offset8 << 3, &signal); - - if (signal) - vmbus_setevent(channel); - -- return 0; -+ return ret; -+} -+ -+int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, -+ u32 bufferlen, u32 *buffer_actual_len, -+ u64 *requestid) -+{ -+ return __vmbus_recvpacket(channel, buffer, bufferlen, -+ buffer_actual_len, requestid, false); - } - EXPORT_SYMBOL(vmbus_recvpacket); - -@@ -972,37 +981,7 @@ int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer, - u32 bufferlen, u32 *buffer_actual_len, - u64 *requestid) - { -- struct vmpacket_descriptor desc; -- u32 packetlen; -- int ret; -- bool signal = false; -- -- *buffer_actual_len = 0; -- *requestid = 0; -- -- -- ret = hv_ringbuffer_peek(&channel->inbound, &desc, -- sizeof(struct vmpacket_descriptor)); -- if (ret != 0) -- return 0; -- -- -- packetlen = desc.len8 << 3; -- -- *buffer_actual_len = packetlen; -- -- if (packetlen > bufferlen) -- return -ENOBUFS; -- -- *requestid = desc.trans_id; -- -- /* Copy over the entire packet to the user buffer */ -- ret = hv_ringbuffer_read(&channel->inbound, buffer, packetlen, 0, -- &signal); -- -- if (signal) -- vmbus_setevent(channel); -- -- return ret; -+ return __vmbus_recvpacket(channel, buffer, bufferlen, -+ buffer_actual_len, requestid, true); - } - EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw); --- -2.10.0 - diff --git a/alpine/kernel/patches/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch b/alpine/kernel/patches/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch deleted file mode 100644 index a52582000..000000000 --- a/alpine/kernel/patches/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 4c754b011766c2d8a99424637656ea8096d55890 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 21 Dec 2015 12:21:22 -0800 -Subject: [PATCH 23/42] Drivers: hv: vmbus: fix the building warning with - hyperv-keyboard - -With the recent change af3ff643ea91ba64dd8d0b1cbed54d44512f96cd -(Drivers: hv: vmbus: Use uuid_le type consistently), we always get this -warning: - - CC [M] drivers/input/serio/hyperv-keyboard.o -drivers/input/serio/hyperv-keyboard.c:427:2: warning: missing braces around - initializer [-Wmissing-braces] - { HV_KBD_GUID, }, - ^ -drivers/input/serio/hyperv-keyboard.c:427:2: warning: (near initialization - for .id_table[0].guid.b.) [-Wmissing-braces] - -The patch fixes the warning. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 2048157ad02e65f6327118dd4a7b9c9f1fd12f77) ---- - drivers/input/serio/hyperv-keyboard.c | 10 ---------- - include/linux/hyperv.h | 8 ++++++++ - 2 files changed, 8 insertions(+), 10 deletions(-) - -diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c -index e74e5d6..c948866 100644 ---- a/drivers/input/serio/hyperv-keyboard.c -+++ b/drivers/input/serio/hyperv-keyboard.c -@@ -412,16 +412,6 @@ static int hv_kbd_remove(struct hv_device *hv_dev) - return 0; - } - --/* -- * Keyboard GUID -- * {f912ad6d-2b17-48ea-bd65-f927a61c7684} -- */ --#define HV_KBD_GUID \ -- .guid = { \ -- 0x6d, 0xad, 0x12, 0xf9, 0x17, 0x2b, 0xea, 0x48, \ -- 0xbd, 0x65, 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84 \ -- } -- - static const struct hv_vmbus_device_id id_table[] = { - /* Keyboard guid */ - { HV_KBD_GUID, }, -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 4712d7d..9e2de6a 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1091,6 +1091,14 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a) - - /* -+ * Keyboard GUID -+ * {f912ad6d-2b17-48ea-bd65-f927a61c7684} -+ */ -+#define HV_KBD_GUID \ -+ .guid = UUID_LE(0xf912ad6d, 0x2b17, 0x48ea, 0xbd, 0x65, \ -+ 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84) -+ -+/* - * VSS (Backup/Restore) GUID - */ - #define HV_VSS_GUID \ --- -2.10.0 - diff --git a/alpine/kernel/patches/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch b/alpine/kernel/patches/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch deleted file mode 100644 index 14c287bd4..000000000 --- a/alpine/kernel/patches/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 6cb1a2f24c7b049f8a0c259afa4f5de37ac84084 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Tue, 15 Dec 2015 16:27:27 -0800 -Subject: [PATCH 24/42] Drivers: hv: vmbus: Treat Fibre Channel devices as - performance critical - -For performance critical devices, we distribute the incoming -channel interrupt load across available CPUs in the guest. -Include Fibre channel devices in the set of devices for which -we would distribute the interrupt load. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 879a650a273bc3efb9d472886b8ced12630ea8ed) ---- - drivers/hv/channel_mgmt.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 306c7df..763d0c1 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -362,6 +362,7 @@ err_free_chan: - enum { - IDE = 0, - SCSI, -+ FC, - NIC, - ND_NIC, - PCIE, -@@ -378,6 +379,8 @@ static const struct hv_vmbus_device_id hp_devs[] = { - { HV_IDE_GUID, }, - /* Storage - SCSI */ - { HV_SCSI_GUID, }, -+ /* Storage - FC */ -+ { HV_SYNTHFC_GUID, }, - /* Network */ - { HV_NIC_GUID, }, - /* NetworkDirect Guest RDMA */ --- -2.10.0 - diff --git a/alpine/kernel/patches/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch b/alpine/kernel/patches/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch deleted file mode 100644 index 2e89c4ea2..000000000 --- a/alpine/kernel/patches/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch +++ /dev/null @@ -1,355 +0,0 @@ -From 69933a7f325a93afbb5ed819388b8b063d602066 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Fri, 25 Dec 2015 20:00:30 -0800 -Subject: [PATCH 25/42] Drivers: hv: vmbus: Add vendor and device atttributes - -Add vendor and device attributes to VMBUS devices. These will be used -by Hyper-V tools as well user-level RDMA libraries that will use the -vendor/device tuple to discover the RDMA device. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 7047f17d70fc0599563d30d0791692cb5fe42ae6) ---- - Documentation/ABI/stable/sysfs-bus-vmbus | 14 +++ - drivers/hv/channel_mgmt.c | 166 +++++++++++++++++++++++-------- - drivers/hv/vmbus_drv.c | 21 ++++ - include/linux/hyperv.h | 28 ++++++ - 4 files changed, 186 insertions(+), 43 deletions(-) - -diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus -index 636e938..5d0125f 100644 ---- a/Documentation/ABI/stable/sysfs-bus-vmbus -+++ b/Documentation/ABI/stable/sysfs-bus-vmbus -@@ -27,3 +27,17 @@ Description: The mapping of which primary/sub channels are bound to which - Virtual Processors. - Format: - Users: tools/hv/lsvmbus -+ -+What: /sys/bus/vmbus/devices/vmbus_*/device -+Date: Dec. 2015 -+KernelVersion: 4.5 -+Contact: K. Y. Srinivasan -+Description: The 16 bit device ID of the device -+Users: tools/hv/lsvmbus and user level RDMA libraries -+ -+What: /sys/bus/vmbus/devices/vmbus_*/vendor -+Date: Dec. 2015 -+KernelVersion: 4.5 -+Contact: K. Y. Srinivasan -+Description: The 16 bit vendor ID of the device -+Users: tools/hv/lsvmbus and user level RDMA libraries -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 763d0c1..d6c6114 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -33,8 +33,122 @@ - - #include "hyperv_vmbus.h" - --static void init_vp_index(struct vmbus_channel *channel, -- const uuid_le *type_guid); -+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type); -+ -+static const struct vmbus_device vmbus_devs[] = { -+ /* IDE */ -+ { .dev_type = HV_IDE, -+ HV_IDE_GUID, -+ .perf_device = true, -+ }, -+ -+ /* SCSI */ -+ { .dev_type = HV_SCSI, -+ HV_SCSI_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Fibre Channel */ -+ { .dev_type = HV_FC, -+ HV_SYNTHFC_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Synthetic NIC */ -+ { .dev_type = HV_NIC, -+ HV_NIC_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Network Direct */ -+ { .dev_type = HV_ND, -+ HV_ND_GUID, -+ .perf_device = true, -+ }, -+ -+ /* PCIE */ -+ { .dev_type = HV_PCIE, -+ HV_PCIE_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Synthetic Frame Buffer */ -+ { .dev_type = HV_FB, -+ HV_SYNTHVID_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Synthetic Keyboard */ -+ { .dev_type = HV_KBD, -+ HV_KBD_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Synthetic MOUSE */ -+ { .dev_type = HV_MOUSE, -+ HV_MOUSE_GUID, -+ .perf_device = false, -+ }, -+ -+ /* KVP */ -+ { .dev_type = HV_KVP, -+ HV_KVP_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Time Synch */ -+ { .dev_type = HV_TS, -+ HV_TS_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Heartbeat */ -+ { .dev_type = HV_HB, -+ HV_HEART_BEAT_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Shutdown */ -+ { .dev_type = HV_SHUTDOWN, -+ HV_SHUTDOWN_GUID, -+ .perf_device = false, -+ }, -+ -+ /* File copy */ -+ { .dev_type = HV_FCOPY, -+ HV_FCOPY_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Backup */ -+ { .dev_type = HV_BACKUP, -+ HV_VSS_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Dynamic Memory */ -+ { .dev_type = HV_DM, -+ HV_DM_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Unknown GUID */ -+ { .dev_type = HV_UNKOWN, -+ .perf_device = false, -+ }, -+}; -+ -+static u16 hv_get_dev_type(const uuid_le *guid) -+{ -+ u16 i; -+ -+ for (i = HV_IDE; i < HV_UNKOWN; i++) { -+ if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) -+ return i; -+ } -+ pr_info("Unknown GUID: %pUl\n", guid); -+ return i; -+} - - /** - * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message -@@ -252,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - struct vmbus_channel *channel; - bool fnew = true; - unsigned long flags; -+ u16 dev_type; - - /* Make sure this is a new offer */ - mutex_lock(&vmbus_connection.channel_mutex); -@@ -289,7 +404,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - goto err_free_chan; - } - -- init_vp_index(newchannel, &newchannel->offermsg.offer.if_type); -+ dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type); -+ -+ init_vp_index(newchannel, dev_type); - - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); -@@ -326,6 +443,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - if (!newchannel->device_obj) - goto err_deq_chan; - -+ newchannel->device_obj->device_id = dev_type; - /* - * Add the new device to the bus. This will kick off device-driver - * binding which eventually invokes the device driver's AddDevice() -@@ -359,37 +477,6 @@ err_free_chan: - free_channel(newchannel); - } - --enum { -- IDE = 0, -- SCSI, -- FC, -- NIC, -- ND_NIC, -- PCIE, -- MAX_PERF_CHN, --}; -- --/* -- * This is an array of device_ids (device types) that are performance critical. -- * We attempt to distribute the interrupt load for these devices across -- * all available CPUs. -- */ --static const struct hv_vmbus_device_id hp_devs[] = { -- /* IDE */ -- { HV_IDE_GUID, }, -- /* Storage - SCSI */ -- { HV_SCSI_GUID, }, -- /* Storage - FC */ -- { HV_SYNTHFC_GUID, }, -- /* Network */ -- { HV_NIC_GUID, }, -- /* NetworkDirect Guest RDMA */ -- { HV_ND_GUID, }, -- /* PCI Express Pass Through */ -- { HV_PCIE_GUID, }, --}; -- -- - /* - * We use this state to statically distribute the channel interrupt load. - */ -@@ -406,22 +493,15 @@ static int next_numa_node_id; - * For pre-win8 hosts or non-performance critical channels we assign the - * first CPU in the first NUMA node. - */ --static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid) -+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) - { - u32 cur_cpu; -- int i; -- bool perf_chn = false; -+ bool perf_chn = vmbus_devs[dev_type].perf_device; - struct vmbus_channel *primary = channel->primary_channel; - int next_node; - struct cpumask available_mask; - struct cpumask *alloced_mask; - -- for (i = IDE; i < MAX_PERF_CHN; i++) { -- if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) { -- perf_chn = true; -- break; -- } -- } - if ((vmbus_proto_version == VERSION_WS2008) || - (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) { - /* -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index 7973aa5..de7130c 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -480,6 +480,24 @@ static ssize_t channel_vp_mapping_show(struct device *dev, - } - static DEVICE_ATTR_RO(channel_vp_mapping); - -+static ssize_t vendor_show(struct device *dev, -+ struct device_attribute *dev_attr, -+ char *buf) -+{ -+ struct hv_device *hv_dev = device_to_hv_device(dev); -+ return sprintf(buf, "0x%x\n", hv_dev->vendor_id); -+} -+static DEVICE_ATTR_RO(vendor); -+ -+static ssize_t device_show(struct device *dev, -+ struct device_attribute *dev_attr, -+ char *buf) -+{ -+ struct hv_device *hv_dev = device_to_hv_device(dev); -+ return sprintf(buf, "0x%x\n", hv_dev->device_id); -+} -+static DEVICE_ATTR_RO(device); -+ - /* Set up per device attributes in /sys/bus/vmbus/devices/ */ - static struct attribute *vmbus_attrs[] = { - &dev_attr_id.attr, -@@ -505,6 +523,8 @@ static struct attribute *vmbus_attrs[] = { - &dev_attr_in_read_bytes_avail.attr, - &dev_attr_in_write_bytes_avail.attr, - &dev_attr_channel_vp_mapping.attr, -+ &dev_attr_vendor.attr, -+ &dev_attr_device.attr, - NULL, - }; - ATTRIBUTE_GROUPS(vmbus); -@@ -963,6 +983,7 @@ struct hv_device *vmbus_device_create(const uuid_le *type, - memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); - memcpy(&child_device_obj->dev_instance, instance, - sizeof(uuid_le)); -+ child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ - - - return child_device_obj; -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 9e2de6a..51c98fd 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -635,6 +635,32 @@ enum hv_signal_policy { - HV_SIGNAL_POLICY_EXPLICIT, - }; - -+enum vmbus_device_type { -+ HV_IDE = 0, -+ HV_SCSI, -+ HV_FC, -+ HV_NIC, -+ HV_ND, -+ HV_PCIE, -+ HV_FB, -+ HV_KBD, -+ HV_MOUSE, -+ HV_KVP, -+ HV_TS, -+ HV_HB, -+ HV_SHUTDOWN, -+ HV_FCOPY, -+ HV_BACKUP, -+ HV_DM, -+ HV_UNKOWN, -+}; -+ -+struct vmbus_device { -+ u16 dev_type; -+ uuid_le guid; -+ bool perf_device; -+}; -+ - struct vmbus_channel { - /* Unique channel id */ - int id; -@@ -961,6 +987,8 @@ struct hv_device { - - /* the device instance id of this device */ - uuid_le dev_instance; -+ u16 vendor_id; -+ u16 device_id; - - struct device device; - --- -2.10.0 - diff --git a/alpine/kernel/patches/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch b/alpine/kernel/patches/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch deleted file mode 100644 index 46095d238..000000000 --- a/alpine/kernel/patches/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 64f93cfc49018e7ffa772506cfe3631b3db530b9 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:37 -0800 -Subject: [PATCH 26/42] Drivers: hv: vmbus: add a helper function to set a - channel's pending send size - -This will be used by the coming net/hvsock driver. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 3c75354d043ad546148d6992e40033ecaefc5ea5) ---- - include/linux/hyperv.h | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 51c98fd..934542a 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -818,6 +818,12 @@ static inline void *get_per_channel_state(struct vmbus_channel *c) - return c->per_channel_state; - } - -+static inline void set_channel_pending_send_size(struct vmbus_channel *c, -+ u32 size) -+{ -+ c->outbound.ring_buffer->pending_send_sz = size; -+} -+ - void vmbus_onmessage(void *context); - - int vmbus_request_offers(void); --- -2.10.0 - diff --git a/alpine/kernel/patches/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch b/alpine/kernel/patches/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch deleted file mode 100644 index 375a62ae1..000000000 --- a/alpine/kernel/patches/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 8658862991789c9dca080be3d35a7e72479b91e9 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:38 -0800 -Subject: [PATCH 27/42] Drivers: hv: vmbus: define the new offer type for - Hyper-V socket (hvsock) - -A helper function is also added. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit e8d6ca023efce3bd80050dcd9e708ee3cf8babd4) ---- - include/linux/hyperv.h | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 934542a..a4f105d 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -237,6 +237,7 @@ struct vmbus_channel_offer { - #define VMBUS_CHANNEL_LOOPBACK_OFFER 0x100 - #define VMBUS_CHANNEL_PARENT_OFFER 0x200 - #define VMBUS_CHANNEL_REQUEST_MONITORED_NOTIFICATION 0x400 -+#define VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER 0x2000 - - struct vmpacket_descriptor { - u16 type; -@@ -797,6 +798,12 @@ struct vmbus_channel { - enum hv_signal_policy signal_policy; - }; - -+static inline bool is_hvsock_channel(const struct vmbus_channel *c) -+{ -+ return !!(c->offermsg.offer.chn_flags & -+ VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER); -+} -+ - static inline void set_channel_signal_state(struct vmbus_channel *c, - enum hv_signal_policy policy) - { --- -2.10.0 - diff --git a/alpine/kernel/patches/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch b/alpine/kernel/patches/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch deleted file mode 100644 index 7c93e72a1..000000000 --- a/alpine/kernel/patches/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 60af2c3c5565e40ee66123edb9386ccaa1355dff Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:39 -0800 -Subject: [PATCH 28/42] Drivers: hv: vmbus: vmbus_sendpacket_ctl: hvsock: avoid - unnecessary signaling - -When the hvsock channel's outbound ringbuffer is full (i.e., -hv_ringbuffer_write() returns -EAGAIN), we should avoid the unnecessary -signaling the host. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 5f363bc38f810d238d1e8b19998625ddec3b8138) ---- - drivers/hv/channel.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index dd6de7f..128dcf2 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -659,6 +659,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - * If we cannot write to the ring-buffer; signal the host - * even if we may not have written anything. This is a rare - * enough condition that it should not matter. -+ * NOTE: in this case, the hvsock channel is an exception, because -+ * it looks the host side's hvsock implementation has a throttling -+ * mechanism which can hurt the performance otherwise. - */ - - if (channel->signal_policy) -@@ -666,7 +669,8 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - else - kick_q = true; - -- if (((ret == 0) && kick_q && signal) || (ret)) -+ if (((ret == 0) && kick_q && signal) || -+ (ret && !is_hvsock_channel(channel))) - vmbus_setevent(channel); - - return ret; --- -2.10.0 - diff --git a/alpine/kernel/patches/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch b/alpine/kernel/patches/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch deleted file mode 100644 index fa317eec0..000000000 --- a/alpine/kernel/patches/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 2c5183043209906ad0a41fb1a5b4d0c4c8a8e735 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:40 -0800 -Subject: [PATCH 29/42] Drivers: hv: vmbus: define a new VMBus message type for - hvsock - -A function to send the type of message is also added. - -The coming net/hvsock driver will use this function to proactively request -the host to offer a VMBus channel for a new hvsock connection. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 5c23a1a5c60b0f472cfa61cd7d8279f8aaeb5b64) ---- - drivers/hv/channel.c | 15 +++++++++++++++ - drivers/hv/channel_mgmt.c | 4 ++++ - include/linux/hyperv.h | 13 +++++++++++++ - 3 files changed, 32 insertions(+) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 128dcf2..415f6c7 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -219,6 +219,21 @@ error0: - } - EXPORT_SYMBOL_GPL(vmbus_open); - -+/* Used for Hyper-V Socket: a guest client's connect() to the host */ -+int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, -+ const uuid_le *shv_host_servie_id) -+{ -+ struct vmbus_channel_tl_connect_request conn_msg; -+ -+ memset(&conn_msg, 0, sizeof(conn_msg)); -+ conn_msg.header.msgtype = CHANNELMSG_TL_CONNECT_REQUEST; -+ conn_msg.guest_endpoint_id = *shv_guest_servie_id; -+ conn_msg.host_service_id = *shv_host_servie_id; -+ -+ return vmbus_post_msg(&conn_msg, sizeof(conn_msg)); -+} -+EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request); -+ - /* - * create_gpadl_header - Creates a gpadl for the specified buffer - */ -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index d6c6114..60ca25b 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -958,6 +958,10 @@ struct vmbus_channel_message_table_entry - {CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response}, - {CHANNELMSG_UNLOAD, 0, NULL}, - {CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response}, -+ {CHANNELMSG_18, 0, NULL}, -+ {CHANNELMSG_19, 0, NULL}, -+ {CHANNELMSG_20, 0, NULL}, -+ {CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL}, - }; - - /* -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index a4f105d..191bc5d 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -394,6 +394,10 @@ enum vmbus_channel_message_type { - CHANNELMSG_VERSION_RESPONSE = 15, - CHANNELMSG_UNLOAD = 16, - CHANNELMSG_UNLOAD_RESPONSE = 17, -+ CHANNELMSG_18 = 18, -+ CHANNELMSG_19 = 19, -+ CHANNELMSG_20 = 20, -+ CHANNELMSG_TL_CONNECT_REQUEST = 21, - CHANNELMSG_COUNT - }; - -@@ -564,6 +568,13 @@ struct vmbus_channel_initiate_contact { - u64 monitor_page2; - } __packed; - -+/* Hyper-V socket: guest's connect()-ing to host */ -+struct vmbus_channel_tl_connect_request { -+ struct vmbus_channel_message_header header; -+ uuid_le guest_endpoint_id; -+ uuid_le host_service_id; -+} __packed; -+ - struct vmbus_channel_version_response { - struct vmbus_channel_message_header header; - u8 version_supported; -@@ -1295,4 +1306,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); - - extern __u32 vmbus_proto_version; - -+int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, -+ const uuid_le *shv_host_servie_id); - #endif /* _HYPERV_H */ --- -2.10.0 - diff --git a/alpine/kernel/patches/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch b/alpine/kernel/patches/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch deleted file mode 100644 index f264f65dd..000000000 --- a/alpine/kernel/patches/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 58a10705d630bdcb5ea08c894d28851c73e9bd4f Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:41 -0800 -Subject: [PATCH 30/42] Drivers: hv: vmbus: add a hvsock flag in struct - hv_driver - -Only the coming hv_sock driver has a "true" value for this flag. - -We treat the hvsock offers/channels as special VMBus devices. -Since the hv_sock driver handles all the hvsock offers/channels, we need to -tweak vmbus_match() for hv_sock driver, so we introduce this flag. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 8981da320a11217589aa3c50f9e891bcdef07ece) ---- - drivers/hv/vmbus_drv.c | 4 ++++ - include/linux/hyperv.h | 14 ++++++++++++++ - 2 files changed, 18 insertions(+) - -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index de7130c..03fc5d3 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -585,6 +585,10 @@ static int vmbus_match(struct device *device, struct device_driver *driver) - struct hv_driver *drv = drv_to_hv_drv(driver); - struct hv_device *hv_dev = device_to_hv_device(device); - -+ /* The hv_sock driver handles all hv_sock offers. */ -+ if (is_hvsock_channel(hv_dev->channel)) -+ return drv->hvsock; -+ - if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) - return 1; - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 191bc5d..05966e2 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -992,6 +992,20 @@ extern void vmbus_ontimer(unsigned long data); - struct hv_driver { - const char *name; - -+ /* -+ * A hvsock offer, which has a VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER -+ * channel flag, actually doesn't mean a synthetic device because the -+ * offer's if_type/if_instance can change for every new hvsock -+ * connection. -+ * -+ * However, to facilitate the notification of new-offer/rescind-offer -+ * from vmbus driver to hvsock driver, we can handle hvsock offer as -+ * a special vmbus device, and hence we need the below flag to -+ * indicate if the driver is the hvsock driver or not: we need to -+ * specially treat the hvosck offer & driver in vmbus_match(). -+ */ -+ bool hvsock; -+ - /* the device type supported by this driver */ - uuid_le dev_type; - const struct hv_vmbus_device_id *id_table; --- -2.10.0 - diff --git a/alpine/kernel/patches/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch b/alpine/kernel/patches/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch deleted file mode 100644 index 9768186ca..000000000 --- a/alpine/kernel/patches/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 6dd9db116b0985dfc56b3028205549f4c52d8be0 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:42 -0800 -Subject: [PATCH 31/42] Drivers: hv: vmbus: add a per-channel rescind callback - -This will be used by the coming hv_sock driver. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 499e8401a515d04daa986b995da710d2b9737764) ---- - drivers/hv/channel_mgmt.c | 11 +++++++++++ - include/linux/hyperv.h | 9 +++++++++ - 2 files changed, 20 insertions(+) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 60ca25b..76864c9 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -741,6 +741,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - spin_unlock_irqrestore(&channel->lock, flags); - - if (channel->device_obj) { -+ if (channel->chn_rescind_callback) { -+ channel->chn_rescind_callback(channel); -+ return; -+ } - /* - * We will have to unregister this device from the - * driver core. -@@ -1110,3 +1114,10 @@ bool vmbus_are_subchannels_present(struct vmbus_channel *primary) - return ret; - } - EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present); -+ -+void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, -+ void (*chn_rescind_cb)(struct vmbus_channel *)) -+{ -+ channel->chn_rescind_callback = chn_rescind_cb; -+} -+EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 05966e2..ad04017 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -768,6 +768,12 @@ struct vmbus_channel { - void (*sc_creation_callback)(struct vmbus_channel *new_sc); - - /* -+ * Channel rescind callback. Some channels (the hvsock ones), need to -+ * register a callback which is invoked in vmbus_onoffer_rescind(). -+ */ -+ void (*chn_rescind_callback)(struct vmbus_channel *channel); -+ -+ /* - * The spinlock to protect the structure. It is being used to protect - * test-and-set access to various attributes of the structure as well - * as all sc_list operations. -@@ -853,6 +859,9 @@ int vmbus_request_offers(void); - void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, - void (*sc_cr_cb)(struct vmbus_channel *new_sc)); - -+void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, -+ void (*chn_rescind_cb)(struct vmbus_channel *)); -+ - /* - * Retrieve the (sub) channel on which to send an outgoing request. - * When a primary channel has multiple sub-channels, we choose a --- -2.10.0 - diff --git a/alpine/kernel/patches/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch b/alpine/kernel/patches/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch deleted file mode 100644 index cb9a4f99c..000000000 --- a/alpine/kernel/patches/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 5e89daa5e8c0b5950b46ba77dd6248c5e61bc405 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:43 -0800 -Subject: [PATCH 32/42] Drivers: hv: vmbus: add an API - vmbus_hvsock_device_unregister() - -The hvsock driver needs this API to release all the resources related -to the channel. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 85d9aa705184a4504d0330017e3956fcdae8a9d6) ---- - drivers/hv/channel_mgmt.c | 33 ++++++++++++++++++++++++++++----- - drivers/hv/connection.c | 4 ++-- - include/linux/hyperv.h | 2 ++ - 3 files changed, 32 insertions(+), 7 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 76864c9..cf311be 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -310,6 +310,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - vmbus_release_relid(relid); - - BUG_ON(!channel->rescind); -+ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - - if (channel->target_cpu != get_cpu()) { - put_cpu(); -@@ -321,9 +322,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - } - - if (channel->primary_channel == NULL) { -- mutex_lock(&vmbus_connection.channel_mutex); - list_del(&channel->listentry); -- mutex_unlock(&vmbus_connection.channel_mutex); - - primary_channel = channel; - } else { -@@ -367,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - bool fnew = true; - unsigned long flags; - u16 dev_type; -+ int ret; - - /* Make sure this is a new offer */ - mutex_lock(&vmbus_connection.channel_mutex); -@@ -449,7 +449,11 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - * binding which eventually invokes the device driver's AddDevice() - * method. - */ -- if (vmbus_device_register(newchannel->device_obj) != 0) { -+ mutex_lock(&vmbus_connection.channel_mutex); -+ ret = vmbus_device_register(newchannel->device_obj); -+ mutex_unlock(&vmbus_connection.channel_mutex); -+ -+ if (ret != 0) { - pr_err("unable to add child device object (relid %d)\n", - newchannel->offermsg.child_relid); - kfree(newchannel->device_obj); -@@ -725,6 +729,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - struct device *dev; - - rescind = (struct vmbus_channel_rescind_offer *)hdr; -+ -+ mutex_lock(&vmbus_connection.channel_mutex); - channel = relid2channel(rescind->child_relid); - - if (channel == NULL) { -@@ -733,7 +739,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - * vmbus_process_offer(), we have already invoked - * vmbus_release_relid() on error. - */ -- return; -+ goto out; - } - - spin_lock_irqsave(&channel->lock, flags); -@@ -743,7 +749,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - if (channel->device_obj) { - if (channel->chn_rescind_callback) { - channel->chn_rescind_callback(channel); -- return; -+ goto out; - } - /* - * We will have to unregister this device from the -@@ -758,8 +764,25 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - hv_process_channel_removal(channel, - channel->offermsg.child_relid); - } -+ -+out: -+ mutex_unlock(&vmbus_connection.channel_mutex); - } - -+void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) -+{ -+ mutex_lock(&vmbus_connection.channel_mutex); -+ -+ BUG_ON(!is_hvsock_channel(channel)); -+ -+ channel->rescind = true; -+ vmbus_device_unregister(channel->device_obj); -+ -+ mutex_unlock(&vmbus_connection.channel_mutex); -+} -+EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); -+ -+ - /* - * vmbus_onoffers_delivered - - * This is invoked when all offers have been delivered. -diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c -index 521f48e..09c08b5 100644 ---- a/drivers/hv/connection.c -+++ b/drivers/hv/connection.c -@@ -285,7 +285,8 @@ struct vmbus_channel *relid2channel(u32 relid) - struct list_head *cur, *tmp; - struct vmbus_channel *cur_sc; - -- mutex_lock(&vmbus_connection.channel_mutex); -+ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); -+ - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (channel->offermsg.child_relid == relid) { - found_channel = channel; -@@ -304,7 +305,6 @@ struct vmbus_channel *relid2channel(u32 relid) - } - } - } -- mutex_unlock(&vmbus_connection.channel_mutex); - - return found_channel; - } -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index ad04017..993318a 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1071,6 +1071,8 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver, - const char *mod_name); - void vmbus_driver_unregister(struct hv_driver *hv_driver); - -+void vmbus_hvsock_device_unregister(struct vmbus_channel *channel); -+ - int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - resource_size_t min, resource_size_t max, - resource_size_t size, resource_size_t align, --- -2.10.0 - diff --git a/alpine/kernel/patches/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch b/alpine/kernel/patches/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch deleted file mode 100644 index fa351c53c..000000000 --- a/alpine/kernel/patches/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch +++ /dev/null @@ -1,208 +0,0 @@ -From b7e3c4ad47b7fd47a79a723ac0c1823b6782d1ff Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Wed, 27 Jan 2016 22:29:45 -0800 -Subject: [PATCH 33/42] Drivers: hv: vmbus: Give control over how the ring - access is serialized - -On the channel send side, many of the VMBUS -device drivers explicity serialize access to the -outgoing ring buffer. Give more control to the -VMBUS device drivers in terms how to serialize -accesss to the outgoing ring buffer. -The default behavior will be to aquire the -ring lock to preserve the current behavior. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit fe760e4d64fe5c17c39e86c410d41f6587ee88bc) ---- - drivers/hv/channel.c | 15 +++++++++++---- - drivers/hv/channel_mgmt.c | 1 + - drivers/hv/hyperv_vmbus.h | 2 +- - drivers/hv/ring_buffer.c | 13 ++++++++----- - include/linux/hyperv.h | 16 ++++++++++++++++ - 5 files changed, 37 insertions(+), 10 deletions(-) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 415f6c7..57a1b65 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -639,6 +639,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - u64 aligned_data = 0; - int ret; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - int num_vecs = ((bufferlen != 0) ? 3 : 1); - - -@@ -658,7 +659,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs, -- &signal); -+ &signal, lock); - - /* - * Signalling the host is conditional on many factors: -@@ -738,6 +739,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, - struct kvec bufferlist[3]; - u64 aligned_data = 0; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - - if (pagecount > MAX_PAGE_BUFFER_COUNT) - return -EINVAL; -@@ -774,7 +776,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - -- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); -+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, -+ &signal, lock); - - /* - * Signalling the host is conditional on many factors: -@@ -837,6 +840,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, - struct kvec bufferlist[3]; - u64 aligned_data = 0; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - - packetlen = desc_size + bufferlen; - packetlen_aligned = ALIGN(packetlen, sizeof(u64)); -@@ -856,7 +860,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - -- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); -+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, -+ &signal, lock); - - if (ret == 0 && signal) - vmbus_setevent(channel); -@@ -881,6 +886,7 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, - struct kvec bufferlist[3]; - u64 aligned_data = 0; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset, - multi_pagebuffer->len); - -@@ -919,7 +925,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - -- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); -+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, -+ &signal, lock); - - if (ret == 0 && signal) - vmbus_setevent(channel); -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index cf311be..b40f429 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -259,6 +259,7 @@ static struct vmbus_channel *alloc_channel(void) - return NULL; - - channel->id = atomic_inc_return(&chan_num); -+ channel->acquire_ring_lock = true; - spin_lock_init(&channel->inbound_lock); - spin_lock_init(&channel->lock); - -diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h -index 50b1de7..89bb559 100644 ---- a/drivers/hv/hyperv_vmbus.h -+++ b/drivers/hv/hyperv_vmbus.h -@@ -617,7 +617,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); - - int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, - struct kvec *kv_list, -- u32 kv_count, bool *signal); -+ u32 kv_count, bool *signal, bool lock); - - int hv_ringbuffer_peek(struct hv_ring_buffer_info *ring_info, void *buffer, - u32 buflen); -diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c -index 70a1a9a..89a428f 100644 ---- a/drivers/hv/ring_buffer.c -+++ b/drivers/hv/ring_buffer.c -@@ -388,7 +388,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) - * - */ - int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, -- struct kvec *kv_list, u32 kv_count, bool *signal) -+ struct kvec *kv_list, u32 kv_count, bool *signal, bool lock) - { - int i = 0; - u32 bytes_avail_towrite; -@@ -398,14 +398,15 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - u32 next_write_location; - u32 old_write; - u64 prev_indices = 0; -- unsigned long flags; -+ unsigned long flags = 0; - - for (i = 0; i < kv_count; i++) - totalbytes_towrite += kv_list[i].iov_len; - - totalbytes_towrite += sizeof(u64); - -- spin_lock_irqsave(&outring_info->ring_lock, flags); -+ if (lock) -+ spin_lock_irqsave(&outring_info->ring_lock, flags); - - hv_get_ringbuffer_availbytes(outring_info, - &bytes_avail_toread, -@@ -416,7 +417,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - /* Otherwise, the next time around, we think the ring buffer */ - /* is empty since the read index == write index */ - if (bytes_avail_towrite <= totalbytes_towrite) { -- spin_unlock_irqrestore(&outring_info->ring_lock, flags); -+ if (lock) -+ spin_unlock_irqrestore(&outring_info->ring_lock, flags); - return -EAGAIN; - } - -@@ -447,7 +449,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - hv_set_next_write_location(outring_info, next_write_location); - - -- spin_unlock_irqrestore(&outring_info->ring_lock, flags); -+ if (lock) -+ spin_unlock_irqrestore(&outring_info->ring_lock, flags); - - *signal = hv_need_to_signal(old_write, outring_info); - return 0; -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 993318a..6c9695e 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -813,8 +813,24 @@ struct vmbus_channel { - * signaling control. - */ - enum hv_signal_policy signal_policy; -+ /* -+ * On the channel send side, many of the VMBUS -+ * device drivers explicity serialize access to the -+ * outgoing ring buffer. Give more control to the -+ * VMBUS device drivers in terms how to serialize -+ * accesss to the outgoing ring buffer. -+ * The default behavior will be to aquire the -+ * ring lock to preserve the current behavior. -+ */ -+ bool acquire_ring_lock; -+ - }; - -+static inline void set_channel_lock_state(struct vmbus_channel *c, bool state) -+{ -+ c->acquire_ring_lock = state; -+} -+ - static inline bool is_hvsock_channel(const struct vmbus_channel *c) - { - return !!(c->offermsg.offer.chn_flags & --- -2.10.0 - diff --git a/alpine/kernel/patches/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch b/alpine/kernel/patches/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch deleted file mode 100644 index 06b9acdea..000000000 --- a/alpine/kernel/patches/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch +++ /dev/null @@ -1,100 +0,0 @@ -From af2dd29e3cf40c789045199893c232d57f0b7057 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Fri, 26 Feb 2016 15:13:16 -0800 -Subject: [PATCH 34/42] Drivers: hv: vmbus: avoid wait_for_completion() on - crash -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -wait_for_completion() may sleep, it enables interrupts and this -is something we really want to avoid on crashes because interrupt -handlers can cause other crashes. Switch to the recently introduced -vmbus_wait_for_unload() doing busy wait instead. - -Reported-by: Radim Krcmar -Signed-off-by: Vitaly Kuznetsov -Reviewed-by: Radim Kr.má -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 75ff3a8a9168df750b5bd0589e897a6c0517a9f1) ---- - drivers/hv/channel_mgmt.c | 4 ++-- - drivers/hv/connection.c | 2 +- - drivers/hv/hyperv_vmbus.h | 2 +- - drivers/hv/vmbus_drv.c | 4 ++-- - 4 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index b40f429..f70e352 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -641,7 +641,7 @@ static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) - complete(&vmbus_connection.unload_event); - } - --void vmbus_initiate_unload(void) -+void vmbus_initiate_unload(bool crash) - { - struct vmbus_channel_message_header hdr; - -@@ -658,7 +658,7 @@ void vmbus_initiate_unload(void) - * vmbus_initiate_unload() is also called on crash and the crash can be - * happening in an interrupt context, where scheduling is impossible. - */ -- if (!in_interrupt()) -+ if (!crash) - wait_for_completion(&vmbus_connection.unload_event); - else - vmbus_wait_for_unload(); -diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c -index 09c08b5..78b8be8 100644 ---- a/drivers/hv/connection.c -+++ b/drivers/hv/connection.c -@@ -233,7 +233,7 @@ void vmbus_disconnect(void) - /* - * First send the unload request to the host. - */ -- vmbus_initiate_unload(); -+ vmbus_initiate_unload(false); - - if (vmbus_connection.work_queue) { - drain_workqueue(vmbus_connection.work_queue); -diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h -index 89bb559..f424c2d 100644 ---- a/drivers/hv/hyperv_vmbus.h -+++ b/drivers/hv/hyperv_vmbus.h -@@ -756,7 +756,7 @@ void hv_vss_onchannelcallback(void *); - int hv_fcopy_init(struct hv_util_service *); - void hv_fcopy_deinit(void); - void hv_fcopy_onchannelcallback(void *); --void vmbus_initiate_unload(void); -+void vmbus_initiate_unload(bool crash); - - static inline void hv_poll_channel(struct vmbus_channel *channel, - void (*cb)(void *)) -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index 03fc5d3..b0cc6fd 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -1276,7 +1276,7 @@ static void hv_kexec_handler(void) - int cpu; - - hv_synic_clockevents_cleanup(); -- vmbus_initiate_unload(); -+ vmbus_initiate_unload(false); - for_each_online_cpu(cpu) - smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); - hv_cleanup(); -@@ -1284,7 +1284,7 @@ static void hv_kexec_handler(void) - - static void hv_crash_handler(struct pt_regs *regs) - { -- vmbus_initiate_unload(); -+ vmbus_initiate_unload(true); - /* - * In crash handler we can't schedule synic cleanup for all CPUs, - * doing the cleanup for current CPU only. This should be sufficient --- -2.10.0 - diff --git a/alpine/kernel/patches/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch b/alpine/kernel/patches/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch deleted file mode 100644 index 58ba9c17c..000000000 --- a/alpine/kernel/patches/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch +++ /dev/null @@ -1,39 +0,0 @@ -From fa3647ae889af3cccaaee37ac0723fc1b74689e3 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Fri, 26 Feb 2016 15:13:18 -0800 -Subject: [PATCH 35/42] Drivers: hv: vmbus: avoid unneeded compiler - optimizations in vmbus_wait_for_unload() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Message header is modified by the hypervisor and we read it in a loop, -we need to prevent compilers from optimizing accesses. There are no such -optimizations at this moment, this is just a future proof. - -Suggested-by: Radim Krcmar -Signed-off-by: Vitaly Kuznetsov -Reviewed-by: Radim Kr.má -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit d452ab7b4c65dfcaee88a0d6866eeeb98a3d1884) ---- - drivers/hv/channel_mgmt.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index f70e352..c892db5 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -605,7 +605,7 @@ static void vmbus_wait_for_unload(void) - bool unloaded = false; - - while (1) { -- if (msg->header.message_type == HVMSG_NONE) { -+ if (READ_ONCE(msg->header.message_type) == HVMSG_NONE) { - mdelay(10); - continue; - } --- -2.10.0 - diff --git a/alpine/kernel/patches/0036-kcm-Kernel-Connection-Multiplexor-module.patch b/alpine/kernel/patches/0036-kcm-Kernel-Connection-Multiplexor-module.patch deleted file mode 100644 index 9353d1e53..000000000 --- a/alpine/kernel/patches/0036-kcm-Kernel-Connection-Multiplexor-module.patch +++ /dev/null @@ -1,2312 +0,0 @@ -From afde92b79d7bbdf25d3f583898cbee4773b07d41 Mon Sep 17 00:00:00 2001 -From: Tom Herbert -Date: Mon, 7 Mar 2016 14:11:06 -0800 -Subject: [PATCH 36/42] kcm: Kernel Connection Multiplexor module - -This module implements the Kernel Connection Multiplexor. - -Kernel Connection Multiplexor (KCM) is a facility that provides a -message based interface over TCP for generic application protocols. -With KCM an application can efficiently send and receive application -protocol messages over TCP using datagram sockets. - -For more information see the included Documentation/networking/kcm.txt - -Signed-off-by: Tom Herbert -Signed-off-by: David S. Miller -(cherry picked from commit ab7ac4eb9832e32a09f4e8042705484d2fb0aad3) ---- - include/linux/socket.h | 6 +- - include/net/kcm.h | 125 +++ - include/uapi/linux/kcm.h | 39 + - net/Kconfig | 1 + - net/Makefile | 1 + - net/kcm/Kconfig | 9 + - net/kcm/Makefile | 3 + - net/kcm/kcmsock.c | 2015 ++++++++++++++++++++++++++++++++++++++++++++++ - 8 files changed, 2198 insertions(+), 1 deletion(-) - create mode 100644 include/net/kcm.h - create mode 100644 include/uapi/linux/kcm.h - create mode 100644 net/kcm/Kconfig - create mode 100644 net/kcm/Makefile - create mode 100644 net/kcm/kcmsock.c - -diff --git a/include/linux/socket.h b/include/linux/socket.h -index 5bf59c8..4e1ea53 100644 ---- a/include/linux/socket.h -+++ b/include/linux/socket.h -@@ -200,7 +200,9 @@ struct ucred { - #define AF_ALG 38 /* Algorithm sockets */ - #define AF_NFC 39 /* NFC sockets */ - #define AF_VSOCK 40 /* vSockets */ --#define AF_MAX 41 /* For now.. */ -+#define AF_KCM 41 /* Kernel Connection Multiplexor*/ -+ -+#define AF_MAX 42 /* For now.. */ - - /* Protocol families, same as address families. */ - #define PF_UNSPEC AF_UNSPEC -@@ -246,6 +248,7 @@ struct ucred { - #define PF_ALG AF_ALG - #define PF_NFC AF_NFC - #define PF_VSOCK AF_VSOCK -+#define PF_KCM AF_KCM - #define PF_MAX AF_MAX - - /* Maximum queue length specifiable by listen. */ -@@ -322,6 +325,7 @@ struct ucred { - #define SOL_CAIF 278 - #define SOL_ALG 279 - #define SOL_NFC 280 -+#define SOL_KCM 281 - - /* IPX options */ - #define IPX_TYPE 1 -diff --git a/include/net/kcm.h b/include/net/kcm.h -new file mode 100644 -index 0000000..1bcae39 ---- /dev/null -+++ b/include/net/kcm.h -@@ -0,0 +1,125 @@ -+/* -+ * Kernel Connection Multiplexor -+ * -+ * Copyright (c) 2016 Tom Herbert -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation. -+ */ -+ -+#ifndef __NET_KCM_H_ -+#define __NET_KCM_H_ -+ -+#include -+#include -+#include -+ -+extern unsigned int kcm_net_id; -+ -+struct kcm_tx_msg { -+ unsigned int sent; -+ unsigned int fragidx; -+ unsigned int frag_offset; -+ unsigned int msg_flags; -+ struct sk_buff *frag_skb; -+ struct sk_buff *last_skb; -+}; -+ -+struct kcm_rx_msg { -+ int full_len; -+ int accum_len; -+ int offset; -+}; -+ -+/* Socket structure for KCM client sockets */ -+struct kcm_sock { -+ struct sock sk; -+ struct kcm_mux *mux; -+ struct list_head kcm_sock_list; -+ int index; -+ u32 done : 1; -+ struct work_struct done_work; -+ -+ /* Transmit */ -+ struct kcm_psock *tx_psock; -+ struct work_struct tx_work; -+ struct list_head wait_psock_list; -+ struct sk_buff *seq_skb; -+ -+ /* Don't use bit fields here, these are set under different locks */ -+ bool tx_wait; -+ bool tx_wait_more; -+ -+ /* Receive */ -+ struct kcm_psock *rx_psock; -+ struct list_head wait_rx_list; /* KCMs waiting for receiving */ -+ bool rx_wait; -+ u32 rx_disabled : 1; -+}; -+ -+struct bpf_prog; -+ -+/* Structure for an attached lower socket */ -+struct kcm_psock { -+ struct sock *sk; -+ struct kcm_mux *mux; -+ int index; -+ -+ u32 tx_stopped : 1; -+ u32 rx_stopped : 1; -+ u32 done : 1; -+ u32 unattaching : 1; -+ -+ void (*save_state_change)(struct sock *sk); -+ void (*save_data_ready)(struct sock *sk); -+ void (*save_write_space)(struct sock *sk); -+ -+ struct list_head psock_list; -+ -+ /* Receive */ -+ struct sk_buff *rx_skb_head; -+ struct sk_buff **rx_skb_nextp; -+ struct sk_buff *ready_rx_msg; -+ struct list_head psock_ready_list; -+ struct work_struct rx_work; -+ struct delayed_work rx_delayed_work; -+ struct bpf_prog *bpf_prog; -+ struct kcm_sock *rx_kcm; -+ -+ /* Transmit */ -+ struct kcm_sock *tx_kcm; -+ struct list_head psock_avail_list; -+}; -+ -+/* Per net MUX list */ -+struct kcm_net { -+ struct mutex mutex; -+ struct list_head mux_list; -+ int count; -+}; -+ -+/* Structure for a MUX */ -+struct kcm_mux { -+ struct list_head kcm_mux_list; -+ struct rcu_head rcu; -+ struct kcm_net *knet; -+ -+ struct list_head kcm_socks; /* All KCM sockets on MUX */ -+ int kcm_socks_cnt; /* Total KCM socket count for MUX */ -+ struct list_head psocks; /* List of all psocks on MUX */ -+ int psocks_cnt; /* Total attached sockets */ -+ -+ /* Receive */ -+ spinlock_t rx_lock ____cacheline_aligned_in_smp; -+ struct list_head kcm_rx_waiters; /* KCMs waiting for receiving */ -+ struct list_head psocks_ready; /* List of psocks with a msg ready */ -+ struct sk_buff_head rx_hold_queue; -+ -+ /* Transmit */ -+ spinlock_t lock ____cacheline_aligned_in_smp; /* TX and mux locking */ -+ struct list_head psocks_avail; /* List of available psocks */ -+ struct list_head kcm_tx_waiters; /* KCMs waiting for a TX psock */ -+}; -+ -+#endif /* __NET_KCM_H_ */ -diff --git a/include/uapi/linux/kcm.h b/include/uapi/linux/kcm.h -new file mode 100644 -index 0000000..d72350f ---- /dev/null -+++ b/include/uapi/linux/kcm.h -@@ -0,0 +1,39 @@ -+/* -+ * Kernel Connection Multiplexor -+ * -+ * Copyright (c) 2016 Tom Herbert -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation. -+ * -+ * User API to clone KCM sockets and attach transport socket to a KCM -+ * multiplexor. -+ */ -+ -+#ifndef KCM_KERNEL_H -+#define KCM_KERNEL_H -+ -+struct kcm_attach { -+ int fd; -+ int bpf_fd; -+}; -+ -+struct kcm_unattach { -+ int fd; -+}; -+ -+struct kcm_clone { -+ int fd; -+}; -+ -+#define SIOCKCMATTACH (SIOCPROTOPRIVATE + 0) -+#define SIOCKCMUNATTACH (SIOCPROTOPRIVATE + 1) -+#define SIOCKCMCLONE (SIOCPROTOPRIVATE + 2) -+ -+#define KCMPROTO_CONNECTED 0 -+ -+/* Socket options */ -+#define KCM_RECV_DISABLE 1 -+ -+#endif -diff --git a/net/Kconfig b/net/Kconfig -index 127da94..b8439e6 100644 ---- a/net/Kconfig -+++ b/net/Kconfig -@@ -351,6 +351,7 @@ source "net/can/Kconfig" - source "net/irda/Kconfig" - source "net/bluetooth/Kconfig" - source "net/rxrpc/Kconfig" -+source "net/kcm/Kconfig" - - config FIB_RULES - bool -diff --git a/net/Makefile b/net/Makefile -index a5d0409..81d1411 100644 ---- a/net/Makefile -+++ b/net/Makefile -@@ -34,6 +34,7 @@ obj-$(CONFIG_IRDA) += irda/ - obj-$(CONFIG_BT) += bluetooth/ - obj-$(CONFIG_SUNRPC) += sunrpc/ - obj-$(CONFIG_AF_RXRPC) += rxrpc/ -+obj-$(CONFIG_AF_KCM) += kcm/ - obj-$(CONFIG_ATM) += atm/ - obj-$(CONFIG_L2TP) += l2tp/ - obj-$(CONFIG_DECNET) += decnet/ -diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig -new file mode 100644 -index 0000000..4f28332 ---- /dev/null -+++ b/net/kcm/Kconfig -@@ -0,0 +1,9 @@ -+ -+config AF_KCM -+ tristate "KCM sockets" -+ depends on INET -+ select BPF_SYSCALL -+ ---help--- -+ KCM (Kernel Connection Multiplexor) sockets provide a method -+ for multiplexing messages of a message based application -+ protocol over kernel connectons (e.g. TCP connections). -diff --git a/net/kcm/Makefile b/net/kcm/Makefile -new file mode 100644 -index 0000000..cb525f7 ---- /dev/null -+++ b/net/kcm/Makefile -@@ -0,0 +1,3 @@ -+obj-$(CONFIG_AF_KCM) += kcm.o -+ -+kcm-y := kcmsock.o -diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c -new file mode 100644 -index 0000000..649d246 ---- /dev/null -+++ b/net/kcm/kcmsock.c -@@ -0,0 +1,2015 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+unsigned int kcm_net_id; -+ -+static struct kmem_cache *kcm_psockp __read_mostly; -+static struct kmem_cache *kcm_muxp __read_mostly; -+static struct workqueue_struct *kcm_wq; -+ -+static inline struct kcm_sock *kcm_sk(const struct sock *sk) -+{ -+ return (struct kcm_sock *)sk; -+} -+ -+static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb) -+{ -+ return (struct kcm_tx_msg *)skb->cb; -+} -+ -+static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb) -+{ -+ return (struct kcm_rx_msg *)((void *)skb->cb + -+ offsetof(struct qdisc_skb_cb, data)); -+} -+ -+static void report_csk_error(struct sock *csk, int err) -+{ -+ csk->sk_err = EPIPE; -+ csk->sk_error_report(csk); -+} -+ -+/* Callback lock held */ -+static void kcm_abort_rx_psock(struct kcm_psock *psock, int err, -+ struct sk_buff *skb) -+{ -+ struct sock *csk = psock->sk; -+ -+ /* Unrecoverable error in receive */ -+ -+ if (psock->rx_stopped) -+ return; -+ -+ psock->rx_stopped = 1; -+ -+ /* Report an error on the lower socket */ -+ report_csk_error(csk, err); -+} -+ -+static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, -+ bool wakeup_kcm) -+{ -+ struct sock *csk = psock->sk; -+ struct kcm_mux *mux = psock->mux; -+ -+ /* Unrecoverable error in transmit */ -+ -+ spin_lock_bh(&mux->lock); -+ -+ if (psock->tx_stopped) { -+ spin_unlock_bh(&mux->lock); -+ return; -+ } -+ -+ psock->tx_stopped = 1; -+ -+ if (!psock->tx_kcm) { -+ /* Take off psocks_avail list */ -+ list_del(&psock->psock_avail_list); -+ } else if (wakeup_kcm) { -+ /* In this case psock is being aborted while outside of -+ * write_msgs and psock is reserved. Schedule tx_work -+ * to handle the failure there. Need to commit tx_stopped -+ * before queuing work. -+ */ -+ smp_mb(); -+ -+ queue_work(kcm_wq, &psock->tx_kcm->tx_work); -+ } -+ -+ spin_unlock_bh(&mux->lock); -+ -+ /* Report error on lower socket */ -+ report_csk_error(csk, err); -+} -+ -+static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -+ -+/* KCM is ready to receive messages on its queue-- either the KCM is new or -+ * has become unblocked after being blocked on full socket buffer. Queue any -+ * pending ready messages on a psock. RX mux lock held. -+ */ -+static void kcm_rcv_ready(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ struct kcm_psock *psock; -+ struct sk_buff *skb; -+ -+ if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled)) -+ return; -+ -+ while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) { -+ if (kcm_queue_rcv_skb(&kcm->sk, skb)) { -+ /* Assuming buffer limit has been reached */ -+ skb_queue_head(&mux->rx_hold_queue, skb); -+ WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); -+ return; -+ } -+ } -+ -+ while (!list_empty(&mux->psocks_ready)) { -+ psock = list_first_entry(&mux->psocks_ready, struct kcm_psock, -+ psock_ready_list); -+ -+ if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) { -+ /* Assuming buffer limit has been reached */ -+ WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); -+ return; -+ } -+ -+ /* Consumed the ready message on the psock. Schedule rx_work to -+ * get more messages. -+ */ -+ list_del(&psock->psock_ready_list); -+ psock->ready_rx_msg = NULL; -+ -+ /* Commit clearing of ready_rx_msg for queuing work */ -+ smp_mb(); -+ -+ queue_work(kcm_wq, &psock->rx_work); -+ } -+ -+ /* Buffer limit is okay now, add to ready list */ -+ list_add_tail(&kcm->wait_rx_list, -+ &kcm->mux->kcm_rx_waiters); -+ kcm->rx_wait = true; -+} -+ -+static void kcm_rfree(struct sk_buff *skb) -+{ -+ struct sock *sk = skb->sk; -+ struct kcm_sock *kcm = kcm_sk(sk); -+ struct kcm_mux *mux = kcm->mux; -+ unsigned int len = skb->truesize; -+ -+ sk_mem_uncharge(sk, len); -+ atomic_sub(len, &sk->sk_rmem_alloc); -+ -+ /* For reading rx_wait and rx_psock without holding lock */ -+ smp_mb__after_atomic(); -+ -+ if (!kcm->rx_wait && !kcm->rx_psock && -+ sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { -+ spin_lock_bh(&mux->rx_lock); -+ kcm_rcv_ready(kcm); -+ spin_unlock_bh(&mux->rx_lock); -+ } -+} -+ -+static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) -+{ -+ struct sk_buff_head *list = &sk->sk_receive_queue; -+ -+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) -+ return -ENOMEM; -+ -+ if (!sk_rmem_schedule(sk, skb, skb->truesize)) -+ return -ENOBUFS; -+ -+ skb->dev = NULL; -+ -+ skb_orphan(skb); -+ skb->sk = sk; -+ skb->destructor = kcm_rfree; -+ atomic_add(skb->truesize, &sk->sk_rmem_alloc); -+ sk_mem_charge(sk, skb->truesize); -+ -+ skb_queue_tail(list, skb); -+ -+ if (!sock_flag(sk, SOCK_DEAD)) -+ sk->sk_data_ready(sk); -+ -+ return 0; -+} -+ -+/* Requeue received messages for a kcm socket to other kcm sockets. This is -+ * called with a kcm socket is receive disabled. -+ * RX mux lock held. -+ */ -+static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head) -+{ -+ struct sk_buff *skb; -+ struct kcm_sock *kcm; -+ -+ while ((skb = __skb_dequeue(head))) { -+ /* Reset destructor to avoid calling kcm_rcv_ready */ -+ skb->destructor = sock_rfree; -+ skb_orphan(skb); -+try_again: -+ if (list_empty(&mux->kcm_rx_waiters)) { -+ skb_queue_tail(&mux->rx_hold_queue, skb); -+ continue; -+ } -+ -+ kcm = list_first_entry(&mux->kcm_rx_waiters, -+ struct kcm_sock, wait_rx_list); -+ -+ if (kcm_queue_rcv_skb(&kcm->sk, skb)) { -+ /* Should mean socket buffer full */ -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ -+ /* Commit rx_wait to read in kcm_free */ -+ smp_wmb(); -+ -+ goto try_again; -+ } -+ } -+} -+ -+/* Lower sock lock held */ -+static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, -+ struct sk_buff *head) -+{ -+ struct kcm_mux *mux = psock->mux; -+ struct kcm_sock *kcm; -+ -+ WARN_ON(psock->ready_rx_msg); -+ -+ if (psock->rx_kcm) -+ return psock->rx_kcm; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ if (psock->rx_kcm) { -+ spin_unlock_bh(&mux->rx_lock); -+ return psock->rx_kcm; -+ } -+ -+ if (list_empty(&mux->kcm_rx_waiters)) { -+ psock->ready_rx_msg = head; -+ list_add_tail(&psock->psock_ready_list, -+ &mux->psocks_ready); -+ spin_unlock_bh(&mux->rx_lock); -+ return NULL; -+ } -+ -+ kcm = list_first_entry(&mux->kcm_rx_waiters, -+ struct kcm_sock, wait_rx_list); -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ -+ psock->rx_kcm = kcm; -+ kcm->rx_psock = psock; -+ -+ spin_unlock_bh(&mux->rx_lock); -+ -+ return kcm; -+} -+ -+static void kcm_done(struct kcm_sock *kcm); -+ -+static void kcm_done_work(struct work_struct *w) -+{ -+ kcm_done(container_of(w, struct kcm_sock, done_work)); -+} -+ -+/* Lower sock held */ -+static void unreserve_rx_kcm(struct kcm_psock *psock, -+ bool rcv_ready) -+{ -+ struct kcm_sock *kcm = psock->rx_kcm; -+ struct kcm_mux *mux = psock->mux; -+ -+ if (!kcm) -+ return; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ psock->rx_kcm = NULL; -+ kcm->rx_psock = NULL; -+ -+ /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with -+ * kcm_rfree -+ */ -+ smp_mb(); -+ -+ if (unlikely(kcm->done)) { -+ spin_unlock_bh(&mux->rx_lock); -+ -+ /* Need to run kcm_done in a task since we need to qcquire -+ * callback locks which may already be held here. -+ */ -+ INIT_WORK(&kcm->done_work, kcm_done_work); -+ schedule_work(&kcm->done_work); -+ return; -+ } -+ -+ if (unlikely(kcm->rx_disabled)) { -+ requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); -+ } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) { -+ /* Check for degenerative race with rx_wait that all -+ * data was dequeued (accounted for in kcm_rfree). -+ */ -+ kcm_rcv_ready(kcm); -+ } -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+/* Macro to invoke filter function. */ -+#define KCM_RUN_FILTER(prog, ctx) \ -+ (*prog->bpf_func)(ctx, prog->insnsi) -+ -+/* Lower socket lock held */ -+static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, -+ unsigned int orig_offset, size_t orig_len) -+{ -+ struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data; -+ struct kcm_rx_msg *rxm; -+ struct kcm_sock *kcm; -+ struct sk_buff *head, *skb; -+ size_t eaten = 0, cand_len; -+ ssize_t extra; -+ int err; -+ bool cloned_orig = false; -+ -+ if (psock->ready_rx_msg) -+ return 0; -+ -+ head = psock->rx_skb_head; -+ if (head) { -+ /* Message already in progress */ -+ -+ if (unlikely(orig_offset)) { -+ /* Getting data with a non-zero offset when a message is -+ * in progress is not expected. If it does happen, we -+ * need to clone and pull since we can't deal with -+ * offsets in the skbs for a message expect in the head. -+ */ -+ orig_skb = skb_clone(orig_skb, GFP_ATOMIC); -+ if (!orig_skb) { -+ desc->error = -ENOMEM; -+ return 0; -+ } -+ if (!pskb_pull(orig_skb, orig_offset)) { -+ kfree_skb(orig_skb); -+ desc->error = -ENOMEM; -+ return 0; -+ } -+ cloned_orig = true; -+ orig_offset = 0; -+ } -+ -+ if (!psock->rx_skb_nextp) { -+ /* We are going to append to the frags_list of head. -+ * Need to unshare the frag_list. -+ */ -+ err = skb_unclone(head, GFP_ATOMIC); -+ if (err) { -+ desc->error = err; -+ return 0; -+ } -+ -+ if (unlikely(skb_shinfo(head)->frag_list)) { -+ /* We can't append to an sk_buff that already -+ * has a frag_list. We create a new head, point -+ * the frag_list of that to the old head, and -+ * then are able to use the old head->next for -+ * appending to the message. -+ */ -+ if (WARN_ON(head->next)) { -+ desc->error = -EINVAL; -+ return 0; -+ } -+ -+ skb = alloc_skb(0, GFP_ATOMIC); -+ if (!skb) { -+ desc->error = -ENOMEM; -+ return 0; -+ } -+ skb->len = head->len; -+ skb->data_len = head->len; -+ skb->truesize = head->truesize; -+ *kcm_rx_msg(skb) = *kcm_rx_msg(head); -+ psock->rx_skb_nextp = &head->next; -+ skb_shinfo(skb)->frag_list = head; -+ psock->rx_skb_head = skb; -+ head = skb; -+ } else { -+ psock->rx_skb_nextp = -+ &skb_shinfo(head)->frag_list; -+ } -+ } -+ } -+ -+ while (eaten < orig_len) { -+ /* Always clone since we will consume something */ -+ skb = skb_clone(orig_skb, GFP_ATOMIC); -+ if (!skb) { -+ desc->error = -ENOMEM; -+ break; -+ } -+ -+ cand_len = orig_len - eaten; -+ -+ head = psock->rx_skb_head; -+ if (!head) { -+ head = skb; -+ psock->rx_skb_head = head; -+ /* Will set rx_skb_nextp on next packet if needed */ -+ psock->rx_skb_nextp = NULL; -+ rxm = kcm_rx_msg(head); -+ memset(rxm, 0, sizeof(*rxm)); -+ rxm->offset = orig_offset + eaten; -+ } else { -+ /* Unclone since we may be appending to an skb that we -+ * already share a frag_list with. -+ */ -+ err = skb_unclone(skb, GFP_ATOMIC); -+ if (err) { -+ desc->error = err; -+ break; -+ } -+ -+ rxm = kcm_rx_msg(head); -+ *psock->rx_skb_nextp = skb; -+ psock->rx_skb_nextp = &skb->next; -+ head->data_len += skb->len; -+ head->len += skb->len; -+ head->truesize += skb->truesize; -+ } -+ -+ if (!rxm->full_len) { -+ ssize_t len; -+ -+ len = KCM_RUN_FILTER(psock->bpf_prog, head); -+ -+ if (!len) { -+ /* Need more header to determine length */ -+ rxm->accum_len += cand_len; -+ eaten += cand_len; -+ WARN_ON(eaten != orig_len); -+ break; -+ } else if (len <= (ssize_t)head->len - -+ skb->len - rxm->offset) { -+ /* Length must be into new skb (and also -+ * greater than zero) -+ */ -+ desc->error = -EPROTO; -+ psock->rx_skb_head = NULL; -+ kcm_abort_rx_psock(psock, EPROTO, head); -+ break; -+ } -+ -+ rxm->full_len = len; -+ } -+ -+ extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len; -+ -+ if (extra < 0) { -+ /* Message not complete yet. */ -+ rxm->accum_len += cand_len; -+ eaten += cand_len; -+ WARN_ON(eaten != orig_len); -+ break; -+ } -+ -+ /* Positive extra indicates ore bytes than needed for the -+ * message -+ */ -+ -+ WARN_ON(extra > cand_len); -+ -+ eaten += (cand_len - extra); -+ -+ /* Hurray, we have a new message! */ -+ psock->rx_skb_head = NULL; -+ -+try_queue: -+ kcm = reserve_rx_kcm(psock, head); -+ if (!kcm) { -+ /* Unable to reserve a KCM, message is held in psock. */ -+ break; -+ } -+ -+ if (kcm_queue_rcv_skb(&kcm->sk, head)) { -+ /* Should mean socket buffer full */ -+ unreserve_rx_kcm(psock, false); -+ goto try_queue; -+ } -+ } -+ -+ if (cloned_orig) -+ kfree_skb(orig_skb); -+ -+ return eaten; -+} -+ -+/* Called with lock held on lower socket */ -+static int psock_tcp_read_sock(struct kcm_psock *psock) -+{ -+ read_descriptor_t desc; -+ -+ desc.arg.data = psock; -+ desc.error = 0; -+ desc.count = 1; /* give more than one skb per call */ -+ -+ /* sk should be locked here, so okay to do tcp_read_sock */ -+ tcp_read_sock(psock->sk, &desc, kcm_tcp_recv); -+ -+ unreserve_rx_kcm(psock, true); -+ -+ return desc.error; -+} -+ -+/* Lower sock lock held */ -+static void psock_tcp_data_ready(struct sock *sk) -+{ -+ struct kcm_psock *psock; -+ -+ read_lock_bh(&sk->sk_callback_lock); -+ -+ psock = (struct kcm_psock *)sk->sk_user_data; -+ if (unlikely(!psock || psock->rx_stopped)) -+ goto out; -+ -+ if (psock->ready_rx_msg) -+ goto out; -+ -+ if (psock_tcp_read_sock(psock) == -ENOMEM) -+ queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); -+ -+out: -+ read_unlock_bh(&sk->sk_callback_lock); -+} -+ -+static void do_psock_rx_work(struct kcm_psock *psock) -+{ -+ read_descriptor_t rd_desc; -+ struct sock *csk = psock->sk; -+ -+ /* We need the read lock to synchronize with psock_tcp_data_ready. We -+ * need the socket lock for calling tcp_read_sock. -+ */ -+ lock_sock(csk); -+ read_lock_bh(&csk->sk_callback_lock); -+ -+ if (unlikely(csk->sk_user_data != psock)) -+ goto out; -+ -+ if (unlikely(psock->rx_stopped)) -+ goto out; -+ -+ if (psock->ready_rx_msg) -+ goto out; -+ -+ rd_desc.arg.data = psock; -+ -+ if (psock_tcp_read_sock(psock) == -ENOMEM) -+ queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); -+ -+out: -+ read_unlock_bh(&csk->sk_callback_lock); -+ release_sock(csk); -+} -+ -+static void psock_rx_work(struct work_struct *w) -+{ -+ do_psock_rx_work(container_of(w, struct kcm_psock, rx_work)); -+} -+ -+static void psock_rx_delayed_work(struct work_struct *w) -+{ -+ do_psock_rx_work(container_of(w, struct kcm_psock, -+ rx_delayed_work.work)); -+} -+ -+static void psock_tcp_state_change(struct sock *sk) -+{ -+ /* TCP only does a POLLIN for a half close. Do a POLLHUP here -+ * since application will normally not poll with POLLIN -+ * on the TCP sockets. -+ */ -+ -+ report_csk_error(sk, EPIPE); -+} -+ -+static void psock_tcp_write_space(struct sock *sk) -+{ -+ struct kcm_psock *psock; -+ struct kcm_mux *mux; -+ struct kcm_sock *kcm; -+ -+ read_lock_bh(&sk->sk_callback_lock); -+ -+ psock = (struct kcm_psock *)sk->sk_user_data; -+ if (unlikely(!psock)) -+ goto out; -+ -+ mux = psock->mux; -+ -+ spin_lock_bh(&mux->lock); -+ -+ /* Check if the socket is reserved so someone is waiting for sending. */ -+ kcm = psock->tx_kcm; -+ if (kcm) -+ queue_work(kcm_wq, &kcm->tx_work); -+ -+ spin_unlock_bh(&mux->lock); -+out: -+ read_unlock_bh(&sk->sk_callback_lock); -+} -+ -+static void unreserve_psock(struct kcm_sock *kcm); -+ -+/* kcm sock is locked. */ -+static struct kcm_psock *reserve_psock(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ struct kcm_psock *psock; -+ -+ psock = kcm->tx_psock; -+ -+ smp_rmb(); /* Must read tx_psock before tx_wait */ -+ -+ if (psock) { -+ WARN_ON(kcm->tx_wait); -+ if (unlikely(psock->tx_stopped)) -+ unreserve_psock(kcm); -+ else -+ return kcm->tx_psock; -+ } -+ -+ spin_lock_bh(&mux->lock); -+ -+ /* Check again under lock to see if psock was reserved for this -+ * psock via psock_unreserve. -+ */ -+ psock = kcm->tx_psock; -+ if (unlikely(psock)) { -+ WARN_ON(kcm->tx_wait); -+ spin_unlock_bh(&mux->lock); -+ return kcm->tx_psock; -+ } -+ -+ if (!list_empty(&mux->psocks_avail)) { -+ psock = list_first_entry(&mux->psocks_avail, -+ struct kcm_psock, -+ psock_avail_list); -+ list_del(&psock->psock_avail_list); -+ if (kcm->tx_wait) { -+ list_del(&kcm->wait_psock_list); -+ kcm->tx_wait = false; -+ } -+ kcm->tx_psock = psock; -+ psock->tx_kcm = kcm; -+ } else if (!kcm->tx_wait) { -+ list_add_tail(&kcm->wait_psock_list, -+ &mux->kcm_tx_waiters); -+ kcm->tx_wait = true; -+ } -+ -+ spin_unlock_bh(&mux->lock); -+ -+ return psock; -+} -+ -+/* mux lock held */ -+static void psock_now_avail(struct kcm_psock *psock) -+{ -+ struct kcm_mux *mux = psock->mux; -+ struct kcm_sock *kcm; -+ -+ if (list_empty(&mux->kcm_tx_waiters)) { -+ list_add_tail(&psock->psock_avail_list, -+ &mux->psocks_avail); -+ } else { -+ kcm = list_first_entry(&mux->kcm_tx_waiters, -+ struct kcm_sock, -+ wait_psock_list); -+ list_del(&kcm->wait_psock_list); -+ kcm->tx_wait = false; -+ psock->tx_kcm = kcm; -+ -+ /* Commit before changing tx_psock since that is read in -+ * reserve_psock before queuing work. -+ */ -+ smp_mb(); -+ -+ kcm->tx_psock = psock; -+ queue_work(kcm_wq, &kcm->tx_work); -+ } -+} -+ -+/* kcm sock is locked. */ -+static void unreserve_psock(struct kcm_sock *kcm) -+{ -+ struct kcm_psock *psock; -+ struct kcm_mux *mux = kcm->mux; -+ -+ spin_lock_bh(&mux->lock); -+ -+ psock = kcm->tx_psock; -+ -+ if (WARN_ON(!psock)) { -+ spin_unlock_bh(&mux->lock); -+ return; -+ } -+ -+ smp_rmb(); /* Read tx_psock before tx_wait */ -+ -+ WARN_ON(kcm->tx_wait); -+ -+ kcm->tx_psock = NULL; -+ psock->tx_kcm = NULL; -+ -+ if (unlikely(psock->tx_stopped)) { -+ if (psock->done) { -+ /* Deferred free */ -+ list_del(&psock->psock_list); -+ mux->psocks_cnt--; -+ sock_put(psock->sk); -+ fput(psock->sk->sk_socket->file); -+ kmem_cache_free(kcm_psockp, psock); -+ } -+ -+ /* Don't put back on available list */ -+ -+ spin_unlock_bh(&mux->lock); -+ -+ return; -+ } -+ -+ psock_now_avail(psock); -+ -+ spin_unlock_bh(&mux->lock); -+} -+ -+/* Write any messages ready on the kcm socket. Called with kcm sock lock -+ * held. Return bytes actually sent or error. -+ */ -+static int kcm_write_msgs(struct kcm_sock *kcm) -+{ -+ struct sock *sk = &kcm->sk; -+ struct kcm_psock *psock; -+ struct sk_buff *skb, *head; -+ struct kcm_tx_msg *txm; -+ unsigned short fragidx, frag_offset; -+ unsigned int sent, total_sent = 0; -+ int ret = 0; -+ -+ kcm->tx_wait_more = false; -+ psock = kcm->tx_psock; -+ if (unlikely(psock && psock->tx_stopped)) { -+ /* A reserved psock was aborted asynchronously. Unreserve -+ * it and we'll retry the message. -+ */ -+ unreserve_psock(kcm); -+ if (skb_queue_empty(&sk->sk_write_queue)) -+ return 0; -+ -+ kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0; -+ -+ } else if (skb_queue_empty(&sk->sk_write_queue)) { -+ return 0; -+ } -+ -+ head = skb_peek(&sk->sk_write_queue); -+ txm = kcm_tx_msg(head); -+ -+ if (txm->sent) { -+ /* Send of first skbuff in queue already in progress */ -+ if (WARN_ON(!psock)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ sent = txm->sent; -+ frag_offset = txm->frag_offset; -+ fragidx = txm->fragidx; -+ skb = txm->frag_skb; -+ -+ goto do_frag; -+ } -+ -+try_again: -+ psock = reserve_psock(kcm); -+ if (!psock) -+ goto out; -+ -+ do { -+ skb = head; -+ txm = kcm_tx_msg(head); -+ sent = 0; -+ -+do_frag_list: -+ if (WARN_ON(!skb_shinfo(skb)->nr_frags)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; -+ fragidx++) { -+ skb_frag_t *frag; -+ -+ frag_offset = 0; -+do_frag: -+ frag = &skb_shinfo(skb)->frags[fragidx]; -+ if (WARN_ON(!frag->size)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ ret = kernel_sendpage(psock->sk->sk_socket, -+ frag->page.p, -+ frag->page_offset + frag_offset, -+ frag->size - frag_offset, -+ MSG_DONTWAIT); -+ if (ret <= 0) { -+ if (ret == -EAGAIN) { -+ /* Save state to try again when there's -+ * write space on the socket -+ */ -+ txm->sent = sent; -+ txm->frag_offset = frag_offset; -+ txm->fragidx = fragidx; -+ txm->frag_skb = skb; -+ -+ ret = 0; -+ goto out; -+ } -+ -+ /* Hard failure in sending message, abort this -+ * psock since it has lost framing -+ * synchonization and retry sending the -+ * message from the beginning. -+ */ -+ kcm_abort_tx_psock(psock, ret ? -ret : EPIPE, -+ true); -+ unreserve_psock(kcm); -+ -+ txm->sent = 0; -+ ret = 0; -+ -+ goto try_again; -+ } -+ -+ sent += ret; -+ frag_offset += ret; -+ if (frag_offset < frag->size) { -+ /* Not finished with this frag */ -+ goto do_frag; -+ } -+ } -+ -+ if (skb == head) { -+ if (skb_has_frag_list(skb)) { -+ skb = skb_shinfo(skb)->frag_list; -+ goto do_frag_list; -+ } -+ } else if (skb->next) { -+ skb = skb->next; -+ goto do_frag_list; -+ } -+ -+ /* Successfully sent the whole packet, account for it. */ -+ skb_dequeue(&sk->sk_write_queue); -+ kfree_skb(head); -+ sk->sk_wmem_queued -= sent; -+ total_sent += sent; -+ } while ((head = skb_peek(&sk->sk_write_queue))); -+out: -+ if (!head) { -+ /* Done with all queued messages. */ -+ WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); -+ unreserve_psock(kcm); -+ } -+ -+ /* Check if write space is available */ -+ sk->sk_write_space(sk); -+ -+ return total_sent ? : ret; -+} -+ -+static void kcm_tx_work(struct work_struct *w) -+{ -+ struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work); -+ struct sock *sk = &kcm->sk; -+ int err; -+ -+ lock_sock(sk); -+ -+ /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx -+ * aborts -+ */ -+ err = kcm_write_msgs(kcm); -+ if (err < 0) { -+ /* Hard failure in write, report error on KCM socket */ -+ pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err); -+ report_csk_error(&kcm->sk, -err); -+ goto out; -+ } -+ -+ /* Primarily for SOCK_SEQPACKET sockets */ -+ if (likely(sk->sk_socket) && -+ test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { -+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -+ sk->sk_write_space(sk); -+ } -+ -+out: -+ release_sock(sk); -+} -+ -+static void kcm_push(struct kcm_sock *kcm) -+{ -+ if (kcm->tx_wait_more) -+ kcm_write_msgs(kcm); -+} -+ -+static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) -+{ -+ struct sock *sk = sock->sk; -+ struct kcm_sock *kcm = kcm_sk(sk); -+ struct sk_buff *skb = NULL, *head = NULL; -+ size_t copy, copied = 0; -+ long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); -+ int eor = (sock->type == SOCK_DGRAM) ? -+ !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR); -+ int err = -EPIPE; -+ -+ lock_sock(sk); -+ -+ /* Per tcp_sendmsg this should be in poll */ -+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); -+ -+ if (sk->sk_err) -+ goto out_error; -+ -+ if (kcm->seq_skb) { -+ /* Previously opened message */ -+ head = kcm->seq_skb; -+ skb = kcm_tx_msg(head)->last_skb; -+ goto start; -+ } -+ -+ /* Call the sk_stream functions to manage the sndbuf mem. */ -+ if (!sk_stream_memory_free(sk)) { -+ kcm_push(kcm); -+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -+ err = sk_stream_wait_memory(sk, &timeo); -+ if (err) -+ goto out_error; -+ } -+ -+ /* New message, alloc head skb */ -+ head = alloc_skb(0, sk->sk_allocation); -+ while (!head) { -+ kcm_push(kcm); -+ err = sk_stream_wait_memory(sk, &timeo); -+ if (err) -+ goto out_error; -+ -+ head = alloc_skb(0, sk->sk_allocation); -+ } -+ -+ skb = head; -+ -+ /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling -+ * csum_and_copy_from_iter from skb_do_copy_data_nocache. -+ */ -+ skb->ip_summed = CHECKSUM_UNNECESSARY; -+ -+start: -+ while (msg_data_left(msg)) { -+ bool merge = true; -+ int i = skb_shinfo(skb)->nr_frags; -+ struct page_frag *pfrag = sk_page_frag(sk); -+ -+ if (!sk_page_frag_refill(sk, pfrag)) -+ goto wait_for_memory; -+ -+ if (!skb_can_coalesce(skb, i, pfrag->page, -+ pfrag->offset)) { -+ if (i == MAX_SKB_FRAGS) { -+ struct sk_buff *tskb; -+ -+ tskb = alloc_skb(0, sk->sk_allocation); -+ if (!tskb) -+ goto wait_for_memory; -+ -+ if (head == skb) -+ skb_shinfo(head)->frag_list = tskb; -+ else -+ skb->next = tskb; -+ -+ skb = tskb; -+ skb->ip_summed = CHECKSUM_UNNECESSARY; -+ continue; -+ } -+ merge = false; -+ } -+ -+ copy = min_t(int, msg_data_left(msg), -+ pfrag->size - pfrag->offset); -+ -+ if (!sk_wmem_schedule(sk, copy)) -+ goto wait_for_memory; -+ -+ err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, -+ pfrag->page, -+ pfrag->offset, -+ copy); -+ if (err) -+ goto out_error; -+ -+ /* Update the skb. */ -+ if (merge) { -+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); -+ } else { -+ skb_fill_page_desc(skb, i, pfrag->page, -+ pfrag->offset, copy); -+ get_page(pfrag->page); -+ } -+ -+ pfrag->offset += copy; -+ copied += copy; -+ if (head != skb) { -+ head->len += copy; -+ head->data_len += copy; -+ } -+ -+ continue; -+ -+wait_for_memory: -+ kcm_push(kcm); -+ err = sk_stream_wait_memory(sk, &timeo); -+ if (err) -+ goto out_error; -+ } -+ -+ if (eor) { -+ bool not_busy = skb_queue_empty(&sk->sk_write_queue); -+ -+ /* Message complete, queue it on send buffer */ -+ __skb_queue_tail(&sk->sk_write_queue, head); -+ kcm->seq_skb = NULL; -+ -+ if (msg->msg_flags & MSG_BATCH) { -+ kcm->tx_wait_more = true; -+ } else if (kcm->tx_wait_more || not_busy) { -+ err = kcm_write_msgs(kcm); -+ if (err < 0) { -+ /* We got a hard error in write_msgs but have -+ * already queued this message. Report an error -+ * in the socket, but don't affect return value -+ * from sendmsg -+ */ -+ pr_warn("KCM: Hard failure on kcm_write_msgs\n"); -+ report_csk_error(&kcm->sk, -err); -+ } -+ } -+ } else { -+ /* Message not complete, save state */ -+partial_message: -+ kcm->seq_skb = head; -+ kcm_tx_msg(head)->last_skb = skb; -+ } -+ -+ release_sock(sk); -+ return copied; -+ -+out_error: -+ kcm_push(kcm); -+ -+ if (copied && sock->type == SOCK_SEQPACKET) { -+ /* Wrote some bytes before encountering an -+ * error, return partial success. -+ */ -+ goto partial_message; -+ } -+ -+ if (head != kcm->seq_skb) -+ kfree_skb(head); -+ -+ err = sk_stream_error(sk, msg->msg_flags, err); -+ -+ /* make sure we wake any epoll edge trigger waiter */ -+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) -+ sk->sk_write_space(sk); -+ -+ release_sock(sk); -+ return err; -+} -+ -+static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, -+ long timeo, int *err) -+{ -+ struct sk_buff *skb; -+ -+ while (!(skb = skb_peek(&sk->sk_receive_queue))) { -+ if (sk->sk_err) { -+ *err = sock_error(sk); -+ return NULL; -+ } -+ -+ if (sock_flag(sk, SOCK_DONE)) -+ return NULL; -+ -+ if ((flags & MSG_DONTWAIT) || !timeo) { -+ *err = -EAGAIN; -+ return NULL; -+ } -+ -+ sk_wait_data(sk, &timeo, NULL); -+ -+ /* Handle signals */ -+ if (signal_pending(current)) { -+ *err = sock_intr_errno(timeo); -+ return NULL; -+ } -+ } -+ -+ return skb; -+} -+ -+static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, -+ size_t len, int flags) -+{ -+ struct sock *sk = sock->sk; -+ int err = 0; -+ long timeo; -+ struct kcm_rx_msg *rxm; -+ int copied = 0; -+ struct sk_buff *skb; -+ -+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); -+ -+ lock_sock(sk); -+ -+ skb = kcm_wait_data(sk, flags, timeo, &err); -+ if (!skb) -+ goto out; -+ -+ /* Okay, have a message on the receive queue */ -+ -+ rxm = kcm_rx_msg(skb); -+ -+ if (len > rxm->full_len) -+ len = rxm->full_len; -+ -+ err = skb_copy_datagram_msg(skb, rxm->offset, msg, len); -+ if (err < 0) -+ goto out; -+ -+ copied = len; -+ if (likely(!(flags & MSG_PEEK))) { -+ if (copied < rxm->full_len) { -+ if (sock->type == SOCK_DGRAM) { -+ /* Truncated message */ -+ msg->msg_flags |= MSG_TRUNC; -+ goto msg_finished; -+ } -+ rxm->offset += copied; -+ rxm->full_len -= copied; -+ } else { -+msg_finished: -+ /* Finished with message */ -+ msg->msg_flags |= MSG_EOR; -+ skb_unlink(skb, &sk->sk_receive_queue); -+ kfree_skb(skb); -+ } -+ } -+ -+out: -+ release_sock(sk); -+ -+ return copied ? : err; -+} -+ -+/* kcm sock lock held */ -+static void kcm_recv_disable(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ -+ if (kcm->rx_disabled) -+ return; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ kcm->rx_disabled = 1; -+ -+ /* If a psock is reserved we'll do cleanup in unreserve */ -+ if (!kcm->rx_psock) { -+ if (kcm->rx_wait) { -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ } -+ -+ requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); -+ } -+ -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+/* kcm sock lock held */ -+static void kcm_recv_enable(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ -+ if (!kcm->rx_disabled) -+ return; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ kcm->rx_disabled = 0; -+ kcm_rcv_ready(kcm); -+ -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+static int kcm_setsockopt(struct socket *sock, int level, int optname, -+ char __user *optval, unsigned int optlen) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ int val, valbool; -+ int err = 0; -+ -+ if (level != SOL_KCM) -+ return -ENOPROTOOPT; -+ -+ if (optlen < sizeof(int)) -+ return -EINVAL; -+ -+ if (get_user(val, (int __user *)optval)) -+ return -EINVAL; -+ -+ valbool = val ? 1 : 0; -+ -+ switch (optname) { -+ case KCM_RECV_DISABLE: -+ lock_sock(&kcm->sk); -+ if (valbool) -+ kcm_recv_disable(kcm); -+ else -+ kcm_recv_enable(kcm); -+ release_sock(&kcm->sk); -+ break; -+ default: -+ err = -ENOPROTOOPT; -+ } -+ -+ return err; -+} -+ -+static int kcm_getsockopt(struct socket *sock, int level, int optname, -+ char __user *optval, int __user *optlen) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ int val, len; -+ -+ if (level != SOL_KCM) -+ return -ENOPROTOOPT; -+ -+ if (get_user(len, optlen)) -+ return -EFAULT; -+ -+ len = min_t(unsigned int, len, sizeof(int)); -+ if (len < 0) -+ return -EINVAL; -+ -+ switch (optname) { -+ case KCM_RECV_DISABLE: -+ val = kcm->rx_disabled; -+ break; -+ default: -+ return -ENOPROTOOPT; -+ } -+ -+ if (put_user(len, optlen)) -+ return -EFAULT; -+ if (copy_to_user(optval, &val, len)) -+ return -EFAULT; -+ return 0; -+} -+ -+static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) -+{ -+ struct kcm_sock *tkcm; -+ struct list_head *head; -+ int index = 0; -+ -+ /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so -+ * we set sk_state, otherwise epoll_wait always returns right away with -+ * POLLHUP -+ */ -+ kcm->sk.sk_state = TCP_ESTABLISHED; -+ -+ /* Add to mux's kcm sockets list */ -+ kcm->mux = mux; -+ spin_lock_bh(&mux->lock); -+ -+ head = &mux->kcm_socks; -+ list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) { -+ if (tkcm->index != index) -+ break; -+ head = &tkcm->kcm_sock_list; -+ index++; -+ } -+ -+ list_add(&kcm->kcm_sock_list, head); -+ kcm->index = index; -+ -+ mux->kcm_socks_cnt++; -+ spin_unlock_bh(&mux->lock); -+ -+ INIT_WORK(&kcm->tx_work, kcm_tx_work); -+ -+ spin_lock_bh(&mux->rx_lock); -+ kcm_rcv_ready(kcm); -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+static int kcm_attach(struct socket *sock, struct socket *csock, -+ struct bpf_prog *prog) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ struct kcm_mux *mux = kcm->mux; -+ struct sock *csk; -+ struct kcm_psock *psock = NULL, *tpsock; -+ struct list_head *head; -+ int index = 0; -+ -+ if (csock->ops->family != PF_INET && -+ csock->ops->family != PF_INET6) -+ return -EINVAL; -+ -+ csk = csock->sk; -+ if (!csk) -+ return -EINVAL; -+ -+ /* Only support TCP for now */ -+ if (csk->sk_protocol != IPPROTO_TCP) -+ return -EINVAL; -+ -+ psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); -+ if (!psock) -+ return -ENOMEM; -+ -+ psock->mux = mux; -+ psock->sk = csk; -+ psock->bpf_prog = prog; -+ INIT_WORK(&psock->rx_work, psock_rx_work); -+ INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work); -+ -+ sock_hold(csk); -+ -+ write_lock_bh(&csk->sk_callback_lock); -+ psock->save_data_ready = csk->sk_data_ready; -+ psock->save_write_space = csk->sk_write_space; -+ psock->save_state_change = csk->sk_state_change; -+ csk->sk_user_data = psock; -+ csk->sk_data_ready = psock_tcp_data_ready; -+ csk->sk_write_space = psock_tcp_write_space; -+ csk->sk_state_change = psock_tcp_state_change; -+ write_unlock_bh(&csk->sk_callback_lock); -+ -+ /* Finished initialization, now add the psock to the MUX. */ -+ spin_lock_bh(&mux->lock); -+ head = &mux->psocks; -+ list_for_each_entry(tpsock, &mux->psocks, psock_list) { -+ if (tpsock->index != index) -+ break; -+ head = &tpsock->psock_list; -+ index++; -+ } -+ -+ list_add(&psock->psock_list, head); -+ psock->index = index; -+ -+ mux->psocks_cnt++; -+ psock_now_avail(psock); -+ spin_unlock_bh(&mux->lock); -+ -+ /* Schedule RX work in case there are already bytes queued */ -+ queue_work(kcm_wq, &psock->rx_work); -+ -+ return 0; -+} -+ -+static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) -+{ -+ struct socket *csock; -+ struct bpf_prog *prog; -+ int err; -+ -+ csock = sockfd_lookup(info->fd, &err); -+ if (!csock) -+ return -ENOENT; -+ -+ prog = bpf_prog_get(info->bpf_fd); -+ if (IS_ERR(prog)) { -+ err = PTR_ERR(prog); -+ goto out; -+ } -+ -+ if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { -+ bpf_prog_put(prog); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ err = kcm_attach(sock, csock, prog); -+ if (err) { -+ bpf_prog_put(prog); -+ goto out; -+ } -+ -+ /* Keep reference on file also */ -+ -+ return 0; -+out: -+ fput(csock->file); -+ return err; -+} -+ -+static void kcm_unattach(struct kcm_psock *psock) -+{ -+ struct sock *csk = psock->sk; -+ struct kcm_mux *mux = psock->mux; -+ -+ /* Stop getting callbacks from TCP socket. After this there should -+ * be no way to reserve a kcm for this psock. -+ */ -+ write_lock_bh(&csk->sk_callback_lock); -+ csk->sk_user_data = NULL; -+ csk->sk_data_ready = psock->save_data_ready; -+ csk->sk_write_space = psock->save_write_space; -+ csk->sk_state_change = psock->save_state_change; -+ psock->rx_stopped = 1; -+ -+ if (WARN_ON(psock->rx_kcm)) { -+ write_unlock_bh(&csk->sk_callback_lock); -+ return; -+ } -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ /* Stop receiver activities. After this point psock should not be -+ * able to get onto ready list either through callbacks or work. -+ */ -+ if (psock->ready_rx_msg) { -+ list_del(&psock->psock_ready_list); -+ kfree_skb(psock->ready_rx_msg); -+ psock->ready_rx_msg = NULL; -+ } -+ -+ spin_unlock_bh(&mux->rx_lock); -+ -+ write_unlock_bh(&csk->sk_callback_lock); -+ -+ cancel_work_sync(&psock->rx_work); -+ cancel_delayed_work_sync(&psock->rx_delayed_work); -+ -+ bpf_prog_put(psock->bpf_prog); -+ -+ kfree_skb(psock->rx_skb_head); -+ psock->rx_skb_head = NULL; -+ -+ spin_lock_bh(&mux->lock); -+ -+ if (psock->tx_kcm) { -+ /* psock was reserved. Just mark it finished and we will clean -+ * up in the kcm paths, we need kcm lock which can not be -+ * acquired here. -+ */ -+ spin_unlock_bh(&mux->lock); -+ -+ /* We are unattaching a socket that is reserved. Abort the -+ * socket since we may be out of sync in sending on it. We need -+ * to do this without the mux lock. -+ */ -+ kcm_abort_tx_psock(psock, EPIPE, false); -+ -+ spin_lock_bh(&mux->lock); -+ if (!psock->tx_kcm) { -+ /* psock now unreserved in window mux was unlocked */ -+ goto no_reserved; -+ } -+ psock->done = 1; -+ -+ /* Commit done before queuing work to process it */ -+ smp_mb(); -+ -+ /* Queue tx work to make sure psock->done is handled */ -+ queue_work(kcm_wq, &psock->tx_kcm->tx_work); -+ spin_unlock_bh(&mux->lock); -+ } else { -+no_reserved: -+ if (!psock->tx_stopped) -+ list_del(&psock->psock_avail_list); -+ list_del(&psock->psock_list); -+ mux->psocks_cnt--; -+ spin_unlock_bh(&mux->lock); -+ -+ sock_put(csk); -+ fput(csk->sk_socket->file); -+ kmem_cache_free(kcm_psockp, psock); -+ } -+} -+ -+static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ struct kcm_mux *mux = kcm->mux; -+ struct kcm_psock *psock; -+ struct socket *csock; -+ struct sock *csk; -+ int err; -+ -+ csock = sockfd_lookup(info->fd, &err); -+ if (!csock) -+ return -ENOENT; -+ -+ csk = csock->sk; -+ if (!csk) { -+ err = -EINVAL; -+ goto out; -+ } -+ -+ err = -ENOENT; -+ -+ spin_lock_bh(&mux->lock); -+ -+ list_for_each_entry(psock, &mux->psocks, psock_list) { -+ if (psock->sk != csk) -+ continue; -+ -+ /* Found the matching psock */ -+ -+ if (psock->unattaching || WARN_ON(psock->done)) { -+ err = -EALREADY; -+ break; -+ } -+ -+ psock->unattaching = 1; -+ -+ spin_unlock_bh(&mux->lock); -+ -+ kcm_unattach(psock); -+ -+ err = 0; -+ goto out; -+ } -+ -+ spin_unlock_bh(&mux->lock); -+ -+out: -+ fput(csock->file); -+ return err; -+} -+ -+static struct proto kcm_proto = { -+ .name = "KCM", -+ .owner = THIS_MODULE, -+ .obj_size = sizeof(struct kcm_sock), -+}; -+ -+/* Clone a kcm socket. */ -+static int kcm_clone(struct socket *osock, struct kcm_clone *info, -+ struct socket **newsockp) -+{ -+ struct socket *newsock; -+ struct sock *newsk; -+ struct file *newfile; -+ int err, newfd; -+ -+ err = -ENFILE; -+ newsock = sock_alloc(); -+ if (!newsock) -+ goto out; -+ -+ newsock->type = osock->type; -+ newsock->ops = osock->ops; -+ -+ __module_get(newsock->ops->owner); -+ -+ newfd = get_unused_fd_flags(0); -+ if (unlikely(newfd < 0)) { -+ err = newfd; -+ goto out_fd_fail; -+ } -+ -+ newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); -+ if (unlikely(IS_ERR(newfile))) { -+ err = PTR_ERR(newfile); -+ goto out_sock_alloc_fail; -+ } -+ -+ newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, -+ &kcm_proto, true); -+ if (!newsk) { -+ err = -ENOMEM; -+ goto out_sk_alloc_fail; -+ } -+ -+ sock_init_data(newsock, newsk); -+ init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); -+ -+ fd_install(newfd, newfile); -+ *newsockp = newsock; -+ info->fd = newfd; -+ -+ return 0; -+ -+out_sk_alloc_fail: -+ fput(newfile); -+out_sock_alloc_fail: -+ put_unused_fd(newfd); -+out_fd_fail: -+ sock_release(newsock); -+out: -+ return err; -+} -+ -+static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -+{ -+ int err; -+ -+ switch (cmd) { -+ case SIOCKCMATTACH: { -+ struct kcm_attach info; -+ -+ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) -+ err = -EFAULT; -+ -+ err = kcm_attach_ioctl(sock, &info); -+ -+ break; -+ } -+ case SIOCKCMUNATTACH: { -+ struct kcm_unattach info; -+ -+ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) -+ err = -EFAULT; -+ -+ err = kcm_unattach_ioctl(sock, &info); -+ -+ break; -+ } -+ case SIOCKCMCLONE: { -+ struct kcm_clone info; -+ struct socket *newsock = NULL; -+ -+ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) -+ err = -EFAULT; -+ -+ err = kcm_clone(sock, &info, &newsock); -+ -+ if (!err) { -+ if (copy_to_user((void __user *)arg, &info, -+ sizeof(info))) { -+ err = -EFAULT; -+ sock_release(newsock); -+ } -+ } -+ -+ break; -+ } -+ default: -+ err = -ENOIOCTLCMD; -+ break; -+ } -+ -+ return err; -+} -+ -+static void free_mux(struct rcu_head *rcu) -+{ -+ struct kcm_mux *mux = container_of(rcu, -+ struct kcm_mux, rcu); -+ -+ kmem_cache_free(kcm_muxp, mux); -+} -+ -+static void release_mux(struct kcm_mux *mux) -+{ -+ struct kcm_net *knet = mux->knet; -+ struct kcm_psock *psock, *tmp_psock; -+ -+ /* Release psocks */ -+ list_for_each_entry_safe(psock, tmp_psock, -+ &mux->psocks, psock_list) { -+ if (!WARN_ON(psock->unattaching)) -+ kcm_unattach(psock); -+ } -+ -+ if (WARN_ON(mux->psocks_cnt)) -+ return; -+ -+ __skb_queue_purge(&mux->rx_hold_queue); -+ -+ mutex_lock(&knet->mutex); -+ list_del_rcu(&mux->kcm_mux_list); -+ knet->count--; -+ mutex_unlock(&knet->mutex); -+ -+ call_rcu(&mux->rcu, free_mux); -+} -+ -+static void kcm_done(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ struct sock *sk = &kcm->sk; -+ int socks_cnt; -+ -+ spin_lock_bh(&mux->rx_lock); -+ if (kcm->rx_psock) { -+ /* Cleanup in unreserve_rx_kcm */ -+ WARN_ON(kcm->done); -+ kcm->rx_disabled = 1; -+ kcm->done = 1; -+ spin_unlock_bh(&mux->rx_lock); -+ return; -+ } -+ -+ if (kcm->rx_wait) { -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ } -+ /* Move any pending receive messages to other kcm sockets */ -+ requeue_rx_msgs(mux, &sk->sk_receive_queue); -+ -+ spin_unlock_bh(&mux->rx_lock); -+ -+ if (WARN_ON(sk_rmem_alloc_get(sk))) -+ return; -+ -+ /* Detach from MUX */ -+ spin_lock_bh(&mux->lock); -+ -+ list_del(&kcm->kcm_sock_list); -+ mux->kcm_socks_cnt--; -+ socks_cnt = mux->kcm_socks_cnt; -+ -+ spin_unlock_bh(&mux->lock); -+ -+ if (!socks_cnt) { -+ /* We are done with the mux now. */ -+ release_mux(mux); -+ } -+ -+ WARN_ON(kcm->rx_wait); -+ -+ sock_put(&kcm->sk); -+} -+ -+/* Called by kcm_release to close a KCM socket. -+ * If this is the last KCM socket on the MUX, destroy the MUX. -+ */ -+static int kcm_release(struct socket *sock) -+{ -+ struct sock *sk = sock->sk; -+ struct kcm_sock *kcm; -+ struct kcm_mux *mux; -+ struct kcm_psock *psock; -+ -+ if (!sk) -+ return 0; -+ -+ kcm = kcm_sk(sk); -+ mux = kcm->mux; -+ -+ sock_orphan(sk); -+ kfree_skb(kcm->seq_skb); -+ -+ lock_sock(sk); -+ /* Purge queue under lock to avoid race condition with tx_work trying -+ * to act when queue is nonempty. If tx_work runs after this point -+ * it will just return. -+ */ -+ __skb_queue_purge(&sk->sk_write_queue); -+ release_sock(sk); -+ -+ spin_lock_bh(&mux->lock); -+ if (kcm->tx_wait) { -+ /* Take of tx_wait list, after this point there should be no way -+ * that a psock will be assigned to this kcm. -+ */ -+ list_del(&kcm->wait_psock_list); -+ kcm->tx_wait = false; -+ } -+ spin_unlock_bh(&mux->lock); -+ -+ /* Cancel work. After this point there should be no outside references -+ * to the kcm socket. -+ */ -+ cancel_work_sync(&kcm->tx_work); -+ -+ lock_sock(sk); -+ psock = kcm->tx_psock; -+ if (psock) { -+ /* A psock was reserved, so we need to kill it since it -+ * may already have some bytes queued from a message. We -+ * need to do this after removing kcm from tx_wait list. -+ */ -+ kcm_abort_tx_psock(psock, EPIPE, false); -+ unreserve_psock(kcm); -+ } -+ release_sock(sk); -+ -+ WARN_ON(kcm->tx_wait); -+ WARN_ON(kcm->tx_psock); -+ -+ sock->sk = NULL; -+ -+ kcm_done(kcm); -+ -+ return 0; -+} -+ -+static const struct proto_ops kcm_ops = { -+ .family = PF_KCM, -+ .owner = THIS_MODULE, -+ .release = kcm_release, -+ .bind = sock_no_bind, -+ .connect = sock_no_connect, -+ .socketpair = sock_no_socketpair, -+ .accept = sock_no_accept, -+ .getname = sock_no_getname, -+ .poll = datagram_poll, -+ .ioctl = kcm_ioctl, -+ .listen = sock_no_listen, -+ .shutdown = sock_no_shutdown, -+ .setsockopt = kcm_setsockopt, -+ .getsockopt = kcm_getsockopt, -+ .sendmsg = kcm_sendmsg, -+ .recvmsg = kcm_recvmsg, -+ .mmap = sock_no_mmap, -+ .sendpage = sock_no_sendpage, -+}; -+ -+/* Create proto operation for kcm sockets */ -+static int kcm_create(struct net *net, struct socket *sock, -+ int protocol, int kern) -+{ -+ struct kcm_net *knet = net_generic(net, kcm_net_id); -+ struct sock *sk; -+ struct kcm_mux *mux; -+ -+ switch (sock->type) { -+ case SOCK_DGRAM: -+ case SOCK_SEQPACKET: -+ sock->ops = &kcm_ops; -+ break; -+ default: -+ return -ESOCKTNOSUPPORT; -+ } -+ -+ if (protocol != KCMPROTO_CONNECTED) -+ return -EPROTONOSUPPORT; -+ -+ sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern); -+ if (!sk) -+ return -ENOMEM; -+ -+ /* Allocate a kcm mux, shared between KCM sockets */ -+ mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL); -+ if (!mux) { -+ sk_free(sk); -+ return -ENOMEM; -+ } -+ -+ spin_lock_init(&mux->lock); -+ spin_lock_init(&mux->rx_lock); -+ INIT_LIST_HEAD(&mux->kcm_socks); -+ INIT_LIST_HEAD(&mux->kcm_rx_waiters); -+ INIT_LIST_HEAD(&mux->kcm_tx_waiters); -+ -+ INIT_LIST_HEAD(&mux->psocks); -+ INIT_LIST_HEAD(&mux->psocks_ready); -+ INIT_LIST_HEAD(&mux->psocks_avail); -+ -+ mux->knet = knet; -+ -+ /* Add new MUX to list */ -+ mutex_lock(&knet->mutex); -+ list_add_rcu(&mux->kcm_mux_list, &knet->mux_list); -+ knet->count++; -+ mutex_unlock(&knet->mutex); -+ -+ skb_queue_head_init(&mux->rx_hold_queue); -+ -+ /* Init KCM socket */ -+ sock_init_data(sock, sk); -+ init_kcm_sock(kcm_sk(sk), mux); -+ -+ return 0; -+} -+ -+static struct net_proto_family kcm_family_ops = { -+ .family = PF_KCM, -+ .create = kcm_create, -+ .owner = THIS_MODULE, -+}; -+ -+static __net_init int kcm_init_net(struct net *net) -+{ -+ struct kcm_net *knet = net_generic(net, kcm_net_id); -+ -+ INIT_LIST_HEAD_RCU(&knet->mux_list); -+ mutex_init(&knet->mutex); -+ -+ return 0; -+} -+ -+static __net_exit void kcm_exit_net(struct net *net) -+{ -+ struct kcm_net *knet = net_generic(net, kcm_net_id); -+ -+ /* All KCM sockets should be closed at this point, which should mean -+ * that all multiplexors and psocks have been destroyed. -+ */ -+ WARN_ON(!list_empty(&knet->mux_list)); -+} -+ -+static struct pernet_operations kcm_net_ops = { -+ .init = kcm_init_net, -+ .exit = kcm_exit_net, -+ .id = &kcm_net_id, -+ .size = sizeof(struct kcm_net), -+}; -+ -+static int __init kcm_init(void) -+{ -+ int err = -ENOMEM; -+ -+ kcm_muxp = kmem_cache_create("kcm_mux_cache", -+ sizeof(struct kcm_mux), 0, -+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); -+ if (!kcm_muxp) -+ goto fail; -+ -+ kcm_psockp = kmem_cache_create("kcm_psock_cache", -+ sizeof(struct kcm_psock), 0, -+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); -+ if (!kcm_psockp) -+ goto fail; -+ -+ kcm_wq = create_singlethread_workqueue("kkcmd"); -+ if (!kcm_wq) -+ goto fail; -+ -+ err = proto_register(&kcm_proto, 1); -+ if (err) -+ goto fail; -+ -+ err = sock_register(&kcm_family_ops); -+ if (err) -+ goto sock_register_fail; -+ -+ err = register_pernet_device(&kcm_net_ops); -+ if (err) -+ goto net_ops_fail; -+ -+ return 0; -+ -+net_ops_fail: -+ sock_unregister(PF_KCM); -+ -+sock_register_fail: -+ proto_unregister(&kcm_proto); -+ -+fail: -+ kmem_cache_destroy(kcm_muxp); -+ kmem_cache_destroy(kcm_psockp); -+ -+ if (kcm_wq) -+ destroy_workqueue(kcm_wq); -+ -+ return err; -+} -+ -+static void __exit kcm_exit(void) -+{ -+ unregister_pernet_device(&kcm_net_ops); -+ sock_unregister(PF_KCM); -+ proto_unregister(&kcm_proto); -+ destroy_workqueue(kcm_wq); -+ -+ kmem_cache_destroy(kcm_muxp); -+ kmem_cache_destroy(kcm_psockp); -+} -+ -+module_init(kcm_init); -+module_exit(kcm_exit); -+ -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS_NETPROTO(PF_KCM); --- -2.10.0 - diff --git a/alpine/kernel/patches/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch b/alpine/kernel/patches/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch deleted file mode 100644 index 3e97c57c5..000000000 --- a/alpine/kernel/patches/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 2f2e6e31ed1b82f1658139e0abe7155ee3755da1 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 21 Mar 2016 02:51:09 -0700 -Subject: [PATCH 37/42] net: add the AF_KCM entries to family name tables - -This is for the recent kcm driver, which introduces AF_KCM(41) in -b7ac4eb(kcm: Kernel Connection Multiplexor module). - -Signed-off-by: Dexuan Cui -Cc: Signed-off-by: Tom Herbert -Origin: https://patchwork.ozlabs.org/patch/600006 ---- - net/core/sock.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/net/core/sock.c b/net/core/sock.c -index 0d91f7d..925def4 100644 ---- a/net/core/sock.c -+++ b/net/core/sock.c -@@ -263,7 +263,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = { - "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , - "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , -- "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" -+ "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , -+ "sk_lock-AF_MAX" - }; - static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , -@@ -279,7 +280,8 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , - "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , -- "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" -+ "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , -+ "slock-AF_MAX" - }; - static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , -@@ -295,7 +297,8 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , - "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , -- "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" -+ "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , -+ "clock-AF_MAX" - }; - - /* --- -2.10.0 - diff --git a/alpine/kernel/patches/0038-net-Add-Qualcomm-IPC-router.patch b/alpine/kernel/patches/0038-net-Add-Qualcomm-IPC-router.patch deleted file mode 100644 index 2d3461c20..000000000 --- a/alpine/kernel/patches/0038-net-Add-Qualcomm-IPC-router.patch +++ /dev/null @@ -1,1307 +0,0 @@ -From 9e184cb0991a4cc08cd7688f2d4e23740c60e382 Mon Sep 17 00:00:00 2001 -From: Courtney Cavin -Date: Wed, 27 Apr 2016 12:13:03 -0700 -Subject: [PATCH 38/42] net: Add Qualcomm IPC router - -Add an implementation of Qualcomm's IPC router protocol, used to -communicate with service providing remote processors. - -Signed-off-by: Courtney Cavin -Signed-off-by: Bjorn Andersson -[bjorn: Cope with 0 being a valid node id and implement RTM_NEWADDR] -Signed-off-by: Bjorn Andersson -Origin: https://patchwork.ozlabs.org/patch/615774/ ---- - include/linux/socket.h | 4 +- - include/uapi/linux/qrtr.h | 12 + - net/Kconfig | 1 + - net/Makefile | 1 + - net/qrtr/Kconfig | 24 ++ - net/qrtr/Makefile | 2 + - net/qrtr/qrtr.c | 1007 +++++++++++++++++++++++++++++++++++++++++++++ - net/qrtr/qrtr.h | 31 ++ - net/qrtr/smd.c | 117 ++++++ - 9 files changed, 1198 insertions(+), 1 deletion(-) - create mode 100644 include/uapi/linux/qrtr.h - create mode 100644 net/qrtr/Kconfig - create mode 100644 net/qrtr/Makefile - create mode 100644 net/qrtr/qrtr.c - create mode 100644 net/qrtr/qrtr.h - create mode 100644 net/qrtr/smd.c - -diff --git a/include/linux/socket.h b/include/linux/socket.h -index 4e1ea53..dbd81e7 100644 ---- a/include/linux/socket.h -+++ b/include/linux/socket.h -@@ -201,8 +201,9 @@ struct ucred { - #define AF_NFC 39 /* NFC sockets */ - #define AF_VSOCK 40 /* vSockets */ - #define AF_KCM 41 /* Kernel Connection Multiplexor*/ -+#define AF_QIPCRTR 42 /* Qualcomm IPC Router */ - --#define AF_MAX 42 /* For now.. */ -+#define AF_MAX 43 /* For now.. */ - - /* Protocol families, same as address families. */ - #define PF_UNSPEC AF_UNSPEC -@@ -249,6 +250,7 @@ struct ucred { - #define PF_NFC AF_NFC - #define PF_VSOCK AF_VSOCK - #define PF_KCM AF_KCM -+#define PF_QIPCRTR AF_QIPCRTR - #define PF_MAX AF_MAX - - /* Maximum queue length specifiable by listen. */ -diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h -new file mode 100644 -index 0000000..66c0748 ---- /dev/null -+++ b/include/uapi/linux/qrtr.h -@@ -0,0 +1,12 @@ -+#ifndef _LINUX_QRTR_H -+#define _LINUX_QRTR_H -+ -+#include -+ -+struct sockaddr_qrtr { -+ __kernel_sa_family_t sq_family; -+ __u32 sq_node; -+ __u32 sq_port; -+}; -+ -+#endif /* _LINUX_QRTR_H */ -diff --git a/net/Kconfig b/net/Kconfig -index b8439e6..1c9fda1 100644 ---- a/net/Kconfig -+++ b/net/Kconfig -@@ -233,6 +233,7 @@ source "net/mpls/Kconfig" - source "net/hsr/Kconfig" - source "net/switchdev/Kconfig" - source "net/l3mdev/Kconfig" -+source "net/qrtr/Kconfig" - - config RPS - bool -diff --git a/net/Makefile b/net/Makefile -index 81d1411..bdd1455 100644 ---- a/net/Makefile -+++ b/net/Makefile -@@ -78,3 +78,4 @@ endif - ifneq ($(CONFIG_NET_L3_MASTER_DEV),) - obj-y += l3mdev/ - endif -+obj-$(CONFIG_QRTR) += qrtr/ -diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig -new file mode 100644 -index 0000000..0c2619d ---- /dev/null -+++ b/net/qrtr/Kconfig -@@ -0,0 +1,24 @@ -+# Qualcomm IPC Router configuration -+# -+ -+config QRTR -+ bool "Qualcomm IPC Router support" -+ depends on ARCH_QCOM || COMPILE_TEST -+ ---help--- -+ Say Y if you intend to use Qualcomm IPC router protocol. The -+ protocol is used to communicate with services provided by other -+ hardware blocks in the system. -+ -+ In order to do service lookups, a userspace daemon is required to -+ maintain a service listing. -+ -+if QRTR -+ -+config QRTR_SMD -+ tristate "SMD IPC Router channels" -+ depends on QCOM_SMD || COMPILE_TEST -+ ---help--- -+ Say Y here to support SMD based ipcrouter channels. SMD is the -+ most common transport for IPC Router. -+ -+endif # QRTR -diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile -new file mode 100644 -index 0000000..e282a84 ---- /dev/null -+++ b/net/qrtr/Makefile -@@ -0,0 +1,2 @@ -+obj-y := qrtr.o -+obj-$(CONFIG_QRTR_SMD) += smd.o -diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c -new file mode 100644 -index 0000000..c985ecb ---- /dev/null -+++ b/net/qrtr/qrtr.c -@@ -0,0 +1,1007 @@ -+/* -+ * Copyright (c) 2015, Sony Mobile Communications Inc. -+ * Copyright (c) 2013, The Linux Foundation. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 and -+ * only version 2 as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+#include -+#include -+#include -+#include /* For TIOCINQ/OUTQ */ -+ -+#include -+ -+#include "qrtr.h" -+ -+#define QRTR_PROTO_VER 1 -+ -+/* auto-bind range */ -+#define QRTR_MIN_EPH_SOCKET 0x4000 -+#define QRTR_MAX_EPH_SOCKET 0x7fff -+ -+enum qrtr_pkt_type { -+ QRTR_TYPE_DATA = 1, -+ QRTR_TYPE_HELLO = 2, -+ QRTR_TYPE_BYE = 3, -+ QRTR_TYPE_NEW_SERVER = 4, -+ QRTR_TYPE_DEL_SERVER = 5, -+ QRTR_TYPE_DEL_CLIENT = 6, -+ QRTR_TYPE_RESUME_TX = 7, -+ QRTR_TYPE_EXIT = 8, -+ QRTR_TYPE_PING = 9, -+}; -+ -+/** -+ * struct qrtr_hdr - (I|R)PCrouter packet header -+ * @version: protocol version -+ * @type: packet type; one of QRTR_TYPE_* -+ * @src_node_id: source node -+ * @src_port_id: source port -+ * @confirm_rx: boolean; whether a resume-tx packet should be send in reply -+ * @size: length of packet, excluding this header -+ * @dst_node_id: destination node -+ * @dst_port_id: destination port -+ */ -+struct qrtr_hdr { -+ __le32 version; -+ __le32 type; -+ __le32 src_node_id; -+ __le32 src_port_id; -+ __le32 confirm_rx; -+ __le32 size; -+ __le32 dst_node_id; -+ __le32 dst_port_id; -+} __packed; -+ -+#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr) -+#define QRTR_NODE_BCAST ((unsigned int)-1) -+#define QRTR_PORT_CTRL ((unsigned int)-2) -+ -+struct qrtr_sock { -+ /* WARNING: sk must be the first member */ -+ struct sock sk; -+ struct sockaddr_qrtr us; -+ struct sockaddr_qrtr peer; -+}; -+ -+static inline struct qrtr_sock *qrtr_sk(struct sock *sk) -+{ -+ BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0); -+ return container_of(sk, struct qrtr_sock, sk); -+} -+ -+static unsigned int qrtr_local_nid = -1; -+ -+/* for node ids */ -+static RADIX_TREE(qrtr_nodes, GFP_KERNEL); -+/* broadcast list */ -+static LIST_HEAD(qrtr_all_nodes); -+/* lock for qrtr_nodes, qrtr_all_nodes and node reference */ -+static DEFINE_MUTEX(qrtr_node_lock); -+ -+/* local port allocation management */ -+static DEFINE_IDR(qrtr_ports); -+static DEFINE_MUTEX(qrtr_port_lock); -+ -+/** -+ * struct qrtr_node - endpoint node -+ * @ep_lock: lock for endpoint management and callbacks -+ * @ep: endpoint -+ * @ref: reference count for node -+ * @nid: node id -+ * @rx_queue: receive queue -+ * @work: scheduled work struct for recv work -+ * @item: list item for broadcast list -+ */ -+struct qrtr_node { -+ struct mutex ep_lock; -+ struct qrtr_endpoint *ep; -+ struct kref ref; -+ unsigned int nid; -+ -+ struct sk_buff_head rx_queue; -+ struct work_struct work; -+ struct list_head item; -+}; -+ -+/* Release node resources and free the node. -+ * -+ * Do not call directly, use qrtr_node_release. To be used with -+ * kref_put_mutex. As such, the node mutex is expected to be locked on call. -+ */ -+static void __qrtr_node_release(struct kref *kref) -+{ -+ struct qrtr_node *node = container_of(kref, struct qrtr_node, ref); -+ -+ if (node->nid != QRTR_EP_NID_AUTO) -+ radix_tree_delete(&qrtr_nodes, node->nid); -+ -+ list_del(&node->item); -+ mutex_unlock(&qrtr_node_lock); -+ -+ skb_queue_purge(&node->rx_queue); -+ kfree(node); -+} -+ -+/* Increment reference to node. */ -+static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node) -+{ -+ if (node) -+ kref_get(&node->ref); -+ return node; -+} -+ -+/* Decrement reference to node and release as necessary. */ -+static void qrtr_node_release(struct qrtr_node *node) -+{ -+ if (!node) -+ return; -+ kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock); -+} -+ -+/* Pass an outgoing packet socket buffer to the endpoint driver. */ -+static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb) -+{ -+ int rc = -ENODEV; -+ -+ mutex_lock(&node->ep_lock); -+ if (node->ep) -+ rc = node->ep->xmit(node->ep, skb); -+ else -+ kfree_skb(skb); -+ mutex_unlock(&node->ep_lock); -+ -+ return rc; -+} -+ -+/* Lookup node by id. -+ * -+ * callers must release with qrtr_node_release() -+ */ -+static struct qrtr_node *qrtr_node_lookup(unsigned int nid) -+{ -+ struct qrtr_node *node; -+ -+ mutex_lock(&qrtr_node_lock); -+ node = radix_tree_lookup(&qrtr_nodes, nid); -+ node = qrtr_node_acquire(node); -+ mutex_unlock(&qrtr_node_lock); -+ -+ return node; -+} -+ -+/* Assign node id to node. -+ * -+ * This is mostly useful for automatic node id assignment, based on -+ * the source id in the incoming packet. -+ */ -+static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid) -+{ -+ if (node->nid != QRTR_EP_NID_AUTO || nid == QRTR_EP_NID_AUTO) -+ return; -+ -+ mutex_lock(&qrtr_node_lock); -+ radix_tree_insert(&qrtr_nodes, nid, node); -+ node->nid = nid; -+ mutex_unlock(&qrtr_node_lock); -+} -+ -+/** -+ * qrtr_endpoint_post() - post incoming data -+ * @ep: endpoint handle -+ * @data: data pointer -+ * @len: size of data in bytes -+ * -+ * Return: 0 on success; negative error code on failure -+ */ -+int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) -+{ -+ struct qrtr_node *node = ep->node; -+ const struct qrtr_hdr *phdr = data; -+ struct sk_buff *skb; -+ unsigned int psize; -+ unsigned int size; -+ unsigned int type; -+ unsigned int ver; -+ unsigned int dst; -+ -+ if (len < QRTR_HDR_SIZE || len & 3) -+ return -EINVAL; -+ -+ ver = le32_to_cpu(phdr->version); -+ size = le32_to_cpu(phdr->size); -+ type = le32_to_cpu(phdr->type); -+ dst = le32_to_cpu(phdr->dst_port_id); -+ -+ psize = (size + 3) & ~3; -+ -+ if (ver != QRTR_PROTO_VER) -+ return -EINVAL; -+ -+ if (len != psize + QRTR_HDR_SIZE) -+ return -EINVAL; -+ -+ if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA) -+ return -EINVAL; -+ -+ skb = netdev_alloc_skb(NULL, len); -+ if (!skb) -+ return -ENOMEM; -+ -+ skb_reset_transport_header(skb); -+ memcpy(skb_put(skb, len), data, len); -+ -+ skb_queue_tail(&node->rx_queue, skb); -+ schedule_work(&node->work); -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(qrtr_endpoint_post); -+ -+/* Allocate and construct a resume-tx packet. */ -+static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, -+ u32 dst_node, u32 port) -+{ -+ const int pkt_len = 20; -+ struct qrtr_hdr *hdr; -+ struct sk_buff *skb; -+ u32 *buf; -+ -+ skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); -+ if (!skb) -+ return NULL; -+ skb_reset_transport_header(skb); -+ -+ hdr = (struct qrtr_hdr *)skb_put(skb, QRTR_HDR_SIZE); -+ hdr->version = cpu_to_le32(QRTR_PROTO_VER); -+ hdr->type = cpu_to_le32(QRTR_TYPE_RESUME_TX); -+ hdr->src_node_id = cpu_to_le32(src_node); -+ hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL); -+ hdr->confirm_rx = cpu_to_le32(0); -+ hdr->size = cpu_to_le32(pkt_len); -+ hdr->dst_node_id = cpu_to_le32(dst_node); -+ hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); -+ -+ buf = (u32 *)skb_put(skb, pkt_len); -+ memset(buf, 0, pkt_len); -+ buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); -+ buf[1] = cpu_to_le32(src_node); -+ buf[2] = cpu_to_le32(port); -+ -+ return skb; -+} -+ -+static struct qrtr_sock *qrtr_port_lookup(int port); -+static void qrtr_port_put(struct qrtr_sock *ipc); -+ -+/* Handle and route a received packet. -+ * -+ * This will auto-reply with resume-tx packet as necessary. -+ */ -+static void qrtr_node_rx_work(struct work_struct *work) -+{ -+ struct qrtr_node *node = container_of(work, struct qrtr_node, work); -+ struct sk_buff *skb; -+ -+ while ((skb = skb_dequeue(&node->rx_queue)) != NULL) { -+ const struct qrtr_hdr *phdr; -+ u32 dst_node, dst_port; -+ struct qrtr_sock *ipc; -+ u32 src_node; -+ int confirm; -+ -+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); -+ src_node = le32_to_cpu(phdr->src_node_id); -+ dst_node = le32_to_cpu(phdr->dst_node_id); -+ dst_port = le32_to_cpu(phdr->dst_port_id); -+ confirm = !!phdr->confirm_rx; -+ -+ qrtr_node_assign(node, src_node); -+ -+ ipc = qrtr_port_lookup(dst_port); -+ if (!ipc) { -+ kfree_skb(skb); -+ } else { -+ if (sock_queue_rcv_skb(&ipc->sk, skb)) -+ kfree_skb(skb); -+ -+ qrtr_port_put(ipc); -+ } -+ -+ if (confirm) { -+ skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port); -+ if (!skb) -+ break; -+ if (qrtr_node_enqueue(node, skb)) -+ break; -+ } -+ } -+} -+ -+/** -+ * qrtr_endpoint_register() - register a new endpoint -+ * @ep: endpoint to register -+ * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment -+ * Return: 0 on success; negative error code on failure -+ * -+ * The specified endpoint must have the xmit function pointer set on call. -+ */ -+int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid) -+{ -+ struct qrtr_node *node; -+ -+ if (!ep || !ep->xmit) -+ return -EINVAL; -+ -+ node = kzalloc(sizeof(*node), GFP_KERNEL); -+ if (!node) -+ return -ENOMEM; -+ -+ INIT_WORK(&node->work, qrtr_node_rx_work); -+ kref_init(&node->ref); -+ mutex_init(&node->ep_lock); -+ skb_queue_head_init(&node->rx_queue); -+ node->nid = QRTR_EP_NID_AUTO; -+ node->ep = ep; -+ -+ qrtr_node_assign(node, nid); -+ -+ mutex_lock(&qrtr_node_lock); -+ list_add(&node->item, &qrtr_all_nodes); -+ mutex_unlock(&qrtr_node_lock); -+ ep->node = node; -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(qrtr_endpoint_register); -+ -+/** -+ * qrtr_endpoint_unregister - unregister endpoint -+ * @ep: endpoint to unregister -+ */ -+void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) -+{ -+ struct qrtr_node *node = ep->node; -+ -+ mutex_lock(&node->ep_lock); -+ node->ep = NULL; -+ mutex_unlock(&node->ep_lock); -+ -+ qrtr_node_release(node); -+ ep->node = NULL; -+} -+EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister); -+ -+/* Lookup socket by port. -+ * -+ * Callers must release with qrtr_port_put() -+ */ -+static struct qrtr_sock *qrtr_port_lookup(int port) -+{ -+ struct qrtr_sock *ipc; -+ -+ if (port == QRTR_PORT_CTRL) -+ port = 0; -+ -+ mutex_lock(&qrtr_port_lock); -+ ipc = idr_find(&qrtr_ports, port); -+ if (ipc) -+ sock_hold(&ipc->sk); -+ mutex_unlock(&qrtr_port_lock); -+ -+ return ipc; -+} -+ -+/* Release acquired socket. */ -+static void qrtr_port_put(struct qrtr_sock *ipc) -+{ -+ sock_put(&ipc->sk); -+} -+ -+/* Remove port assignment. */ -+static void qrtr_port_remove(struct qrtr_sock *ipc) -+{ -+ int port = ipc->us.sq_port; -+ -+ if (port == QRTR_PORT_CTRL) -+ port = 0; -+ -+ __sock_put(&ipc->sk); -+ -+ mutex_lock(&qrtr_port_lock); -+ idr_remove(&qrtr_ports, port); -+ mutex_unlock(&qrtr_port_lock); -+} -+ -+/* Assign port number to socket. -+ * -+ * Specify port in the integer pointed to by port, and it will be adjusted -+ * on return as necesssary. -+ * -+ * Port may be: -+ * 0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET] -+ * QRTR_MIN_EPH_SOCKET: Specified; available to all -+ */ -+static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) -+{ -+ int rc; -+ -+ mutex_lock(&qrtr_port_lock); -+ if (!*port) { -+ rc = idr_alloc(&qrtr_ports, ipc, -+ QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET + 1, -+ GFP_ATOMIC); -+ if (rc >= 0) -+ *port = rc; -+ } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { -+ rc = -EACCES; -+ } else if (*port == QRTR_PORT_CTRL) { -+ rc = idr_alloc(&qrtr_ports, ipc, 0, 1, GFP_ATOMIC); -+ } else { -+ rc = idr_alloc(&qrtr_ports, ipc, *port, *port + 1, GFP_ATOMIC); -+ if (rc >= 0) -+ *port = rc; -+ } -+ mutex_unlock(&qrtr_port_lock); -+ -+ if (rc == -ENOSPC) -+ return -EADDRINUSE; -+ else if (rc < 0) -+ return rc; -+ -+ sock_hold(&ipc->sk); -+ -+ return 0; -+} -+ -+/* Bind socket to address. -+ * -+ * Socket should be locked upon call. -+ */ -+static int __qrtr_bind(struct socket *sock, -+ const struct sockaddr_qrtr *addr, int zapped) -+{ -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ int port; -+ int rc; -+ -+ /* rebinding ok */ -+ if (!zapped && addr->sq_port == ipc->us.sq_port) -+ return 0; -+ -+ port = addr->sq_port; -+ rc = qrtr_port_assign(ipc, &port); -+ if (rc) -+ return rc; -+ -+ /* unbind previous, if any */ -+ if (!zapped) -+ qrtr_port_remove(ipc); -+ ipc->us.sq_port = port; -+ -+ sock_reset_flag(sk, SOCK_ZAPPED); -+ -+ return 0; -+} -+ -+/* Auto bind to an ephemeral port. */ -+static int qrtr_autobind(struct socket *sock) -+{ -+ struct sock *sk = sock->sk; -+ struct sockaddr_qrtr addr; -+ -+ if (!sock_flag(sk, SOCK_ZAPPED)) -+ return 0; -+ -+ addr.sq_family = AF_QIPCRTR; -+ addr.sq_node = qrtr_local_nid; -+ addr.sq_port = 0; -+ -+ return __qrtr_bind(sock, &addr, 1); -+} -+ -+/* Bind socket to specified sockaddr. */ -+static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ int rc; -+ -+ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) -+ return -EINVAL; -+ -+ if (addr->sq_node != ipc->us.sq_node) -+ return -EINVAL; -+ -+ lock_sock(sk); -+ rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED)); -+ release_sock(sk); -+ -+ return rc; -+} -+ -+/* Queue packet to local peer socket. */ -+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb) -+{ -+ const struct qrtr_hdr *phdr; -+ struct qrtr_sock *ipc; -+ -+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); -+ -+ ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id)); -+ if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ -+ kfree_skb(skb); -+ return -ENODEV; -+ } -+ -+ if (sock_queue_rcv_skb(&ipc->sk, skb)) { -+ qrtr_port_put(ipc); -+ kfree_skb(skb); -+ return -ENOSPC; -+ } -+ -+ qrtr_port_put(ipc); -+ -+ return 0; -+} -+ -+/* Queue packet for broadcast. */ -+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) -+{ -+ struct sk_buff *skbn; -+ -+ mutex_lock(&qrtr_node_lock); -+ list_for_each_entry(node, &qrtr_all_nodes, item) { -+ skbn = skb_clone(skb, GFP_KERNEL); -+ if (!skbn) -+ break; -+ skb_set_owner_w(skbn, skb->sk); -+ qrtr_node_enqueue(node, skbn); -+ } -+ mutex_unlock(&qrtr_node_lock); -+ -+ qrtr_local_enqueue(node, skb); -+ -+ return 0; -+} -+ -+static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); -+ int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *); -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ struct qrtr_node *node; -+ struct qrtr_hdr *hdr; -+ struct sk_buff *skb; -+ size_t plen; -+ int rc; -+ -+ if (msg->msg_flags & ~(MSG_DONTWAIT)) -+ return -EINVAL; -+ -+ if (len > 65535) -+ return -EMSGSIZE; -+ -+ lock_sock(sk); -+ -+ if (addr) { -+ if (msg->msg_namelen < sizeof(*addr)) { -+ release_sock(sk); -+ return -EINVAL; -+ } -+ -+ if (addr->sq_family != AF_QIPCRTR) { -+ release_sock(sk); -+ return -EINVAL; -+ } -+ -+ rc = qrtr_autobind(sock); -+ if (rc) { -+ release_sock(sk); -+ return rc; -+ } -+ } else if (sk->sk_state == TCP_ESTABLISHED) { -+ addr = &ipc->peer; -+ } else { -+ release_sock(sk); -+ return -ENOTCONN; -+ } -+ -+ node = NULL; -+ if (addr->sq_node == QRTR_NODE_BCAST) { -+ enqueue_fn = qrtr_bcast_enqueue; -+ } else if (addr->sq_node == ipc->us.sq_node) { -+ enqueue_fn = qrtr_local_enqueue; -+ } else { -+ enqueue_fn = qrtr_node_enqueue; -+ node = qrtr_node_lookup(addr->sq_node); -+ if (!node) { -+ release_sock(sk); -+ return -ECONNRESET; -+ } -+ } -+ -+ plen = (len + 3) & ~3; -+ skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE, -+ msg->msg_flags & MSG_DONTWAIT, &rc); -+ if (!skb) -+ goto out_node; -+ -+ skb_reset_transport_header(skb); -+ skb_put(skb, len + QRTR_HDR_SIZE); -+ -+ hdr = (struct qrtr_hdr *)skb_transport_header(skb); -+ hdr->version = cpu_to_le32(QRTR_PROTO_VER); -+ hdr->src_node_id = cpu_to_le32(ipc->us.sq_node); -+ hdr->src_port_id = cpu_to_le32(ipc->us.sq_port); -+ hdr->confirm_rx = cpu_to_le32(0); -+ hdr->size = cpu_to_le32(len); -+ hdr->dst_node_id = cpu_to_le32(addr->sq_node); -+ hdr->dst_port_id = cpu_to_le32(addr->sq_port); -+ -+ rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE, -+ &msg->msg_iter, len); -+ if (rc) { -+ kfree_skb(skb); -+ goto out_node; -+ } -+ -+ if (plen != len) { -+ skb_pad(skb, plen - len); -+ skb_put(skb, plen - len); -+ } -+ -+ if (ipc->us.sq_port == QRTR_PORT_CTRL) { -+ if (len < 4) { -+ rc = -EINVAL; -+ kfree_skb(skb); -+ goto out_node; -+ } -+ -+ /* control messages already require the type as 'command' */ -+ skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4); -+ } else { -+ hdr->type = cpu_to_le32(QRTR_TYPE_DATA); -+ } -+ -+ rc = enqueue_fn(node, skb); -+ if (rc >= 0) -+ rc = len; -+ -+out_node: -+ qrtr_node_release(node); -+ release_sock(sk); -+ -+ return rc; -+} -+ -+static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, -+ size_t size, int flags) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); -+ const struct qrtr_hdr *phdr; -+ struct sock *sk = sock->sk; -+ struct sk_buff *skb; -+ int copied, rc; -+ -+ lock_sock(sk); -+ -+ if (sock_flag(sk, SOCK_ZAPPED)) { -+ release_sock(sk); -+ return -EADDRNOTAVAIL; -+ } -+ -+ skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, -+ flags & MSG_DONTWAIT, &rc); -+ if (!skb) { -+ release_sock(sk); -+ return rc; -+ } -+ -+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); -+ copied = le32_to_cpu(phdr->size); -+ if (copied > size) { -+ copied = size; -+ msg->msg_flags |= MSG_TRUNC; -+ } -+ -+ rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied); -+ if (rc < 0) -+ goto out; -+ rc = copied; -+ -+ if (addr) { -+ addr->sq_family = AF_QIPCRTR; -+ addr->sq_node = le32_to_cpu(phdr->src_node_id); -+ addr->sq_port = le32_to_cpu(phdr->src_port_id); -+ msg->msg_namelen = sizeof(*addr); -+ } -+ -+out: -+ skb_free_datagram(sk, skb); -+ release_sock(sk); -+ -+ return rc; -+} -+ -+static int qrtr_connect(struct socket *sock, struct sockaddr *saddr, -+ int len, int flags) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ int rc; -+ -+ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) -+ return -EINVAL; -+ -+ lock_sock(sk); -+ -+ sk->sk_state = TCP_CLOSE; -+ sock->state = SS_UNCONNECTED; -+ -+ rc = qrtr_autobind(sock); -+ if (rc) { -+ release_sock(sk); -+ return rc; -+ } -+ -+ ipc->peer = *addr; -+ sock->state = SS_CONNECTED; -+ sk->sk_state = TCP_ESTABLISHED; -+ -+ release_sock(sk); -+ -+ return 0; -+} -+ -+static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, -+ int *len, int peer) -+{ -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sockaddr_qrtr qaddr; -+ struct sock *sk = sock->sk; -+ -+ lock_sock(sk); -+ if (peer) { -+ if (sk->sk_state != TCP_ESTABLISHED) { -+ release_sock(sk); -+ return -ENOTCONN; -+ } -+ -+ qaddr = ipc->peer; -+ } else { -+ qaddr = ipc->us; -+ } -+ release_sock(sk); -+ -+ *len = sizeof(qaddr); -+ qaddr.sq_family = AF_QIPCRTR; -+ -+ memcpy(saddr, &qaddr, sizeof(qaddr)); -+ -+ return 0; -+} -+ -+static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -+{ -+ void __user *argp = (void __user *)arg; -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ struct sockaddr_qrtr *sq; -+ struct sk_buff *skb; -+ struct ifreq ifr; -+ long len = 0; -+ int rc = 0; -+ -+ lock_sock(sk); -+ -+ switch (cmd) { -+ case TIOCOUTQ: -+ len = sk->sk_sndbuf - sk_wmem_alloc_get(sk); -+ if (len < 0) -+ len = 0; -+ rc = put_user(len, (int __user *)argp); -+ break; -+ case TIOCINQ: -+ skb = skb_peek(&sk->sk_receive_queue); -+ if (skb) -+ len = skb->len - QRTR_HDR_SIZE; -+ rc = put_user(len, (int __user *)argp); -+ break; -+ case SIOCGIFADDR: -+ if (copy_from_user(&ifr, argp, sizeof(ifr))) { -+ rc = -EFAULT; -+ break; -+ } -+ -+ sq = (struct sockaddr_qrtr *)&ifr.ifr_addr; -+ *sq = ipc->us; -+ if (copy_to_user(argp, &ifr, sizeof(ifr))) { -+ rc = -EFAULT; -+ break; -+ } -+ break; -+ case SIOCGSTAMP: -+ rc = sock_get_timestamp(sk, argp); -+ break; -+ case SIOCADDRT: -+ case SIOCDELRT: -+ case SIOCSIFADDR: -+ case SIOCGIFDSTADDR: -+ case SIOCSIFDSTADDR: -+ case SIOCGIFBRDADDR: -+ case SIOCSIFBRDADDR: -+ case SIOCGIFNETMASK: -+ case SIOCSIFNETMASK: -+ rc = -EINVAL; -+ break; -+ default: -+ rc = -ENOIOCTLCMD; -+ break; -+ } -+ -+ release_sock(sk); -+ -+ return rc; -+} -+ -+static int qrtr_release(struct socket *sock) -+{ -+ struct sock *sk = sock->sk; -+ struct qrtr_sock *ipc; -+ -+ if (!sk) -+ return 0; -+ -+ lock_sock(sk); -+ -+ ipc = qrtr_sk(sk); -+ sk->sk_shutdown = SHUTDOWN_MASK; -+ if (!sock_flag(sk, SOCK_DEAD)) -+ sk->sk_state_change(sk); -+ -+ sock_set_flag(sk, SOCK_DEAD); -+ sock->sk = NULL; -+ -+ if (!sock_flag(sk, SOCK_ZAPPED)) -+ qrtr_port_remove(ipc); -+ -+ skb_queue_purge(&sk->sk_receive_queue); -+ -+ release_sock(sk); -+ sock_put(sk); -+ -+ return 0; -+} -+ -+static const struct proto_ops qrtr_proto_ops = { -+ .owner = THIS_MODULE, -+ .family = AF_QIPCRTR, -+ .bind = qrtr_bind, -+ .connect = qrtr_connect, -+ .socketpair = sock_no_socketpair, -+ .accept = sock_no_accept, -+ .listen = sock_no_listen, -+ .sendmsg = qrtr_sendmsg, -+ .recvmsg = qrtr_recvmsg, -+ .getname = qrtr_getname, -+ .ioctl = qrtr_ioctl, -+ .poll = datagram_poll, -+ .shutdown = sock_no_shutdown, -+ .setsockopt = sock_no_setsockopt, -+ .getsockopt = sock_no_getsockopt, -+ .release = qrtr_release, -+ .mmap = sock_no_mmap, -+ .sendpage = sock_no_sendpage, -+}; -+ -+static struct proto qrtr_proto = { -+ .name = "QIPCRTR", -+ .owner = THIS_MODULE, -+ .obj_size = sizeof(struct qrtr_sock), -+}; -+ -+static int qrtr_create(struct net *net, struct socket *sock, -+ int protocol, int kern) -+{ -+ struct qrtr_sock *ipc; -+ struct sock *sk; -+ -+ if (sock->type != SOCK_DGRAM) -+ return -EPROTOTYPE; -+ -+ sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern); -+ if (!sk) -+ return -ENOMEM; -+ -+ sock_set_flag(sk, SOCK_ZAPPED); -+ -+ sock_init_data(sock, sk); -+ sock->ops = &qrtr_proto_ops; -+ -+ ipc = qrtr_sk(sk); -+ ipc->us.sq_family = AF_QIPCRTR; -+ ipc->us.sq_node = qrtr_local_nid; -+ ipc->us.sq_port = 0; -+ -+ return 0; -+} -+ -+static const struct nla_policy qrtr_policy[IFA_MAX + 1] = { -+ [IFA_LOCAL] = { .type = NLA_U32 }, -+}; -+ -+static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) -+{ -+ struct nlattr *tb[IFA_MAX + 1]; -+ struct ifaddrmsg *ifm; -+ int rc; -+ -+ if (!netlink_capable(skb, CAP_NET_ADMIN)) -+ return -EPERM; -+ -+ if (!netlink_capable(skb, CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ ASSERT_RTNL(); -+ -+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy); -+ if (rc < 0) -+ return rc; -+ -+ ifm = nlmsg_data(nlh); -+ if (!tb[IFA_LOCAL]) -+ return -EINVAL; -+ -+ qrtr_local_nid = nla_get_u32(tb[IFA_LOCAL]); -+ return 0; -+} -+ -+static const struct net_proto_family qrtr_family = { -+ .owner = THIS_MODULE, -+ .family = AF_QIPCRTR, -+ .create = qrtr_create, -+}; -+ -+static int __init qrtr_proto_init(void) -+{ -+ int rc; -+ -+ rc = proto_register(&qrtr_proto, 1); -+ if (rc) -+ return rc; -+ -+ rc = sock_register(&qrtr_family); -+ if (rc) { -+ proto_unregister(&qrtr_proto); -+ return rc; -+ } -+ -+ rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL); -+ -+ return 0; -+} -+module_init(qrtr_proto_init); -+ -+static void __exit qrtr_proto_fini(void) -+{ -+ rtnl_unregister(PF_QIPCRTR, RTM_NEWADDR); -+ sock_unregister(qrtr_family.family); -+ proto_unregister(&qrtr_proto); -+} -+module_exit(qrtr_proto_fini); -+ -+MODULE_DESCRIPTION("Qualcomm IPC-router driver"); -+MODULE_LICENSE("GPL v2"); -diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h -new file mode 100644 -index 0000000..2b84871 ---- /dev/null -+++ b/net/qrtr/qrtr.h -@@ -0,0 +1,31 @@ -+#ifndef __QRTR_H_ -+#define __QRTR_H_ -+ -+#include -+ -+struct sk_buff; -+ -+/* endpoint node id auto assignment */ -+#define QRTR_EP_NID_AUTO (-1) -+ -+/** -+ * struct qrtr_endpoint - endpoint handle -+ * @xmit: Callback for outgoing packets -+ * -+ * The socket buffer passed to the xmit function becomes owned by the endpoint -+ * driver. As such, when the driver is done with the buffer, it should -+ * call kfree_skb() on failure, or consume_skb() on success. -+ */ -+struct qrtr_endpoint { -+ int (*xmit)(struct qrtr_endpoint *ep, struct sk_buff *skb); -+ /* private: not for endpoint use */ -+ struct qrtr_node *node; -+}; -+ -+int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid); -+ -+void qrtr_endpoint_unregister(struct qrtr_endpoint *ep); -+ -+int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len); -+ -+#endif -diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c -new file mode 100644 -index 0000000..84ebce7 ---- /dev/null -+++ b/net/qrtr/smd.c -@@ -0,0 +1,117 @@ -+/* -+ * Copyright (c) 2015, Sony Mobile Communications Inc. -+ * Copyright (c) 2013, The Linux Foundation. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 and -+ * only version 2 as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include -+#include -+#include -+ -+#include "qrtr.h" -+ -+struct qrtr_smd_dev { -+ struct qrtr_endpoint ep; -+ struct qcom_smd_channel *channel; -+}; -+ -+/* from smd to qrtr */ -+static int qcom_smd_qrtr_callback(struct qcom_smd_device *sdev, -+ const void *data, size_t len) -+{ -+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); -+ int rc; -+ -+ if (!qdev) -+ return -EAGAIN; -+ -+ rc = qrtr_endpoint_post(&qdev->ep, data, len); -+ if (rc == -EINVAL) { -+ dev_err(&sdev->dev, "invalid ipcrouter packet\n"); -+ /* return 0 to let smd drop the packet */ -+ rc = 0; -+ } -+ -+ return rc; -+} -+ -+/* from qrtr to smd */ -+static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) -+{ -+ struct qrtr_smd_dev *qdev = container_of(ep, struct qrtr_smd_dev, ep); -+ int rc; -+ -+ rc = skb_linearize(skb); -+ if (rc) -+ goto out; -+ -+ rc = qcom_smd_send(qdev->channel, skb->data, skb->len); -+ -+out: -+ if (rc) -+ kfree_skb(skb); -+ else -+ consume_skb(skb); -+ return rc; -+} -+ -+static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev) -+{ -+ struct qrtr_smd_dev *qdev; -+ int rc; -+ -+ qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL); -+ if (!qdev) -+ return -ENOMEM; -+ -+ qdev->channel = sdev->channel; -+ qdev->ep.xmit = qcom_smd_qrtr_send; -+ -+ rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO); -+ if (rc) -+ return rc; -+ -+ dev_set_drvdata(&sdev->dev, qdev); -+ -+ dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n"); -+ -+ return 0; -+} -+ -+static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev) -+{ -+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); -+ -+ qrtr_endpoint_unregister(&qdev->ep); -+ -+ dev_set_drvdata(&sdev->dev, NULL); -+} -+ -+static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = { -+ { "IPCRTR" }, -+ {} -+}; -+ -+static struct qcom_smd_driver qcom_smd_qrtr_driver = { -+ .probe = qcom_smd_qrtr_probe, -+ .remove = qcom_smd_qrtr_remove, -+ .callback = qcom_smd_qrtr_callback, -+ .smd_match_table = qcom_smd_qrtr_smd_match, -+ .driver = { -+ .name = "qcom_smd_qrtr", -+ .owner = THIS_MODULE, -+ }, -+}; -+ -+module_qcom_smd_driver(qcom_smd_qrtr_driver); -+ -+MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver"); -+MODULE_LICENSE("GPL v2"); --- -2.10.0 - diff --git a/alpine/kernel/patches/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch b/alpine/kernel/patches/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch deleted file mode 100644 index ce4bee972..000000000 --- a/alpine/kernel/patches/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch +++ /dev/null @@ -1,49 +0,0 @@ -From b7da2c01ddbb00ed9ccdd3d646f6129f07016cf8 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 21 Mar 2016 02:53:08 -0700 -Subject: [PATCH 40/42] net: add the AF_HYPERV entries to family name tables - -This is for the hv_sock driver, which introduces AF_HYPERV(42). - -Signed-off-by: Dexuan Cui -Cc: "K. Y. Srinivasan" -Cc: Haiyang Zhang -Origin: https://patchwork.ozlabs.org/patch/600009 ---- - net/core/sock.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/net/core/sock.c b/net/core/sock.c -index 925def4..323f7a3 100644 ---- a/net/core/sock.c -+++ b/net/core/sock.c -@@ -264,7 +264,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { - "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , - "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , -- "sk_lock-AF_MAX" -+ "sk_lock-AF_HYPERV", "sk_lock-AF_MAX" - }; - static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , -@@ -281,7 +281,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , - "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , -- "slock-AF_MAX" -+ "slock-AF_HYPERV", "slock-AF_MAX" - }; - static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , -@@ -298,7 +298,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , - "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , -- "clock-AF_MAX" -+ "clock-AF_HYPERV", "clock-AF_MAX" - }; - - /* --- -2.10.0 -