diff --git a/.gitattributes b/.gitattributes index fcb9fc4df..be9a73c19 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,2 @@ **/vendor/** linguist-vendored -kernel/*.x linguist-language=text +kernel/config-* linguist-language=text diff --git a/blueprints/docker-for-mac/base.yml b/blueprints/docker-for-mac/base.yml index 969774449..3963c91c7 100644 --- a/blueprints/docker-for-mac/base.yml +++ b/blueprints/docker-for-mac/base.yml @@ -1,6 +1,6 @@ # This is a blueprint for building the open source components of Docker for Mac kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 page_poison=1" init: - linuxkit/vpnkit-expose-port:0832f0cfdfc02214680588a5018619cd1eb4b93f # install vpnkit-expose-port and vpnkit-iptables-wrapper on host diff --git a/docs/kernels.md b/docs/kernels.md index 405ee4ac1..7c8108c03 100644 --- a/docs/kernels.md +++ b/docs/kernels.md @@ -87,7 +87,7 @@ contains an example for the configuration. Each series of kernels has a config file dedicated to it in [../kernel/](../kernel), e.g. -[kernel.config-4.9.x-x86_64](../kernel/kernel_config-4.9.x-x86_64), +[config-4.9.x-x86_64](../kernel/config-4.9.x-x86_64), which is applied during the kernel build process. If you need to modify the kernel config, `make kconfig` in @@ -122,7 +122,7 @@ architectures to a minimum, so if you make changes to one configuration also try to apply it to the others. The script [kconfig-split.py](../scripts/kconfig-split.py) can be used to compare kernel config files. For example: ```sh -../scripts/kconfig-split.py kernel_config-4.9.x-aarch64 kernel_config-4.9.x-x86_64 +../scripts/kconfig-split.py config-4.9.x-aarch64 config-4.9.x-x86_64 ``` creates a file with the common and the x86_64 and arm64 specific @@ -165,7 +165,7 @@ The kernel build system has some provision to allow local customisation to the build. If you want to override/add some kernel config options, you can add a -file called `kernel_config-foo` and then invoke the build with `make +file called `config-foo` and then invoke the build with `make EXTRA=-foo build_4.9.x-foo` and this will build an image with the additional kernel config options enabled. @@ -339,7 +339,7 @@ file: ``` kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0 console=ttyS0 console=ttyAMA0" init: - /zfs-kmod:4.9.47 diff --git a/examples/aws.yml b/examples/aws.yml index 4e58a40ae..6ee1bcea9 100644 --- a/examples/aws.yml +++ b/examples/aws.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/azure.yml b/examples/azure.yml index e3517aa43..ff3659912 100644 --- a/examples/azure.yml +++ b/examples/azure.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/docker.yml b/examples/docker.yml index 753fe97cc..244f26929 100644 --- a/examples/docker.yml +++ b/examples/docker.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0 console=ttyS0 console=ttyAMA0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/gcp.yml b/examples/gcp.yml index 618e691fb..dc8b7aa31 100644 --- a/examples/gcp.yml +++ b/examples/gcp.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/getty.yml b/examples/getty.yml index 2533f696f..ced2c2417 100644 --- a/examples/getty.yml +++ b/examples/getty.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0 console=ttyS0 console=ttyAMA0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/minimal.yml b/examples/minimal.yml index f3cb9837b..d93bd829e 100644 --- a/examples/minimal.yml +++ b/examples/minimal.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0 console=ttyS0 console=ttyAMA0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/node_exporter.yml b/examples/node_exporter.yml index 2d45255f9..32014827b 100644 --- a/examples/node_exporter.yml +++ b/examples/node_exporter.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0 console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/openstack.yml b/examples/openstack.yml index d9c7f76ee..52b9ba891 100644 --- a/examples/openstack.yml +++ b/examples/openstack.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/packet.yml b/examples/packet.yml index c3639c2da..eada09e60 100644 --- a/examples/packet.yml +++ b/examples/packet.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/redis-os.yml b/examples/redis-os.yml index 051883439..3ca0e0161 100644 --- a/examples/redis-os.yml +++ b/examples/redis-os.yml @@ -1,7 +1,7 @@ # Minimal YAML to run a redis server (used at DockerCon'17) # connect: nc localhost 6379 kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0 console=ttyS0 console=ttyAMA0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/sshd.yml b/examples/sshd.yml index 9d40b1c03..93f1bf15e 100644 --- a/examples/sshd.yml +++ b/examples/sshd.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0 console=ttyS0 console=ttyAMA0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/swap.yml b/examples/swap.yml index 90e01649c..930af92b3 100644 --- a/examples/swap.yml +++ b/examples/swap.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0 console=ttyS0 console=ttyAMA0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/vmware.yml b/examples/vmware.yml index 3d81d1f35..ac5d262ee 100644 --- a/examples/vmware.yml +++ b/examples/vmware.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/vpnkit-forwarder.yml b/examples/vpnkit-forwarder.yml index 79af93e4b..7a83af823 100644 --- a/examples/vpnkit-forwarder.yml +++ b/examples/vpnkit-forwarder.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/vsudd.yml b/examples/vsudd.yml index 5bf1cf879..d482e8071 100644 --- a/examples/vsudd.yml +++ b/examples/vsudd.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/vultr.yml b/examples/vultr.yml index cf6817111..189f27d4d 100644 --- a/examples/vultr.yml +++ b/examples/vultr.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/examples/wireguard.yml b/examples/wireguard.yml index c67ce320e..cd98769af 100644 --- a/examples/wireguard.yml +++ b/examples/wireguard.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0 console=ttyS0 console=ttyAMA0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/kernel/Dockerfile b/kernel/Dockerfile index c4bc1e8b8..a27e6b598 100644 --- a/kernel/Dockerfile +++ b/kernel/Dockerfile @@ -41,11 +41,13 @@ ENV WIREGUARD_VERSION=0.0.20171017 ENV WIREGUARD_SHA256=57b79a62874d9b99659a744513d4f6f9d88cb772deaa99e485b6fed3004a35cd ENV WIREGUARD_URL=https://git.zx2c4.com/WireGuard/snapshot/WireGuard-${WIREGUARD_VERSION}.tar.xz -# PGP keys: 589DA6B1 (greg@kroah.com) & 6092693E (autosigner@kernel.org) & 00411886 (torvalds@linux-foundation.org) -COPY keys.asc keys.asc +# We copy the entire directory. This copies some unneeded files, but +# allows us to check for the existence /patches-${KERNEL_SERIES} to +# build kernels without patches. +COPY / / # Download and verify kernel -COPY sources/ / +# PGP keys: 589DA6B1 (greg@kroah.com) & 6092693E (autosigner@kernel.org) & 00411886 (torvalds@linux-foundation.org) RUN curl -fsSLO ${KERNEL_SHA256_SUMS} && \ gpg2 -q --import keys.asc && \ gpg2 --verify sha256sums.asc && \ @@ -57,17 +59,17 @@ RUN curl -fsSLO ${KERNEL_SHA256_SUMS} && \ gpg2 --verify linux-${KERNEL_VERSION}.tar.sign linux-${KERNEL_VERSION}.tar && \ cat linux-${KERNEL_VERSION}.tar | tar --absolute-names -x && mv /linux-${KERNEL_VERSION} /linux -# Apply local patches -COPY patches-${KERNEL_SERIES} /patches +# Apply local patches if present WORKDIR /linux -RUN set -e && for patch in /patches/*.patch; do \ - echo "Applying $patch"; \ - patch -p1 < "$patch"; \ - done +RUN set -e && \ + if [ -d /patches-${KERNEL_SERIES} ]; then \ + for patch in /patches-${KERNEL_SERIES}/*.patch; do \ + echo "Applying $patch"; \ + patch -p1 < "$patch"; \ + done; \ + fi # Kernel config -COPY kernel_config* /linux/ - RUN case $(uname -m) in \ x86_64) \ KERNEL_DEF_CONF=/linux/arch/x86/configs/x86_64_defconfig; \ @@ -76,15 +78,14 @@ RUN case $(uname -m) in \ KERNEL_DEF_CONF=/linux/arch/arm64/configs/defconfig; \ ;; \ esac && \ - cp /linux/kernel_config-${KERNEL_SERIES}-$(uname -m) ${KERNEL_DEF_CONF}; \ + cp /config-${KERNEL_SERIES}-$(uname -m) ${KERNEL_DEF_CONF}; \ if [ -n "${EXTRA}" ]; then \ sed -i "s/CONFIG_LOCALVERSION=\"-linuxkit\"/CONFIG_LOCALVERSION=\"-linuxkit${EXTRA}\"/" ${KERNEL_DEF_CONF}; \ if [ "${EXTRA}" = "-dbg" ]; then \ sed -i 's/CONFIG_PANIC_ON_OOPS=y/# CONFIG_PANIC_ON_OOPS is not set/' ${KERNEL_DEF_CONF}; \ fi && \ - cat /linux/kernel_config${EXTRA} >> ${KERNEL_DEF_CONF}; \ + cat /config${EXTRA} >> ${KERNEL_DEF_CONF}; \ fi && \ - rm /linux/kernel_config* && \ make defconfig && \ make oldconfig && \ if [ -z "${EXTRA}" ]; then diff .config ${KERNEL_DEF_CONF}; fi diff --git a/kernel/Dockerfile.kconfig b/kernel/Dockerfile.kconfig index 1aa108ead..10459f4ae 100644 --- a/kernel/Dockerfile.kconfig +++ b/kernel/Dockerfile.kconfig @@ -10,8 +10,6 @@ RUN apk add \ ARG KERNEL_VERSIONS -# There is no simple way to copy directories with wild cards as needed -# for patches-*. Copy the entire dir instead. COPY / / # Unpack kernels (download if not present) @@ -27,12 +25,15 @@ RUN for VERSION in ${KERNEL_VERSIONS}; do \ SERIES=${VERSION%.*}.x && \ echo "Patching $VERSION" && \ cd /linux-${VERSION} && \ - set -e && for patch in /patches-${SERIES}/*.patch; do \ - echo "Applying $patch" && \ - patch -p1 < "$patch"; \ - done && \ - mv /kernel_config-${SERIES}-x86_64 arch/x86/configs/x86_64_defconfig && \ - mv /kernel_config-${SERIES}-aarch64 arch/arm64/configs/defconfig; \ + set -e && \ + if [ -d /patches-${KERNEL_SERIES} ]; then \ + for patch in /patches-${SERIES}/*.patch; do \ + echo "Applying $patch" && \ + patch -p1 < "$patch"; \ + done; \ + fi && \ + mv /config-${SERIES}-x86_64 arch/x86/configs/x86_64_defconfig && \ + mv /config-${SERIES}-aarch64 arch/arm64/configs/defconfig; \ done ENTRYPOINT ["/bin/sh"] diff --git a/kernel/Makefile b/kernel/Makefile index 70c245cc5..15bb75cab 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -19,7 +19,7 @@ IMAGE_PERF:=kernel-perf IMAGE_ZFS:=zfs-kmod # You can specify an extra options for the Makefile. This will: -# - append a kernel_config$(EXTRA) to the kernel config for your kernel/arch +# - append a config$(EXTRA) to the kernel config for your kernel/arch # - append $(EXTRA) to the CONFIG_LOCALVERSION of your kernel EXTRA?= @@ -93,7 +93,7 @@ sources/linux-$(1).tar.xz: Makefile | sources KERNEL_VERSIONS+=$(1) endif -build_$(2)$(3): Dockerfile Makefile $(wildcard patches-$(2)/*) $(wildcard kernel_config-$(2)*) kernel_config-dbg | sources +build_$(2)$(3): Dockerfile Makefile $(wildcard patches-$(2)/*) $(wildcard config-$(2)*) config-dbg | sources docker pull $(ORG)/$(IMAGE):$(1)$(3)-$(TAG)$(SUFFIX) || \ docker build \ --build-arg KERNEL_VERSION=$(1) \ @@ -172,11 +172,11 @@ endef # Build Targets # Debug targets only for latest stable and LTS stable # -$(eval $(call kernel,4.13.6,4.13.x,$(EXTRA))) -$(eval $(call kernel,4.13.6,4.13.x,-dbg)) -$(eval $(call kernel,4.9.56,4.9.x,$(EXTRA))) -$(eval $(call kernel,4.9.56,4.9.x,-dbg)) -$(eval $(call kernel,4.4.92,4.4.x,$(EXTRA))) +$(eval $(call kernel,4.13.9,4.13.x,$(EXTRA))) +$(eval $(call kernel,4.13.9,4.13.x,-dbg)) +$(eval $(call kernel,4.9.58,4.9.x,$(EXTRA))) +$(eval $(call kernel,4.9.58,4.9.x,-dbg)) +$(eval $(call kernel,4.4.94,4.4.x,$(EXTRA))) # Target for kernel config kconfig: | sources diff --git a/kernel/kernel_config-4.13.x-aarch64 b/kernel/config-4.13.x-aarch64 similarity index 99% rename from kernel/kernel_config-4.13.x-aarch64 rename to kernel/config-4.13.x-aarch64 index 56e7ba396..91cb3ca3f 100644 --- a/kernel/kernel_config-4.13.x-aarch64 +++ b/kernel/config-4.13.x-aarch64 @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/arm64 4.13.6 Kernel Configuration +# Linux/arm64 4.13.9 Kernel Configuration # CONFIG_ARM64=y CONFIG_64BIT=y diff --git a/kernel/kernel_config-4.13.x-x86_64 b/kernel/config-4.13.x-x86_64 similarity index 99% rename from kernel/kernel_config-4.13.x-x86_64 rename to kernel/config-4.13.x-x86_64 index 658af6a18..95bc2fc07 100644 --- a/kernel/kernel_config-4.13.x-x86_64 +++ b/kernel/config-4.13.x-x86_64 @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 4.13.6 Kernel Configuration +# Linux/x86 4.13.9 Kernel Configuration # CONFIG_64BIT=y CONFIG_X86_64=y diff --git a/kernel/kernel_config-4.4.x-aarch64 b/kernel/config-4.4.x-aarch64 similarity index 99% rename from kernel/kernel_config-4.4.x-aarch64 rename to kernel/config-4.4.x-aarch64 index 542756a71..a608e7542 100644 --- a/kernel/kernel_config-4.4.x-aarch64 +++ b/kernel/config-4.4.x-aarch64 @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/arm64 4.4.92 Kernel Configuration +# Linux/arm64 4.4.94 Kernel Configuration # CONFIG_ARM64=y CONFIG_64BIT=y @@ -1056,8 +1056,6 @@ CONFIG_OPENVSWITCH_GRE=m CONFIG_OPENVSWITCH_VXLAN=m CONFIG_OPENVSWITCH_GENEVE=m CONFIG_VSOCKETS=y -CONFIG_VIRTIO_VSOCKETS=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_NETLINK_DIAG=y CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -1065,7 +1063,6 @@ CONFIG_NET_MPLS_GSO=m # CONFIG_HSR is not set CONFIG_NET_SWITCHDEV=y CONFIG_NET_L3_MASTER_DEV=y -# CONFIG_QRTR is not set CONFIG_RPS=y CONFIG_RFS_ACCEL=y CONFIG_XPS=y @@ -1086,7 +1083,6 @@ CONFIG_NET_FLOW_LIMIT=y # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set -# CONFIG_AF_KCM is not set CONFIG_FIB_RULES=y # CONFIG_WIRELESS is not set # CONFIG_WIMAX is not set @@ -1532,7 +1528,6 @@ CONFIG_NLMON=y # CAIF transport drivers # CONFIG_VHOST_NET=m -# CONFIG_VHOST_VSOCK is not set CONFIG_VHOST_RING=m CONFIG_VHOST=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set diff --git a/kernel/kernel_config-4.4.x-x86_64 b/kernel/config-4.4.x-x86_64 similarity index 99% rename from kernel/kernel_config-4.4.x-x86_64 rename to kernel/config-4.4.x-x86_64 index 945d3fcfc..3804cf1dd 100644 --- a/kernel/kernel_config-4.4.x-x86_64 +++ b/kernel/config-4.4.x-x86_64 @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 4.4.92 Kernel Configuration +# Linux/x86 4.4.94 Kernel Configuration # CONFIG_64BIT=y CONFIG_X86_64=y @@ -1228,9 +1228,6 @@ CONFIG_OPENVSWITCH_GRE=m CONFIG_OPENVSWITCH_VXLAN=m CONFIG_OPENVSWITCH_GENEVE=m CONFIG_VSOCKETS=y -CONFIG_VIRTIO_VSOCKETS=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y -CONFIG_HYPERV_SOCK=y CONFIG_NETLINK_DIAG=y CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -1259,7 +1256,6 @@ CONFIG_NET_FLOW_LIMIT=y # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set -# CONFIG_AF_KCM is not set CONFIG_FIB_RULES=y # CONFIG_WIRELESS is not set # CONFIG_WIMAX is not set @@ -1677,7 +1673,6 @@ CONFIG_NLMON=y # CAIF transport drivers # CONFIG_VHOST_NET=m -# CONFIG_VHOST_VSOCK is not set CONFIG_VHOST_RING=m CONFIG_VHOST=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set diff --git a/kernel/kernel_config-4.9.x-aarch64 b/kernel/config-4.9.x-aarch64 similarity index 99% rename from kernel/kernel_config-4.9.x-aarch64 rename to kernel/config-4.9.x-aarch64 index c1cae68ed..4cea2636f 100644 --- a/kernel/kernel_config-4.9.x-aarch64 +++ b/kernel/config-4.9.x-aarch64 @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/arm64 4.9.56 Kernel Configuration +# Linux/arm64 4.9.58 Kernel Configuration # CONFIG_ARM64=y CONFIG_64BIT=y diff --git a/kernel/kernel_config-4.9.x-x86_64 b/kernel/config-4.9.x-x86_64 similarity index 99% rename from kernel/kernel_config-4.9.x-x86_64 rename to kernel/config-4.9.x-x86_64 index ea821667f..56cab1be7 100644 --- a/kernel/kernel_config-4.9.x-x86_64 +++ b/kernel/config-4.9.x-x86_64 @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 4.9.56 Kernel Configuration +# Linux/x86 4.9.58 Kernel Configuration # CONFIG_64BIT=y CONFIG_X86_64=y diff --git a/kernel/kernel_config-dbg b/kernel/config-dbg similarity index 100% rename from kernel/kernel_config-dbg rename to kernel/config-dbg diff --git a/kernel/patches-4.13.x/0001-vmbus-vmbus_open-reset-onchannel_callback-on-error.patch b/kernel/patches-4.13.x/0001-vmbus-vmbus_open-reset-onchannel_callback-on-error.patch index ceb11b8cc..263beffd4 100644 --- a/kernel/patches-4.13.x/0001-vmbus-vmbus_open-reset-onchannel_callback-on-error.patch +++ b/kernel/patches-4.13.x/0001-vmbus-vmbus_open-reset-onchannel_callback-on-error.patch @@ -1,7 +1,7 @@ -From 88649f9b578f9d4a00f624e18c0524440ce97a0b Mon Sep 17 00:00:00 2001 +From 255a4eb6b920f42018f05235f525b8c72624df62 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 5 May 2017 16:57:12 -0600 -Subject: [PATCH 01/14] vmbus: vmbus_open(): reset onchannel_callback on error +Subject: [PATCH 01/12] vmbus: vmbus_open(): reset onchannel_callback on error No real issue is observed without the patch, but let's add this just in case. @@ -17,10 +17,10 @@ Origin: git@github.com:dcui/linux.git 1 file changed, 2 insertions(+) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index e57cc40cb768..6e172cdf42f7 100644 +index be3fccab07fe..4775616ec87f 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c -@@ -220,6 +220,8 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, +@@ -225,6 +225,8 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, get_order(send_ringbuffer_size + recv_ringbuffer_size)); error_set_chnstate: newchannel->state = CHANNEL_OPEN_STATE; @@ -30,5 +30,5 @@ index e57cc40cb768..6e172cdf42f7 100644 } EXPORT_SYMBOL_GPL(vmbus_open); -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.13.x/0002-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch b/kernel/patches-4.13.x/0002-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch index be5de35e8..13ea2f370 100644 --- a/kernel/patches-4.13.x/0002-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch +++ b/kernel/patches-4.13.x/0002-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch @@ -1,7 +1,7 @@ -From 443c87acb6c5a538e3e3065fced1e3e5a5c0aae3 Mon Sep 17 00:00:00 2001 +From 9d372844cc64ec8921879c81beab83eeb4f8a05f Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 5 May 2017 16:57:20 -0600 -Subject: [PATCH 02/14] vmbus: remove "goto error_clean_msglist" in +Subject: [PATCH 02/12] vmbus: remove "goto error_clean_msglist" in vmbus_open() This is just a cleanup patch to simplify the code a little. @@ -18,10 +18,10 @@ Origin: git@github.com:dcui/linux.git 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 6e172cdf42f7..42498ecd0f02 100644 +index 4775616ec87f..033100a1cd59 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c -@@ -180,17 +180,18 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, +@@ -185,17 +185,18 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, ret = vmbus_post_msg(open_msg, sizeof(struct vmbus_channel_open_channel), true); @@ -46,7 +46,7 @@ index 6e172cdf42f7..42498ecd0f02 100644 if (newchannel->rescind) { err = -ENODEV; goto error_free_gpadl; -@@ -205,11 +206,6 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, +@@ -210,11 +211,6 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, kfree(open_info); return 0; @@ -59,5 +59,5 @@ index 6e172cdf42f7..42498ecd0f02 100644 vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle); kfree(open_info); -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.13.x/0004-hv_sock-implements-Hyper-V-transport-for-Virtual-Soc.patch b/kernel/patches-4.13.x/0003-hv_sock-implements-Hyper-V-transport-for-Virtual-Soc.patch similarity index 99% rename from kernel/patches-4.13.x/0004-hv_sock-implements-Hyper-V-transport-for-Virtual-Soc.patch rename to kernel/patches-4.13.x/0003-hv_sock-implements-Hyper-V-transport-for-Virtual-Soc.patch index 21152092b..f6c4f3b80 100644 --- a/kernel/patches-4.13.x/0004-hv_sock-implements-Hyper-V-transport-for-Virtual-Soc.patch +++ b/kernel/patches-4.13.x/0003-hv_sock-implements-Hyper-V-transport-for-Virtual-Soc.patch @@ -1,7 +1,7 @@ -From fcb3e7f94e485a5bcd730784f056d6650093149c Mon Sep 17 00:00:00 2001 +From 2d199886c8f5b1aaaf0beef6e4f14a871f98d834 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 5 May 2017 16:57:26 -0600 -Subject: [PATCH 04/14] hv_sock: implements Hyper-V transport for Virtual +Subject: [PATCH 03/12] hv_sock: implements Hyper-V transport for Virtual Sockets (AF_VSOCK) Hyper-V Sockets (hv_sock) supplies a byte-stream based communication @@ -930,5 +930,5 @@ index 000000000000..fd89bf357617 +MODULE_VERSION("1.0.0"); +MODULE_LICENSE("GPL"); -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.13.x/0003-vmbus-dynamically-enqueue-dequeue-a-channel-on-vmbus.patch b/kernel/patches-4.13.x/0003-vmbus-dynamically-enqueue-dequeue-a-channel-on-vmbus.patch deleted file mode 100644 index fc87f5877..000000000 --- a/kernel/patches-4.13.x/0003-vmbus-dynamically-enqueue-dequeue-a-channel-on-vmbus.patch +++ /dev/null @@ -1,189 +0,0 @@ -From dc99fa02f640b5f7719727e62d2f81f0d1c93868 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Fri, 5 May 2017 16:57:23 -0600 -Subject: [PATCH 03/14] vmbus: dynamically enqueue/dequeue a channel on - vmbus_open/close -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -A just-closed channel may have a pending interrupt, and later when a new -channel with the same channel ID is not being fully initialized, the -pending interrupt of the previous channel with the same channel ID can run -the channel callback on the new channel data structure, causing a crash -of NULL pointer dereferencing. - -Normally it’s pretty hard to reproduce the race condition, but it can -indeed happen with specially-designed hv_sock stress test cases. - -Signed-off-by: Dexuan Cui -Reported-by: Rolf Neugebauer -Tested-by: Rolf Neugebauer -Cc: K. Y. Srinivasan -Cc: Haiyang Zhang -Cc: Stephen Hemminger -Origin: git@github.com:dcui/linux.git -(cherry picked from commit fdd8e16c855a6c7238c654d7217dcf51c5533307) ---- - drivers/hv/channel.c | 12 +++++++++--- - drivers/hv/channel_mgmt.c | 50 +++++++++++++++++++++-------------------------- - include/linux/hyperv.h | 3 +++ - 3 files changed, 34 insertions(+), 31 deletions(-) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 42498ecd0f02..d4243b5c39b7 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -177,6 +177,8 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, - &vmbus_connection.chn_msg_list); - spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); - -+ hv_percpu_channel_enq(newchannel); -+ - ret = vmbus_post_msg(open_msg, - sizeof(struct vmbus_channel_open_channel), true); - -@@ -189,23 +191,25 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, - - if (ret != 0) { - err = ret; -- goto error_free_gpadl; -+ goto error_deq_channel; - } - - if (newchannel->rescind) { - err = -ENODEV; -- goto error_free_gpadl; -+ goto error_deq_channel; - } - - if (open_info->response.open_result.status) { - err = -EAGAIN; -- goto error_free_gpadl; -+ goto error_deq_channel; - } - - newchannel->state = CHANNEL_OPENED_STATE; - kfree(open_info); - return 0; - -+error_deq_channel: -+ hv_percpu_channel_deq(newchannel); - error_free_gpadl: - vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle); - kfree(open_info); -@@ -551,6 +555,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel) - goto out; - } - -+ hv_percpu_channel_deq(channel); -+ - channel->state = CHANNEL_OPEN_STATE; - channel->sc_creation_callback = NULL; - /* Stop callback and cancel the timer asap */ -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 037361158074..415c69aeb32c 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -362,6 +362,17 @@ static void percpu_channel_enq(void *arg) - list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list); - } - -+void hv_percpu_channel_enq(struct vmbus_channel *channel) -+{ -+ if (channel->target_cpu != get_cpu()) -+ smp_call_function_single(channel->target_cpu, -+ percpu_channel_enq, channel, true); -+ else -+ percpu_channel_enq(channel); -+ -+ put_cpu(); -+} -+ - static void percpu_channel_deq(void *arg) - { - struct vmbus_channel *channel = arg; -@@ -369,6 +380,17 @@ static void percpu_channel_deq(void *arg) - list_del_rcu(&channel->percpu_list); - } - -+void hv_percpu_channel_deq(struct vmbus_channel *channel) -+{ -+ if (channel->target_cpu != get_cpu()) -+ smp_call_function_single(channel->target_cpu, -+ percpu_channel_deq, channel, true); -+ else -+ percpu_channel_deq(channel); -+ -+ put_cpu(); -+} -+ - - static void vmbus_release_relid(u32 relid) - { -@@ -389,15 +411,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - BUG_ON(!channel->rescind); - BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - -- if (channel->target_cpu != get_cpu()) { -- put_cpu(); -- smp_call_function_single(channel->target_cpu, -- percpu_channel_deq, channel, true); -- } else { -- percpu_channel_deq(channel); -- put_cpu(); -- } -- - if (channel->primary_channel == NULL) { - list_del(&channel->listentry); - -@@ -490,16 +503,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - - init_vp_index(newchannel, dev_type); - -- if (newchannel->target_cpu != get_cpu()) { -- put_cpu(); -- smp_call_function_single(newchannel->target_cpu, -- percpu_channel_enq, -- newchannel, true); -- } else { -- percpu_channel_enq(newchannel); -- put_cpu(); -- } -- - /* - * This state is used to indicate a successful open - * so that when we do close the channel normally, we -@@ -549,15 +552,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - list_del(&newchannel->listentry); - mutex_unlock(&vmbus_connection.channel_mutex); - -- if (newchannel->target_cpu != get_cpu()) { -- put_cpu(); -- smp_call_function_single(newchannel->target_cpu, -- percpu_channel_deq, newchannel, true); -- } else { -- percpu_channel_deq(newchannel); -- put_cpu(); -- } -- - vmbus_release_relid(newchannel->offermsg.child_relid); - - err_free_chan: -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index b7d7bbec74e0..f5d3e8c01401 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1453,6 +1453,9 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, - const int *srv_version, int srv_vercnt, - int *nego_fw_version, int *nego_srv_version); - -+void hv_percpu_channel_enq(struct vmbus_channel *channel); -+void hv_percpu_channel_deq(struct vmbus_channel *channel); -+ - void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); - - void vmbus_setevent(struct vmbus_channel *channel); --- -2.14.1 - diff --git a/kernel/patches-4.13.x/0005-VMCI-only-try-to-load-on-VMware-hypervisor.patch b/kernel/patches-4.13.x/0004-VMCI-only-try-to-load-on-VMware-hypervisor.patch similarity index 94% rename from kernel/patches-4.13.x/0005-VMCI-only-try-to-load-on-VMware-hypervisor.patch rename to kernel/patches-4.13.x/0004-VMCI-only-try-to-load-on-VMware-hypervisor.patch index 969cc73b7..b7fc1f829 100644 --- a/kernel/patches-4.13.x/0005-VMCI-only-try-to-load-on-VMware-hypervisor.patch +++ b/kernel/patches-4.13.x/0004-VMCI-only-try-to-load-on-VMware-hypervisor.patch @@ -1,7 +1,7 @@ -From 215417da38c0a88d9b2468304b414c8faa88cd10 Mon Sep 17 00:00:00 2001 +From fe24104d463f6c66ba20d6637bcd8e64ea3d91f4 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 5 May 2017 16:57:29 -0600 -Subject: [PATCH 05/14] VMCI: only try to load on VMware hypervisor +Subject: [PATCH 04/12] VMCI: only try to load on VMware hypervisor Without the patch, vmw_vsock_vmci_transport.ko and vmw_vmci.ko can automatically load when an application creates an AF_VSOCK socket. @@ -60,5 +60,5 @@ index d7eaf1eb11e7..1789ea71ff5d 100644 if (vmci_err < VMCI_SUCCESS) { pr_err("Failed to initialize VMCIEvent (result=%d)\n", -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.13.x/0006-hv_sock-add-the-support-of-auto-loading.patch b/kernel/patches-4.13.x/0005-hv_sock-add-the-support-of-auto-loading.patch similarity index 87% rename from kernel/patches-4.13.x/0006-hv_sock-add-the-support-of-auto-loading.patch rename to kernel/patches-4.13.x/0005-hv_sock-add-the-support-of-auto-loading.patch index d3ed7f584..18b62e335 100644 --- a/kernel/patches-4.13.x/0006-hv_sock-add-the-support-of-auto-loading.patch +++ b/kernel/patches-4.13.x/0005-hv_sock-add-the-support-of-auto-loading.patch @@ -1,7 +1,7 @@ -From 31695355b51ff9351e1beb1d334a3eeb996896f4 Mon Sep 17 00:00:00 2001 +From 3846240ba100e7178bfffe1d7a20382167764a2a Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 5 May 2017 16:57:35 -0600 -Subject: [PATCH 06/14] hv_sock: add the support of auto-loading +Subject: [PATCH 05/12] hv_sock: add the support of auto-loading After we disable VMWare virtual sockets driver's auto-loading on Hyper-V, we can enable hv_sock's auto-loading now. @@ -26,5 +26,5 @@ index fd89bf357617..f465b0b662df 100644 MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_VSOCK); -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.13.x/0007-tools-hv_sock-2-simple-test-cases.patch b/kernel/patches-4.13.x/0006-tools-hv_sock-2-simple-test-cases.patch similarity index 98% rename from kernel/patches-4.13.x/0007-tools-hv_sock-2-simple-test-cases.patch rename to kernel/patches-4.13.x/0006-tools-hv_sock-2-simple-test-cases.patch index 996eb5f89..f02bdf6b8 100644 --- a/kernel/patches-4.13.x/0007-tools-hv_sock-2-simple-test-cases.patch +++ b/kernel/patches-4.13.x/0006-tools-hv_sock-2-simple-test-cases.patch @@ -1,7 +1,7 @@ -From d351b5ed3eafffe7dc31c2c75180782ca455ae31 Mon Sep 17 00:00:00 2001 +From 0e575b9966945fb1132bd13c7a3c69330c14166e Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 5 May 2017 18:52:02 -0600 -Subject: [PATCH 07/14] tools: hv_sock: 2 simple test cases. +Subject: [PATCH 06/12] tools: hv_sock: 2 simple test cases. Please read this document first: https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-guide/make-integration-service @@ -426,5 +426,5 @@ index 000000000000..fb4370c53152 + return -1; +} -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.13.x/0008-hvsock-fix-a-race-in-hvs_stream_dequeue.patch b/kernel/patches-4.13.x/0007-hvsock-fix-a-race-in-hvs_stream_dequeue.patch similarity index 95% rename from kernel/patches-4.13.x/0008-hvsock-fix-a-race-in-hvs_stream_dequeue.patch rename to kernel/patches-4.13.x/0007-hvsock-fix-a-race-in-hvs_stream_dequeue.patch index addd92cb3..3aef08c6e 100644 --- a/kernel/patches-4.13.x/0008-hvsock-fix-a-race-in-hvs_stream_dequeue.patch +++ b/kernel/patches-4.13.x/0007-hvsock-fix-a-race-in-hvs_stream_dequeue.patch @@ -1,7 +1,7 @@ -From 0fe8cb7ac4a6fb02e267b4e87b0e9102aa7bb92f Mon Sep 17 00:00:00 2001 +From b5b86d3b2b25b051b4cef6d2a3be970726111da1 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 16 May 2017 22:14:03 +0800 -Subject: [PATCH 08/14] hvsock: fix a race in hvs_stream_dequeue() +Subject: [PATCH 07/12] hvsock: fix a race in hvs_stream_dequeue() If hv_pkt_iter_next() returns a non-NULL pointer, we must update the recv_data_len/data_off info, otherwise the received data will @@ -109,5 +109,5 @@ index f465b0b662df..30154836acd0 100644 case 1: ret = 1; -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.13.x/0009-hvsock-fix-vsock_dequeue-enqueue_accept-race.patch b/kernel/patches-4.13.x/0008-hvsock-fix-vsock_dequeue-enqueue_accept-race.patch similarity index 91% rename from kernel/patches-4.13.x/0009-hvsock-fix-vsock_dequeue-enqueue_accept-race.patch rename to kernel/patches-4.13.x/0008-hvsock-fix-vsock_dequeue-enqueue_accept-race.patch index b11743534..f951c2202 100644 --- a/kernel/patches-4.13.x/0009-hvsock-fix-vsock_dequeue-enqueue_accept-race.patch +++ b/kernel/patches-4.13.x/0008-hvsock-fix-vsock_dequeue-enqueue_accept-race.patch @@ -1,7 +1,7 @@ -From 8b8cd3d7e407fcd39648cea2fe030f4c2e505855 Mon Sep 17 00:00:00 2001 +From 2cf5773ab1091bc4e575ba1e8d861ae89bdf1f9e Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 19 May 2017 21:49:59 +0800 -Subject: [PATCH 09/14] hvsock: fix vsock_dequeue/enqueue_accept race +Subject: [PATCH 08/12] hvsock: fix vsock_dequeue/enqueue_accept race Signed-off-by: Dexuan Cui Origin: git@github.com:dcui/linux.git @@ -45,5 +45,5 @@ index dfc8c51e4d74..b7b2c66d91fd 100644 /* The caller will need a reference on the connected socket so we let * it call sock_put(). -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.13.x/0010-hv-sock-a-temporary-workaround-for-the-pending_send_.patch b/kernel/patches-4.13.x/0009-hv-sock-a-temporary-workaround-for-the-pending_send_.patch similarity index 97% rename from kernel/patches-4.13.x/0010-hv-sock-a-temporary-workaround-for-the-pending_send_.patch rename to kernel/patches-4.13.x/0009-hv-sock-a-temporary-workaround-for-the-pending_send_.patch index 7d25d8f87..feee46e85 100644 --- a/kernel/patches-4.13.x/0010-hv-sock-a-temporary-workaround-for-the-pending_send_.patch +++ b/kernel/patches-4.13.x/0009-hv-sock-a-temporary-workaround-for-the-pending_send_.patch @@ -1,7 +1,7 @@ -From aaa233991131d32aaa04bb4bc5a2ebd6516fa992 Mon Sep 17 00:00:00 2001 +From d449661db47a2dc22437d30bbf5e9354344a0dd3 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 21 Jun 2017 22:30:42 +0800 -Subject: [PATCH 10/14] hv-sock: a temporary workaround for the +Subject: [PATCH 09/12] hv-sock: a temporary workaround for the pending_send_size issue While I'm trying to find out the root cause, I believe this can work @@ -129,5 +129,5 @@ index 30154836acd0..b8bf1446ae13 100644 return 0; } -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.13.x/0012-hv-sock-avoid-double-FINs-if-shutdown-is-called.patch b/kernel/patches-4.13.x/0010-hv-sock-avoid-double-FINs-if-shutdown-is-called.patch similarity index 89% rename from kernel/patches-4.13.x/0012-hv-sock-avoid-double-FINs-if-shutdown-is-called.patch rename to kernel/patches-4.13.x/0010-hv-sock-avoid-double-FINs-if-shutdown-is-called.patch index df836b6a0..bd2bc5f4f 100644 --- a/kernel/patches-4.13.x/0012-hv-sock-avoid-double-FINs-if-shutdown-is-called.patch +++ b/kernel/patches-4.13.x/0010-hv-sock-avoid-double-FINs-if-shutdown-is-called.patch @@ -1,7 +1,7 @@ -From d8592f2252894b71e4494f0ff88e44b6bcea3dca Mon Sep 17 00:00:00 2001 +From a0be18ac0d4976c54d4c4359a4d0289cf88bbf4c Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 7 Jul 2017 09:15:29 +0800 -Subject: [PATCH 12/14] hv-sock: avoid double FINs if shutdown() is called +Subject: [PATCH 10/12] hv-sock: avoid double FINs if shutdown() is called The host expects a single FIN. @@ -37,5 +37,5 @@ index b8bf1446ae13..50e4bc822c69 100644 /* It can't fail: see hvs_channel_writable_bytes(). */ -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.13.x/0013-ext4-fix-fault-handling-when-mounted-with-o-dax-ro.patch b/kernel/patches-4.13.x/0011-ext4-fix-fault-handling-when-mounted-with-o-dax-ro.patch similarity index 94% rename from kernel/patches-4.13.x/0013-ext4-fix-fault-handling-when-mounted-with-o-dax-ro.patch rename to kernel/patches-4.13.x/0011-ext4-fix-fault-handling-when-mounted-with-o-dax-ro.patch index 893fa464d..4d1768e4e 100644 --- a/kernel/patches-4.13.x/0013-ext4-fix-fault-handling-when-mounted-with-o-dax-ro.patch +++ b/kernel/patches-4.13.x/0011-ext4-fix-fault-handling-when-mounted-with-o-dax-ro.patch @@ -1,7 +1,7 @@ -From 1f090eed1aaf9f1181e45a338b99878c6e494996 Mon Sep 17 00:00:00 2001 +From 22b335999bba926ec0c168a4b8ebd9ab388853e4 Mon Sep 17 00:00:00 2001 From: Randy Dodgen Date: Thu, 24 Aug 2017 15:26:01 -0400 -Subject: [PATCH 13/14] ext4: fix fault handling when mounted with -o dax,ro +Subject: [PATCH 11/12] ext4: fix fault handling when mounted with -o dax,ro If an ext4 filesystem is mounted with both the DAX and read-only options, executables on that filesystem will fail to start (claiming @@ -57,5 +57,5 @@ index 86ea1d92839a..197653ea6041 100644 if (write) { sb_start_pagefault(sb); -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.13.x/0011-vmbus-fix-the-missed-signaling-in-hv_signal_on_read.patch b/kernel/patches-4.13.x/0011-vmbus-fix-the-missed-signaling-in-hv_signal_on_read.patch deleted file mode 100644 index 46578e1fe..000000000 --- a/kernel/patches-4.13.x/0011-vmbus-fix-the-missed-signaling-in-hv_signal_on_read.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 463ca8fab7225ac87f3b20915e45e44dfb5af592 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 28 Jun 2017 23:50:38 +0800 -Subject: [PATCH 11/14] vmbus: fix the missed signaling in hv_signal_on_read() - -There is an off-by-one bug here, which can cause host-to-guest write to stall. - -When cur_write_sz == pending_sz, we shouldn't signal the host because it's -meaningless: the ring mustn't be 100% full. - -But when cached_write_sz == pending_sz, we must signal the host. - -Signed-off-by: John Starks -Signed-off-by: Dexuan Cui -Origin: git@github.com:dcui/linux.git -(cherry picked from commit 02d07a9dcdb042f33248fd3aeb1e5c2eca6d3d49) ---- - include/linux/hyperv.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index f5d3e8c01401..cc28cd20256f 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1517,11 +1517,11 @@ static inline void hv_signal_on_read(struct vmbus_channel *channel) - - cur_write_sz = hv_get_bytes_to_write(rbi); - -- if (cur_write_sz < pending_sz) -+ if (cur_write_sz <= pending_sz) - return; - - cached_write_sz = hv_get_cached_bytes_to_write(rbi); -- if (cached_write_sz < pending_sz) -+ if (cached_write_sz <= pending_sz) - vmbus_setevent(channel); - } - --- -2.14.1 - diff --git a/kernel/patches-4.13.x/0014-NVDIMM-reducded-ND_MIN_NAMESPACE_SIZE-from-4MB-to-4K.patch b/kernel/patches-4.13.x/0012-NVDIMM-reducded-ND_MIN_NAMESPACE_SIZE-from-4MB-to-4K.patch similarity index 84% rename from kernel/patches-4.13.x/0014-NVDIMM-reducded-ND_MIN_NAMESPACE_SIZE-from-4MB-to-4K.patch rename to kernel/patches-4.13.x/0012-NVDIMM-reducded-ND_MIN_NAMESPACE_SIZE-from-4MB-to-4K.patch index 51298c9a6..79fd0458a 100644 --- a/kernel/patches-4.13.x/0014-NVDIMM-reducded-ND_MIN_NAMESPACE_SIZE-from-4MB-to-4K.patch +++ b/kernel/patches-4.13.x/0012-NVDIMM-reducded-ND_MIN_NAMESPACE_SIZE-from-4MB-to-4K.patch @@ -1,7 +1,7 @@ -From f0591ec41208c6e0d2a12334a6c846f01f2f4007 Mon Sep 17 00:00:00 2001 +From a6fc1687fbd218542285c2d77a49ff88a3cb100c Mon Sep 17 00:00:00 2001 From: Cheng-mean Liu Date: Tue, 11 Jul 2017 16:58:26 -0700 -Subject: [PATCH 14/14] NVDIMM: reducded ND_MIN_NAMESPACE_SIZE from 4MB to 4KB +Subject: [PATCH 12/12] NVDIMM: reducded ND_MIN_NAMESPACE_SIZE from 4MB to 4KB (page size) Signed-off-by: Cheng-mean Liu @@ -24,5 +24,5 @@ index 3f03567631cb..e63c201ed1ef 100644 enum ars_masks { -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.4.x/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch b/kernel/patches-4.4.x/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch deleted file mode 100644 index ca79f346f..000000000 --- a/kernel/patches-4.4.x/0001-virtio-make-find_vqs-checkpatch.pl-friendly.patch +++ /dev/null @@ -1,219 +0,0 @@ -From 856bd4391b1d1ef570c41a04af1e6569b0e6ae56 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 17 Dec 2015 16:53:43 +0800 -Subject: [PATCH 01/44] virtio: make find_vqs() checkpatch.pl-friendly - -checkpatch.pl wants arrays of strings declared as follows: - - static const char * const names[] = { "vq-1", "vq-2", "vq-3" }; - -Currently the find_vqs() function takes a const char *names[] argument -so passing checkpatch.pl's const char * const names[] results in a -compiler error due to losing the second const. - -This patch adjusts the find_vqs() prototype and updates all virtio -transports. This makes it possible for virtio_balloon.c, virtio_input.c, -virtgpu_kms.c, and virtio_rpmsg_bus.c to use the checkpatch.pl-friendly -type. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -Acked-by: Bjorn Andersson -(cherry picked from commit f7ad26ff952b3ca2702d7da03aad0ab1f6c01d7c) ---- - drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +- - drivers/misc/mic/card/mic_virtio.c | 2 +- - drivers/remoteproc/remoteproc_virtio.c | 2 +- - drivers/rpmsg/virtio_rpmsg_bus.c | 2 +- - drivers/s390/virtio/kvm_virtio.c | 2 +- - drivers/s390/virtio/virtio_ccw.c | 2 +- - drivers/virtio/virtio_balloon.c | 2 +- - drivers/virtio/virtio_input.c | 2 +- - drivers/virtio/virtio_mmio.c | 2 +- - drivers/virtio/virtio_pci_common.c | 4 ++-- - drivers/virtio/virtio_pci_common.h | 2 +- - drivers/virtio/virtio_pci_modern.c | 2 +- - include/linux/virtio_config.h | 2 +- - 13 files changed, 14 insertions(+), 14 deletions(-) - -diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c -index 06496a128162..4150873d432e 100644 ---- a/drivers/gpu/drm/virtio/virtgpu_kms.c -+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c -@@ -130,7 +130,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) - static vq_callback_t *callbacks[] = { - virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack - }; -- static const char *names[] = { "control", "cursor" }; -+ static const char * const names[] = { "control", "cursor" }; - - struct virtio_gpu_device *vgdev; - /* this will expand later */ -diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c -index e486a0c26267..f6ed57d3125c 100644 ---- a/drivers/misc/mic/card/mic_virtio.c -+++ b/drivers/misc/mic/card/mic_virtio.c -@@ -311,7 +311,7 @@ unmap: - static int mic_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct mic_vdev *mvdev = to_micvdev(vdev); - struct mic_device_ctrl __iomem *dc = mvdev->dc; -diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c -index e1a10232a943..e44872fb9e5e 100644 ---- a/drivers/remoteproc/remoteproc_virtio.c -+++ b/drivers/remoteproc/remoteproc_virtio.c -@@ -147,7 +147,7 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev) - static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct rproc *rproc = vdev_to_rproc(vdev); - int i, ret; -diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c -index 73354ee27877..1fcd27c1f183 100644 ---- a/drivers/rpmsg/virtio_rpmsg_bus.c -+++ b/drivers/rpmsg/virtio_rpmsg_bus.c -@@ -945,7 +945,7 @@ static void rpmsg_ns_cb(struct rpmsg_channel *rpdev, void *data, int len, - static int rpmsg_probe(struct virtio_device *vdev) - { - vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done }; -- const char *names[] = { "input", "output" }; -+ static const char * const names[] = { "input", "output" }; - struct virtqueue *vqs[2]; - struct virtproc_info *vrp; - void *bufs_va; -diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c -index 53fb975c404b..1d060fd293a3 100644 ---- a/drivers/s390/virtio/kvm_virtio.c -+++ b/drivers/s390/virtio/kvm_virtio.c -@@ -255,7 +255,7 @@ static void kvm_del_vqs(struct virtio_device *vdev) - static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct kvm_device *kdev = to_kvmdev(vdev); - int i; -diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c -index 1b831598df7c..bf2d1300a957 100644 ---- a/drivers/s390/virtio/virtio_ccw.c -+++ b/drivers/s390/virtio/virtio_ccw.c -@@ -635,7 +635,7 @@ out: - static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct virtio_ccw_device *vcdev = to_vc_device(vdev); - unsigned long *indicatorp = NULL; -diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c -index 01d15dca940e..0c5533813cde 100644 ---- a/drivers/virtio/virtio_balloon.c -+++ b/drivers/virtio/virtio_balloon.c -@@ -394,7 +394,7 @@ static int init_vqs(struct virtio_balloon *vb) - { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; -- const char *names[] = { "inflate", "deflate", "stats" }; -+ static const char * const names[] = { "inflate", "deflate", "stats" }; - int err, nvqs; - - /* -diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c -index c96944b59856..350a2a5a49db 100644 ---- a/drivers/virtio/virtio_input.c -+++ b/drivers/virtio/virtio_input.c -@@ -170,7 +170,7 @@ static int virtinput_init_vqs(struct virtio_input *vi) - struct virtqueue *vqs[2]; - vq_callback_t *cbs[] = { virtinput_recv_events, - virtinput_recv_status }; -- static const char *names[] = { "events", "status" }; -+ static const char * const names[] = { "events", "status" }; - int err; - - err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names); -diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c -index f499d9da7237..745c6ee1bb3e 100644 ---- a/drivers/virtio/virtio_mmio.c -+++ b/drivers/virtio/virtio_mmio.c -@@ -482,7 +482,7 @@ error_available: - static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); - unsigned int irq = platform_get_irq(vm_dev->pdev, 0); -diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c -index 2046a68ad0ba..f6bed86c17f9 100644 ---- a/drivers/virtio/virtio_pci_common.c -+++ b/drivers/virtio/virtio_pci_common.c -@@ -296,7 +296,7 @@ void vp_del_vqs(struct virtio_device *vdev) - static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[], -+ const char * const names[], - bool use_msix, - bool per_vq_vectors) - { -@@ -376,7 +376,7 @@ error_find: - int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - int err; - -diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h -index b976d968e793..2cc252270b2d 100644 ---- a/drivers/virtio/virtio_pci_common.h -+++ b/drivers/virtio/virtio_pci_common.h -@@ -139,7 +139,7 @@ void vp_del_vqs(struct virtio_device *vdev); - int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]); -+ const char * const names[]); - const char *vp_bus_name(struct virtio_device *vdev); - - /* Setup the affinity for a virtqueue: -diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c -index 4469202eaa8e..631021cfc740 100644 ---- a/drivers/virtio/virtio_pci_modern.c -+++ b/drivers/virtio/virtio_pci_modern.c -@@ -423,7 +423,7 @@ err_new_queue: - static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]) -+ const char * const names[]) - { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtqueue *vq; -diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h -index e5ce8ab0b8b0..6e6cb0c9d7cb 100644 ---- a/include/linux/virtio_config.h -+++ b/include/linux/virtio_config.h -@@ -70,7 +70,7 @@ struct virtio_config_ops { - int (*find_vqs)(struct virtio_device *, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], -- const char *names[]); -+ const char * const names[]); - void (*del_vqs)(struct virtio_device *); - u64 (*get_features)(struct virtio_device *vdev); - int (*finalize_features)(struct virtio_device *vdev); --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch b/kernel/patches-4.4.x/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch deleted file mode 100644 index cfd881a4c..000000000 --- a/kernel/patches-4.4.x/0002-VSOCK-constify-vmci_transport_notify_ops-structures.patch +++ /dev/null @@ -1,77 +0,0 @@ -From e5e81c9b625e8bef7d9f568c11a183a0d943d616 Mon Sep 17 00:00:00 2001 -From: Julia Lawall -Date: Sat, 21 Nov 2015 18:39:17 +0100 -Subject: [PATCH 02/44] VSOCK: constify vmci_transport_notify_ops structures - -The vmci_transport_notify_ops structures are never modified, so declare -them as const. - -Done with the help of Coccinelle. - -Signed-off-by: Julia Lawall -Signed-off-by: David S. Miller -(cherry picked from commit 3b22dae38db1cea9ead3229f08cfb0b69aca5706) ---- - net/vmw_vsock/vmci_transport.h | 2 +- - net/vmw_vsock/vmci_transport_notify.c | 2 +- - net/vmw_vsock/vmci_transport_notify.h | 5 +++-- - net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +- - 4 files changed, 6 insertions(+), 5 deletions(-) - -diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h -index 2ad46f39649f..1820e74a5752 100644 ---- a/net/vmw_vsock/vmci_transport.h -+++ b/net/vmw_vsock/vmci_transport.h -@@ -121,7 +121,7 @@ struct vmci_transport { - u64 queue_pair_max_size; - u32 detach_sub_id; - union vmci_transport_notify notify; -- struct vmci_transport_notify_ops *notify_ops; -+ const struct vmci_transport_notify_ops *notify_ops; - struct list_head elem; - struct sock *sk; - spinlock_t lock; /* protects sk. */ -diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c -index 9b7f207f2bee..fd8cf0214d51 100644 ---- a/net/vmw_vsock/vmci_transport_notify.c -+++ b/net/vmw_vsock/vmci_transport_notify.c -@@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) - } - - /* Socket control packet based operations. */ --struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { -+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { - vmci_transport_notify_pkt_socket_init, - vmci_transport_notify_pkt_socket_destruct, - vmci_transport_notify_pkt_poll_in, -diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h -index 7df793249b6c..3c464d394a8f 100644 ---- a/net/vmw_vsock/vmci_transport_notify.h -+++ b/net/vmw_vsock/vmci_transport_notify.h -@@ -77,7 +77,8 @@ struct vmci_transport_notify_ops { - void (*process_negotiate) (struct sock *sk); - }; - --extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; --extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; -+extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; -+extern const -+struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; - - #endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ -diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c -index dc9c7929a2f9..21e591dafb03 100644 ---- a/net/vmw_vsock/vmci_transport_notify_qstate.c -+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c -@@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue( - } - - /* Socket always on control packet based operations. */ --struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { -+const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { - vmci_transport_notify_pkt_socket_init, - vmci_transport_notify_pkt_socket_destruct, - vmci_transport_notify_pkt_poll_in, --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch b/kernel/patches-4.4.x/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch deleted file mode 100644 index b1f0a2bfd..000000000 --- a/kernel/patches-4.4.x/0003-AF_VSOCK-Shrink-the-area-influenced-by-prepare_to_wa.patch +++ /dev/null @@ -1,336 +0,0 @@ -From cdfe5217991da9a36667d8bd2035dac485323f32 Mon Sep 17 00:00:00 2001 -From: Claudio Imbrenda -Date: Tue, 22 Mar 2016 17:05:52 +0100 -Subject: [PATCH 03/44] AF_VSOCK: Shrink the area influenced by prepare_to_wait - -When a thread is prepared for waiting by calling prepare_to_wait, sleeping -is not allowed until either the wait has taken place or finish_wait has -been called. The existing code in af_vsock imposed unnecessary no-sleep -assumptions to a broad list of backend functions. -This patch shrinks the influence of prepare_to_wait to the area where it -is strictly needed, therefore relaxing the no-sleep restriction there. - -Signed-off-by: Claudio Imbrenda -Signed-off-by: David S. Miller -(cherry picked from commit f7f9b5e7f8eccfd68ffa7b8d74b07c478bb9e7f0) ---- - net/vmw_vsock/af_vsock.c | 158 +++++++++++++++++++++++++---------------------- - 1 file changed, 85 insertions(+), 73 deletions(-) - -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index 9b5bd6d142dc..b5f1221f48d4 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -1209,10 +1209,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, - - if (signal_pending(current)) { - err = sock_intr_errno(timeout); -- goto out_wait_error; -+ sk->sk_state = SS_UNCONNECTED; -+ sock->state = SS_UNCONNECTED; -+ goto out_wait; - } else if (timeout == 0) { - err = -ETIMEDOUT; -- goto out_wait_error; -+ sk->sk_state = SS_UNCONNECTED; -+ sock->state = SS_UNCONNECTED; -+ goto out_wait; - } - - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -@@ -1220,20 +1224,17 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, - - if (sk->sk_err) { - err = -sk->sk_err; -- goto out_wait_error; -- } else -+ sk->sk_state = SS_UNCONNECTED; -+ sock->state = SS_UNCONNECTED; -+ } else { - err = 0; -+ } - - out_wait: - finish_wait(sk_sleep(sk), &wait); - out: - release_sock(sk); - return err; -- --out_wait_error: -- sk->sk_state = SS_UNCONNECTED; -- sock->state = SS_UNCONNECTED; -- goto out_wait; - } - - static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) -@@ -1270,18 +1271,20 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) - listener->sk_err == 0) { - release_sock(listener); - timeout = schedule_timeout(timeout); -+ finish_wait(sk_sleep(listener), &wait); - lock_sock(listener); - - if (signal_pending(current)) { - err = sock_intr_errno(timeout); -- goto out_wait; -+ goto out; - } else if (timeout == 0) { - err = -EAGAIN; -- goto out_wait; -+ goto out; - } - - prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); - } -+ finish_wait(sk_sleep(listener), &wait); - - if (listener->sk_err) - err = -listener->sk_err; -@@ -1301,19 +1304,15 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) - */ - if (err) { - vconnected->rejected = true; -- release_sock(connected); -- sock_put(connected); -- goto out_wait; -+ } else { -+ newsock->state = SS_CONNECTED; -+ sock_graft(connected, newsock); - } - -- newsock->state = SS_CONNECTED; -- sock_graft(connected, newsock); - release_sock(connected); - sock_put(connected); - } - --out_wait: -- finish_wait(sk_sleep(listener), &wait); - out: - release_sock(listener); - return err; -@@ -1557,11 +1556,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - if (err < 0) - goto out; - -- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - - while (total_written < len) { - ssize_t written; - -+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - while (vsock_stream_has_space(vsk) == 0 && - sk->sk_err == 0 && - !(sk->sk_shutdown & SEND_SHUTDOWN) && -@@ -1570,27 +1569,33 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - /* Don't wait for non-blocking sockets. */ - if (timeout == 0) { - err = -EAGAIN; -- goto out_wait; -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; - } - - err = transport->notify_send_pre_block(vsk, &send_data); -- if (err < 0) -- goto out_wait; -+ if (err < 0) { -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; -+ } - - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - if (signal_pending(current)) { - err = sock_intr_errno(timeout); -- goto out_wait; -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; - } else if (timeout == 0) { - err = -EAGAIN; -- goto out_wait; -+ finish_wait(sk_sleep(sk), &wait); -+ goto out_err; - } - - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - } -+ finish_wait(sk_sleep(sk), &wait); - - /* These checks occur both as part of and after the loop - * conditional since we need to check before and after -@@ -1598,16 +1603,16 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - */ - if (sk->sk_err) { - err = -sk->sk_err; -- goto out_wait; -+ goto out_err; - } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || - (vsk->peer_shutdown & RCV_SHUTDOWN)) { - err = -EPIPE; -- goto out_wait; -+ goto out_err; - } - - err = transport->notify_send_pre_enqueue(vsk, &send_data); - if (err < 0) -- goto out_wait; -+ goto out_err; - - /* Note that enqueue will only write as many bytes as are free - * in the produce queue, so we don't need to ensure len is -@@ -1620,7 +1625,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - len - total_written); - if (written < 0) { - err = -ENOMEM; -- goto out_wait; -+ goto out_err; - } - - total_written += written; -@@ -1628,14 +1633,13 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, - err = transport->notify_send_post_enqueue( - vsk, written, &send_data); - if (err < 0) -- goto out_wait; -+ goto out_err; - - } - --out_wait: -+out_err: - if (total_written > 0) - err = total_written; -- finish_wait(sk_sleep(sk), &wait); - out: - release_sock(sk); - return err; -@@ -1716,21 +1720,61 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - if (err < 0) - goto out; - -- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - - while (1) { -- s64 ready = vsock_stream_has_data(vsk); -+ s64 ready; - -- if (ready < 0) { -- /* Invalid queue pair content. XXX This should be -- * changed to a connection reset in a later change. -- */ -+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -+ ready = vsock_stream_has_data(vsk); - -- err = -ENOMEM; -- goto out_wait; -- } else if (ready > 0) { -+ if (ready == 0) { -+ if (sk->sk_err != 0 || -+ (sk->sk_shutdown & RCV_SHUTDOWN) || -+ (vsk->peer_shutdown & SEND_SHUTDOWN)) { -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ /* Don't wait for non-blocking sockets. */ -+ if (timeout == 0) { -+ err = -EAGAIN; -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ -+ err = transport->notify_recv_pre_block( -+ vsk, target, &recv_data); -+ if (err < 0) { -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ release_sock(sk); -+ timeout = schedule_timeout(timeout); -+ lock_sock(sk); -+ -+ if (signal_pending(current)) { -+ err = sock_intr_errno(timeout); -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } else if (timeout == 0) { -+ err = -EAGAIN; -+ finish_wait(sk_sleep(sk), &wait); -+ break; -+ } -+ } else { - ssize_t read; - -+ finish_wait(sk_sleep(sk), &wait); -+ -+ if (ready < 0) { -+ /* Invalid queue pair content. XXX This should -+ * be changed to a connection reset in a later -+ * change. -+ */ -+ -+ err = -ENOMEM; -+ goto out; -+ } -+ - err = transport->notify_recv_pre_dequeue( - vsk, target, &recv_data); - if (err < 0) -@@ -1750,42 +1794,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - vsk, target, read, - !(flags & MSG_PEEK), &recv_data); - if (err < 0) -- goto out_wait; -+ goto out; - - if (read >= target || flags & MSG_PEEK) - break; - - target -= read; -- } else { -- if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) -- || (vsk->peer_shutdown & SEND_SHUTDOWN)) { -- break; -- } -- /* Don't wait for non-blocking sockets. */ -- if (timeout == 0) { -- err = -EAGAIN; -- break; -- } -- -- err = transport->notify_recv_pre_block( -- vsk, target, &recv_data); -- if (err < 0) -- break; -- -- release_sock(sk); -- timeout = schedule_timeout(timeout); -- lock_sock(sk); -- -- if (signal_pending(current)) { -- err = sock_intr_errno(timeout); -- break; -- } else if (timeout == 0) { -- err = -EAGAIN; -- break; -- } -- -- prepare_to_wait(sk_sleep(sk), &wait, -- TASK_INTERRUPTIBLE); - } - } - -@@ -1797,8 +1811,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - if (copied > 0) - err = copied; - --out_wait: -- finish_wait(sk_sleep(sk), &wait); - out: - release_sock(sk); - return err; --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0004-vsock-make-listener-child-lock-ordering-explicit.patch b/kernel/patches-4.4.x/0004-vsock-make-listener-child-lock-ordering-explicit.patch deleted file mode 100644 index e510ae282..000000000 --- a/kernel/patches-4.4.x/0004-vsock-make-listener-child-lock-ordering-explicit.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 5629f181052f3b9f81e6a5cb3e27909ec1f06fa2 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 23 Jun 2016 16:28:58 +0100 -Subject: [PATCH 04/44] vsock: make listener child lock ordering explicit - -There are several places where the listener and pending or accept queue -child sockets are accessed at the same time. Lockdep is unhappy that -two locks from the same class are held. - -Tell lockdep that it is safe and document the lock ordering. - -Originally Claudio Imbrenda sent a similar -patch asking whether this is safe. I have audited the code and also -covered the vsock_pending_work() function. - -Suggested-by: Claudio Imbrenda -Signed-off-by: Stefan Hajnoczi -Signed-off-by: David S. Miller -(cherry picked from commit 4192f672fae559f32d82de72a677701853cc98a7) ---- - net/vmw_vsock/af_vsock.c | 12 ++++++++++-- - 1 file changed, 10 insertions(+), 2 deletions(-) - -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index b5f1221f48d4..b96ac918e0ba 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -61,6 +61,14 @@ - * function will also cleanup rejected sockets, those that reach the connected - * state but leave it before they have been accepted. - * -+ * - Lock ordering for pending or accept queue sockets is: -+ * -+ * lock_sock(listener); -+ * lock_sock_nested(pending, SINGLE_DEPTH_NESTING); -+ * -+ * Using explicit nested locking keeps lockdep happy since normally only one -+ * lock of a given class may be taken at a time. -+ * - * - Sockets created by user action will be cleaned up when the user process - * calls close(2), causing our release implementation to be called. Our release - * implementation will perform some cleanup then drop the last reference so our -@@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work) - cleanup = true; - - lock_sock(listener); -- lock_sock(sk); -+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - - if (vsock_is_pending(sk)) { - vsock_remove_pending(listener, sk); -@@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) - if (connected) { - listener->sk_ack_backlog--; - -- lock_sock(connected); -+ lock_sock_nested(connected, SINGLE_DEPTH_NESTING); - vconnected = vsock_sk(connected); - - /* If the listener socket has received an error, then we should --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0005-VSOCK-transport-specific-vsock_transport-functions.patch b/kernel/patches-4.4.x/0005-VSOCK-transport-specific-vsock_transport-functions.patch deleted file mode 100644 index 0c25533cf..000000000 --- a/kernel/patches-4.4.x/0005-VSOCK-transport-specific-vsock_transport-functions.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 219cd6dc1429ef0805b586a7b9f274d274214f64 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 28 Jul 2016 15:36:30 +0100 -Subject: [PATCH 05/44] VSOCK: transport-specific vsock_transport functions - -struct vsock_transport contains function pointers called by AF_VSOCK -core code. The transport may want its own transport-specific function -pointers and they can be added after struct vsock_transport. - -Allow the transport to fetch vsock_transport. It can downcast it to -access transport-specific function pointers. - -The virtio transport will use this. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 0b01aeb3d2fbf16787f0c9629f4ca52ae792f732) ---- - include/net/af_vsock.h | 3 +++ - net/vmw_vsock/af_vsock.c | 9 +++++++++ - 2 files changed, 12 insertions(+) - -diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h -index e9eb2d6791b3..23f55259b60d 100644 ---- a/include/net/af_vsock.h -+++ b/include/net/af_vsock.h -@@ -165,6 +165,9 @@ static inline int vsock_core_init(const struct vsock_transport *t) - } - void vsock_core_exit(void); - -+/* The transport may downcast this to access transport-specific functions */ -+const struct vsock_transport *vsock_core_get_transport(void); -+ - /**** UTILS ****/ - - void vsock_release_pending(struct sock *pending); -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index b96ac918e0ba..e34d96f8bde2 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -1995,6 +1995,15 @@ void vsock_core_exit(void) - } - EXPORT_SYMBOL_GPL(vsock_core_exit); - -+const struct vsock_transport *vsock_core_get_transport(void) -+{ -+ /* vsock_register_mutex not taken since only the transport uses this -+ * function and only while registered. -+ */ -+ return transport; -+} -+EXPORT_SYMBOL_GPL(vsock_core_get_transport); -+ - MODULE_AUTHOR("VMware, Inc."); - MODULE_DESCRIPTION("VMware Virtual Socket Family"); - MODULE_VERSION("1.0.1.0-k"); --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0006-VSOCK-defer-sock-removal-to-transports.patch b/kernel/patches-4.4.x/0006-VSOCK-defer-sock-removal-to-transports.patch deleted file mode 100644 index c5114b112..000000000 --- a/kernel/patches-4.4.x/0006-VSOCK-defer-sock-removal-to-transports.patch +++ /dev/null @@ -1,83 +0,0 @@ -From d3cc1a7d00e67576dbccc862e2f6a4093fb1619e Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 28 Jul 2016 15:36:31 +0100 -Subject: [PATCH 06/44] VSOCK: defer sock removal to transports - -The virtio transport will implement graceful shutdown and the related -SO_LINGER socket option. This requires orphaning the sock but keeping -it in the table of connections after .release(). - -This patch adds the vsock_remove_sock() function and leaves it up to the -transport when to remove the sock. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6773b7dc39f165bd9d824b50ac52cbb3f87d53c8) ---- - include/net/af_vsock.h | 1 + - net/vmw_vsock/af_vsock.c | 16 ++++++++++------ - net/vmw_vsock/vmci_transport.c | 2 ++ - 3 files changed, 13 insertions(+), 6 deletions(-) - -diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h -index 23f55259b60d..3af0b224f754 100644 ---- a/include/net/af_vsock.h -+++ b/include/net/af_vsock.h -@@ -180,6 +180,7 @@ void vsock_remove_connected(struct vsock_sock *vsk); - struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); - struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, - struct sockaddr_vm *dst); -+void vsock_remove_sock(struct vsock_sock *vsk); - void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); - - #endif /* __AF_VSOCK_H__ */ -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index e34d96f8bde2..17dbbe64cd73 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -344,6 +344,16 @@ static bool vsock_in_connected_table(struct vsock_sock *vsk) - return ret; - } - -+void vsock_remove_sock(struct vsock_sock *vsk) -+{ -+ if (vsock_in_bound_table(vsk)) -+ vsock_remove_bound(vsk); -+ -+ if (vsock_in_connected_table(vsk)) -+ vsock_remove_connected(vsk); -+} -+EXPORT_SYMBOL_GPL(vsock_remove_sock); -+ - void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) - { - int i; -@@ -660,12 +670,6 @@ static void __vsock_release(struct sock *sk) - vsk = vsock_sk(sk); - pending = NULL; /* Compiler warning. */ - -- if (vsock_in_bound_table(vsk)) -- vsock_remove_bound(vsk); -- -- if (vsock_in_connected_table(vsk)) -- vsock_remove_connected(vsk); -- - transport->release(vsk); - - lock_sock(sk); -diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c -index 662bdd20a748..5f8c99eb104c 100644 ---- a/net/vmw_vsock/vmci_transport.c -+++ b/net/vmw_vsock/vmci_transport.c -@@ -1644,6 +1644,8 @@ static void vmci_transport_destruct(struct vsock_sock *vsk) - - static void vmci_transport_release(struct vsock_sock *vsk) - { -+ vsock_remove_sock(vsk); -+ - if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { - vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); - vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch b/kernel/patches-4.4.x/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch deleted file mode 100644 index ee30eb2e5..000000000 --- a/kernel/patches-4.4.x/0007-VSOCK-Introduce-virtio_vsock_common.ko.patch +++ /dev/null @@ -1,1496 +0,0 @@ -From cc3ba5ac51c7f21272f81b04c857e81e0385ce92 Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:32 +0100 -Subject: [PATCH 07/44] VSOCK: Introduce virtio_vsock_common.ko - -This module contains the common code and header files for the following -virtio_transporto and vhost_vsock kernel modules. - -Signed-off-by: Asias He -Signed-off-by: Claudio Imbrenda -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 06a8fc78367d070720af960dcecec917d3ae5f3b) ---- - MAINTAINERS | 10 + - include/linux/virtio_vsock.h | 154 ++++ - include/net/af_vsock.h | 2 + - .../trace/events/vsock_virtio_transport_common.h | 144 +++ - include/uapi/linux/Kbuild | 1 + - include/uapi/linux/virtio_ids.h | 1 + - include/uapi/linux/virtio_vsock.h | 94 ++ - net/vmw_vsock/virtio_transport_common.c | 992 +++++++++++++++++++++ - 8 files changed, 1398 insertions(+) - create mode 100644 include/linux/virtio_vsock.h - create mode 100644 include/trace/events/vsock_virtio_transport_common.h - create mode 100644 include/uapi/linux/virtio_vsock.h - create mode 100644 net/vmw_vsock/virtio_transport_common.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index ab65bbecb159..b93ba8b21be7 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -11382,6 +11382,16 @@ S: Maintained - F: drivers/media/v4l2-core/videobuf2-* - F: include/media/videobuf2-* - -+VIRTIO AND VHOST VSOCK DRIVER -+M: Stefan Hajnoczi -+L: kvm@vger.kernel.org -+L: virtualization@lists.linux-foundation.org -+L: netdev@vger.kernel.org -+S: Maintained -+F: include/linux/virtio_vsock.h -+F: include/uapi/linux/virtio_vsock.h -+F: net/vmw_vsock/virtio_transport_common.c -+ - VIRTUAL SERIO DEVICE DRIVER - M: Stephen Chandler Paul - S: Maintained -diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h -new file mode 100644 -index 000000000000..9638bfeb0d1f ---- /dev/null -+++ b/include/linux/virtio_vsock.h -@@ -0,0 +1,154 @@ -+#ifndef _LINUX_VIRTIO_VSOCK_H -+#define _LINUX_VIRTIO_VSOCK_H -+ -+#include -+#include -+#include -+#include -+ -+#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128 -+#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256) -+#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) -+#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) -+#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL -+#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) -+ -+enum { -+ VSOCK_VQ_RX = 0, /* for host to guest data */ -+ VSOCK_VQ_TX = 1, /* for guest to host data */ -+ VSOCK_VQ_EVENT = 2, -+ VSOCK_VQ_MAX = 3, -+}; -+ -+/* Per-socket state (accessed via vsk->trans) */ -+struct virtio_vsock_sock { -+ struct vsock_sock *vsk; -+ -+ /* Protected by lock_sock(sk_vsock(trans->vsk)) */ -+ u32 buf_size; -+ u32 buf_size_min; -+ u32 buf_size_max; -+ -+ spinlock_t tx_lock; -+ spinlock_t rx_lock; -+ -+ /* Protected by tx_lock */ -+ u32 tx_cnt; -+ u32 buf_alloc; -+ u32 peer_fwd_cnt; -+ u32 peer_buf_alloc; -+ -+ /* Protected by rx_lock */ -+ u32 fwd_cnt; -+ u32 rx_bytes; -+ struct list_head rx_queue; -+}; -+ -+struct virtio_vsock_pkt { -+ struct virtio_vsock_hdr hdr; -+ struct work_struct work; -+ struct list_head list; -+ void *buf; -+ u32 len; -+ u32 off; -+ bool reply; -+}; -+ -+struct virtio_vsock_pkt_info { -+ u32 remote_cid, remote_port; -+ struct msghdr *msg; -+ u32 pkt_len; -+ u16 type; -+ u16 op; -+ u32 flags; -+ bool reply; -+}; -+ -+struct virtio_transport { -+ /* This must be the first field */ -+ struct vsock_transport transport; -+ -+ /* Takes ownership of the packet */ -+ int (*send_pkt)(struct virtio_vsock_pkt *pkt); -+}; -+ -+ssize_t -+virtio_transport_stream_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, -+ int type); -+int -+virtio_transport_dgram_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, int flags); -+ -+s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); -+s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); -+ -+int virtio_transport_do_socket_init(struct vsock_sock *vsk, -+ struct vsock_sock *psk); -+u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk); -+u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk); -+u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk); -+void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val); -+void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val); -+void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val); -+int -+virtio_transport_notify_poll_in(struct vsock_sock *vsk, -+ size_t target, -+ bool *data_ready_now); -+int -+virtio_transport_notify_poll_out(struct vsock_sock *vsk, -+ size_t target, -+ bool *space_available_now); -+ -+int virtio_transport_notify_recv_init(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, -+ size_t target, ssize_t copied, bool data_read, -+ struct vsock_transport_recv_notify_data *data); -+int virtio_transport_notify_send_init(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data); -+int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data); -+int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data); -+int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, -+ ssize_t written, struct vsock_transport_send_notify_data *data); -+ -+u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); -+bool virtio_transport_stream_is_active(struct vsock_sock *vsk); -+bool virtio_transport_stream_allow(u32 cid, u32 port); -+int virtio_transport_dgram_bind(struct vsock_sock *vsk, -+ struct sockaddr_vm *addr); -+bool virtio_transport_dgram_allow(u32 cid, u32 port); -+ -+int virtio_transport_connect(struct vsock_sock *vsk); -+ -+int virtio_transport_shutdown(struct vsock_sock *vsk, int mode); -+ -+void virtio_transport_release(struct vsock_sock *vsk); -+ -+ssize_t -+virtio_transport_stream_enqueue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len); -+int -+virtio_transport_dgram_enqueue(struct vsock_sock *vsk, -+ struct sockaddr_vm *remote_addr, -+ struct msghdr *msg, -+ size_t len); -+ -+void virtio_transport_destruct(struct vsock_sock *vsk); -+ -+void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); -+void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); -+void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt); -+u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); -+void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); -+ -+#endif /* _LINUX_VIRTIO_VSOCK_H */ -diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h -index 3af0b224f754..f2758964ce6f 100644 ---- a/include/net/af_vsock.h -+++ b/include/net/af_vsock.h -@@ -63,6 +63,8 @@ struct vsock_sock { - struct list_head accept_queue; - bool rejected; - struct delayed_work dwork; -+ struct delayed_work close_work; -+ bool close_work_scheduled; - u32 peer_shutdown; - bool sent_request; - bool ignore_connecting_rst; -diff --git a/include/trace/events/vsock_virtio_transport_common.h b/include/trace/events/vsock_virtio_transport_common.h -new file mode 100644 -index 000000000000..b7f1d6278280 ---- /dev/null -+++ b/include/trace/events/vsock_virtio_transport_common.h -@@ -0,0 +1,144 @@ -+#undef TRACE_SYSTEM -+#define TRACE_SYSTEM vsock -+ -+#if !defined(_TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H) || \ -+ defined(TRACE_HEADER_MULTI_READ) -+#define _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H -+ -+#include -+ -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_STREAM); -+ -+#define show_type(val) \ -+ __print_symbolic(val, { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" }) -+ -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_INVALID); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_REQUEST); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RESPONSE); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RST); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_SHUTDOWN); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RW); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_UPDATE); -+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_REQUEST); -+ -+#define show_op(val) \ -+ __print_symbolic(val, \ -+ { VIRTIO_VSOCK_OP_INVALID, "INVALID" }, \ -+ { VIRTIO_VSOCK_OP_REQUEST, "REQUEST" }, \ -+ { VIRTIO_VSOCK_OP_RESPONSE, "RESPONSE" }, \ -+ { VIRTIO_VSOCK_OP_RST, "RST" }, \ -+ { VIRTIO_VSOCK_OP_SHUTDOWN, "SHUTDOWN" }, \ -+ { VIRTIO_VSOCK_OP_RW, "RW" }, \ -+ { VIRTIO_VSOCK_OP_CREDIT_UPDATE, "CREDIT_UPDATE" }, \ -+ { VIRTIO_VSOCK_OP_CREDIT_REQUEST, "CREDIT_REQUEST" }) -+ -+TRACE_EVENT(virtio_transport_alloc_pkt, -+ TP_PROTO( -+ __u32 src_cid, __u32 src_port, -+ __u32 dst_cid, __u32 dst_port, -+ __u32 len, -+ __u16 type, -+ __u16 op, -+ __u32 flags -+ ), -+ TP_ARGS( -+ src_cid, src_port, -+ dst_cid, dst_port, -+ len, -+ type, -+ op, -+ flags -+ ), -+ TP_STRUCT__entry( -+ __field(__u32, src_cid) -+ __field(__u32, src_port) -+ __field(__u32, dst_cid) -+ __field(__u32, dst_port) -+ __field(__u32, len) -+ __field(__u16, type) -+ __field(__u16, op) -+ __field(__u32, flags) -+ ), -+ TP_fast_assign( -+ __entry->src_cid = src_cid; -+ __entry->src_port = src_port; -+ __entry->dst_cid = dst_cid; -+ __entry->dst_port = dst_port; -+ __entry->len = len; -+ __entry->type = type; -+ __entry->op = op; -+ __entry->flags = flags; -+ ), -+ TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x", -+ __entry->src_cid, __entry->src_port, -+ __entry->dst_cid, __entry->dst_port, -+ __entry->len, -+ show_type(__entry->type), -+ show_op(__entry->op), -+ __entry->flags) -+); -+ -+TRACE_EVENT(virtio_transport_recv_pkt, -+ TP_PROTO( -+ __u32 src_cid, __u32 src_port, -+ __u32 dst_cid, __u32 dst_port, -+ __u32 len, -+ __u16 type, -+ __u16 op, -+ __u32 flags, -+ __u32 buf_alloc, -+ __u32 fwd_cnt -+ ), -+ TP_ARGS( -+ src_cid, src_port, -+ dst_cid, dst_port, -+ len, -+ type, -+ op, -+ flags, -+ buf_alloc, -+ fwd_cnt -+ ), -+ TP_STRUCT__entry( -+ __field(__u32, src_cid) -+ __field(__u32, src_port) -+ __field(__u32, dst_cid) -+ __field(__u32, dst_port) -+ __field(__u32, len) -+ __field(__u16, type) -+ __field(__u16, op) -+ __field(__u32, flags) -+ __field(__u32, buf_alloc) -+ __field(__u32, fwd_cnt) -+ ), -+ TP_fast_assign( -+ __entry->src_cid = src_cid; -+ __entry->src_port = src_port; -+ __entry->dst_cid = dst_cid; -+ __entry->dst_port = dst_port; -+ __entry->len = len; -+ __entry->type = type; -+ __entry->op = op; -+ __entry->flags = flags; -+ __entry->buf_alloc = buf_alloc; -+ __entry->fwd_cnt = fwd_cnt; -+ ), -+ TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x " -+ "buf_alloc=%u fwd_cnt=%u", -+ __entry->src_cid, __entry->src_port, -+ __entry->dst_cid, __entry->dst_port, -+ __entry->len, -+ show_type(__entry->type), -+ show_op(__entry->op), -+ __entry->flags, -+ __entry->buf_alloc, -+ __entry->fwd_cnt) -+); -+ -+#endif /* _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H */ -+ -+#undef TRACE_INCLUDE_FILE -+#define TRACE_INCLUDE_FILE vsock_virtio_transport_common -+ -+/* This part must be outside protection */ -+#include -diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild -index ebd10e624598..6c51a4d8bb62 100644 ---- a/include/uapi/linux/Kbuild -+++ b/include/uapi/linux/Kbuild -@@ -447,6 +447,7 @@ header-y += virtio_ring.h - header-y += virtio_rng.h - header-y += virtio_scsi.h - header-y += virtio_types.h -+header-y += virtio_vsock.h - header-y += vm_sockets.h - header-y += vt.h - header-y += wait.h -diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h -index 77925f587b15..3228d582234a 100644 ---- a/include/uapi/linux/virtio_ids.h -+++ b/include/uapi/linux/virtio_ids.h -@@ -41,5 +41,6 @@ - #define VIRTIO_ID_CAIF 12 /* Virtio caif */ - #define VIRTIO_ID_GPU 16 /* virtio GPU */ - #define VIRTIO_ID_INPUT 18 /* virtio input */ -+#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ - - #endif /* _LINUX_VIRTIO_IDS_H */ -diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h -new file mode 100644 -index 000000000000..6b011c19b50f ---- /dev/null -+++ b/include/uapi/linux/virtio_vsock.h -@@ -0,0 +1,94 @@ -+/* -+ * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so -+ * anyone can use the definitions to implement compatible drivers/servers: -+ * -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of IBM nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * Copyright (C) Red Hat, Inc., 2013-2015 -+ * Copyright (C) Asias He , 2013 -+ * Copyright (C) Stefan Hajnoczi , 2015 -+ */ -+ -+#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H -+#define _UAPI_LINUX_VIRTIO_VOSCK_H -+ -+#include -+#include -+#include -+ -+struct virtio_vsock_config { -+ __le64 guest_cid; -+} __attribute__((packed)); -+ -+enum virtio_vsock_event_id { -+ VIRTIO_VSOCK_EVENT_TRANSPORT_RESET = 0, -+}; -+ -+struct virtio_vsock_event { -+ __le32 id; -+} __attribute__((packed)); -+ -+struct virtio_vsock_hdr { -+ __le64 src_cid; -+ __le64 dst_cid; -+ __le32 src_port; -+ __le32 dst_port; -+ __le32 len; -+ __le16 type; /* enum virtio_vsock_type */ -+ __le16 op; /* enum virtio_vsock_op */ -+ __le32 flags; -+ __le32 buf_alloc; -+ __le32 fwd_cnt; -+} __attribute__((packed)); -+ -+enum virtio_vsock_type { -+ VIRTIO_VSOCK_TYPE_STREAM = 1, -+}; -+ -+enum virtio_vsock_op { -+ VIRTIO_VSOCK_OP_INVALID = 0, -+ -+ /* Connect operations */ -+ VIRTIO_VSOCK_OP_REQUEST = 1, -+ VIRTIO_VSOCK_OP_RESPONSE = 2, -+ VIRTIO_VSOCK_OP_RST = 3, -+ VIRTIO_VSOCK_OP_SHUTDOWN = 4, -+ -+ /* To send payload */ -+ VIRTIO_VSOCK_OP_RW = 5, -+ -+ /* Tell the peer our credit info */ -+ VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6, -+ /* Request the peer to send the credit info to us */ -+ VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7, -+}; -+ -+/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ -+enum virtio_vsock_shutdown { -+ VIRTIO_VSOCK_SHUTDOWN_RCV = 1, -+ VIRTIO_VSOCK_SHUTDOWN_SEND = 2, -+}; -+ -+#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ -diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c -new file mode 100644 -index 000000000000..a53b3a16b4f1 ---- /dev/null -+++ b/net/vmw_vsock/virtio_transport_common.c -@@ -0,0 +1,992 @@ -+/* -+ * common code for virtio vsock -+ * -+ * Copyright (C) 2013-2015 Red Hat, Inc. -+ * Author: Asias He -+ * Stefan Hajnoczi -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#define CREATE_TRACE_POINTS -+#include -+ -+/* How long to wait for graceful shutdown of a connection */ -+#define VSOCK_CLOSE_TIMEOUT (8 * HZ) -+ -+static const struct virtio_transport *virtio_transport_get_ops(void) -+{ -+ const struct vsock_transport *t = vsock_core_get_transport(); -+ -+ return container_of(t, struct virtio_transport, transport); -+} -+ -+struct virtio_vsock_pkt * -+virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, -+ size_t len, -+ u32 src_cid, -+ u32 src_port, -+ u32 dst_cid, -+ u32 dst_port) -+{ -+ struct virtio_vsock_pkt *pkt; -+ int err; -+ -+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); -+ if (!pkt) -+ return NULL; -+ -+ pkt->hdr.type = cpu_to_le16(info->type); -+ pkt->hdr.op = cpu_to_le16(info->op); -+ pkt->hdr.src_cid = cpu_to_le64(src_cid); -+ pkt->hdr.dst_cid = cpu_to_le64(dst_cid); -+ pkt->hdr.src_port = cpu_to_le32(src_port); -+ pkt->hdr.dst_port = cpu_to_le32(dst_port); -+ pkt->hdr.flags = cpu_to_le32(info->flags); -+ pkt->len = len; -+ pkt->hdr.len = cpu_to_le32(len); -+ pkt->reply = info->reply; -+ -+ if (info->msg && len > 0) { -+ pkt->buf = kmalloc(len, GFP_KERNEL); -+ if (!pkt->buf) -+ goto out_pkt; -+ err = memcpy_from_msg(pkt->buf, info->msg, len); -+ if (err) -+ goto out; -+ } -+ -+ trace_virtio_transport_alloc_pkt(src_cid, src_port, -+ dst_cid, dst_port, -+ len, -+ info->type, -+ info->op, -+ info->flags); -+ -+ return pkt; -+ -+out: -+ kfree(pkt->buf); -+out_pkt: -+ kfree(pkt); -+ return NULL; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); -+ -+static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, -+ struct virtio_vsock_pkt_info *info) -+{ -+ u32 src_cid, src_port, dst_cid, dst_port; -+ struct virtio_vsock_sock *vvs; -+ struct virtio_vsock_pkt *pkt; -+ u32 pkt_len = info->pkt_len; -+ -+ src_cid = vm_sockets_get_local_cid(); -+ src_port = vsk->local_addr.svm_port; -+ if (!info->remote_cid) { -+ dst_cid = vsk->remote_addr.svm_cid; -+ dst_port = vsk->remote_addr.svm_port; -+ } else { -+ dst_cid = info->remote_cid; -+ dst_port = info->remote_port; -+ } -+ -+ vvs = vsk->trans; -+ -+ /* we can send less than pkt_len bytes */ -+ if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) -+ pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; -+ -+ /* virtio_transport_get_credit might return less than pkt_len credit */ -+ pkt_len = virtio_transport_get_credit(vvs, pkt_len); -+ -+ /* Do not send zero length OP_RW pkt */ -+ if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) -+ return pkt_len; -+ -+ pkt = virtio_transport_alloc_pkt(info, pkt_len, -+ src_cid, src_port, -+ dst_cid, dst_port); -+ if (!pkt) { -+ virtio_transport_put_credit(vvs, pkt_len); -+ return -ENOMEM; -+ } -+ -+ virtio_transport_inc_tx_pkt(vvs, pkt); -+ -+ return virtio_transport_get_ops()->send_pkt(pkt); -+} -+ -+static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, -+ struct virtio_vsock_pkt *pkt) -+{ -+ vvs->rx_bytes += pkt->len; -+} -+ -+static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, -+ struct virtio_vsock_pkt *pkt) -+{ -+ vvs->rx_bytes -= pkt->len; -+ vvs->fwd_cnt += pkt->len; -+} -+ -+void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) -+{ -+ spin_lock_bh(&vvs->tx_lock); -+ pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); -+ pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); -+ spin_unlock_bh(&vvs->tx_lock); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); -+ -+u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) -+{ -+ u32 ret; -+ -+ spin_lock_bh(&vvs->tx_lock); -+ ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); -+ if (ret > credit) -+ ret = credit; -+ vvs->tx_cnt += ret; -+ spin_unlock_bh(&vvs->tx_lock); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_credit); -+ -+void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) -+{ -+ spin_lock_bh(&vvs->tx_lock); -+ vvs->tx_cnt -= credit; -+ spin_unlock_bh(&vvs->tx_lock); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_put_credit); -+ -+static int virtio_transport_send_credit_update(struct vsock_sock *vsk, -+ int type, -+ struct virtio_vsock_hdr *hdr) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, -+ .type = type, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+ -+static ssize_t -+virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ struct virtio_vsock_pkt *pkt; -+ size_t bytes, total = 0; -+ int err = -EFAULT; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ while (total < len && !list_empty(&vvs->rx_queue)) { -+ pkt = list_first_entry(&vvs->rx_queue, -+ struct virtio_vsock_pkt, list); -+ -+ bytes = len - total; -+ if (bytes > pkt->len - pkt->off) -+ bytes = pkt->len - pkt->off; -+ -+ /* sk_lock is held by caller so no one else can dequeue. -+ * Unlock rx_lock since memcpy_to_msg() may sleep. -+ */ -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); -+ if (err) -+ goto out; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ -+ total += bytes; -+ pkt->off += bytes; -+ if (pkt->off == pkt->len) { -+ virtio_transport_dec_rx_pkt(vvs, pkt); -+ list_del(&pkt->list); -+ virtio_transport_free_pkt(pkt); -+ } -+ } -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ /* Send a credit pkt to peer */ -+ virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, -+ NULL); -+ -+ return total; -+ -+out: -+ if (total) -+ err = total; -+ return err; -+} -+ -+ssize_t -+virtio_transport_stream_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, int flags) -+{ -+ if (flags & MSG_PEEK) -+ return -EOPNOTSUPP; -+ -+ return virtio_transport_stream_do_dequeue(vsk, msg, len); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); -+ -+int -+virtio_transport_dgram_dequeue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); -+ -+s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ s64 bytes; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ bytes = vvs->rx_bytes; -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ return bytes; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); -+ -+static s64 virtio_transport_has_space(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ s64 bytes; -+ -+ bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); -+ if (bytes < 0) -+ bytes = 0; -+ -+ return bytes; -+} -+ -+s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ s64 bytes; -+ -+ spin_lock_bh(&vvs->tx_lock); -+ bytes = virtio_transport_has_space(vsk); -+ spin_unlock_bh(&vvs->tx_lock); -+ -+ return bytes; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); -+ -+int virtio_transport_do_socket_init(struct vsock_sock *vsk, -+ struct vsock_sock *psk) -+{ -+ struct virtio_vsock_sock *vvs; -+ -+ vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); -+ if (!vvs) -+ return -ENOMEM; -+ -+ vsk->trans = vvs; -+ vvs->vsk = vsk; -+ if (psk) { -+ struct virtio_vsock_sock *ptrans = psk->trans; -+ -+ vvs->buf_size = ptrans->buf_size; -+ vvs->buf_size_min = ptrans->buf_size_min; -+ vvs->buf_size_max = ptrans->buf_size_max; -+ vvs->peer_buf_alloc = ptrans->peer_buf_alloc; -+ } else { -+ vvs->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; -+ vvs->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; -+ vvs->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; -+ } -+ -+ vvs->buf_alloc = vvs->buf_size; -+ -+ spin_lock_init(&vvs->rx_lock); -+ spin_lock_init(&vvs->tx_lock); -+ INIT_LIST_HEAD(&vvs->rx_queue); -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); -+ -+u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); -+ -+u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size_min; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); -+ -+u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size_max; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); -+ -+void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) -+ val = VIRTIO_VSOCK_MAX_BUF_SIZE; -+ if (val < vvs->buf_size_min) -+ vvs->buf_size_min = val; -+ if (val > vvs->buf_size_max) -+ vvs->buf_size_max = val; -+ vvs->buf_size = val; -+ vvs->buf_alloc = val; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); -+ -+void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) -+ val = VIRTIO_VSOCK_MAX_BUF_SIZE; -+ if (val > vvs->buf_size) -+ vvs->buf_size = val; -+ vvs->buf_size_min = val; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); -+ -+void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) -+ val = VIRTIO_VSOCK_MAX_BUF_SIZE; -+ if (val < vvs->buf_size) -+ vvs->buf_size = val; -+ vvs->buf_size_max = val; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); -+ -+int -+virtio_transport_notify_poll_in(struct vsock_sock *vsk, -+ size_t target, -+ bool *data_ready_now) -+{ -+ if (vsock_stream_has_data(vsk)) -+ *data_ready_now = true; -+ else -+ *data_ready_now = false; -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); -+ -+int -+virtio_transport_notify_poll_out(struct vsock_sock *vsk, -+ size_t target, -+ bool *space_avail_now) -+{ -+ s64 free_space; -+ -+ free_space = vsock_stream_has_space(vsk); -+ if (free_space > 0) -+ *space_avail_now = true; -+ else if (free_space == 0) -+ *space_avail_now = false; -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); -+ -+int virtio_transport_notify_recv_init(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); -+ -+int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); -+ -+int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, -+ size_t target, struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); -+ -+int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, -+ size_t target, ssize_t copied, bool data_read, -+ struct vsock_transport_recv_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); -+ -+int virtio_transport_notify_send_init(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); -+ -+int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); -+ -+int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, -+ struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); -+ -+int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, -+ ssize_t written, struct vsock_transport_send_notify_data *data) -+{ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); -+ -+u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ return vvs->buf_size; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); -+ -+bool virtio_transport_stream_is_active(struct vsock_sock *vsk) -+{ -+ return true; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); -+ -+bool virtio_transport_stream_allow(u32 cid, u32 port) -+{ -+ return true; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); -+ -+int virtio_transport_dgram_bind(struct vsock_sock *vsk, -+ struct sockaddr_vm *addr) -+{ -+ return -EOPNOTSUPP; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); -+ -+bool virtio_transport_dgram_allow(u32 cid, u32 port) -+{ -+ return false; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); -+ -+int virtio_transport_connect(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_REQUEST, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_connect); -+ -+int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_SHUTDOWN, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .flags = (mode & RCV_SHUTDOWN ? -+ VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | -+ (mode & SEND_SHUTDOWN ? -+ VIRTIO_VSOCK_SHUTDOWN_SEND : 0), -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_shutdown); -+ -+int -+virtio_transport_dgram_enqueue(struct vsock_sock *vsk, -+ struct sockaddr_vm *remote_addr, -+ struct msghdr *msg, -+ size_t dgram_len) -+{ -+ return -EOPNOTSUPP; -+} -+EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); -+ -+ssize_t -+virtio_transport_stream_enqueue(struct vsock_sock *vsk, -+ struct msghdr *msg, -+ size_t len) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RW, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .msg = msg, -+ .pkt_len = len, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); -+ -+void virtio_transport_destruct(struct vsock_sock *vsk) -+{ -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ -+ kfree(vvs); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_destruct); -+ -+static int virtio_transport_reset(struct vsock_sock *vsk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RST, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .reply = !!pkt, -+ }; -+ -+ /* Send RST only if the original pkt is not a RST pkt */ -+ if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) -+ return 0; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+ -+/* Normally packets are associated with a socket. There may be no socket if an -+ * attempt was made to connect to a socket that does not exist. -+ */ -+static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RST, -+ .type = le16_to_cpu(pkt->hdr.type), -+ .reply = true, -+ }; -+ -+ /* Send RST only if the original pkt is not a RST pkt */ -+ if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) -+ return 0; -+ -+ pkt = virtio_transport_alloc_pkt(&info, 0, -+ le32_to_cpu(pkt->hdr.dst_cid), -+ le32_to_cpu(pkt->hdr.dst_port), -+ le32_to_cpu(pkt->hdr.src_cid), -+ le32_to_cpu(pkt->hdr.src_port)); -+ if (!pkt) -+ return -ENOMEM; -+ -+ return virtio_transport_get_ops()->send_pkt(pkt); -+} -+ -+static void virtio_transport_wait_close(struct sock *sk, long timeout) -+{ -+ if (timeout) { -+ DEFINE_WAIT(wait); -+ -+ do { -+ prepare_to_wait(sk_sleep(sk), &wait, -+ TASK_INTERRUPTIBLE); -+ if (sk_wait_event(sk, &timeout, -+ sock_flag(sk, SOCK_DONE))) -+ break; -+ } while (!signal_pending(current) && timeout); -+ -+ finish_wait(sk_sleep(sk), &wait); -+ } -+} -+ -+static void virtio_transport_do_close(struct vsock_sock *vsk, -+ bool cancel_timeout) -+{ -+ struct sock *sk = sk_vsock(vsk); -+ -+ sock_set_flag(sk, SOCK_DONE); -+ vsk->peer_shutdown = SHUTDOWN_MASK; -+ if (vsock_stream_has_data(vsk) <= 0) -+ sk->sk_state = SS_DISCONNECTING; -+ sk->sk_state_change(sk); -+ -+ if (vsk->close_work_scheduled && -+ (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { -+ vsk->close_work_scheduled = false; -+ -+ vsock_remove_sock(vsk); -+ -+ /* Release refcnt obtained when we scheduled the timeout */ -+ sock_put(sk); -+ } -+} -+ -+static void virtio_transport_close_timeout(struct work_struct *work) -+{ -+ struct vsock_sock *vsk = -+ container_of(work, struct vsock_sock, close_work.work); -+ struct sock *sk = sk_vsock(vsk); -+ -+ sock_hold(sk); -+ lock_sock(sk); -+ -+ if (!sock_flag(sk, SOCK_DONE)) { -+ (void)virtio_transport_reset(vsk, NULL); -+ -+ virtio_transport_do_close(vsk, false); -+ } -+ -+ vsk->close_work_scheduled = false; -+ -+ release_sock(sk); -+ sock_put(sk); -+} -+ -+/* User context, vsk->sk is locked */ -+static bool virtio_transport_close(struct vsock_sock *vsk) -+{ -+ struct sock *sk = &vsk->sk; -+ -+ if (!(sk->sk_state == SS_CONNECTED || -+ sk->sk_state == SS_DISCONNECTING)) -+ return true; -+ -+ /* Already received SHUTDOWN from peer, reply with RST */ -+ if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { -+ (void)virtio_transport_reset(vsk, NULL); -+ return true; -+ } -+ -+ if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) -+ (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); -+ -+ if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) -+ virtio_transport_wait_close(sk, sk->sk_lingertime); -+ -+ if (sock_flag(sk, SOCK_DONE)) { -+ return true; -+ } -+ -+ sock_hold(sk); -+ INIT_DELAYED_WORK(&vsk->close_work, -+ virtio_transport_close_timeout); -+ vsk->close_work_scheduled = true; -+ schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); -+ return false; -+} -+ -+void virtio_transport_release(struct vsock_sock *vsk) -+{ -+ struct sock *sk = &vsk->sk; -+ bool remove_sock = true; -+ -+ lock_sock(sk); -+ if (sk->sk_type == SOCK_STREAM) -+ remove_sock = virtio_transport_close(vsk); -+ release_sock(sk); -+ -+ if (remove_sock) -+ vsock_remove_sock(vsk); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_release); -+ -+static int -+virtio_transport_recv_connecting(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ int err; -+ int skerr; -+ -+ switch (le16_to_cpu(pkt->hdr.op)) { -+ case VIRTIO_VSOCK_OP_RESPONSE: -+ sk->sk_state = SS_CONNECTED; -+ sk->sk_socket->state = SS_CONNECTED; -+ vsock_insert_connected(vsk); -+ sk->sk_state_change(sk); -+ break; -+ case VIRTIO_VSOCK_OP_INVALID: -+ break; -+ case VIRTIO_VSOCK_OP_RST: -+ skerr = ECONNRESET; -+ err = 0; -+ goto destroy; -+ default: -+ skerr = EPROTO; -+ err = -EINVAL; -+ goto destroy; -+ } -+ return 0; -+ -+destroy: -+ virtio_transport_reset(vsk, pkt); -+ sk->sk_state = SS_UNCONNECTED; -+ sk->sk_err = skerr; -+ sk->sk_error_report(sk); -+ return err; -+} -+ -+static int -+virtio_transport_recv_connected(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ int err = 0; -+ -+ switch (le16_to_cpu(pkt->hdr.op)) { -+ case VIRTIO_VSOCK_OP_RW: -+ pkt->len = le32_to_cpu(pkt->hdr.len); -+ pkt->off = 0; -+ -+ spin_lock_bh(&vvs->rx_lock); -+ virtio_transport_inc_rx_pkt(vvs, pkt); -+ list_add_tail(&pkt->list, &vvs->rx_queue); -+ spin_unlock_bh(&vvs->rx_lock); -+ -+ sk->sk_data_ready(sk); -+ return err; -+ case VIRTIO_VSOCK_OP_CREDIT_UPDATE: -+ sk->sk_write_space(sk); -+ break; -+ case VIRTIO_VSOCK_OP_SHUTDOWN: -+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) -+ vsk->peer_shutdown |= RCV_SHUTDOWN; -+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) -+ vsk->peer_shutdown |= SEND_SHUTDOWN; -+ if (vsk->peer_shutdown == SHUTDOWN_MASK && -+ vsock_stream_has_data(vsk) <= 0) -+ sk->sk_state = SS_DISCONNECTING; -+ if (le32_to_cpu(pkt->hdr.flags)) -+ sk->sk_state_change(sk); -+ break; -+ case VIRTIO_VSOCK_OP_RST: -+ virtio_transport_do_close(vsk, true); -+ break; -+ default: -+ err = -EINVAL; -+ break; -+ } -+ -+ virtio_transport_free_pkt(pkt); -+ return err; -+} -+ -+static void -+virtio_transport_recv_disconnecting(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ -+ if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) -+ virtio_transport_do_close(vsk, true); -+} -+ -+static int -+virtio_transport_send_response(struct vsock_sock *vsk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock_pkt_info info = { -+ .op = VIRTIO_VSOCK_OP_RESPONSE, -+ .type = VIRTIO_VSOCK_TYPE_STREAM, -+ .remote_cid = le32_to_cpu(pkt->hdr.src_cid), -+ .remote_port = le32_to_cpu(pkt->hdr.src_port), -+ .reply = true, -+ }; -+ -+ return virtio_transport_send_pkt_info(vsk, &info); -+} -+ -+/* Handle server socket */ -+static int -+virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ struct vsock_sock *vchild; -+ struct sock *child; -+ -+ if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { -+ virtio_transport_reset(vsk, pkt); -+ return -EINVAL; -+ } -+ -+ if (sk_acceptq_is_full(sk)) { -+ virtio_transport_reset(vsk, pkt); -+ return -ENOMEM; -+ } -+ -+ child = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, -+ sk->sk_type, 0); -+ if (!child) { -+ virtio_transport_reset(vsk, pkt); -+ return -ENOMEM; -+ } -+ -+ sk->sk_ack_backlog++; -+ -+ lock_sock_nested(child, SINGLE_DEPTH_NESTING); -+ -+ child->sk_state = SS_CONNECTED; -+ -+ vchild = vsock_sk(child); -+ vsock_addr_init(&vchild->local_addr, le32_to_cpu(pkt->hdr.dst_cid), -+ le32_to_cpu(pkt->hdr.dst_port)); -+ vsock_addr_init(&vchild->remote_addr, le32_to_cpu(pkt->hdr.src_cid), -+ le32_to_cpu(pkt->hdr.src_port)); -+ -+ vsock_insert_connected(vchild); -+ vsock_enqueue_accept(sk, child); -+ virtio_transport_send_response(vchild, pkt); -+ -+ release_sock(child); -+ -+ sk->sk_data_ready(sk); -+ return 0; -+} -+ -+static bool virtio_transport_space_update(struct sock *sk, -+ struct virtio_vsock_pkt *pkt) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ struct virtio_vsock_sock *vvs = vsk->trans; -+ bool space_available; -+ -+ /* buf_alloc and fwd_cnt is always included in the hdr */ -+ spin_lock_bh(&vvs->tx_lock); -+ vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); -+ vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); -+ space_available = virtio_transport_has_space(vsk); -+ spin_unlock_bh(&vvs->tx_lock); -+ return space_available; -+} -+ -+/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex -+ * lock. -+ */ -+void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ struct sockaddr_vm src, dst; -+ struct vsock_sock *vsk; -+ struct sock *sk; -+ bool space_available; -+ -+ vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), -+ le32_to_cpu(pkt->hdr.src_port)); -+ vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), -+ le32_to_cpu(pkt->hdr.dst_port)); -+ -+ trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, -+ dst.svm_cid, dst.svm_port, -+ le32_to_cpu(pkt->hdr.len), -+ le16_to_cpu(pkt->hdr.type), -+ le16_to_cpu(pkt->hdr.op), -+ le32_to_cpu(pkt->hdr.flags), -+ le32_to_cpu(pkt->hdr.buf_alloc), -+ le32_to_cpu(pkt->hdr.fwd_cnt)); -+ -+ if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) { -+ (void)virtio_transport_reset_no_sock(pkt); -+ goto free_pkt; -+ } -+ -+ /* The socket must be in connected or bound table -+ * otherwise send reset back -+ */ -+ sk = vsock_find_connected_socket(&src, &dst); -+ if (!sk) { -+ sk = vsock_find_bound_socket(&dst); -+ if (!sk) { -+ (void)virtio_transport_reset_no_sock(pkt); -+ goto free_pkt; -+ } -+ } -+ -+ vsk = vsock_sk(sk); -+ -+ space_available = virtio_transport_space_update(sk, pkt); -+ -+ lock_sock(sk); -+ -+ /* Update CID in case it has changed after a transport reset event */ -+ vsk->local_addr.svm_cid = dst.svm_cid; -+ -+ if (space_available) -+ sk->sk_write_space(sk); -+ -+ switch (sk->sk_state) { -+ case VSOCK_SS_LISTEN: -+ virtio_transport_recv_listen(sk, pkt); -+ virtio_transport_free_pkt(pkt); -+ break; -+ case SS_CONNECTING: -+ virtio_transport_recv_connecting(sk, pkt); -+ virtio_transport_free_pkt(pkt); -+ break; -+ case SS_CONNECTED: -+ virtio_transport_recv_connected(sk, pkt); -+ break; -+ case SS_DISCONNECTING: -+ virtio_transport_recv_disconnecting(sk, pkt); -+ virtio_transport_free_pkt(pkt); -+ break; -+ default: -+ virtio_transport_free_pkt(pkt); -+ break; -+ } -+ release_sock(sk); -+ -+ /* Release refcnt obtained when we fetched this socket out of the -+ * bound or connected list. -+ */ -+ sock_put(sk); -+ return; -+ -+free_pkt: -+ virtio_transport_free_pkt(pkt); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); -+ -+void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ kfree(pkt->buf); -+ kfree(pkt); -+} -+EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); -+ -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("Asias He"); -+MODULE_DESCRIPTION("common code for virtio vsock"); --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0008-VSOCK-Introduce-virtio_transport.ko.patch b/kernel/patches-4.4.x/0008-VSOCK-Introduce-virtio_transport.ko.patch deleted file mode 100644 index 54b890f6c..000000000 --- a/kernel/patches-4.4.x/0008-VSOCK-Introduce-virtio_transport.ko.patch +++ /dev/null @@ -1,663 +0,0 @@ -From 5493c8c9039db93d887149fe390c97c62b7de3e9 Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:33 +0100 -Subject: [PATCH 08/44] VSOCK: Introduce virtio_transport.ko - -VM sockets virtio transport implementation. This driver runs in the -guest. - -Signed-off-by: Asias He -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 0ea9e1d3a9e3ef7d2a1462d3de6b95131dc7d872) ---- - MAINTAINERS | 1 + - net/vmw_vsock/virtio_transport.c | 624 +++++++++++++++++++++++++++++++++++++++ - 2 files changed, 625 insertions(+) - create mode 100644 net/vmw_vsock/virtio_transport.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index b93ba8b21be7..82d11235cacb 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -11391,6 +11391,7 @@ S: Maintained - F: include/linux/virtio_vsock.h - F: include/uapi/linux/virtio_vsock.h - F: net/vmw_vsock/virtio_transport_common.c -+F: net/vmw_vsock/virtio_transport.c - - VIRTUAL SERIO DEVICE DRIVER - M: Stephen Chandler Paul -diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c -new file mode 100644 -index 000000000000..699dfabdbccd ---- /dev/null -+++ b/net/vmw_vsock/virtio_transport.c -@@ -0,0 +1,624 @@ -+/* -+ * virtio transport for vsock -+ * -+ * Copyright (C) 2013-2015 Red Hat, Inc. -+ * Author: Asias He -+ * Stefan Hajnoczi -+ * -+ * Some of the code is take from Gerd Hoffmann 's -+ * early virtio-vsock proof-of-concept bits. -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static struct workqueue_struct *virtio_vsock_workqueue; -+static struct virtio_vsock *the_virtio_vsock; -+static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ -+ -+struct virtio_vsock { -+ struct virtio_device *vdev; -+ struct virtqueue *vqs[VSOCK_VQ_MAX]; -+ -+ /* Virtqueue processing is deferred to a workqueue */ -+ struct work_struct tx_work; -+ struct work_struct rx_work; -+ struct work_struct event_work; -+ -+ /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX] -+ * must be accessed with tx_lock held. -+ */ -+ struct mutex tx_lock; -+ -+ struct work_struct send_pkt_work; -+ spinlock_t send_pkt_list_lock; -+ struct list_head send_pkt_list; -+ -+ atomic_t queued_replies; -+ -+ /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] -+ * must be accessed with rx_lock held. -+ */ -+ struct mutex rx_lock; -+ int rx_buf_nr; -+ int rx_buf_max_nr; -+ -+ /* The following fields are protected by event_lock. -+ * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held. -+ */ -+ struct mutex event_lock; -+ struct virtio_vsock_event event_list[8]; -+ -+ u32 guest_cid; -+}; -+ -+static struct virtio_vsock *virtio_vsock_get(void) -+{ -+ return the_virtio_vsock; -+} -+ -+static u32 virtio_transport_get_local_cid(void) -+{ -+ struct virtio_vsock *vsock = virtio_vsock_get(); -+ -+ return vsock->guest_cid; -+} -+ -+static void -+virtio_transport_send_pkt_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, send_pkt_work); -+ struct virtqueue *vq; -+ bool added = false; -+ bool restart_rx = false; -+ -+ mutex_lock(&vsock->tx_lock); -+ -+ vq = vsock->vqs[VSOCK_VQ_TX]; -+ -+ /* Avoid unnecessary interrupts while we're processing the ring */ -+ virtqueue_disable_cb(vq); -+ -+ for (;;) { -+ struct virtio_vsock_pkt *pkt; -+ struct scatterlist hdr, buf, *sgs[2]; -+ int ret, in_sg = 0, out_sg = 0; -+ bool reply; -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ if (list_empty(&vsock->send_pkt_list)) { -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ virtqueue_enable_cb(vq); -+ break; -+ } -+ -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del_init(&pkt->list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ reply = pkt->reply; -+ -+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); -+ sgs[out_sg++] = &hdr; -+ if (pkt->buf) { -+ sg_init_one(&buf, pkt->buf, pkt->len); -+ sgs[out_sg++] = &buf; -+ } -+ -+ ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL); -+ if (ret < 0) { -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ if (!virtqueue_enable_cb(vq) && ret == -ENOSPC) -+ continue; /* retry now that we have more space */ -+ break; -+ } -+ -+ if (reply) { -+ struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; -+ int val; -+ -+ val = atomic_dec_return(&vsock->queued_replies); -+ -+ /* Do we now have resources to resume rx processing? */ -+ if (val + 1 == virtqueue_get_vring_size(rx_vq)) -+ restart_rx = true; -+ } -+ -+ added = true; -+ } -+ -+ if (added) -+ virtqueue_kick(vq); -+ -+ mutex_unlock(&vsock->tx_lock); -+ -+ if (restart_rx) -+ queue_work(virtio_vsock_workqueue, &vsock->rx_work); -+} -+ -+static int -+virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ struct virtio_vsock *vsock; -+ int len = pkt->len; -+ -+ vsock = virtio_vsock_get(); -+ if (!vsock) { -+ virtio_transport_free_pkt(pkt); -+ return -ENODEV; -+ } -+ -+ if (pkt->reply) -+ atomic_inc(&vsock->queued_replies); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add_tail(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); -+ return len; -+} -+ -+static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) -+{ -+ int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; -+ struct virtio_vsock_pkt *pkt; -+ struct scatterlist hdr, buf, *sgs[2]; -+ struct virtqueue *vq; -+ int ret; -+ -+ vq = vsock->vqs[VSOCK_VQ_RX]; -+ -+ do { -+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); -+ if (!pkt) -+ break; -+ -+ pkt->buf = kmalloc(buf_len, GFP_KERNEL); -+ if (!pkt->buf) { -+ virtio_transport_free_pkt(pkt); -+ break; -+ } -+ -+ pkt->len = buf_len; -+ -+ sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); -+ sgs[0] = &hdr; -+ -+ sg_init_one(&buf, pkt->buf, buf_len); -+ sgs[1] = &buf; -+ ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); -+ if (ret) { -+ virtio_transport_free_pkt(pkt); -+ break; -+ } -+ vsock->rx_buf_nr++; -+ } while (vq->num_free); -+ if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) -+ vsock->rx_buf_max_nr = vsock->rx_buf_nr; -+ virtqueue_kick(vq); -+} -+ -+static void virtio_transport_tx_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, tx_work); -+ struct virtqueue *vq; -+ bool added = false; -+ -+ vq = vsock->vqs[VSOCK_VQ_TX]; -+ mutex_lock(&vsock->tx_lock); -+ do { -+ struct virtio_vsock_pkt *pkt; -+ unsigned int len; -+ -+ virtqueue_disable_cb(vq); -+ while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { -+ virtio_transport_free_pkt(pkt); -+ added = true; -+ } -+ } while (!virtqueue_enable_cb(vq)); -+ mutex_unlock(&vsock->tx_lock); -+ -+ if (added) -+ queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); -+} -+ -+/* Is there space left for replies to rx packets? */ -+static bool virtio_transport_more_replies(struct virtio_vsock *vsock) -+{ -+ struct virtqueue *vq = vsock->vqs[VSOCK_VQ_RX]; -+ int val; -+ -+ smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ -+ val = atomic_read(&vsock->queued_replies); -+ -+ return val < virtqueue_get_vring_size(vq); -+} -+ -+static void virtio_transport_rx_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, rx_work); -+ struct virtqueue *vq; -+ -+ vq = vsock->vqs[VSOCK_VQ_RX]; -+ -+ mutex_lock(&vsock->rx_lock); -+ -+ do { -+ virtqueue_disable_cb(vq); -+ for (;;) { -+ struct virtio_vsock_pkt *pkt; -+ unsigned int len; -+ -+ if (!virtio_transport_more_replies(vsock)) { -+ /* Stop rx until the device processes already -+ * pending replies. Leave rx virtqueue -+ * callbacks disabled. -+ */ -+ goto out; -+ } -+ -+ pkt = virtqueue_get_buf(vq, &len); -+ if (!pkt) { -+ break; -+ } -+ -+ vsock->rx_buf_nr--; -+ -+ /* Drop short/long packets */ -+ if (unlikely(len < sizeof(pkt->hdr) || -+ len > sizeof(pkt->hdr) + pkt->len)) { -+ virtio_transport_free_pkt(pkt); -+ continue; -+ } -+ -+ pkt->len = len - sizeof(pkt->hdr); -+ virtio_transport_recv_pkt(pkt); -+ } -+ } while (!virtqueue_enable_cb(vq)); -+ -+out: -+ if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) -+ virtio_vsock_rx_fill(vsock); -+ mutex_unlock(&vsock->rx_lock); -+} -+ -+/* event_lock must be held */ -+static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, -+ struct virtio_vsock_event *event) -+{ -+ struct scatterlist sg; -+ struct virtqueue *vq; -+ -+ vq = vsock->vqs[VSOCK_VQ_EVENT]; -+ -+ sg_init_one(&sg, event, sizeof(*event)); -+ -+ return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL); -+} -+ -+/* event_lock must be held */ -+static void virtio_vsock_event_fill(struct virtio_vsock *vsock) -+{ -+ size_t i; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) { -+ struct virtio_vsock_event *event = &vsock->event_list[i]; -+ -+ virtio_vsock_event_fill_one(vsock, event); -+ } -+ -+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); -+} -+ -+static void virtio_vsock_reset_sock(struct sock *sk) -+{ -+ lock_sock(sk); -+ sk->sk_state = SS_UNCONNECTED; -+ sk->sk_err = ECONNRESET; -+ sk->sk_error_report(sk); -+ release_sock(sk); -+} -+ -+static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock) -+{ -+ struct virtio_device *vdev = vsock->vdev; -+ u64 guest_cid; -+ -+ vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), -+ &guest_cid, sizeof(guest_cid)); -+ vsock->guest_cid = le64_to_cpu(guest_cid); -+} -+ -+/* event_lock must be held */ -+static void virtio_vsock_event_handle(struct virtio_vsock *vsock, -+ struct virtio_vsock_event *event) -+{ -+ switch (le32_to_cpu(event->id)) { -+ case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: -+ virtio_vsock_update_guest_cid(vsock); -+ vsock_for_each_connected_socket(virtio_vsock_reset_sock); -+ break; -+ } -+} -+ -+static void virtio_transport_event_work(struct work_struct *work) -+{ -+ struct virtio_vsock *vsock = -+ container_of(work, struct virtio_vsock, event_work); -+ struct virtqueue *vq; -+ -+ vq = vsock->vqs[VSOCK_VQ_EVENT]; -+ -+ mutex_lock(&vsock->event_lock); -+ -+ do { -+ struct virtio_vsock_event *event; -+ unsigned int len; -+ -+ virtqueue_disable_cb(vq); -+ while ((event = virtqueue_get_buf(vq, &len)) != NULL) { -+ if (len == sizeof(*event)) -+ virtio_vsock_event_handle(vsock, event); -+ -+ virtio_vsock_event_fill_one(vsock, event); -+ } -+ } while (!virtqueue_enable_cb(vq)); -+ -+ virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]); -+ -+ mutex_unlock(&vsock->event_lock); -+} -+ -+static void virtio_vsock_event_done(struct virtqueue *vq) -+{ -+ struct virtio_vsock *vsock = vq->vdev->priv; -+ -+ if (!vsock) -+ return; -+ queue_work(virtio_vsock_workqueue, &vsock->event_work); -+} -+ -+static void virtio_vsock_tx_done(struct virtqueue *vq) -+{ -+ struct virtio_vsock *vsock = vq->vdev->priv; -+ -+ if (!vsock) -+ return; -+ queue_work(virtio_vsock_workqueue, &vsock->tx_work); -+} -+ -+static void virtio_vsock_rx_done(struct virtqueue *vq) -+{ -+ struct virtio_vsock *vsock = vq->vdev->priv; -+ -+ if (!vsock) -+ return; -+ queue_work(virtio_vsock_workqueue, &vsock->rx_work); -+} -+ -+static struct virtio_transport virtio_transport = { -+ .transport = { -+ .get_local_cid = virtio_transport_get_local_cid, -+ -+ .init = virtio_transport_do_socket_init, -+ .destruct = virtio_transport_destruct, -+ .release = virtio_transport_release, -+ .connect = virtio_transport_connect, -+ .shutdown = virtio_transport_shutdown, -+ -+ .dgram_bind = virtio_transport_dgram_bind, -+ .dgram_dequeue = virtio_transport_dgram_dequeue, -+ .dgram_enqueue = virtio_transport_dgram_enqueue, -+ .dgram_allow = virtio_transport_dgram_allow, -+ -+ .stream_dequeue = virtio_transport_stream_dequeue, -+ .stream_enqueue = virtio_transport_stream_enqueue, -+ .stream_has_data = virtio_transport_stream_has_data, -+ .stream_has_space = virtio_transport_stream_has_space, -+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, -+ .stream_is_active = virtio_transport_stream_is_active, -+ .stream_allow = virtio_transport_stream_allow, -+ -+ .notify_poll_in = virtio_transport_notify_poll_in, -+ .notify_poll_out = virtio_transport_notify_poll_out, -+ .notify_recv_init = virtio_transport_notify_recv_init, -+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, -+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, -+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, -+ .notify_send_init = virtio_transport_notify_send_init, -+ .notify_send_pre_block = virtio_transport_notify_send_pre_block, -+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, -+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, -+ -+ .set_buffer_size = virtio_transport_set_buffer_size, -+ .set_min_buffer_size = virtio_transport_set_min_buffer_size, -+ .set_max_buffer_size = virtio_transport_set_max_buffer_size, -+ .get_buffer_size = virtio_transport_get_buffer_size, -+ .get_min_buffer_size = virtio_transport_get_min_buffer_size, -+ .get_max_buffer_size = virtio_transport_get_max_buffer_size, -+ }, -+ -+ .send_pkt = virtio_transport_send_pkt, -+}; -+ -+static int virtio_vsock_probe(struct virtio_device *vdev) -+{ -+ vq_callback_t *callbacks[] = { -+ virtio_vsock_rx_done, -+ virtio_vsock_tx_done, -+ virtio_vsock_event_done, -+ }; -+ static const char * const names[] = { -+ "rx", -+ "tx", -+ "event", -+ }; -+ struct virtio_vsock *vsock = NULL; -+ int ret; -+ -+ ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); -+ if (ret) -+ return ret; -+ -+ /* Only one virtio-vsock device per guest is supported */ -+ if (the_virtio_vsock) { -+ ret = -EBUSY; -+ goto out; -+ } -+ -+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); -+ if (!vsock) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ vsock->vdev = vdev; -+ -+ ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, -+ vsock->vqs, callbacks, names); -+ if (ret < 0) -+ goto out; -+ -+ virtio_vsock_update_guest_cid(vsock); -+ -+ ret = vsock_core_init(&virtio_transport.transport); -+ if (ret < 0) -+ goto out_vqs; -+ -+ vsock->rx_buf_nr = 0; -+ vsock->rx_buf_max_nr = 0; -+ atomic_set(&vsock->queued_replies, 0); -+ -+ vdev->priv = vsock; -+ the_virtio_vsock = vsock; -+ mutex_init(&vsock->tx_lock); -+ mutex_init(&vsock->rx_lock); -+ mutex_init(&vsock->event_lock); -+ spin_lock_init(&vsock->send_pkt_list_lock); -+ INIT_LIST_HEAD(&vsock->send_pkt_list); -+ INIT_WORK(&vsock->rx_work, virtio_transport_rx_work); -+ INIT_WORK(&vsock->tx_work, virtio_transport_tx_work); -+ INIT_WORK(&vsock->event_work, virtio_transport_event_work); -+ INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work); -+ -+ mutex_lock(&vsock->rx_lock); -+ virtio_vsock_rx_fill(vsock); -+ mutex_unlock(&vsock->rx_lock); -+ -+ mutex_lock(&vsock->event_lock); -+ virtio_vsock_event_fill(vsock); -+ mutex_unlock(&vsock->event_lock); -+ -+ mutex_unlock(&the_virtio_vsock_mutex); -+ return 0; -+ -+out_vqs: -+ vsock->vdev->config->del_vqs(vsock->vdev); -+out: -+ kfree(vsock); -+ mutex_unlock(&the_virtio_vsock_mutex); -+ return ret; -+} -+ -+static void virtio_vsock_remove(struct virtio_device *vdev) -+{ -+ struct virtio_vsock *vsock = vdev->priv; -+ struct virtio_vsock_pkt *pkt; -+ -+ flush_work(&vsock->rx_work); -+ flush_work(&vsock->tx_work); -+ flush_work(&vsock->event_work); -+ flush_work(&vsock->send_pkt_work); -+ -+ vdev->config->reset(vdev); -+ -+ mutex_lock(&vsock->rx_lock); -+ while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX]))) -+ virtio_transport_free_pkt(pkt); -+ mutex_unlock(&vsock->rx_lock); -+ -+ mutex_lock(&vsock->tx_lock); -+ while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX]))) -+ virtio_transport_free_pkt(pkt); -+ mutex_unlock(&vsock->tx_lock); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ while (!list_empty(&vsock->send_pkt_list)) { -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del(&pkt->list); -+ virtio_transport_free_pkt(pkt); -+ } -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ mutex_lock(&the_virtio_vsock_mutex); -+ the_virtio_vsock = NULL; -+ vsock_core_exit(); -+ mutex_unlock(&the_virtio_vsock_mutex); -+ -+ vdev->config->del_vqs(vdev); -+ -+ kfree(vsock); -+} -+ -+static struct virtio_device_id id_table[] = { -+ { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, -+ { 0 }, -+}; -+ -+static unsigned int features[] = { -+}; -+ -+static struct virtio_driver virtio_vsock_driver = { -+ .feature_table = features, -+ .feature_table_size = ARRAY_SIZE(features), -+ .driver.name = KBUILD_MODNAME, -+ .driver.owner = THIS_MODULE, -+ .id_table = id_table, -+ .probe = virtio_vsock_probe, -+ .remove = virtio_vsock_remove, -+}; -+ -+static int __init virtio_vsock_init(void) -+{ -+ int ret; -+ -+ virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); -+ if (!virtio_vsock_workqueue) -+ return -ENOMEM; -+ ret = register_virtio_driver(&virtio_vsock_driver); -+ if (ret) -+ destroy_workqueue(virtio_vsock_workqueue); -+ return ret; -+} -+ -+static void __exit virtio_vsock_exit(void) -+{ -+ unregister_virtio_driver(&virtio_vsock_driver); -+ destroy_workqueue(virtio_vsock_workqueue); -+} -+ -+module_init(virtio_vsock_init); -+module_exit(virtio_vsock_exit); -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("Asias He"); -+MODULE_DESCRIPTION("virtio transport for vsock"); -+MODULE_DEVICE_TABLE(virtio, id_table); --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0009-VSOCK-Introduce-vhost_vsock.ko.patch b/kernel/patches-4.4.x/0009-VSOCK-Introduce-vhost_vsock.ko.patch deleted file mode 100644 index f9f7ecedf..000000000 --- a/kernel/patches-4.4.x/0009-VSOCK-Introduce-vhost_vsock.ko.patch +++ /dev/null @@ -1,777 +0,0 @@ -From 9d6607037faab1c8a53001d8b259ac4a80b427e8 Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:34 +0100 -Subject: [PATCH 09/44] VSOCK: Introduce vhost_vsock.ko - -VM sockets vhost transport implementation. This driver runs on the -host. - -Signed-off-by: Asias He -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 433fc58e6bf2c8bd97e57153ed28e64fd78207b8) ---- - MAINTAINERS | 2 + - drivers/vhost/vsock.c | 722 +++++++++++++++++++++++++++++++++++++++++++++ - include/uapi/linux/vhost.h | 5 + - 3 files changed, 729 insertions(+) - create mode 100644 drivers/vhost/vsock.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index 82d11235cacb..12d49f58c4e0 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -11392,6 +11392,8 @@ F: include/linux/virtio_vsock.h - F: include/uapi/linux/virtio_vsock.h - F: net/vmw_vsock/virtio_transport_common.c - F: net/vmw_vsock/virtio_transport.c -+F: drivers/vhost/vsock.c -+F: drivers/vhost/vsock.h - - VIRTUAL SERIO DEVICE DRIVER - M: Stephen Chandler Paul -diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c -new file mode 100644 -index 000000000000..028ca16c2d36 ---- /dev/null -+++ b/drivers/vhost/vsock.c -@@ -0,0 +1,722 @@ -+/* -+ * vhost transport for vsock -+ * -+ * Copyright (C) 2013-2015 Red Hat, Inc. -+ * Author: Asias He -+ * Stefan Hajnoczi -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include "vhost.h" -+ -+#define VHOST_VSOCK_DEFAULT_HOST_CID 2 -+ -+enum { -+ VHOST_VSOCK_FEATURES = VHOST_FEATURES, -+}; -+ -+/* Used to track all the vhost_vsock instances on the system. */ -+static DEFINE_SPINLOCK(vhost_vsock_lock); -+static LIST_HEAD(vhost_vsock_list); -+ -+struct vhost_vsock { -+ struct vhost_dev dev; -+ struct vhost_virtqueue vqs[2]; -+ -+ /* Link to global vhost_vsock_list, protected by vhost_vsock_lock */ -+ struct list_head list; -+ -+ struct vhost_work send_pkt_work; -+ spinlock_t send_pkt_list_lock; -+ struct list_head send_pkt_list; /* host->guest pending packets */ -+ -+ atomic_t queued_replies; -+ -+ u32 guest_cid; -+}; -+ -+static u32 vhost_transport_get_local_cid(void) -+{ -+ return VHOST_VSOCK_DEFAULT_HOST_CID; -+} -+ -+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) -+{ -+ struct vhost_vsock *vsock; -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ list_for_each_entry(vsock, &vhost_vsock_list, list) { -+ u32 other_cid = vsock->guest_cid; -+ -+ /* Skip instances that have no CID yet */ -+ if (other_cid == 0) -+ continue; -+ -+ if (other_cid == guest_cid) { -+ spin_unlock_bh(&vhost_vsock_lock); -+ return vsock; -+ } -+ } -+ spin_unlock_bh(&vhost_vsock_lock); -+ -+ return NULL; -+} -+ -+static void -+vhost_transport_do_send_pkt(struct vhost_vsock *vsock, -+ struct vhost_virtqueue *vq) -+{ -+ struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; -+ bool added = false; -+ bool restart_tx = false; -+ -+ mutex_lock(&vq->mutex); -+ -+ if (!vq->private_data) -+ goto out; -+ -+ /* Avoid further vmexits, we're already processing the virtqueue */ -+ vhost_disable_notify(&vsock->dev, vq); -+ -+ for (;;) { -+ struct virtio_vsock_pkt *pkt; -+ struct iov_iter iov_iter; -+ unsigned out, in; -+ size_t nbytes; -+ size_t len; -+ int head; -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ if (list_empty(&vsock->send_pkt_list)) { -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ vhost_enable_notify(&vsock->dev, vq); -+ break; -+ } -+ -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del_init(&pkt->list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), -+ &out, &in, NULL, NULL); -+ if (head < 0) { -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ break; -+ } -+ -+ if (head == vq->num) { -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ /* We cannot finish yet if more buffers snuck in while -+ * re-enabling notify. -+ */ -+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { -+ vhost_disable_notify(&vsock->dev, vq); -+ continue; -+ } -+ break; -+ } -+ -+ if (out) { -+ virtio_transport_free_pkt(pkt); -+ vq_err(vq, "Expected 0 output buffers, got %u\n", out); -+ break; -+ } -+ -+ len = iov_length(&vq->iov[out], in); -+ iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); -+ -+ nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); -+ if (nbytes != sizeof(pkt->hdr)) { -+ virtio_transport_free_pkt(pkt); -+ vq_err(vq, "Faulted on copying pkt hdr\n"); -+ break; -+ } -+ -+ nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); -+ if (nbytes != pkt->len) { -+ virtio_transport_free_pkt(pkt); -+ vq_err(vq, "Faulted on copying pkt buf\n"); -+ break; -+ } -+ -+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); -+ added = true; -+ -+ if (pkt->reply) { -+ int val; -+ -+ val = atomic_dec_return(&vsock->queued_replies); -+ -+ /* Do we have resources to resume tx processing? */ -+ if (val + 1 == tx_vq->num) -+ restart_tx = true; -+ } -+ -+ virtio_transport_free_pkt(pkt); -+ } -+ if (added) -+ vhost_signal(&vsock->dev, vq); -+ -+out: -+ mutex_unlock(&vq->mutex); -+ -+ if (restart_tx) -+ vhost_poll_queue(&tx_vq->poll); -+} -+ -+static void vhost_transport_send_pkt_work(struct vhost_work *work) -+{ -+ struct vhost_virtqueue *vq; -+ struct vhost_vsock *vsock; -+ -+ vsock = container_of(work, struct vhost_vsock, send_pkt_work); -+ vq = &vsock->vqs[VSOCK_VQ_RX]; -+ -+ vhost_transport_do_send_pkt(vsock, vq); -+} -+ -+static int -+vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) -+{ -+ struct vhost_vsock *vsock; -+ struct vhost_virtqueue *vq; -+ int len = pkt->len; -+ -+ /* Find the vhost_vsock according to guest context id */ -+ vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid)); -+ if (!vsock) { -+ virtio_transport_free_pkt(pkt); -+ return -ENODEV; -+ } -+ -+ vq = &vsock->vqs[VSOCK_VQ_RX]; -+ -+ if (pkt->reply) -+ atomic_inc(&vsock->queued_replies); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ list_add_tail(&pkt->list, &vsock->send_pkt_list); -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); -+ return len; -+} -+ -+static struct virtio_vsock_pkt * -+vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, -+ unsigned int out, unsigned int in) -+{ -+ struct virtio_vsock_pkt *pkt; -+ struct iov_iter iov_iter; -+ size_t nbytes; -+ size_t len; -+ -+ if (in != 0) { -+ vq_err(vq, "Expected 0 input buffers, got %u\n", in); -+ return NULL; -+ } -+ -+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); -+ if (!pkt) -+ return NULL; -+ -+ len = iov_length(vq->iov, out); -+ iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); -+ -+ nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); -+ if (nbytes != sizeof(pkt->hdr)) { -+ vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", -+ sizeof(pkt->hdr), nbytes); -+ kfree(pkt); -+ return NULL; -+ } -+ -+ if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) -+ pkt->len = le32_to_cpu(pkt->hdr.len); -+ -+ /* No payload */ -+ if (!pkt->len) -+ return pkt; -+ -+ /* The pkt is too big */ -+ if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { -+ kfree(pkt); -+ return NULL; -+ } -+ -+ pkt->buf = kmalloc(pkt->len, GFP_KERNEL); -+ if (!pkt->buf) { -+ kfree(pkt); -+ return NULL; -+ } -+ -+ nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); -+ if (nbytes != pkt->len) { -+ vq_err(vq, "Expected %u byte payload, got %zu bytes\n", -+ pkt->len, nbytes); -+ virtio_transport_free_pkt(pkt); -+ return NULL; -+ } -+ -+ return pkt; -+} -+ -+/* Is there space left for replies to rx packets? */ -+static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) -+{ -+ struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX]; -+ int val; -+ -+ smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */ -+ val = atomic_read(&vsock->queued_replies); -+ -+ return val < vq->num; -+} -+ -+static void vhost_vsock_handle_tx_kick(struct vhost_work *work) -+{ -+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, -+ poll.work); -+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, -+ dev); -+ struct virtio_vsock_pkt *pkt; -+ int head; -+ unsigned int out, in; -+ bool added = false; -+ -+ mutex_lock(&vq->mutex); -+ -+ if (!vq->private_data) -+ goto out; -+ -+ vhost_disable_notify(&vsock->dev, vq); -+ for (;;) { -+ if (!vhost_vsock_more_replies(vsock)) { -+ /* Stop tx until the device processes already -+ * pending replies. Leave tx virtqueue -+ * callbacks disabled. -+ */ -+ goto no_more_replies; -+ } -+ -+ head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), -+ &out, &in, NULL, NULL); -+ if (head < 0) -+ break; -+ -+ if (head == vq->num) { -+ if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { -+ vhost_disable_notify(&vsock->dev, vq); -+ continue; -+ } -+ break; -+ } -+ -+ pkt = vhost_vsock_alloc_pkt(vq, out, in); -+ if (!pkt) { -+ vq_err(vq, "Faulted on pkt\n"); -+ continue; -+ } -+ -+ /* Only accept correctly addressed packets */ -+ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) -+ virtio_transport_recv_pkt(pkt); -+ else -+ virtio_transport_free_pkt(pkt); -+ -+ vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); -+ added = true; -+ } -+ -+no_more_replies: -+ if (added) -+ vhost_signal(&vsock->dev, vq); -+ -+out: -+ mutex_unlock(&vq->mutex); -+} -+ -+static void vhost_vsock_handle_rx_kick(struct vhost_work *work) -+{ -+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, -+ poll.work); -+ struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, -+ dev); -+ -+ vhost_transport_do_send_pkt(vsock, vq); -+} -+ -+static int vhost_vsock_start(struct vhost_vsock *vsock) -+{ -+ size_t i; -+ int ret; -+ -+ mutex_lock(&vsock->dev.mutex); -+ -+ ret = vhost_dev_check_owner(&vsock->dev); -+ if (ret) -+ goto err; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ struct vhost_virtqueue *vq = &vsock->vqs[i]; -+ -+ mutex_lock(&vq->mutex); -+ -+ if (!vhost_vq_access_ok(vq)) { -+ ret = -EFAULT; -+ mutex_unlock(&vq->mutex); -+ goto err_vq; -+ } -+ -+ if (!vq->private_data) { -+ vq->private_data = vsock; -+ vhost_vq_init_access(vq); -+ } -+ -+ mutex_unlock(&vq->mutex); -+ } -+ -+ mutex_unlock(&vsock->dev.mutex); -+ return 0; -+ -+err_vq: -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ struct vhost_virtqueue *vq = &vsock->vqs[i]; -+ -+ mutex_lock(&vq->mutex); -+ vq->private_data = NULL; -+ mutex_unlock(&vq->mutex); -+ } -+err: -+ mutex_unlock(&vsock->dev.mutex); -+ return ret; -+} -+ -+static int vhost_vsock_stop(struct vhost_vsock *vsock) -+{ -+ size_t i; -+ int ret; -+ -+ mutex_lock(&vsock->dev.mutex); -+ -+ ret = vhost_dev_check_owner(&vsock->dev); -+ if (ret) -+ goto err; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ struct vhost_virtqueue *vq = &vsock->vqs[i]; -+ -+ mutex_lock(&vq->mutex); -+ vq->private_data = NULL; -+ mutex_unlock(&vq->mutex); -+ } -+ -+err: -+ mutex_unlock(&vsock->dev.mutex); -+ return ret; -+} -+ -+static void vhost_vsock_free(struct vhost_vsock *vsock) -+{ -+ if (is_vmalloc_addr(vsock)) -+ vfree(vsock); -+ else -+ kfree(vsock); -+} -+ -+static int vhost_vsock_dev_open(struct inode *inode, struct file *file) -+{ -+ struct vhost_virtqueue **vqs; -+ struct vhost_vsock *vsock; -+ int ret; -+ -+ /* This struct is large and allocation could fail, fall back to vmalloc -+ * if there is no other way. -+ */ -+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); -+ if (!vsock) { -+ vsock = vmalloc(sizeof(*vsock)); -+ if (!vsock) -+ return -ENOMEM; -+ } -+ -+ vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); -+ if (!vqs) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ atomic_set(&vsock->queued_replies, 0); -+ -+ vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; -+ vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; -+ vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; -+ vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; -+ -+ vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); -+ -+ file->private_data = vsock; -+ spin_lock_init(&vsock->send_pkt_list_lock); -+ INIT_LIST_HEAD(&vsock->send_pkt_list); -+ vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ list_add_tail(&vsock->list, &vhost_vsock_list); -+ spin_unlock_bh(&vhost_vsock_lock); -+ return 0; -+ -+out: -+ vhost_vsock_free(vsock); -+ return ret; -+} -+ -+static void vhost_vsock_flush(struct vhost_vsock *vsock) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) -+ if (vsock->vqs[i].handle_kick) -+ vhost_poll_flush(&vsock->vqs[i].poll); -+ vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); -+} -+ -+static void vhost_vsock_reset_orphans(struct sock *sk) -+{ -+ struct vsock_sock *vsk = vsock_sk(sk); -+ -+ /* vmci_transport.c doesn't take sk_lock here either. At least we're -+ * under vsock_table_lock so the sock cannot disappear while we're -+ * executing. -+ */ -+ -+ if (!vhost_vsock_get(vsk->local_addr.svm_cid)) { -+ sock_set_flag(sk, SOCK_DONE); -+ vsk->peer_shutdown = SHUTDOWN_MASK; -+ sk->sk_state = SS_UNCONNECTED; -+ sk->sk_err = ECONNRESET; -+ sk->sk_error_report(sk); -+ } -+} -+ -+static int vhost_vsock_dev_release(struct inode *inode, struct file *file) -+{ -+ struct vhost_vsock *vsock = file->private_data; -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ list_del(&vsock->list); -+ spin_unlock_bh(&vhost_vsock_lock); -+ -+ /* Iterating over all connections for all CIDs to find orphans is -+ * inefficient. Room for improvement here. */ -+ vsock_for_each_connected_socket(vhost_vsock_reset_orphans); -+ -+ vhost_vsock_stop(vsock); -+ vhost_vsock_flush(vsock); -+ vhost_dev_stop(&vsock->dev); -+ -+ spin_lock_bh(&vsock->send_pkt_list_lock); -+ while (!list_empty(&vsock->send_pkt_list)) { -+ struct virtio_vsock_pkt *pkt; -+ -+ pkt = list_first_entry(&vsock->send_pkt_list, -+ struct virtio_vsock_pkt, list); -+ list_del_init(&pkt->list); -+ virtio_transport_free_pkt(pkt); -+ } -+ spin_unlock_bh(&vsock->send_pkt_list_lock); -+ -+ vhost_dev_cleanup(&vsock->dev, false); -+ kfree(vsock->dev.vqs); -+ vhost_vsock_free(vsock); -+ return 0; -+} -+ -+static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid) -+{ -+ struct vhost_vsock *other; -+ -+ /* Refuse reserved CIDs */ -+ if (guest_cid <= VMADDR_CID_HOST || -+ guest_cid == U32_MAX) -+ return -EINVAL; -+ -+ /* 64-bit CIDs are not yet supported */ -+ if (guest_cid > U32_MAX) -+ return -EINVAL; -+ -+ /* Refuse if CID is already in use */ -+ other = vhost_vsock_get(guest_cid); -+ if (other && other != vsock) -+ return -EADDRINUSE; -+ -+ spin_lock_bh(&vhost_vsock_lock); -+ vsock->guest_cid = guest_cid; -+ spin_unlock_bh(&vhost_vsock_lock); -+ -+ return 0; -+} -+ -+static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) -+{ -+ struct vhost_virtqueue *vq; -+ int i; -+ -+ if (features & ~VHOST_VSOCK_FEATURES) -+ return -EOPNOTSUPP; -+ -+ mutex_lock(&vsock->dev.mutex); -+ if ((features & (1 << VHOST_F_LOG_ALL)) && -+ !vhost_log_access_ok(&vsock->dev)) { -+ mutex_unlock(&vsock->dev.mutex); -+ return -EFAULT; -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { -+ vq = &vsock->vqs[i]; -+ mutex_lock(&vq->mutex); -+ vq->acked_features = features; -+ mutex_unlock(&vq->mutex); -+ } -+ mutex_unlock(&vsock->dev.mutex); -+ return 0; -+} -+ -+static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, -+ unsigned long arg) -+{ -+ struct vhost_vsock *vsock = f->private_data; -+ void __user *argp = (void __user *)arg; -+ u64 guest_cid; -+ u64 features; -+ int start; -+ int r; -+ -+ switch (ioctl) { -+ case VHOST_VSOCK_SET_GUEST_CID: -+ if (copy_from_user(&guest_cid, argp, sizeof(guest_cid))) -+ return -EFAULT; -+ return vhost_vsock_set_cid(vsock, guest_cid); -+ case VHOST_VSOCK_SET_RUNNING: -+ if (copy_from_user(&start, argp, sizeof(start))) -+ return -EFAULT; -+ if (start) -+ return vhost_vsock_start(vsock); -+ else -+ return vhost_vsock_stop(vsock); -+ case VHOST_GET_FEATURES: -+ features = VHOST_VSOCK_FEATURES; -+ if (copy_to_user(argp, &features, sizeof(features))) -+ return -EFAULT; -+ return 0; -+ case VHOST_SET_FEATURES: -+ if (copy_from_user(&features, argp, sizeof(features))) -+ return -EFAULT; -+ return vhost_vsock_set_features(vsock, features); -+ default: -+ mutex_lock(&vsock->dev.mutex); -+ r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); -+ if (r == -ENOIOCTLCMD) -+ r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); -+ else -+ vhost_vsock_flush(vsock); -+ mutex_unlock(&vsock->dev.mutex); -+ return r; -+ } -+} -+ -+static const struct file_operations vhost_vsock_fops = { -+ .owner = THIS_MODULE, -+ .open = vhost_vsock_dev_open, -+ .release = vhost_vsock_dev_release, -+ .llseek = noop_llseek, -+ .unlocked_ioctl = vhost_vsock_dev_ioctl, -+}; -+ -+static struct miscdevice vhost_vsock_misc = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "vhost-vsock", -+ .fops = &vhost_vsock_fops, -+}; -+ -+static struct virtio_transport vhost_transport = { -+ .transport = { -+ .get_local_cid = vhost_transport_get_local_cid, -+ -+ .init = virtio_transport_do_socket_init, -+ .destruct = virtio_transport_destruct, -+ .release = virtio_transport_release, -+ .connect = virtio_transport_connect, -+ .shutdown = virtio_transport_shutdown, -+ -+ .dgram_enqueue = virtio_transport_dgram_enqueue, -+ .dgram_dequeue = virtio_transport_dgram_dequeue, -+ .dgram_bind = virtio_transport_dgram_bind, -+ .dgram_allow = virtio_transport_dgram_allow, -+ -+ .stream_enqueue = virtio_transport_stream_enqueue, -+ .stream_dequeue = virtio_transport_stream_dequeue, -+ .stream_has_data = virtio_transport_stream_has_data, -+ .stream_has_space = virtio_transport_stream_has_space, -+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, -+ .stream_is_active = virtio_transport_stream_is_active, -+ .stream_allow = virtio_transport_stream_allow, -+ -+ .notify_poll_in = virtio_transport_notify_poll_in, -+ .notify_poll_out = virtio_transport_notify_poll_out, -+ .notify_recv_init = virtio_transport_notify_recv_init, -+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, -+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, -+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, -+ .notify_send_init = virtio_transport_notify_send_init, -+ .notify_send_pre_block = virtio_transport_notify_send_pre_block, -+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, -+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, -+ -+ .set_buffer_size = virtio_transport_set_buffer_size, -+ .set_min_buffer_size = virtio_transport_set_min_buffer_size, -+ .set_max_buffer_size = virtio_transport_set_max_buffer_size, -+ .get_buffer_size = virtio_transport_get_buffer_size, -+ .get_min_buffer_size = virtio_transport_get_min_buffer_size, -+ .get_max_buffer_size = virtio_transport_get_max_buffer_size, -+ }, -+ -+ .send_pkt = vhost_transport_send_pkt, -+}; -+ -+static int __init vhost_vsock_init(void) -+{ -+ int ret; -+ -+ ret = vsock_core_init(&vhost_transport.transport); -+ if (ret < 0) -+ return ret; -+ return misc_register(&vhost_vsock_misc); -+}; -+ -+static void __exit vhost_vsock_exit(void) -+{ -+ misc_deregister(&vhost_vsock_misc); -+ vsock_core_exit(); -+}; -+ -+module_init(vhost_vsock_init); -+module_exit(vhost_vsock_exit); -+MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("Asias He"); -+MODULE_DESCRIPTION("vhost transport for vsock "); -diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h -index ab3731917bac..b30647697774 100644 ---- a/include/uapi/linux/vhost.h -+++ b/include/uapi/linux/vhost.h -@@ -169,4 +169,9 @@ struct vhost_scsi_target { - #define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32) - #define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32) - -+/* VHOST_VSOCK specific defines */ -+ -+#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64) -+#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int) -+ - #endif --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0010-VSOCK-Add-Makefile-and-Kconfig.patch b/kernel/patches-4.4.x/0010-VSOCK-Add-Makefile-and-Kconfig.patch deleted file mode 100644 index 50f31558c..000000000 --- a/kernel/patches-4.4.x/0010-VSOCK-Add-Makefile-and-Kconfig.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 2a53a98986e77f240013b679bdd5080b84f47d72 Mon Sep 17 00:00:00 2001 -From: Asias He -Date: Thu, 28 Jul 2016 15:36:35 +0100 -Subject: [PATCH 10/44] VSOCK: Add Makefile and Kconfig - -Enable virtio-vsock and vhost-vsock. - -Signed-off-by: Asias He -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 304ba62fd4e670c1a5784585da0fac9f7309ef6c) ---- - drivers/vhost/Kconfig | 14 ++++++++++++++ - drivers/vhost/Makefile | 4 ++++ - net/vmw_vsock/Kconfig | 20 ++++++++++++++++++++ - net/vmw_vsock/Makefile | 6 ++++++ - 4 files changed, 44 insertions(+) - -diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig -index 533eaf04f12f..2b5f588f5b1e 100644 ---- a/drivers/vhost/Kconfig -+++ b/drivers/vhost/Kconfig -@@ -21,6 +21,20 @@ config VHOST_SCSI - Say M here to enable the vhost_scsi TCM fabric module - for use with virtio-scsi guests - -+config VHOST_VSOCK -+ tristate "vhost virtio-vsock driver" -+ depends on VSOCKETS && EVENTFD -+ select VIRTIO_VSOCKETS_COMMON -+ select VHOST -+ default n -+ ---help--- -+ This kernel module can be loaded in the host kernel to provide AF_VSOCK -+ sockets for communicating with guests. The guests must have the -+ virtio_transport.ko driver loaded to use the virtio-vsock device. -+ -+ To compile this driver as a module, choose M here: the module will be called -+ vhost_vsock. -+ - config VHOST_RING - tristate - ---help--- -diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile -index e0441c34db1c..6b012b986b57 100644 ---- a/drivers/vhost/Makefile -+++ b/drivers/vhost/Makefile -@@ -4,5 +4,9 @@ vhost_net-y := net.o - obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o - vhost_scsi-y := scsi.o - -+obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o -+vhost_vsock-y := vsock.o -+ - obj-$(CONFIG_VHOST_RING) += vringh.o -+ - obj-$(CONFIG_VHOST) += vhost.o -diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig -index 14810abedc2e..8831e7c42167 100644 ---- a/net/vmw_vsock/Kconfig -+++ b/net/vmw_vsock/Kconfig -@@ -26,3 +26,23 @@ config VMWARE_VMCI_VSOCKETS - - To compile this driver as a module, choose M here: the module - will be called vmw_vsock_vmci_transport. If unsure, say N. -+ -+config VIRTIO_VSOCKETS -+ tristate "virtio transport for Virtual Sockets" -+ depends on VSOCKETS && VIRTIO -+ select VIRTIO_VSOCKETS_COMMON -+ help -+ This module implements a virtio transport for Virtual Sockets. -+ -+ Enable this transport if your Virtual Machine host supports Virtual -+ Sockets over virtio. -+ -+ To compile this driver as a module, choose M here: the module will be -+ called vmw_vsock_virtio_transport. If unsure, say N. -+ -+config VIRTIO_VSOCKETS_COMMON -+ tristate -+ help -+ This option is selected by any driver which needs to access -+ the virtio_vsock. The module will be called -+ vmw_vsock_virtio_transport_common. -diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile -index 2ce52d70f224..bc27c70e0e59 100644 ---- a/net/vmw_vsock/Makefile -+++ b/net/vmw_vsock/Makefile -@@ -1,7 +1,13 @@ - obj-$(CONFIG_VSOCKETS) += vsock.o - obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o -+obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o -+obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o - - vsock-y += af_vsock.o vsock_addr.o - - vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ - vmci_transport_notify_qstate.o -+ -+vmw_vsock_virtio_transport-y += virtio_transport.o -+ -+vmw_vsock_virtio_transport_common-y += virtio_transport_common.o --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0011-VSOCK-Use-kvfree.patch b/kernel/patches-4.4.x/0011-VSOCK-Use-kvfree.patch deleted file mode 100644 index a2c1db831..000000000 --- a/kernel/patches-4.4.x/0011-VSOCK-Use-kvfree.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 1047f5e89fec6c09e4174acb5561782954f2fc4b Mon Sep 17 00:00:00 2001 -From: Wei Yongjun -Date: Tue, 2 Aug 2016 13:50:42 +0000 -Subject: [PATCH 11/44] VSOCK: Use kvfree() - -Use kvfree() instead of open-coding it. - -Signed-off-by: Wei Yongjun -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit b226acab2f6aaa45c2af27279b63f622b23a44bd) ---- - drivers/vhost/vsock.c | 5 +---- - 1 file changed, 1 insertion(+), 4 deletions(-) - -diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c -index 028ca16c2d36..0ddf3a2dbfc4 100644 ---- a/drivers/vhost/vsock.c -+++ b/drivers/vhost/vsock.c -@@ -434,10 +434,7 @@ err: - - static void vhost_vsock_free(struct vhost_vsock *vsock) - { -- if (is_vmalloc_addr(vsock)) -- vfree(vsock); -- else -- kfree(vsock); -+ kvfree(vsock); - } - - static int vhost_vsock_dev_open(struct inode *inode, struct file *file) --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch b/kernel/patches-4.4.x/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch deleted file mode 100644 index 54f95a61a..000000000 --- a/kernel/patches-4.4.x/0012-vhost-vsock-fix-vhost-virtio_vsock_pkt-use-after-fre.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 657e8991b110a87c0df324f4d27e2398df81e870 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 4 Aug 2016 14:52:53 +0100 -Subject: [PATCH 12/44] vhost/vsock: fix vhost virtio_vsock_pkt use-after-free - -Stash the packet length in a local variable before handing over -ownership of the packet to virtio_transport_recv_pkt() or -virtio_transport_free_pkt(). - -This patch solves the use-after-free since pkt is no longer guaranteed -to be alive. - -Reported-by: Dan Carpenter -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 3fda5d6e580193fa005014355b3a61498f1b3ae0) ---- - drivers/vhost/vsock.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c -index 0ddf3a2dbfc4..e3b30ea9ece5 100644 ---- a/drivers/vhost/vsock.c -+++ b/drivers/vhost/vsock.c -@@ -307,6 +307,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) - - vhost_disable_notify(&vsock->dev, vq); - for (;;) { -+ u32 len; -+ - if (!vhost_vsock_more_replies(vsock)) { - /* Stop tx until the device processes already - * pending replies. Leave tx virtqueue -@@ -334,13 +336,15 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) - continue; - } - -+ len = pkt->len; -+ - /* Only accept correctly addressed packets */ - if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) - virtio_transport_recv_pkt(pkt); - else - virtio_transport_free_pkt(pkt); - -- vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); -+ vhost_add_used(vq, head, sizeof(pkt->hdr) + len); - added = true; - } - --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0013-virtio-vsock-fix-include-guard-typo.patch b/kernel/patches-4.4.x/0013-virtio-vsock-fix-include-guard-typo.patch deleted file mode 100644 index a9e5fc2a3..000000000 --- a/kernel/patches-4.4.x/0013-virtio-vsock-fix-include-guard-typo.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 4ff9eab3a8e61a8307e76570bcec31472fc3b090 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Fri, 5 Aug 2016 13:52:09 +0100 -Subject: [PATCH 13/44] virtio-vsock: fix include guard typo - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 28ad55578b8a76390d966b09da8c7fa3644f5140) ---- - include/uapi/linux/virtio_vsock.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h -index 6b011c19b50f..1d57ed3d84d2 100644 ---- a/include/uapi/linux/virtio_vsock.h -+++ b/include/uapi/linux/virtio_vsock.h -@@ -32,7 +32,7 @@ - */ - - #ifndef _UAPI_LINUX_VIRTIO_VSOCK_H --#define _UAPI_LINUX_VIRTIO_VOSCK_H -+#define _UAPI_LINUX_VIRTIO_VSOCK_H - - #include - #include --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch b/kernel/patches-4.4.x/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch deleted file mode 100644 index cd54345e2..000000000 --- a/kernel/patches-4.4.x/0014-vhost-vsock-drop-space-available-check-for-TX-vq.patch +++ /dev/null @@ -1,61 +0,0 @@ -From c17a67f9b7015fb4d3a15a0d783486d770a23eca Mon Sep 17 00:00:00 2001 -From: Gerard Garcia -Date: Wed, 10 Aug 2016 17:24:34 +0200 -Subject: [PATCH 14/44] vhost/vsock: drop space available check for TX vq - -Remove unnecessary use of enable/disable callback notifications -and the incorrect more space available check. - -The virtio_transport_tx_work handles when the TX virtqueue -has more buffers available. - -Signed-off-by: Gerard Garcia -Acked-by: Stefan Hajnoczi -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 21bc54fc0cdc31de72b57d2b3c79cf9c2b83cf39) ---- - net/vmw_vsock/virtio_transport.c | 10 +++------- - 1 file changed, 3 insertions(+), 7 deletions(-) - -diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c -index 699dfabdbccd..936d7eee62d0 100644 ---- a/net/vmw_vsock/virtio_transport.c -+++ b/net/vmw_vsock/virtio_transport.c -@@ -87,9 +87,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) - - vq = vsock->vqs[VSOCK_VQ_TX]; - -- /* Avoid unnecessary interrupts while we're processing the ring */ -- virtqueue_disable_cb(vq); -- - for (;;) { - struct virtio_vsock_pkt *pkt; - struct scatterlist hdr, buf, *sgs[2]; -@@ -99,7 +96,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) - spin_lock_bh(&vsock->send_pkt_list_lock); - if (list_empty(&vsock->send_pkt_list)) { - spin_unlock_bh(&vsock->send_pkt_list_lock); -- virtqueue_enable_cb(vq); - break; - } - -@@ -118,13 +114,13 @@ virtio_transport_send_pkt_work(struct work_struct *work) - } - - ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL); -+ /* Usually this means that there is no more space available in -+ * the vq -+ */ - if (ret < 0) { - spin_lock_bh(&vsock->send_pkt_list_lock); - list_add(&pkt->list, &vsock->send_pkt_list); - spin_unlock_bh(&vsock->send_pkt_list_lock); -- -- if (!virtqueue_enable_cb(vq) && ret == -ENOSPC) -- continue; /* retry now that we have more space */ - break; - } - --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch b/kernel/patches-4.4.x/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch deleted file mode 100644 index 7f5317ccb..000000000 --- a/kernel/patches-4.4.x/0015-VSOCK-Only-allow-host-network-namespace-to-use-AF_VS.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 1acbbfced43f6219f3f1596cf4070d738625979c Mon Sep 17 00:00:00 2001 -From: Ian Campbell -Date: Mon, 4 Apr 2016 14:50:10 +0100 -Subject: [PATCH 15/44] VSOCK: Only allow host network namespace to use - AF_VSOCK. - -The VSOCK addressing schema does not really lend itself to simply creating an -alternative end point address within a namespace. - -Signed-off-by: Ian Campbell ---- - net/vmw_vsock/af_vsock.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index 17dbbe64cd73..1bb1b016e945 100644 ---- a/net/vmw_vsock/af_vsock.c -+++ b/net/vmw_vsock/af_vsock.c -@@ -1852,6 +1852,9 @@ static const struct proto_ops vsock_stream_ops = { - static int vsock_create(struct net *net, struct socket *sock, - int protocol, int kern) - { -+ if (!net_eq(net, &init_net)) -+ return -EAFNOSUPPORT; -+ - if (!sock) - return -EINVAL; - --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch b/kernel/patches-4.4.x/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch deleted file mode 100644 index d432b3abf..000000000 --- a/kernel/patches-4.4.x/0016-drivers-hv-Define-the-channel-type-for-Hyper-V-PCI-E.patch +++ /dev/null @@ -1,63 +0,0 @@ -From ad1f53a263777b329fe0ce5bcd356aca29fa8708 Mon Sep 17 00:00:00 2001 -From: Jake Oshins -Date: Mon, 14 Dec 2015 16:01:41 -0800 -Subject: [PATCH 16/44] drivers:hv: Define the channel type for Hyper-V PCI - Express pass-through - -This defines the channel type for PCI front-ends in Hyper-V VMs. - -Signed-off-by: Jake Oshins -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 3053c762444a83ec6a8777f9476668b23b8ab180) ---- - drivers/hv/channel_mgmt.c | 3 +++ - include/linux/hyperv.h | 11 +++++++++++ - 2 files changed, 14 insertions(+) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 37238dffd947..a562318b856b 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -359,6 +359,7 @@ enum { - SCSI, - NIC, - ND_NIC, -+ PCIE, - MAX_PERF_CHN, - }; - -@@ -376,6 +377,8 @@ static const struct hv_vmbus_device_id hp_devs[] = { - { HV_NIC_GUID, }, - /* NetworkDirect Guest RDMA */ - { HV_ND_GUID, }, -+ /* PCI Express Pass Through */ -+ { HV_PCIE_GUID, }, - }; - - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index ae6a711dcd1d..10dda1e3b560 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1155,6 +1155,17 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - 0xab, 0x99, 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01 \ - } - -+/* -+ * PCI Express Pass Through -+ * {44C4F61D-4444-4400-9D52-802E27EDE19F} -+ */ -+ -+#define HV_PCIE_GUID \ -+ .guid = { \ -+ 0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, \ -+ 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F \ -+ } -+ - /* - * Common header for Hyper-V ICs - */ --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch b/kernel/patches-4.4.x/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch deleted file mode 100644 index 0f01fef85..000000000 --- a/kernel/patches-4.4.x/0017-Drivers-hv-vmbus-Use-uuid_le-type-consistently.patch +++ /dev/null @@ -1,297 +0,0 @@ -From 18e5bb427d8f7b5b07529fcc4dace32a4120a964 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Mon, 14 Dec 2015 16:01:43 -0800 -Subject: [PATCH 17/44] Drivers: hv: vmbus: Use uuid_le type consistently - -Consistently use uuid_le type in the Hyper-V driver code. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit af3ff643ea91ba64dd8d0b1cbed54d44512f96cd) ---- - drivers/hv/channel_mgmt.c | 2 +- - drivers/hv/vmbus_drv.c | 10 ++--- - include/linux/hyperv.h | 92 ++++++++++++++--------------------------- - include/linux/mod_devicetable.h | 2 +- - scripts/mod/file2alias.c | 2 +- - 5 files changed, 40 insertions(+), 68 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index a562318b856b..339277b76468 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -409,7 +409,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui - struct cpumask *alloced_mask; - - for (i = IDE; i < MAX_PERF_CHN; i++) { -- if (!memcmp(type_guid->b, hp_devs[i].guid, -+ if (!memcmp(type_guid->b, &hp_devs[i].guid, - sizeof(uuid_le))) { - perf_chn = true; - break; -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index 802dcb409030..f1fbb6b98f5c 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -533,7 +533,7 @@ static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) - - static const uuid_le null_guid; - --static inline bool is_null_guid(const __u8 *guid) -+static inline bool is_null_guid(const uuid_le *guid) - { - if (memcmp(guid, &null_guid, sizeof(uuid_le))) - return false; -@@ -546,9 +546,9 @@ static inline bool is_null_guid(const __u8 *guid) - */ - static const struct hv_vmbus_device_id *hv_vmbus_get_id( - const struct hv_vmbus_device_id *id, -- const __u8 *guid) -+ const uuid_le *guid) - { -- for (; !is_null_guid(id->guid); id++) -+ for (; !is_null_guid(&id->guid); id++) - if (!memcmp(&id->guid, guid, sizeof(uuid_le))) - return id; - -@@ -565,7 +565,7 @@ static int vmbus_match(struct device *device, struct device_driver *driver) - struct hv_driver *drv = drv_to_hv_drv(driver); - struct hv_device *hv_dev = device_to_hv_device(device); - -- if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b)) -+ if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) - return 1; - - return 0; -@@ -582,7 +582,7 @@ static int vmbus_probe(struct device *child_device) - struct hv_device *dev = device_to_hv_device(child_device); - const struct hv_vmbus_device_id *dev_id; - -- dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b); -+ dev_id = hv_vmbus_get_id(drv->id_table, &dev->dev_type); - if (drv->probe) { - ret = drv->probe(dev, dev_id); - if (ret != 0) -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 10dda1e3b560..4712d7d07b8c 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1012,6 +1012,8 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - .guid = { g0, g1, g2, g3, g4, g5, g6, g7, \ - g8, g9, ga, gb, gc, gd, ge, gf }, - -+ -+ - /* - * GUID definitions of various offer types - services offered to the guest. - */ -@@ -1021,118 +1023,94 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - * {f8615163-df3e-46c5-913f-f2d2f965ed0e} - */ - #define HV_NIC_GUID \ -- .guid = { \ -- 0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, \ -- 0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e \ -- } -+ .guid = UUID_LE(0xf8615163, 0xdf3e, 0x46c5, 0x91, 0x3f, \ -+ 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e) - - /* - * IDE GUID - * {32412632-86cb-44a2-9b5c-50d1417354f5} - */ - #define HV_IDE_GUID \ -- .guid = { \ -- 0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, \ -- 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5 \ -- } -+ .guid = UUID_LE(0x32412632, 0x86cb, 0x44a2, 0x9b, 0x5c, \ -+ 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5) - - /* - * SCSI GUID - * {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} - */ - #define HV_SCSI_GUID \ -- .guid = { \ -- 0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, \ -- 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f \ -- } -+ .guid = UUID_LE(0xba6163d9, 0x04a1, 0x4d29, 0xb6, 0x05, \ -+ 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f) - - /* - * Shutdown GUID - * {0e0b6031-5213-4934-818b-38d90ced39db} - */ - #define HV_SHUTDOWN_GUID \ -- .guid = { \ -- 0x31, 0x60, 0x0b, 0x0e, 0x13, 0x52, 0x34, 0x49, \ -- 0x81, 0x8b, 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb \ -- } -+ .guid = UUID_LE(0x0e0b6031, 0x5213, 0x4934, 0x81, 0x8b, \ -+ 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb) - - /* - * Time Synch GUID - * {9527E630-D0AE-497b-ADCE-E80AB0175CAF} - */ - #define HV_TS_GUID \ -- .guid = { \ -- 0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49, \ -- 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf \ -- } -+ .guid = UUID_LE(0x9527e630, 0xd0ae, 0x497b, 0xad, 0xce, \ -+ 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf) - - /* - * Heartbeat GUID - * {57164f39-9115-4e78-ab55-382f3bd5422d} - */ - #define HV_HEART_BEAT_GUID \ -- .guid = { \ -- 0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e, \ -- 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d \ -- } -+ .guid = UUID_LE(0x57164f39, 0x9115, 0x4e78, 0xab, 0x55, \ -+ 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d) - - /* - * KVP GUID - * {a9a0f4e7-5a45-4d96-b827-8a841e8c03e6} - */ - #define HV_KVP_GUID \ -- .guid = { \ -- 0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d, \ -- 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6 \ -- } -+ .guid = UUID_LE(0xa9a0f4e7, 0x5a45, 0x4d96, 0xb8, 0x27, \ -+ 0x8a, 0x84, 0x1e, 0x8c, 0x03, 0xe6) - - /* - * Dynamic memory GUID - * {525074dc-8985-46e2-8057-a307dc18a502} - */ - #define HV_DM_GUID \ -- .guid = { \ -- 0xdc, 0x74, 0x50, 0X52, 0x85, 0x89, 0xe2, 0x46, \ -- 0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02 \ -- } -+ .guid = UUID_LE(0x525074dc, 0x8985, 0x46e2, 0x80, 0x57, \ -+ 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02) - - /* - * Mouse GUID - * {cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a} - */ - #define HV_MOUSE_GUID \ -- .guid = { \ -- 0x9e, 0xb6, 0xa8, 0xcf, 0x4a, 0x5b, 0xc0, 0x4c, \ -- 0xb9, 0x8b, 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a \ -- } -+ .guid = UUID_LE(0xcfa8b69e, 0x5b4a, 0x4cc0, 0xb9, 0x8b, \ -+ 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a) - - /* - * VSS (Backup/Restore) GUID - */ - #define HV_VSS_GUID \ -- .guid = { \ -- 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, \ -- 0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40 \ -- } -+ .guid = UUID_LE(0x35fa2e29, 0xea23, 0x4236, 0x96, 0xae, \ -+ 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40) - /* - * Synthetic Video GUID - * {DA0A7802-E377-4aac-8E77-0558EB1073F8} - */ - #define HV_SYNTHVID_GUID \ -- .guid = { \ -- 0x02, 0x78, 0x0a, 0xda, 0x77, 0xe3, 0xac, 0x4a, \ -- 0x8e, 0x77, 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8 \ -- } -+ .guid = UUID_LE(0xda0a7802, 0xe377, 0x4aac, 0x8e, 0x77, \ -+ 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8) - - /* - * Synthetic FC GUID - * {2f9bcc4a-0069-4af3-b76b-6fd0be528cda} - */ - #define HV_SYNTHFC_GUID \ -- .guid = { \ -- 0x4A, 0xCC, 0x9B, 0x2F, 0x69, 0x00, 0xF3, 0x4A, \ -- 0xB7, 0x6B, 0x6F, 0xD0, 0xBE, 0x52, 0x8C, 0xDA \ -- } -+ .guid = UUID_LE(0x2f9bcc4a, 0x0069, 0x4af3, 0xb7, 0x6b, \ -+ 0x6f, 0xd0, 0xbe, 0x52, 0x8c, 0xda) - - /* - * Guest File Copy Service -@@ -1140,20 +1118,16 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - */ - - #define HV_FCOPY_GUID \ -- .guid = { \ -- 0xE3, 0x4B, 0xD1, 0x34, 0xE4, 0xDE, 0xC8, 0x41, \ -- 0x9A, 0xE7, 0x6B, 0x17, 0x49, 0x77, 0xC1, 0x92 \ -- } -+ .guid = UUID_LE(0x34d14be3, 0xdee4, 0x41c8, 0x9a, 0xe7, \ -+ 0x6b, 0x17, 0x49, 0x77, 0xc1, 0x92) - - /* - * NetworkDirect. This is the guest RDMA service. - * {8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501} - */ - #define HV_ND_GUID \ -- .guid = { \ -- 0x3d, 0xaf, 0x2e, 0x8c, 0xa7, 0x32, 0x09, 0x4b, \ -- 0xab, 0x99, 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01 \ -- } -+ .guid = UUID_LE(0x8c2eaf3d, 0x32a7, 0x4b09, 0xab, 0x99, \ -+ 0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01) - - /* - * PCI Express Pass Through -@@ -1161,10 +1135,8 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - */ - - #define HV_PCIE_GUID \ -- .guid = { \ -- 0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44, \ -- 0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F \ -- } -+ .guid = UUID_LE(0x44c4f61d, 0x4444, 0x4400, 0x9d, 0x52, \ -+ 0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f) - - /* - * Common header for Hyper-V ICs -diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h -index 64f36e09a790..6e4c645e1c0d 100644 ---- a/include/linux/mod_devicetable.h -+++ b/include/linux/mod_devicetable.h -@@ -404,7 +404,7 @@ struct virtio_device_id { - * For Hyper-V devices we use the device guid as the id. - */ - struct hv_vmbus_device_id { -- __u8 guid[16]; -+ uuid_le guid; - kernel_ulong_t driver_data; /* Data private to the driver */ - }; - -diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c -index 9f5cdd49ff0b..8e8c69bee78f 100644 ---- a/scripts/mod/file2alias.c -+++ b/scripts/mod/file2alias.c -@@ -917,7 +917,7 @@ static int do_vmbus_entry(const char *filename, void *symval, - char guid_name[(sizeof(*guid) + 1) * 2]; - - for (i = 0; i < (sizeof(*guid) * 2); i += 2) -- sprintf(&guid_name[i], "%02x", TO_NATIVE((*guid)[i/2])); -+ sprintf(&guid_name[i], "%02x", TO_NATIVE((guid->b)[i/2])); - - strcpy(alias, "vmbus:"); - strcat(alias, guid_name); --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch b/kernel/patches-4.4.x/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch deleted file mode 100644 index 858e1b4e4..000000000 --- a/kernel/patches-4.4.x/0018-Drivers-hv-vmbus-Use-uuid_le_cmp-for-comparing-GUIDs.patch +++ /dev/null @@ -1,55 +0,0 @@ -From b5447ef6e9e46f644b5d7d31d4557fae93d0de61 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Mon, 14 Dec 2015 16:01:44 -0800 -Subject: [PATCH 18/44] Drivers: hv: vmbus: Use uuid_le_cmp() for comparing - GUIDs - -Use uuid_le_cmp() for comparing GUIDs. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 4ae9250893485f380275e7d5cb291df87c4d9710) ---- - drivers/hv/channel_mgmt.c | 3 +-- - drivers/hv/vmbus_drv.c | 4 ++-- - 2 files changed, 3 insertions(+), 4 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 339277b76468..9b4525c56376 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -409,8 +409,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui - struct cpumask *alloced_mask; - - for (i = IDE; i < MAX_PERF_CHN; i++) { -- if (!memcmp(type_guid->b, &hp_devs[i].guid, -- sizeof(uuid_le))) { -+ if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) { - perf_chn = true; - break; - } -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index f1fbb6b98f5c..e71f3561dbab 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -535,7 +535,7 @@ static const uuid_le null_guid; - - static inline bool is_null_guid(const uuid_le *guid) - { -- if (memcmp(guid, &null_guid, sizeof(uuid_le))) -+ if (uuid_le_cmp(*guid, null_guid)) - return false; - return true; - } -@@ -549,7 +549,7 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id( - const uuid_le *guid) - { - for (; !is_null_guid(&id->guid); id++) -- if (!memcmp(&id->guid, guid, sizeof(uuid_le))) -+ if (!uuid_le_cmp(id->guid, *guid)) - return id; - - return NULL; --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch b/kernel/patches-4.4.x/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch deleted file mode 100644 index 56f1d391c..000000000 --- a/kernel/patches-4.4.x/0019-Drivers-hv-vmbus-do-sanity-check-of-channel-state-in.patch +++ /dev/null @@ -1,42 +0,0 @@ -From efcba5fd10440c87e32b1e1c761821828850a8d6 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 14 Dec 2015 16:01:48 -0800 -Subject: [PATCH 19/44] Drivers: hv: vmbus: do sanity check of channel state in - vmbus_close_internal() - -This fixes an incorrect assumption of channel state in the function. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 64b7faf903dae2df94d89edf2c688b16751800e4) ---- - drivers/hv/channel.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index d037454fe7b8..b00cdfb725de 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -514,6 +514,18 @@ static int vmbus_close_internal(struct vmbus_channel *channel) - tasklet = hv_context.event_dpc[channel->target_cpu]; - tasklet_disable(tasklet); - -+ /* -+ * In case a device driver's probe() fails (e.g., -+ * util_probe() -> vmbus_open() returns -ENOMEM) and the device is -+ * rescinded later (e.g., we dynamically disble an Integrated Service -+ * in Hyper-V Manager), the driver's remove() invokes vmbus_close(): -+ * here we should skip most of the below cleanup work. -+ */ -+ if (channel->state != CHANNEL_OPENED_STATE) { -+ ret = -EINVAL; -+ goto out; -+ } -+ - channel->state = CHANNEL_OPEN_STATE; - channel->sc_creation_callback = NULL; - /* Stop callback and cancel the timer asap */ --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch b/kernel/patches-4.4.x/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch deleted file mode 100644 index b03e04236..000000000 --- a/kernel/patches-4.4.x/0020-Drivers-hv-vmbus-release-relid-on-error-in-vmbus_pro.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 5978b66f3462754c88616070a6f41f9f8ff6c3d0 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 14 Dec 2015 16:01:50 -0800 -Subject: [PATCH 20/44] Drivers: hv: vmbus: release relid on error in - vmbus_process_offer() - -We want to simplify vmbus_onoffer_rescind() by not invoking -hv_process_channel_removal(NULL, ...). - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit f52078cf5711ce47c113a58702b35c8ff5f212f5) ---- - drivers/hv/channel_mgmt.c | 21 +++++++++++++++------ - 1 file changed, 15 insertions(+), 6 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 9b4525c56376..8529dd2ebc3d 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -178,19 +178,22 @@ static void percpu_channel_deq(void *arg) - } - - --void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) -+static void vmbus_release_relid(u32 relid) - { - struct vmbus_channel_relid_released msg; -- unsigned long flags; -- struct vmbus_channel *primary_channel; - - memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); - msg.child_relid = relid; - msg.header.msgtype = CHANNELMSG_RELID_RELEASED; - vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released)); -+} - -- if (channel == NULL) -- return; -+void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) -+{ -+ unsigned long flags; -+ struct vmbus_channel *primary_channel; -+ -+ vmbus_release_relid(relid); - - BUG_ON(!channel->rescind); - -@@ -337,6 +340,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - return; - - err_deq_chan: -+ vmbus_release_relid(newchannel->offermsg.child_relid); -+ - spin_lock_irqsave(&vmbus_connection.channel_lock, flags); - list_del(&newchannel->listentry); - spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -@@ -640,7 +645,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - channel = relid2channel(rescind->child_relid); - - if (channel == NULL) { -- hv_process_channel_removal(NULL, rescind->child_relid); -+ /* -+ * This is very impossible, because in -+ * vmbus_process_offer(), we have already invoked -+ * vmbus_release_relid() on error. -+ */ - return; - } - --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch b/kernel/patches-4.4.x/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch deleted file mode 100644 index ee9b6d14d..000000000 --- a/kernel/patches-4.4.x/0021-Drivers-hv-vmbus-channge-vmbus_connection.channel_lo.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 965725635521dbba2a4a82f8ab40a343bd46e018 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 14 Dec 2015 16:01:51 -0800 -Subject: [PATCH 21/44] Drivers: hv: vmbus: channge - vmbus_connection.channel_lock to mutex - -spinlock is unnecessary here. -mutex is enough. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit d6f591e339d23f434efda11917da511870891472) ---- - drivers/hv/channel_mgmt.c | 12 ++++++------ - drivers/hv/connection.c | 7 +++---- - drivers/hv/hyperv_vmbus.h | 2 +- - 3 files changed, 10 insertions(+), 11 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 8529dd2ebc3d..306c7dff6c77 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -207,9 +207,9 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - } - - if (channel->primary_channel == NULL) { -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - list_del(&channel->listentry); -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - primary_channel = channel; - } else { -@@ -254,7 +254,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - unsigned long flags; - - /* Make sure this is a new offer */ -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (!uuid_le_cmp(channel->offermsg.offer.if_type, -@@ -270,7 +270,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - list_add_tail(&newchannel->listentry, - &vmbus_connection.chn_list); - -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - if (!fnew) { - /* -@@ -342,9 +342,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - err_deq_chan: - vmbus_release_relid(newchannel->offermsg.child_relid); - -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - list_del(&newchannel->listentry); -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); -diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c -index 2bbc53025549..c0c0aaf88228 100644 ---- a/drivers/hv/connection.c -+++ b/drivers/hv/connection.c -@@ -146,7 +146,7 @@ int vmbus_connect(void) - spin_lock_init(&vmbus_connection.channelmsg_lock); - - INIT_LIST_HEAD(&vmbus_connection.chn_list); -- spin_lock_init(&vmbus_connection.channel_lock); -+ mutex_init(&vmbus_connection.channel_mutex); - - /* - * Setup the vmbus event connection for channel interrupt -@@ -282,11 +282,10 @@ struct vmbus_channel *relid2channel(u32 relid) - { - struct vmbus_channel *channel; - struct vmbus_channel *found_channel = NULL; -- unsigned long flags; - struct list_head *cur, *tmp; - struct vmbus_channel *cur_sc; - -- spin_lock_irqsave(&vmbus_connection.channel_lock, flags); -+ mutex_lock(&vmbus_connection.channel_mutex); - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (channel->offermsg.child_relid == relid) { - found_channel = channel; -@@ -305,7 +304,7 @@ struct vmbus_channel *relid2channel(u32 relid) - } - } - } -- spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); -+ mutex_unlock(&vmbus_connection.channel_mutex); - - return found_channel; - } -diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h -index 75e383e6d03d..9a95beb87015 100644 ---- a/drivers/hv/hyperv_vmbus.h -+++ b/drivers/hv/hyperv_vmbus.h -@@ -683,7 +683,7 @@ struct vmbus_connection { - - /* List of channels */ - struct list_head chn_list; -- spinlock_t channel_lock; -+ struct mutex channel_mutex; - - struct workqueue_struct *work_queue; - }; --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch b/kernel/patches-4.4.x/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch deleted file mode 100644 index d4cc98f59..000000000 --- a/kernel/patches-4.4.x/0022-Drivers-hv-remove-code-duplication-between-vmbus_rec.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 658b7c7c06ede2e0211c8accfecaae4cec85add4 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Mon, 14 Dec 2015 19:02:00 -0800 -Subject: [PATCH 22/44] Drivers: hv: remove code duplication between - vmbus_recvpacket()/vmbus_recvpacket_raw() - -vmbus_recvpacket() and vmbus_recvpacket_raw() are almost identical but -there are two discrepancies: -1) vmbus_recvpacket() doesn't propagate errors from hv_ringbuffer_read() - which looks like it is not desired. -2) There is an error message printed in packetlen > bufferlen case in - vmbus_recvpacket(). I'm removing it as it is usless for users to see - such messages and /vmbus_recvpacket_raw() doesn't have it. - -Signed-off-by: Vitaly Kuznetsov -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 667d374064b0cc48b6122101b287908d1b392bdb) ---- - drivers/hv/channel.c | 65 ++++++++++++++++++---------------------------------- - 1 file changed, 22 insertions(+), 43 deletions(-) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index b00cdfb725de..def21d34f3ea 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -924,8 +924,10 @@ EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer); - * - * Mainly used by Hyper-V drivers. - */ --int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, -- u32 bufferlen, u32 *buffer_actual_len, u64 *requestid) -+static inline int -+__vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, -+ u32 bufferlen, u32 *buffer_actual_len, u64 *requestid, -+ bool raw) - { - struct vmpacket_descriptor desc; - u32 packetlen; -@@ -943,27 +945,34 @@ int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, - return 0; - - packetlen = desc.len8 << 3; -- userlen = packetlen - (desc.offset8 << 3); -+ if (!raw) -+ userlen = packetlen - (desc.offset8 << 3); -+ else -+ userlen = packetlen; - - *buffer_actual_len = userlen; - -- if (userlen > bufferlen) { -- -- pr_err("Buffer too small - got %d needs %d\n", -- bufferlen, userlen); -- return -ETOOSMALL; -- } -+ if (userlen > bufferlen) -+ return -ENOBUFS; - - *requestid = desc.trans_id; - - /* Copy over the packet to the user buffer */ - ret = hv_ringbuffer_read(&channel->inbound, buffer, userlen, -- (desc.offset8 << 3), &signal); -+ raw ? 0 : desc.offset8 << 3, &signal); - - if (signal) - vmbus_setevent(channel); - -- return 0; -+ return ret; -+} -+ -+int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, -+ u32 bufferlen, u32 *buffer_actual_len, -+ u64 *requestid) -+{ -+ return __vmbus_recvpacket(channel, buffer, bufferlen, -+ buffer_actual_len, requestid, false); - } - EXPORT_SYMBOL(vmbus_recvpacket); - -@@ -974,37 +983,7 @@ int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer, - u32 bufferlen, u32 *buffer_actual_len, - u64 *requestid) - { -- struct vmpacket_descriptor desc; -- u32 packetlen; -- int ret; -- bool signal = false; -- -- *buffer_actual_len = 0; -- *requestid = 0; -- -- -- ret = hv_ringbuffer_peek(&channel->inbound, &desc, -- sizeof(struct vmpacket_descriptor)); -- if (ret != 0) -- return 0; -- -- -- packetlen = desc.len8 << 3; -- -- *buffer_actual_len = packetlen; -- -- if (packetlen > bufferlen) -- return -ENOBUFS; -- -- *requestid = desc.trans_id; -- -- /* Copy over the entire packet to the user buffer */ -- ret = hv_ringbuffer_read(&channel->inbound, buffer, packetlen, 0, -- &signal); -- -- if (signal) -- vmbus_setevent(channel); -- -- return ret; -+ return __vmbus_recvpacket(channel, buffer, bufferlen, -+ buffer_actual_len, requestid, true); - } - EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw); --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch b/kernel/patches-4.4.x/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch deleted file mode 100644 index 7ecdbbc50..000000000 --- a/kernel/patches-4.4.x/0023-Drivers-hv-vmbus-fix-the-building-warning-with-hyper.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 899f16803bb388cfc959a28368723e63c07beb09 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 21 Dec 2015 12:21:22 -0800 -Subject: [PATCH 23/44] Drivers: hv: vmbus: fix the building warning with - hyperv-keyboard - -With the recent change af3ff643ea91ba64dd8d0b1cbed54d44512f96cd -(Drivers: hv: vmbus: Use uuid_le type consistently), we always get this -warning: - - CC [M] drivers/input/serio/hyperv-keyboard.o -drivers/input/serio/hyperv-keyboard.c:427:2: warning: missing braces around - initializer [-Wmissing-braces] - { HV_KBD_GUID, }, - ^ -drivers/input/serio/hyperv-keyboard.c:427:2: warning: (near initialization - for .id_table[0].guid.b.) [-Wmissing-braces] - -The patch fixes the warning. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 2048157ad02e65f6327118dd4a7b9c9f1fd12f77) ---- - drivers/input/serio/hyperv-keyboard.c | 10 ---------- - include/linux/hyperv.h | 8 ++++++++ - 2 files changed, 8 insertions(+), 10 deletions(-) - -diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c -index e74e5d6e5f9f..c948866edf87 100644 ---- a/drivers/input/serio/hyperv-keyboard.c -+++ b/drivers/input/serio/hyperv-keyboard.c -@@ -412,16 +412,6 @@ static int hv_kbd_remove(struct hv_device *hv_dev) - return 0; - } - --/* -- * Keyboard GUID -- * {f912ad6d-2b17-48ea-bd65-f927a61c7684} -- */ --#define HV_KBD_GUID \ -- .guid = { \ -- 0x6d, 0xad, 0x12, 0xf9, 0x17, 0x2b, 0xea, 0x48, \ -- 0xbd, 0x65, 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84 \ -- } -- - static const struct hv_vmbus_device_id id_table[] = { - /* Keyboard guid */ - { HV_KBD_GUID, }, -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 4712d7d07b8c..9e2de6a7cc96 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1090,6 +1090,14 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - .guid = UUID_LE(0xcfa8b69e, 0x5b4a, 0x4cc0, 0xb9, 0x8b, \ - 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a) - -+/* -+ * Keyboard GUID -+ * {f912ad6d-2b17-48ea-bd65-f927a61c7684} -+ */ -+#define HV_KBD_GUID \ -+ .guid = UUID_LE(0xf912ad6d, 0x2b17, 0x48ea, 0xbd, 0x65, \ -+ 0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84) -+ - /* - * VSS (Backup/Restore) GUID - */ --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch b/kernel/patches-4.4.x/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch deleted file mode 100644 index ecd745759..000000000 --- a/kernel/patches-4.4.x/0024-Drivers-hv-vmbus-Treat-Fibre-Channel-devices-as-perf.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 234561ebf27f49a8ec564f28ec7a16f38cfa812b Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Tue, 15 Dec 2015 16:27:27 -0800 -Subject: [PATCH 24/44] Drivers: hv: vmbus: Treat Fibre Channel devices as - performance critical - -For performance critical devices, we distribute the incoming -channel interrupt load across available CPUs in the guest. -Include Fibre channel devices in the set of devices for which -we would distribute the interrupt load. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 879a650a273bc3efb9d472886b8ced12630ea8ed) ---- - drivers/hv/channel_mgmt.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 306c7dff6c77..763d0c19c16f 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -362,6 +362,7 @@ err_free_chan: - enum { - IDE = 0, - SCSI, -+ FC, - NIC, - ND_NIC, - PCIE, -@@ -378,6 +379,8 @@ static const struct hv_vmbus_device_id hp_devs[] = { - { HV_IDE_GUID, }, - /* Storage - SCSI */ - { HV_SCSI_GUID, }, -+ /* Storage - FC */ -+ { HV_SYNTHFC_GUID, }, - /* Network */ - { HV_NIC_GUID, }, - /* NetworkDirect Guest RDMA */ --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch b/kernel/patches-4.4.x/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch deleted file mode 100644 index 234efe587..000000000 --- a/kernel/patches-4.4.x/0025-Drivers-hv-vmbus-Add-vendor-and-device-atttributes.patch +++ /dev/null @@ -1,355 +0,0 @@ -From 1469a802b8bfbb817986cca9e7c8904524c5fd32 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Fri, 25 Dec 2015 20:00:30 -0800 -Subject: [PATCH 25/44] Drivers: hv: vmbus: Add vendor and device atttributes - -Add vendor and device attributes to VMBUS devices. These will be used -by Hyper-V tools as well user-level RDMA libraries that will use the -vendor/device tuple to discover the RDMA device. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 7047f17d70fc0599563d30d0791692cb5fe42ae6) ---- - Documentation/ABI/stable/sysfs-bus-vmbus | 14 +++ - drivers/hv/channel_mgmt.c | 166 +++++++++++++++++++++++-------- - drivers/hv/vmbus_drv.c | 21 ++++ - include/linux/hyperv.h | 28 ++++++ - 4 files changed, 186 insertions(+), 43 deletions(-) - -diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus -index 636e938d5e33..5d0125f7bcaf 100644 ---- a/Documentation/ABI/stable/sysfs-bus-vmbus -+++ b/Documentation/ABI/stable/sysfs-bus-vmbus -@@ -27,3 +27,17 @@ Description: The mapping of which primary/sub channels are bound to which - Virtual Processors. - Format: - Users: tools/hv/lsvmbus -+ -+What: /sys/bus/vmbus/devices/vmbus_*/device -+Date: Dec. 2015 -+KernelVersion: 4.5 -+Contact: K. Y. Srinivasan -+Description: The 16 bit device ID of the device -+Users: tools/hv/lsvmbus and user level RDMA libraries -+ -+What: /sys/bus/vmbus/devices/vmbus_*/vendor -+Date: Dec. 2015 -+KernelVersion: 4.5 -+Contact: K. Y. Srinivasan -+Description: The 16 bit vendor ID of the device -+Users: tools/hv/lsvmbus and user level RDMA libraries -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 763d0c19c16f..d6c611457601 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -33,8 +33,122 @@ - - #include "hyperv_vmbus.h" - --static void init_vp_index(struct vmbus_channel *channel, -- const uuid_le *type_guid); -+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type); -+ -+static const struct vmbus_device vmbus_devs[] = { -+ /* IDE */ -+ { .dev_type = HV_IDE, -+ HV_IDE_GUID, -+ .perf_device = true, -+ }, -+ -+ /* SCSI */ -+ { .dev_type = HV_SCSI, -+ HV_SCSI_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Fibre Channel */ -+ { .dev_type = HV_FC, -+ HV_SYNTHFC_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Synthetic NIC */ -+ { .dev_type = HV_NIC, -+ HV_NIC_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Network Direct */ -+ { .dev_type = HV_ND, -+ HV_ND_GUID, -+ .perf_device = true, -+ }, -+ -+ /* PCIE */ -+ { .dev_type = HV_PCIE, -+ HV_PCIE_GUID, -+ .perf_device = true, -+ }, -+ -+ /* Synthetic Frame Buffer */ -+ { .dev_type = HV_FB, -+ HV_SYNTHVID_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Synthetic Keyboard */ -+ { .dev_type = HV_KBD, -+ HV_KBD_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Synthetic MOUSE */ -+ { .dev_type = HV_MOUSE, -+ HV_MOUSE_GUID, -+ .perf_device = false, -+ }, -+ -+ /* KVP */ -+ { .dev_type = HV_KVP, -+ HV_KVP_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Time Synch */ -+ { .dev_type = HV_TS, -+ HV_TS_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Heartbeat */ -+ { .dev_type = HV_HB, -+ HV_HEART_BEAT_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Shutdown */ -+ { .dev_type = HV_SHUTDOWN, -+ HV_SHUTDOWN_GUID, -+ .perf_device = false, -+ }, -+ -+ /* File copy */ -+ { .dev_type = HV_FCOPY, -+ HV_FCOPY_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Backup */ -+ { .dev_type = HV_BACKUP, -+ HV_VSS_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Dynamic Memory */ -+ { .dev_type = HV_DM, -+ HV_DM_GUID, -+ .perf_device = false, -+ }, -+ -+ /* Unknown GUID */ -+ { .dev_type = HV_UNKOWN, -+ .perf_device = false, -+ }, -+}; -+ -+static u16 hv_get_dev_type(const uuid_le *guid) -+{ -+ u16 i; -+ -+ for (i = HV_IDE; i < HV_UNKOWN; i++) { -+ if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) -+ return i; -+ } -+ pr_info("Unknown GUID: %pUl\n", guid); -+ return i; -+} - - /** - * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message -@@ -252,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - struct vmbus_channel *channel; - bool fnew = true; - unsigned long flags; -+ u16 dev_type; - - /* Make sure this is a new offer */ - mutex_lock(&vmbus_connection.channel_mutex); -@@ -289,7 +404,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - goto err_free_chan; - } - -- init_vp_index(newchannel, &newchannel->offermsg.offer.if_type); -+ dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type); -+ -+ init_vp_index(newchannel, dev_type); - - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); -@@ -326,6 +443,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - if (!newchannel->device_obj) - goto err_deq_chan; - -+ newchannel->device_obj->device_id = dev_type; - /* - * Add the new device to the bus. This will kick off device-driver - * binding which eventually invokes the device driver's AddDevice() -@@ -359,37 +477,6 @@ err_free_chan: - free_channel(newchannel); - } - --enum { -- IDE = 0, -- SCSI, -- FC, -- NIC, -- ND_NIC, -- PCIE, -- MAX_PERF_CHN, --}; -- --/* -- * This is an array of device_ids (device types) that are performance critical. -- * We attempt to distribute the interrupt load for these devices across -- * all available CPUs. -- */ --static const struct hv_vmbus_device_id hp_devs[] = { -- /* IDE */ -- { HV_IDE_GUID, }, -- /* Storage - SCSI */ -- { HV_SCSI_GUID, }, -- /* Storage - FC */ -- { HV_SYNTHFC_GUID, }, -- /* Network */ -- { HV_NIC_GUID, }, -- /* NetworkDirect Guest RDMA */ -- { HV_ND_GUID, }, -- /* PCI Express Pass Through */ -- { HV_PCIE_GUID, }, --}; -- -- - /* - * We use this state to statically distribute the channel interrupt load. - */ -@@ -406,22 +493,15 @@ static int next_numa_node_id; - * For pre-win8 hosts or non-performance critical channels we assign the - * first CPU in the first NUMA node. - */ --static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid) -+static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) - { - u32 cur_cpu; -- int i; -- bool perf_chn = false; -+ bool perf_chn = vmbus_devs[dev_type].perf_device; - struct vmbus_channel *primary = channel->primary_channel; - int next_node; - struct cpumask available_mask; - struct cpumask *alloced_mask; - -- for (i = IDE; i < MAX_PERF_CHN; i++) { -- if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) { -- perf_chn = true; -- break; -- } -- } - if ((vmbus_proto_version == VERSION_WS2008) || - (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) { - /* -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index e71f3561dbab..f688c051ca17 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -480,6 +480,24 @@ static ssize_t channel_vp_mapping_show(struct device *dev, - } - static DEVICE_ATTR_RO(channel_vp_mapping); - -+static ssize_t vendor_show(struct device *dev, -+ struct device_attribute *dev_attr, -+ char *buf) -+{ -+ struct hv_device *hv_dev = device_to_hv_device(dev); -+ return sprintf(buf, "0x%x\n", hv_dev->vendor_id); -+} -+static DEVICE_ATTR_RO(vendor); -+ -+static ssize_t device_show(struct device *dev, -+ struct device_attribute *dev_attr, -+ char *buf) -+{ -+ struct hv_device *hv_dev = device_to_hv_device(dev); -+ return sprintf(buf, "0x%x\n", hv_dev->device_id); -+} -+static DEVICE_ATTR_RO(device); -+ - /* Set up per device attributes in /sys/bus/vmbus/devices/ */ - static struct attribute *vmbus_attrs[] = { - &dev_attr_id.attr, -@@ -505,6 +523,8 @@ static struct attribute *vmbus_attrs[] = { - &dev_attr_in_read_bytes_avail.attr, - &dev_attr_in_write_bytes_avail.attr, - &dev_attr_channel_vp_mapping.attr, -+ &dev_attr_vendor.attr, -+ &dev_attr_device.attr, - NULL, - }; - ATTRIBUTE_GROUPS(vmbus); -@@ -963,6 +983,7 @@ struct hv_device *vmbus_device_create(const uuid_le *type, - memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); - memcpy(&child_device_obj->dev_instance, instance, - sizeof(uuid_le)); -+ child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ - - - return child_device_obj; -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 9e2de6a7cc96..51c98fd6044d 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -635,6 +635,32 @@ enum hv_signal_policy { - HV_SIGNAL_POLICY_EXPLICIT, - }; - -+enum vmbus_device_type { -+ HV_IDE = 0, -+ HV_SCSI, -+ HV_FC, -+ HV_NIC, -+ HV_ND, -+ HV_PCIE, -+ HV_FB, -+ HV_KBD, -+ HV_MOUSE, -+ HV_KVP, -+ HV_TS, -+ HV_HB, -+ HV_SHUTDOWN, -+ HV_FCOPY, -+ HV_BACKUP, -+ HV_DM, -+ HV_UNKOWN, -+}; -+ -+struct vmbus_device { -+ u16 dev_type; -+ uuid_le guid; -+ bool perf_device; -+}; -+ - struct vmbus_channel { - /* Unique channel id */ - int id; -@@ -961,6 +987,8 @@ struct hv_device { - - /* the device instance id of this device */ - uuid_le dev_instance; -+ u16 vendor_id; -+ u16 device_id; - - struct device device; - --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch b/kernel/patches-4.4.x/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch deleted file mode 100644 index 0d198c0c4..000000000 --- a/kernel/patches-4.4.x/0026-Drivers-hv-vmbus-add-a-helper-function-to-set-a-chan.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 2ed4bc4174ebc1dd73a7f8db67a82779f88d5566 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:37 -0800 -Subject: [PATCH 26/44] Drivers: hv: vmbus: add a helper function to set a - channel's pending send size - -This will be used by the coming net/hvsock driver. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 3c75354d043ad546148d6992e40033ecaefc5ea5) ---- - include/linux/hyperv.h | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 51c98fd6044d..934542ac1394 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -818,6 +818,12 @@ static inline void *get_per_channel_state(struct vmbus_channel *c) - return c->per_channel_state; - } - -+static inline void set_channel_pending_send_size(struct vmbus_channel *c, -+ u32 size) -+{ -+ c->outbound.ring_buffer->pending_send_sz = size; -+} -+ - void vmbus_onmessage(void *context); - - int vmbus_request_offers(void); --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch b/kernel/patches-4.4.x/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch deleted file mode 100644 index 52f141080..000000000 --- a/kernel/patches-4.4.x/0027-Drivers-hv-vmbus-define-the-new-offer-type-for-Hyper.patch +++ /dev/null @@ -1,44 +0,0 @@ -From a970930ec2236429f979d92d6a2c03420437fa82 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:38 -0800 -Subject: [PATCH 27/44] Drivers: hv: vmbus: define the new offer type for - Hyper-V socket (hvsock) - -A helper function is also added. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit e8d6ca023efce3bd80050dcd9e708ee3cf8babd4) ---- - include/linux/hyperv.h | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 934542ac1394..a4f105d55881 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -237,6 +237,7 @@ struct vmbus_channel_offer { - #define VMBUS_CHANNEL_LOOPBACK_OFFER 0x100 - #define VMBUS_CHANNEL_PARENT_OFFER 0x200 - #define VMBUS_CHANNEL_REQUEST_MONITORED_NOTIFICATION 0x400 -+#define VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER 0x2000 - - struct vmpacket_descriptor { - u16 type; -@@ -797,6 +798,12 @@ struct vmbus_channel { - enum hv_signal_policy signal_policy; - }; - -+static inline bool is_hvsock_channel(const struct vmbus_channel *c) -+{ -+ return !!(c->offermsg.offer.chn_flags & -+ VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER); -+} -+ - static inline void set_channel_signal_state(struct vmbus_channel *c, - enum hv_signal_policy policy) - { --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch b/kernel/patches-4.4.x/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch deleted file mode 100644 index cfbf098d9..000000000 --- a/kernel/patches-4.4.x/0028-Drivers-hv-vmbus-vmbus_sendpacket_ctl-hvsock-avoid-u.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 617883388dc298c729f278d67560f5084a6b3383 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:39 -0800 -Subject: [PATCH 28/44] Drivers: hv: vmbus: vmbus_sendpacket_ctl: hvsock: avoid - unnecessary signaling - -When the hvsock channel's outbound ringbuffer is full (i.e., -hv_ringbuffer_write() returns -EAGAIN), we should avoid the unnecessary -signaling the host. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 5f363bc38f810d238d1e8b19998625ddec3b8138) ---- - drivers/hv/channel.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index def21d34f3ea..a42104ed0b59 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -661,6 +661,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - * If we cannot write to the ring-buffer; signal the host - * even if we may not have written anything. This is a rare - * enough condition that it should not matter. -+ * NOTE: in this case, the hvsock channel is an exception, because -+ * it looks the host side's hvsock implementation has a throttling -+ * mechanism which can hurt the performance otherwise. - */ - - if (channel->signal_policy) -@@ -668,7 +671,8 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - else - kick_q = true; - -- if (((ret == 0) && kick_q && signal) || (ret)) -+ if (((ret == 0) && kick_q && signal) || -+ (ret && !is_hvsock_channel(channel))) - vmbus_setevent(channel); - - return ret; --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch b/kernel/patches-4.4.x/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch deleted file mode 100644 index c47ca3cee..000000000 --- a/kernel/patches-4.4.x/0029-Drivers-hv-vmbus-define-a-new-VMBus-message-type-for.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 9c7846299cb55ed7743d7706094bf83f5a9df447 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:40 -0800 -Subject: [PATCH 29/44] Drivers: hv: vmbus: define a new VMBus message type for - hvsock - -A function to send the type of message is also added. - -The coming net/hvsock driver will use this function to proactively request -the host to offer a VMBus channel for a new hvsock connection. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 5c23a1a5c60b0f472cfa61cd7d8279f8aaeb5b64) ---- - drivers/hv/channel.c | 15 +++++++++++++++ - drivers/hv/channel_mgmt.c | 4 ++++ - include/linux/hyperv.h | 13 +++++++++++++ - 3 files changed, 32 insertions(+) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index a42104ed0b59..b27054e4eac2 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -214,6 +214,21 @@ error0: - } - EXPORT_SYMBOL_GPL(vmbus_open); - -+/* Used for Hyper-V Socket: a guest client's connect() to the host */ -+int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, -+ const uuid_le *shv_host_servie_id) -+{ -+ struct vmbus_channel_tl_connect_request conn_msg; -+ -+ memset(&conn_msg, 0, sizeof(conn_msg)); -+ conn_msg.header.msgtype = CHANNELMSG_TL_CONNECT_REQUEST; -+ conn_msg.guest_endpoint_id = *shv_guest_servie_id; -+ conn_msg.host_service_id = *shv_host_servie_id; -+ -+ return vmbus_post_msg(&conn_msg, sizeof(conn_msg)); -+} -+EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request); -+ - /* - * create_gpadl_header - Creates a gpadl for the specified buffer - */ -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index d6c611457601..60ca25b93b4c 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -958,6 +958,10 @@ struct vmbus_channel_message_table_entry - {CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response}, - {CHANNELMSG_UNLOAD, 0, NULL}, - {CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response}, -+ {CHANNELMSG_18, 0, NULL}, -+ {CHANNELMSG_19, 0, NULL}, -+ {CHANNELMSG_20, 0, NULL}, -+ {CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL}, - }; - - /* -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index a4f105d55881..191bc5d0ffbf 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -394,6 +394,10 @@ enum vmbus_channel_message_type { - CHANNELMSG_VERSION_RESPONSE = 15, - CHANNELMSG_UNLOAD = 16, - CHANNELMSG_UNLOAD_RESPONSE = 17, -+ CHANNELMSG_18 = 18, -+ CHANNELMSG_19 = 19, -+ CHANNELMSG_20 = 20, -+ CHANNELMSG_TL_CONNECT_REQUEST = 21, - CHANNELMSG_COUNT - }; - -@@ -564,6 +568,13 @@ struct vmbus_channel_initiate_contact { - u64 monitor_page2; - } __packed; - -+/* Hyper-V socket: guest's connect()-ing to host */ -+struct vmbus_channel_tl_connect_request { -+ struct vmbus_channel_message_header header; -+ uuid_le guest_endpoint_id; -+ uuid_le host_service_id; -+} __packed; -+ - struct vmbus_channel_version_response { - struct vmbus_channel_message_header header; - u8 version_supported; -@@ -1295,4 +1306,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); - - extern __u32 vmbus_proto_version; - -+int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, -+ const uuid_le *shv_host_servie_id); - #endif /* _HYPERV_H */ --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch b/kernel/patches-4.4.x/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch deleted file mode 100644 index 8588639e7..000000000 --- a/kernel/patches-4.4.x/0030-Drivers-hv-vmbus-add-a-hvsock-flag-in-struct-hv_driv.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 332af068b03da4500f78879c376ffde6337be19c Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:41 -0800 -Subject: [PATCH 30/44] Drivers: hv: vmbus: add a hvsock flag in struct - hv_driver - -Only the coming hv_sock driver has a "true" value for this flag. - -We treat the hvsock offers/channels as special VMBus devices. -Since the hv_sock driver handles all the hvsock offers/channels, we need to -tweak vmbus_match() for hv_sock driver, so we introduce this flag. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 8981da320a11217589aa3c50f9e891bcdef07ece) ---- - drivers/hv/vmbus_drv.c | 4 ++++ - include/linux/hyperv.h | 14 ++++++++++++++ - 2 files changed, 18 insertions(+) - -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index f688c051ca17..a220efc297c4 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -585,6 +585,10 @@ static int vmbus_match(struct device *device, struct device_driver *driver) - struct hv_driver *drv = drv_to_hv_drv(driver); - struct hv_device *hv_dev = device_to_hv_device(device); - -+ /* The hv_sock driver handles all hv_sock offers. */ -+ if (is_hvsock_channel(hv_dev->channel)) -+ return drv->hvsock; -+ - if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) - return 1; - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 191bc5d0ffbf..05966e279ec8 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -992,6 +992,20 @@ extern void vmbus_ontimer(unsigned long data); - struct hv_driver { - const char *name; - -+ /* -+ * A hvsock offer, which has a VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER -+ * channel flag, actually doesn't mean a synthetic device because the -+ * offer's if_type/if_instance can change for every new hvsock -+ * connection. -+ * -+ * However, to facilitate the notification of new-offer/rescind-offer -+ * from vmbus driver to hvsock driver, we can handle hvsock offer as -+ * a special vmbus device, and hence we need the below flag to -+ * indicate if the driver is the hvsock driver or not: we need to -+ * specially treat the hvosck offer & driver in vmbus_match(). -+ */ -+ bool hvsock; -+ - /* the device type supported by this driver */ - uuid_le dev_type; - const struct hv_vmbus_device_id *id_table; --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch b/kernel/patches-4.4.x/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch deleted file mode 100644 index fef2a9514..000000000 --- a/kernel/patches-4.4.x/0031-Drivers-hv-vmbus-add-a-per-channel-rescind-callback.patch +++ /dev/null @@ -1,72 +0,0 @@ -From acda69845b33f53f2dca9f79e59d45361797cd23 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:42 -0800 -Subject: [PATCH 31/44] Drivers: hv: vmbus: add a per-channel rescind callback - -This will be used by the coming hv_sock driver. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 499e8401a515d04daa986b995da710d2b9737764) ---- - drivers/hv/channel_mgmt.c | 11 +++++++++++ - include/linux/hyperv.h | 9 +++++++++ - 2 files changed, 20 insertions(+) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 60ca25b93b4c..76864c98a110 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -741,6 +741,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - spin_unlock_irqrestore(&channel->lock, flags); - - if (channel->device_obj) { -+ if (channel->chn_rescind_callback) { -+ channel->chn_rescind_callback(channel); -+ return; -+ } - /* - * We will have to unregister this device from the - * driver core. -@@ -1110,3 +1114,10 @@ bool vmbus_are_subchannels_present(struct vmbus_channel *primary) - return ret; - } - EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present); -+ -+void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, -+ void (*chn_rescind_cb)(struct vmbus_channel *)) -+{ -+ channel->chn_rescind_callback = chn_rescind_cb; -+} -+EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 05966e279ec8..ad04017ba06f 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -767,6 +767,12 @@ struct vmbus_channel { - */ - void (*sc_creation_callback)(struct vmbus_channel *new_sc); - -+ /* -+ * Channel rescind callback. Some channels (the hvsock ones), need to -+ * register a callback which is invoked in vmbus_onoffer_rescind(). -+ */ -+ void (*chn_rescind_callback)(struct vmbus_channel *channel); -+ - /* - * The spinlock to protect the structure. It is being used to protect - * test-and-set access to various attributes of the structure as well -@@ -853,6 +859,9 @@ int vmbus_request_offers(void); - void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, - void (*sc_cr_cb)(struct vmbus_channel *new_sc)); - -+void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, -+ void (*chn_rescind_cb)(struct vmbus_channel *)); -+ - /* - * Retrieve the (sub) channel on which to send an outgoing request. - * When a primary channel has multiple sub-channels, we choose a --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch b/kernel/patches-4.4.x/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch deleted file mode 100644 index 197a2fe96..000000000 --- a/kernel/patches-4.4.x/0032-Drivers-hv-vmbus-add-an-API-vmbus_hvsock_device_unre.patch +++ /dev/null @@ -1,153 +0,0 @@ -From db167d0152627bbfa667ca2ad8a80fcd67c83530 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Wed, 27 Jan 2016 22:29:43 -0800 -Subject: [PATCH 32/44] Drivers: hv: vmbus: add an API - vmbus_hvsock_device_unregister() - -The hvsock driver needs this API to release all the resources related -to the channel. - -Signed-off-by: Dexuan Cui -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 85d9aa705184a4504d0330017e3956fcdae8a9d6) ---- - drivers/hv/channel_mgmt.c | 33 ++++++++++++++++++++++++++++----- - drivers/hv/connection.c | 4 ++-- - include/linux/hyperv.h | 2 ++ - 3 files changed, 32 insertions(+), 7 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 76864c98a110..cf311be88cb4 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -310,6 +310,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - vmbus_release_relid(relid); - - BUG_ON(!channel->rescind); -+ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - - if (channel->target_cpu != get_cpu()) { - put_cpu(); -@@ -321,9 +322,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - } - - if (channel->primary_channel == NULL) { -- mutex_lock(&vmbus_connection.channel_mutex); - list_del(&channel->listentry); -- mutex_unlock(&vmbus_connection.channel_mutex); - - primary_channel = channel; - } else { -@@ -367,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - bool fnew = true; - unsigned long flags; - u16 dev_type; -+ int ret; - - /* Make sure this is a new offer */ - mutex_lock(&vmbus_connection.channel_mutex); -@@ -449,7 +449,11 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - * binding which eventually invokes the device driver's AddDevice() - * method. - */ -- if (vmbus_device_register(newchannel->device_obj) != 0) { -+ mutex_lock(&vmbus_connection.channel_mutex); -+ ret = vmbus_device_register(newchannel->device_obj); -+ mutex_unlock(&vmbus_connection.channel_mutex); -+ -+ if (ret != 0) { - pr_err("unable to add child device object (relid %d)\n", - newchannel->offermsg.child_relid); - kfree(newchannel->device_obj); -@@ -725,6 +729,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - struct device *dev; - - rescind = (struct vmbus_channel_rescind_offer *)hdr; -+ -+ mutex_lock(&vmbus_connection.channel_mutex); - channel = relid2channel(rescind->child_relid); - - if (channel == NULL) { -@@ -733,7 +739,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - * vmbus_process_offer(), we have already invoked - * vmbus_release_relid() on error. - */ -- return; -+ goto out; - } - - spin_lock_irqsave(&channel->lock, flags); -@@ -743,7 +749,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - if (channel->device_obj) { - if (channel->chn_rescind_callback) { - channel->chn_rescind_callback(channel); -- return; -+ goto out; - } - /* - * We will have to unregister this device from the -@@ -758,8 +764,25 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) - hv_process_channel_removal(channel, - channel->offermsg.child_relid); - } -+ -+out: -+ mutex_unlock(&vmbus_connection.channel_mutex); - } - -+void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) -+{ -+ mutex_lock(&vmbus_connection.channel_mutex); -+ -+ BUG_ON(!is_hvsock_channel(channel)); -+ -+ channel->rescind = true; -+ vmbus_device_unregister(channel->device_obj); -+ -+ mutex_unlock(&vmbus_connection.channel_mutex); -+} -+EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); -+ -+ - /* - * vmbus_onoffers_delivered - - * This is invoked when all offers have been delivered. -diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c -index c0c0aaf88228..4a1679d54c1f 100644 ---- a/drivers/hv/connection.c -+++ b/drivers/hv/connection.c -@@ -285,7 +285,8 @@ struct vmbus_channel *relid2channel(u32 relid) - struct list_head *cur, *tmp; - struct vmbus_channel *cur_sc; - -- mutex_lock(&vmbus_connection.channel_mutex); -+ BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); -+ - list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { - if (channel->offermsg.child_relid == relid) { - found_channel = channel; -@@ -304,7 +305,6 @@ struct vmbus_channel *relid2channel(u32 relid) - } - } - } -- mutex_unlock(&vmbus_connection.channel_mutex); - - return found_channel; - } -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index ad04017ba06f..993318a6d147 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1071,6 +1071,8 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver, - const char *mod_name); - void vmbus_driver_unregister(struct hv_driver *hv_driver); - -+void vmbus_hvsock_device_unregister(struct vmbus_channel *channel); -+ - int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - resource_size_t min, resource_size_t max, - resource_size_t size, resource_size_t align, --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch b/kernel/patches-4.4.x/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch deleted file mode 100644 index af2bab77e..000000000 --- a/kernel/patches-4.4.x/0033-Drivers-hv-vmbus-Give-control-over-how-the-ring-acce.patch +++ /dev/null @@ -1,208 +0,0 @@ -From ec8df15f6500c5c8df2eaccf30a3e881361775e5 Mon Sep 17 00:00:00 2001 -From: "K. Y. Srinivasan" -Date: Wed, 27 Jan 2016 22:29:45 -0800 -Subject: [PATCH 33/44] Drivers: hv: vmbus: Give control over how the ring - access is serialized - -On the channel send side, many of the VMBUS -device drivers explicity serialize access to the -outgoing ring buffer. Give more control to the -VMBUS device drivers in terms how to serialize -accesss to the outgoing ring buffer. -The default behavior will be to aquire the -ring lock to preserve the current behavior. - -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit fe760e4d64fe5c17c39e86c410d41f6587ee88bc) ---- - drivers/hv/channel.c | 15 +++++++++++---- - drivers/hv/channel_mgmt.c | 1 + - drivers/hv/hyperv_vmbus.h | 2 +- - drivers/hv/ring_buffer.c | 13 ++++++++----- - include/linux/hyperv.h | 16 ++++++++++++++++ - 5 files changed, 37 insertions(+), 10 deletions(-) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index b27054e4eac2..4077e7243151 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -641,6 +641,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - u64 aligned_data = 0; - int ret; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - int num_vecs = ((bufferlen != 0) ? 3 : 1); - - -@@ -660,7 +661,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs, -- &signal); -+ &signal, lock); - - /* - * Signalling the host is conditional on many factors: -@@ -740,6 +741,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, - struct kvec bufferlist[3]; - u64 aligned_data = 0; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - - if (pagecount > MAX_PAGE_BUFFER_COUNT) - return -EINVAL; -@@ -776,7 +778,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - -- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); -+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, -+ &signal, lock); - - /* - * Signalling the host is conditional on many factors: -@@ -839,6 +842,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, - struct kvec bufferlist[3]; - u64 aligned_data = 0; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - - packetlen = desc_size + bufferlen; - packetlen_aligned = ALIGN(packetlen, sizeof(u64)); -@@ -858,7 +862,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - -- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); -+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, -+ &signal, lock); - - if (ret == 0 && signal) - vmbus_setevent(channel); -@@ -883,6 +888,7 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, - struct kvec bufferlist[3]; - u64 aligned_data = 0; - bool signal = false; -+ bool lock = channel->acquire_ring_lock; - u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset, - multi_pagebuffer->len); - -@@ -921,7 +927,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - -- ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); -+ ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, -+ &signal, lock); - - if (ret == 0 && signal) - vmbus_setevent(channel); -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index cf311be88cb4..b40f429aaa13 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -259,6 +259,7 @@ static struct vmbus_channel *alloc_channel(void) - return NULL; - - channel->id = atomic_inc_return(&chan_num); -+ channel->acquire_ring_lock = true; - spin_lock_init(&channel->inbound_lock); - spin_lock_init(&channel->lock); - -diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h -index 9a95beb87015..9976774d6abc 100644 ---- a/drivers/hv/hyperv_vmbus.h -+++ b/drivers/hv/hyperv_vmbus.h -@@ -617,7 +617,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); - - int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, - struct kvec *kv_list, -- u32 kv_count, bool *signal); -+ u32 kv_count, bool *signal, bool lock); - - int hv_ringbuffer_peek(struct hv_ring_buffer_info *ring_info, void *buffer, - u32 buflen); -diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c -index 70a1a9a22f87..89a428f7dc46 100644 ---- a/drivers/hv/ring_buffer.c -+++ b/drivers/hv/ring_buffer.c -@@ -388,7 +388,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) - * - */ - int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, -- struct kvec *kv_list, u32 kv_count, bool *signal) -+ struct kvec *kv_list, u32 kv_count, bool *signal, bool lock) - { - int i = 0; - u32 bytes_avail_towrite; -@@ -398,14 +398,15 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - u32 next_write_location; - u32 old_write; - u64 prev_indices = 0; -- unsigned long flags; -+ unsigned long flags = 0; - - for (i = 0; i < kv_count; i++) - totalbytes_towrite += kv_list[i].iov_len; - - totalbytes_towrite += sizeof(u64); - -- spin_lock_irqsave(&outring_info->ring_lock, flags); -+ if (lock) -+ spin_lock_irqsave(&outring_info->ring_lock, flags); - - hv_get_ringbuffer_availbytes(outring_info, - &bytes_avail_toread, -@@ -416,7 +417,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - /* Otherwise, the next time around, we think the ring buffer */ - /* is empty since the read index == write index */ - if (bytes_avail_towrite <= totalbytes_towrite) { -- spin_unlock_irqrestore(&outring_info->ring_lock, flags); -+ if (lock) -+ spin_unlock_irqrestore(&outring_info->ring_lock, flags); - return -EAGAIN; - } - -@@ -447,7 +449,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - hv_set_next_write_location(outring_info, next_write_location); - - -- spin_unlock_irqrestore(&outring_info->ring_lock, flags); -+ if (lock) -+ spin_unlock_irqrestore(&outring_info->ring_lock, flags); - - *signal = hv_need_to_signal(old_write, outring_info); - return 0; -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 993318a6d147..6c9695ef757e 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -813,8 +813,24 @@ struct vmbus_channel { - * signaling control. - */ - enum hv_signal_policy signal_policy; -+ /* -+ * On the channel send side, many of the VMBUS -+ * device drivers explicity serialize access to the -+ * outgoing ring buffer. Give more control to the -+ * VMBUS device drivers in terms how to serialize -+ * accesss to the outgoing ring buffer. -+ * The default behavior will be to aquire the -+ * ring lock to preserve the current behavior. -+ */ -+ bool acquire_ring_lock; -+ - }; - -+static inline void set_channel_lock_state(struct vmbus_channel *c, bool state) -+{ -+ c->acquire_ring_lock = state; -+} -+ - static inline bool is_hvsock_channel(const struct vmbus_channel *c) - { - return !!(c->offermsg.offer.chn_flags & --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch b/kernel/patches-4.4.x/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch deleted file mode 100644 index 65d23ab55..000000000 --- a/kernel/patches-4.4.x/0034-Drivers-hv-vmbus-avoid-wait_for_completion-on-crash.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 62a87e03ec7cc707ac0df489b2a5b36b015441af Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Fri, 26 Feb 2016 15:13:16 -0800 -Subject: [PATCH 34/44] Drivers: hv: vmbus: avoid wait_for_completion() on - crash -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -wait_for_completion() may sleep, it enables interrupts and this -is something we really want to avoid on crashes because interrupt -handlers can cause other crashes. Switch to the recently introduced -vmbus_wait_for_unload() doing busy wait instead. - -Reported-by: Radim Krcmar -Signed-off-by: Vitaly Kuznetsov -Reviewed-by: Radim Kr.mĂ¡ -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit 75ff3a8a9168df750b5bd0589e897a6c0517a9f1) ---- - drivers/hv/channel_mgmt.c | 4 ++-- - drivers/hv/connection.c | 2 +- - drivers/hv/hyperv_vmbus.h | 2 +- - drivers/hv/vmbus_drv.c | 4 ++-- - 4 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index b40f429aaa13..f70e35278b94 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -641,7 +641,7 @@ static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) - complete(&vmbus_connection.unload_event); - } - --void vmbus_initiate_unload(void) -+void vmbus_initiate_unload(bool crash) - { - struct vmbus_channel_message_header hdr; - -@@ -658,7 +658,7 @@ void vmbus_initiate_unload(void) - * vmbus_initiate_unload() is also called on crash and the crash can be - * happening in an interrupt context, where scheduling is impossible. - */ -- if (!in_interrupt()) -+ if (!crash) - wait_for_completion(&vmbus_connection.unload_event); - else - vmbus_wait_for_unload(); -diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c -index 4a1679d54c1f..46cc9c639775 100644 ---- a/drivers/hv/connection.c -+++ b/drivers/hv/connection.c -@@ -233,7 +233,7 @@ void vmbus_disconnect(void) - /* - * First send the unload request to the host. - */ -- vmbus_initiate_unload(); -+ vmbus_initiate_unload(false); - - if (vmbus_connection.work_queue) { - drain_workqueue(vmbus_connection.work_queue); -diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h -index 9976774d6abc..4aadad6a0cde 100644 ---- a/drivers/hv/hyperv_vmbus.h -+++ b/drivers/hv/hyperv_vmbus.h -@@ -756,7 +756,7 @@ void hv_vss_onchannelcallback(void *); - int hv_fcopy_init(struct hv_util_service *); - void hv_fcopy_deinit(void); - void hv_fcopy_onchannelcallback(void *); --void vmbus_initiate_unload(void); -+void vmbus_initiate_unload(bool crash); - - static inline void hv_poll_channel(struct vmbus_channel *channel, - void (*cb)(void *)) -diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index a220efc297c4..d9801855ad4e 100644 ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -1276,7 +1276,7 @@ static void hv_kexec_handler(void) - int cpu; - - hv_synic_clockevents_cleanup(); -- vmbus_initiate_unload(); -+ vmbus_initiate_unload(false); - for_each_online_cpu(cpu) - smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); - hv_cleanup(false); -@@ -1284,7 +1284,7 @@ static void hv_kexec_handler(void) - - static void hv_crash_handler(struct pt_regs *regs) - { -- vmbus_initiate_unload(); -+ vmbus_initiate_unload(true); - /* - * In crash handler we can't schedule synic cleanup for all CPUs, - * doing the cleanup for current CPU only. This should be sufficient --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch b/kernel/patches-4.4.x/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch deleted file mode 100644 index d9dd920ff..000000000 --- a/kernel/patches-4.4.x/0035-Drivers-hv-vmbus-avoid-unneeded-compiler-optimizatio.patch +++ /dev/null @@ -1,39 +0,0 @@ -From c3fdfcfb86581b96bf0e0afc501be497fe84303f Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Fri, 26 Feb 2016 15:13:18 -0800 -Subject: [PATCH 35/44] Drivers: hv: vmbus: avoid unneeded compiler - optimizations in vmbus_wait_for_unload() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Message header is modified by the hypervisor and we read it in a loop, -we need to prevent compilers from optimizing accesses. There are no such -optimizations at this moment, this is just a future proof. - -Suggested-by: Radim Krcmar -Signed-off-by: Vitaly Kuznetsov -Reviewed-by: Radim Kr.mĂ¡ -Signed-off-by: K. Y. Srinivasan -Signed-off-by: Greg Kroah-Hartman -(cherry picked from commit d452ab7b4c65dfcaee88a0d6866eeeb98a3d1884) ---- - drivers/hv/channel_mgmt.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index f70e35278b94..c892db5df665 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -605,7 +605,7 @@ static void vmbus_wait_for_unload(void) - bool unloaded = false; - - while (1) { -- if (msg->header.message_type == HVMSG_NONE) { -+ if (READ_ONCE(msg->header.message_type) == HVMSG_NONE) { - mdelay(10); - continue; - } --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0036-kcm-Kernel-Connection-Multiplexor-module.patch b/kernel/patches-4.4.x/0036-kcm-Kernel-Connection-Multiplexor-module.patch deleted file mode 100644 index b030b9a9e..000000000 --- a/kernel/patches-4.4.x/0036-kcm-Kernel-Connection-Multiplexor-module.patch +++ /dev/null @@ -1,2312 +0,0 @@ -From 1c021b50e0614cb9ac189ba3a15e31ad4c2c57fb Mon Sep 17 00:00:00 2001 -From: Tom Herbert -Date: Mon, 7 Mar 2016 14:11:06 -0800 -Subject: [PATCH 36/44] kcm: Kernel Connection Multiplexor module - -This module implements the Kernel Connection Multiplexor. - -Kernel Connection Multiplexor (KCM) is a facility that provides a -message based interface over TCP for generic application protocols. -With KCM an application can efficiently send and receive application -protocol messages over TCP using datagram sockets. - -For more information see the included Documentation/networking/kcm.txt - -Signed-off-by: Tom Herbert -Signed-off-by: David S. Miller -(cherry picked from commit ab7ac4eb9832e32a09f4e8042705484d2fb0aad3) ---- - include/linux/socket.h | 6 +- - include/net/kcm.h | 125 +++ - include/uapi/linux/kcm.h | 39 + - net/Kconfig | 1 + - net/Makefile | 1 + - net/kcm/Kconfig | 9 + - net/kcm/Makefile | 3 + - net/kcm/kcmsock.c | 2015 ++++++++++++++++++++++++++++++++++++++++++++++ - 8 files changed, 2198 insertions(+), 1 deletion(-) - create mode 100644 include/net/kcm.h - create mode 100644 include/uapi/linux/kcm.h - create mode 100644 net/kcm/Kconfig - create mode 100644 net/kcm/Makefile - create mode 100644 net/kcm/kcmsock.c - -diff --git a/include/linux/socket.h b/include/linux/socket.h -index 5bf59c8493b7..4e1ea53aa329 100644 ---- a/include/linux/socket.h -+++ b/include/linux/socket.h -@@ -200,7 +200,9 @@ struct ucred { - #define AF_ALG 38 /* Algorithm sockets */ - #define AF_NFC 39 /* NFC sockets */ - #define AF_VSOCK 40 /* vSockets */ --#define AF_MAX 41 /* For now.. */ -+#define AF_KCM 41 /* Kernel Connection Multiplexor*/ -+ -+#define AF_MAX 42 /* For now.. */ - - /* Protocol families, same as address families. */ - #define PF_UNSPEC AF_UNSPEC -@@ -246,6 +248,7 @@ struct ucred { - #define PF_ALG AF_ALG - #define PF_NFC AF_NFC - #define PF_VSOCK AF_VSOCK -+#define PF_KCM AF_KCM - #define PF_MAX AF_MAX - - /* Maximum queue length specifiable by listen. */ -@@ -322,6 +325,7 @@ struct ucred { - #define SOL_CAIF 278 - #define SOL_ALG 279 - #define SOL_NFC 280 -+#define SOL_KCM 281 - - /* IPX options */ - #define IPX_TYPE 1 -diff --git a/include/net/kcm.h b/include/net/kcm.h -new file mode 100644 -index 000000000000..1bcae39070ec ---- /dev/null -+++ b/include/net/kcm.h -@@ -0,0 +1,125 @@ -+/* -+ * Kernel Connection Multiplexor -+ * -+ * Copyright (c) 2016 Tom Herbert -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation. -+ */ -+ -+#ifndef __NET_KCM_H_ -+#define __NET_KCM_H_ -+ -+#include -+#include -+#include -+ -+extern unsigned int kcm_net_id; -+ -+struct kcm_tx_msg { -+ unsigned int sent; -+ unsigned int fragidx; -+ unsigned int frag_offset; -+ unsigned int msg_flags; -+ struct sk_buff *frag_skb; -+ struct sk_buff *last_skb; -+}; -+ -+struct kcm_rx_msg { -+ int full_len; -+ int accum_len; -+ int offset; -+}; -+ -+/* Socket structure for KCM client sockets */ -+struct kcm_sock { -+ struct sock sk; -+ struct kcm_mux *mux; -+ struct list_head kcm_sock_list; -+ int index; -+ u32 done : 1; -+ struct work_struct done_work; -+ -+ /* Transmit */ -+ struct kcm_psock *tx_psock; -+ struct work_struct tx_work; -+ struct list_head wait_psock_list; -+ struct sk_buff *seq_skb; -+ -+ /* Don't use bit fields here, these are set under different locks */ -+ bool tx_wait; -+ bool tx_wait_more; -+ -+ /* Receive */ -+ struct kcm_psock *rx_psock; -+ struct list_head wait_rx_list; /* KCMs waiting for receiving */ -+ bool rx_wait; -+ u32 rx_disabled : 1; -+}; -+ -+struct bpf_prog; -+ -+/* Structure for an attached lower socket */ -+struct kcm_psock { -+ struct sock *sk; -+ struct kcm_mux *mux; -+ int index; -+ -+ u32 tx_stopped : 1; -+ u32 rx_stopped : 1; -+ u32 done : 1; -+ u32 unattaching : 1; -+ -+ void (*save_state_change)(struct sock *sk); -+ void (*save_data_ready)(struct sock *sk); -+ void (*save_write_space)(struct sock *sk); -+ -+ struct list_head psock_list; -+ -+ /* Receive */ -+ struct sk_buff *rx_skb_head; -+ struct sk_buff **rx_skb_nextp; -+ struct sk_buff *ready_rx_msg; -+ struct list_head psock_ready_list; -+ struct work_struct rx_work; -+ struct delayed_work rx_delayed_work; -+ struct bpf_prog *bpf_prog; -+ struct kcm_sock *rx_kcm; -+ -+ /* Transmit */ -+ struct kcm_sock *tx_kcm; -+ struct list_head psock_avail_list; -+}; -+ -+/* Per net MUX list */ -+struct kcm_net { -+ struct mutex mutex; -+ struct list_head mux_list; -+ int count; -+}; -+ -+/* Structure for a MUX */ -+struct kcm_mux { -+ struct list_head kcm_mux_list; -+ struct rcu_head rcu; -+ struct kcm_net *knet; -+ -+ struct list_head kcm_socks; /* All KCM sockets on MUX */ -+ int kcm_socks_cnt; /* Total KCM socket count for MUX */ -+ struct list_head psocks; /* List of all psocks on MUX */ -+ int psocks_cnt; /* Total attached sockets */ -+ -+ /* Receive */ -+ spinlock_t rx_lock ____cacheline_aligned_in_smp; -+ struct list_head kcm_rx_waiters; /* KCMs waiting for receiving */ -+ struct list_head psocks_ready; /* List of psocks with a msg ready */ -+ struct sk_buff_head rx_hold_queue; -+ -+ /* Transmit */ -+ spinlock_t lock ____cacheline_aligned_in_smp; /* TX and mux locking */ -+ struct list_head psocks_avail; /* List of available psocks */ -+ struct list_head kcm_tx_waiters; /* KCMs waiting for a TX psock */ -+}; -+ -+#endif /* __NET_KCM_H_ */ -diff --git a/include/uapi/linux/kcm.h b/include/uapi/linux/kcm.h -new file mode 100644 -index 000000000000..d72350fd048d ---- /dev/null -+++ b/include/uapi/linux/kcm.h -@@ -0,0 +1,39 @@ -+/* -+ * Kernel Connection Multiplexor -+ * -+ * Copyright (c) 2016 Tom Herbert -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 -+ * as published by the Free Software Foundation. -+ * -+ * User API to clone KCM sockets and attach transport socket to a KCM -+ * multiplexor. -+ */ -+ -+#ifndef KCM_KERNEL_H -+#define KCM_KERNEL_H -+ -+struct kcm_attach { -+ int fd; -+ int bpf_fd; -+}; -+ -+struct kcm_unattach { -+ int fd; -+}; -+ -+struct kcm_clone { -+ int fd; -+}; -+ -+#define SIOCKCMATTACH (SIOCPROTOPRIVATE + 0) -+#define SIOCKCMUNATTACH (SIOCPROTOPRIVATE + 1) -+#define SIOCKCMCLONE (SIOCPROTOPRIVATE + 2) -+ -+#define KCMPROTO_CONNECTED 0 -+ -+/* Socket options */ -+#define KCM_RECV_DISABLE 1 -+ -+#endif -diff --git a/net/Kconfig b/net/Kconfig -index 127da94ae25e..b8439e61f9a0 100644 ---- a/net/Kconfig -+++ b/net/Kconfig -@@ -351,6 +351,7 @@ source "net/can/Kconfig" - source "net/irda/Kconfig" - source "net/bluetooth/Kconfig" - source "net/rxrpc/Kconfig" -+source "net/kcm/Kconfig" - - config FIB_RULES - bool -diff --git a/net/Makefile b/net/Makefile -index a5d04098dfce..81d14119eab5 100644 ---- a/net/Makefile -+++ b/net/Makefile -@@ -34,6 +34,7 @@ obj-$(CONFIG_IRDA) += irda/ - obj-$(CONFIG_BT) += bluetooth/ - obj-$(CONFIG_SUNRPC) += sunrpc/ - obj-$(CONFIG_AF_RXRPC) += rxrpc/ -+obj-$(CONFIG_AF_KCM) += kcm/ - obj-$(CONFIG_ATM) += atm/ - obj-$(CONFIG_L2TP) += l2tp/ - obj-$(CONFIG_DECNET) += decnet/ -diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig -new file mode 100644 -index 000000000000..4f28332c7f15 ---- /dev/null -+++ b/net/kcm/Kconfig -@@ -0,0 +1,9 @@ -+ -+config AF_KCM -+ tristate "KCM sockets" -+ depends on INET -+ select BPF_SYSCALL -+ ---help--- -+ KCM (Kernel Connection Multiplexor) sockets provide a method -+ for multiplexing messages of a message based application -+ protocol over kernel connectons (e.g. TCP connections). -diff --git a/net/kcm/Makefile b/net/kcm/Makefile -new file mode 100644 -index 000000000000..cb525f7c5a13 ---- /dev/null -+++ b/net/kcm/Makefile -@@ -0,0 +1,3 @@ -+obj-$(CONFIG_AF_KCM) += kcm.o -+ -+kcm-y := kcmsock.o -diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c -new file mode 100644 -index 000000000000..649d246c6799 ---- /dev/null -+++ b/net/kcm/kcmsock.c -@@ -0,0 +1,2015 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+unsigned int kcm_net_id; -+ -+static struct kmem_cache *kcm_psockp __read_mostly; -+static struct kmem_cache *kcm_muxp __read_mostly; -+static struct workqueue_struct *kcm_wq; -+ -+static inline struct kcm_sock *kcm_sk(const struct sock *sk) -+{ -+ return (struct kcm_sock *)sk; -+} -+ -+static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb) -+{ -+ return (struct kcm_tx_msg *)skb->cb; -+} -+ -+static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb) -+{ -+ return (struct kcm_rx_msg *)((void *)skb->cb + -+ offsetof(struct qdisc_skb_cb, data)); -+} -+ -+static void report_csk_error(struct sock *csk, int err) -+{ -+ csk->sk_err = EPIPE; -+ csk->sk_error_report(csk); -+} -+ -+/* Callback lock held */ -+static void kcm_abort_rx_psock(struct kcm_psock *psock, int err, -+ struct sk_buff *skb) -+{ -+ struct sock *csk = psock->sk; -+ -+ /* Unrecoverable error in receive */ -+ -+ if (psock->rx_stopped) -+ return; -+ -+ psock->rx_stopped = 1; -+ -+ /* Report an error on the lower socket */ -+ report_csk_error(csk, err); -+} -+ -+static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, -+ bool wakeup_kcm) -+{ -+ struct sock *csk = psock->sk; -+ struct kcm_mux *mux = psock->mux; -+ -+ /* Unrecoverable error in transmit */ -+ -+ spin_lock_bh(&mux->lock); -+ -+ if (psock->tx_stopped) { -+ spin_unlock_bh(&mux->lock); -+ return; -+ } -+ -+ psock->tx_stopped = 1; -+ -+ if (!psock->tx_kcm) { -+ /* Take off psocks_avail list */ -+ list_del(&psock->psock_avail_list); -+ } else if (wakeup_kcm) { -+ /* In this case psock is being aborted while outside of -+ * write_msgs and psock is reserved. Schedule tx_work -+ * to handle the failure there. Need to commit tx_stopped -+ * before queuing work. -+ */ -+ smp_mb(); -+ -+ queue_work(kcm_wq, &psock->tx_kcm->tx_work); -+ } -+ -+ spin_unlock_bh(&mux->lock); -+ -+ /* Report error on lower socket */ -+ report_csk_error(csk, err); -+} -+ -+static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -+ -+/* KCM is ready to receive messages on its queue-- either the KCM is new or -+ * has become unblocked after being blocked on full socket buffer. Queue any -+ * pending ready messages on a psock. RX mux lock held. -+ */ -+static void kcm_rcv_ready(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ struct kcm_psock *psock; -+ struct sk_buff *skb; -+ -+ if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled)) -+ return; -+ -+ while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) { -+ if (kcm_queue_rcv_skb(&kcm->sk, skb)) { -+ /* Assuming buffer limit has been reached */ -+ skb_queue_head(&mux->rx_hold_queue, skb); -+ WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); -+ return; -+ } -+ } -+ -+ while (!list_empty(&mux->psocks_ready)) { -+ psock = list_first_entry(&mux->psocks_ready, struct kcm_psock, -+ psock_ready_list); -+ -+ if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) { -+ /* Assuming buffer limit has been reached */ -+ WARN_ON(!sk_rmem_alloc_get(&kcm->sk)); -+ return; -+ } -+ -+ /* Consumed the ready message on the psock. Schedule rx_work to -+ * get more messages. -+ */ -+ list_del(&psock->psock_ready_list); -+ psock->ready_rx_msg = NULL; -+ -+ /* Commit clearing of ready_rx_msg for queuing work */ -+ smp_mb(); -+ -+ queue_work(kcm_wq, &psock->rx_work); -+ } -+ -+ /* Buffer limit is okay now, add to ready list */ -+ list_add_tail(&kcm->wait_rx_list, -+ &kcm->mux->kcm_rx_waiters); -+ kcm->rx_wait = true; -+} -+ -+static void kcm_rfree(struct sk_buff *skb) -+{ -+ struct sock *sk = skb->sk; -+ struct kcm_sock *kcm = kcm_sk(sk); -+ struct kcm_mux *mux = kcm->mux; -+ unsigned int len = skb->truesize; -+ -+ sk_mem_uncharge(sk, len); -+ atomic_sub(len, &sk->sk_rmem_alloc); -+ -+ /* For reading rx_wait and rx_psock without holding lock */ -+ smp_mb__after_atomic(); -+ -+ if (!kcm->rx_wait && !kcm->rx_psock && -+ sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { -+ spin_lock_bh(&mux->rx_lock); -+ kcm_rcv_ready(kcm); -+ spin_unlock_bh(&mux->rx_lock); -+ } -+} -+ -+static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) -+{ -+ struct sk_buff_head *list = &sk->sk_receive_queue; -+ -+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) -+ return -ENOMEM; -+ -+ if (!sk_rmem_schedule(sk, skb, skb->truesize)) -+ return -ENOBUFS; -+ -+ skb->dev = NULL; -+ -+ skb_orphan(skb); -+ skb->sk = sk; -+ skb->destructor = kcm_rfree; -+ atomic_add(skb->truesize, &sk->sk_rmem_alloc); -+ sk_mem_charge(sk, skb->truesize); -+ -+ skb_queue_tail(list, skb); -+ -+ if (!sock_flag(sk, SOCK_DEAD)) -+ sk->sk_data_ready(sk); -+ -+ return 0; -+} -+ -+/* Requeue received messages for a kcm socket to other kcm sockets. This is -+ * called with a kcm socket is receive disabled. -+ * RX mux lock held. -+ */ -+static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head) -+{ -+ struct sk_buff *skb; -+ struct kcm_sock *kcm; -+ -+ while ((skb = __skb_dequeue(head))) { -+ /* Reset destructor to avoid calling kcm_rcv_ready */ -+ skb->destructor = sock_rfree; -+ skb_orphan(skb); -+try_again: -+ if (list_empty(&mux->kcm_rx_waiters)) { -+ skb_queue_tail(&mux->rx_hold_queue, skb); -+ continue; -+ } -+ -+ kcm = list_first_entry(&mux->kcm_rx_waiters, -+ struct kcm_sock, wait_rx_list); -+ -+ if (kcm_queue_rcv_skb(&kcm->sk, skb)) { -+ /* Should mean socket buffer full */ -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ -+ /* Commit rx_wait to read in kcm_free */ -+ smp_wmb(); -+ -+ goto try_again; -+ } -+ } -+} -+ -+/* Lower sock lock held */ -+static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, -+ struct sk_buff *head) -+{ -+ struct kcm_mux *mux = psock->mux; -+ struct kcm_sock *kcm; -+ -+ WARN_ON(psock->ready_rx_msg); -+ -+ if (psock->rx_kcm) -+ return psock->rx_kcm; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ if (psock->rx_kcm) { -+ spin_unlock_bh(&mux->rx_lock); -+ return psock->rx_kcm; -+ } -+ -+ if (list_empty(&mux->kcm_rx_waiters)) { -+ psock->ready_rx_msg = head; -+ list_add_tail(&psock->psock_ready_list, -+ &mux->psocks_ready); -+ spin_unlock_bh(&mux->rx_lock); -+ return NULL; -+ } -+ -+ kcm = list_first_entry(&mux->kcm_rx_waiters, -+ struct kcm_sock, wait_rx_list); -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ -+ psock->rx_kcm = kcm; -+ kcm->rx_psock = psock; -+ -+ spin_unlock_bh(&mux->rx_lock); -+ -+ return kcm; -+} -+ -+static void kcm_done(struct kcm_sock *kcm); -+ -+static void kcm_done_work(struct work_struct *w) -+{ -+ kcm_done(container_of(w, struct kcm_sock, done_work)); -+} -+ -+/* Lower sock held */ -+static void unreserve_rx_kcm(struct kcm_psock *psock, -+ bool rcv_ready) -+{ -+ struct kcm_sock *kcm = psock->rx_kcm; -+ struct kcm_mux *mux = psock->mux; -+ -+ if (!kcm) -+ return; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ psock->rx_kcm = NULL; -+ kcm->rx_psock = NULL; -+ -+ /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with -+ * kcm_rfree -+ */ -+ smp_mb(); -+ -+ if (unlikely(kcm->done)) { -+ spin_unlock_bh(&mux->rx_lock); -+ -+ /* Need to run kcm_done in a task since we need to qcquire -+ * callback locks which may already be held here. -+ */ -+ INIT_WORK(&kcm->done_work, kcm_done_work); -+ schedule_work(&kcm->done_work); -+ return; -+ } -+ -+ if (unlikely(kcm->rx_disabled)) { -+ requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); -+ } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) { -+ /* Check for degenerative race with rx_wait that all -+ * data was dequeued (accounted for in kcm_rfree). -+ */ -+ kcm_rcv_ready(kcm); -+ } -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+/* Macro to invoke filter function. */ -+#define KCM_RUN_FILTER(prog, ctx) \ -+ (*prog->bpf_func)(ctx, prog->insnsi) -+ -+/* Lower socket lock held */ -+static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, -+ unsigned int orig_offset, size_t orig_len) -+{ -+ struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data; -+ struct kcm_rx_msg *rxm; -+ struct kcm_sock *kcm; -+ struct sk_buff *head, *skb; -+ size_t eaten = 0, cand_len; -+ ssize_t extra; -+ int err; -+ bool cloned_orig = false; -+ -+ if (psock->ready_rx_msg) -+ return 0; -+ -+ head = psock->rx_skb_head; -+ if (head) { -+ /* Message already in progress */ -+ -+ if (unlikely(orig_offset)) { -+ /* Getting data with a non-zero offset when a message is -+ * in progress is not expected. If it does happen, we -+ * need to clone and pull since we can't deal with -+ * offsets in the skbs for a message expect in the head. -+ */ -+ orig_skb = skb_clone(orig_skb, GFP_ATOMIC); -+ if (!orig_skb) { -+ desc->error = -ENOMEM; -+ return 0; -+ } -+ if (!pskb_pull(orig_skb, orig_offset)) { -+ kfree_skb(orig_skb); -+ desc->error = -ENOMEM; -+ return 0; -+ } -+ cloned_orig = true; -+ orig_offset = 0; -+ } -+ -+ if (!psock->rx_skb_nextp) { -+ /* We are going to append to the frags_list of head. -+ * Need to unshare the frag_list. -+ */ -+ err = skb_unclone(head, GFP_ATOMIC); -+ if (err) { -+ desc->error = err; -+ return 0; -+ } -+ -+ if (unlikely(skb_shinfo(head)->frag_list)) { -+ /* We can't append to an sk_buff that already -+ * has a frag_list. We create a new head, point -+ * the frag_list of that to the old head, and -+ * then are able to use the old head->next for -+ * appending to the message. -+ */ -+ if (WARN_ON(head->next)) { -+ desc->error = -EINVAL; -+ return 0; -+ } -+ -+ skb = alloc_skb(0, GFP_ATOMIC); -+ if (!skb) { -+ desc->error = -ENOMEM; -+ return 0; -+ } -+ skb->len = head->len; -+ skb->data_len = head->len; -+ skb->truesize = head->truesize; -+ *kcm_rx_msg(skb) = *kcm_rx_msg(head); -+ psock->rx_skb_nextp = &head->next; -+ skb_shinfo(skb)->frag_list = head; -+ psock->rx_skb_head = skb; -+ head = skb; -+ } else { -+ psock->rx_skb_nextp = -+ &skb_shinfo(head)->frag_list; -+ } -+ } -+ } -+ -+ while (eaten < orig_len) { -+ /* Always clone since we will consume something */ -+ skb = skb_clone(orig_skb, GFP_ATOMIC); -+ if (!skb) { -+ desc->error = -ENOMEM; -+ break; -+ } -+ -+ cand_len = orig_len - eaten; -+ -+ head = psock->rx_skb_head; -+ if (!head) { -+ head = skb; -+ psock->rx_skb_head = head; -+ /* Will set rx_skb_nextp on next packet if needed */ -+ psock->rx_skb_nextp = NULL; -+ rxm = kcm_rx_msg(head); -+ memset(rxm, 0, sizeof(*rxm)); -+ rxm->offset = orig_offset + eaten; -+ } else { -+ /* Unclone since we may be appending to an skb that we -+ * already share a frag_list with. -+ */ -+ err = skb_unclone(skb, GFP_ATOMIC); -+ if (err) { -+ desc->error = err; -+ break; -+ } -+ -+ rxm = kcm_rx_msg(head); -+ *psock->rx_skb_nextp = skb; -+ psock->rx_skb_nextp = &skb->next; -+ head->data_len += skb->len; -+ head->len += skb->len; -+ head->truesize += skb->truesize; -+ } -+ -+ if (!rxm->full_len) { -+ ssize_t len; -+ -+ len = KCM_RUN_FILTER(psock->bpf_prog, head); -+ -+ if (!len) { -+ /* Need more header to determine length */ -+ rxm->accum_len += cand_len; -+ eaten += cand_len; -+ WARN_ON(eaten != orig_len); -+ break; -+ } else if (len <= (ssize_t)head->len - -+ skb->len - rxm->offset) { -+ /* Length must be into new skb (and also -+ * greater than zero) -+ */ -+ desc->error = -EPROTO; -+ psock->rx_skb_head = NULL; -+ kcm_abort_rx_psock(psock, EPROTO, head); -+ break; -+ } -+ -+ rxm->full_len = len; -+ } -+ -+ extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len; -+ -+ if (extra < 0) { -+ /* Message not complete yet. */ -+ rxm->accum_len += cand_len; -+ eaten += cand_len; -+ WARN_ON(eaten != orig_len); -+ break; -+ } -+ -+ /* Positive extra indicates ore bytes than needed for the -+ * message -+ */ -+ -+ WARN_ON(extra > cand_len); -+ -+ eaten += (cand_len - extra); -+ -+ /* Hurray, we have a new message! */ -+ psock->rx_skb_head = NULL; -+ -+try_queue: -+ kcm = reserve_rx_kcm(psock, head); -+ if (!kcm) { -+ /* Unable to reserve a KCM, message is held in psock. */ -+ break; -+ } -+ -+ if (kcm_queue_rcv_skb(&kcm->sk, head)) { -+ /* Should mean socket buffer full */ -+ unreserve_rx_kcm(psock, false); -+ goto try_queue; -+ } -+ } -+ -+ if (cloned_orig) -+ kfree_skb(orig_skb); -+ -+ return eaten; -+} -+ -+/* Called with lock held on lower socket */ -+static int psock_tcp_read_sock(struct kcm_psock *psock) -+{ -+ read_descriptor_t desc; -+ -+ desc.arg.data = psock; -+ desc.error = 0; -+ desc.count = 1; /* give more than one skb per call */ -+ -+ /* sk should be locked here, so okay to do tcp_read_sock */ -+ tcp_read_sock(psock->sk, &desc, kcm_tcp_recv); -+ -+ unreserve_rx_kcm(psock, true); -+ -+ return desc.error; -+} -+ -+/* Lower sock lock held */ -+static void psock_tcp_data_ready(struct sock *sk) -+{ -+ struct kcm_psock *psock; -+ -+ read_lock_bh(&sk->sk_callback_lock); -+ -+ psock = (struct kcm_psock *)sk->sk_user_data; -+ if (unlikely(!psock || psock->rx_stopped)) -+ goto out; -+ -+ if (psock->ready_rx_msg) -+ goto out; -+ -+ if (psock_tcp_read_sock(psock) == -ENOMEM) -+ queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); -+ -+out: -+ read_unlock_bh(&sk->sk_callback_lock); -+} -+ -+static void do_psock_rx_work(struct kcm_psock *psock) -+{ -+ read_descriptor_t rd_desc; -+ struct sock *csk = psock->sk; -+ -+ /* We need the read lock to synchronize with psock_tcp_data_ready. We -+ * need the socket lock for calling tcp_read_sock. -+ */ -+ lock_sock(csk); -+ read_lock_bh(&csk->sk_callback_lock); -+ -+ if (unlikely(csk->sk_user_data != psock)) -+ goto out; -+ -+ if (unlikely(psock->rx_stopped)) -+ goto out; -+ -+ if (psock->ready_rx_msg) -+ goto out; -+ -+ rd_desc.arg.data = psock; -+ -+ if (psock_tcp_read_sock(psock) == -ENOMEM) -+ queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); -+ -+out: -+ read_unlock_bh(&csk->sk_callback_lock); -+ release_sock(csk); -+} -+ -+static void psock_rx_work(struct work_struct *w) -+{ -+ do_psock_rx_work(container_of(w, struct kcm_psock, rx_work)); -+} -+ -+static void psock_rx_delayed_work(struct work_struct *w) -+{ -+ do_psock_rx_work(container_of(w, struct kcm_psock, -+ rx_delayed_work.work)); -+} -+ -+static void psock_tcp_state_change(struct sock *sk) -+{ -+ /* TCP only does a POLLIN for a half close. Do a POLLHUP here -+ * since application will normally not poll with POLLIN -+ * on the TCP sockets. -+ */ -+ -+ report_csk_error(sk, EPIPE); -+} -+ -+static void psock_tcp_write_space(struct sock *sk) -+{ -+ struct kcm_psock *psock; -+ struct kcm_mux *mux; -+ struct kcm_sock *kcm; -+ -+ read_lock_bh(&sk->sk_callback_lock); -+ -+ psock = (struct kcm_psock *)sk->sk_user_data; -+ if (unlikely(!psock)) -+ goto out; -+ -+ mux = psock->mux; -+ -+ spin_lock_bh(&mux->lock); -+ -+ /* Check if the socket is reserved so someone is waiting for sending. */ -+ kcm = psock->tx_kcm; -+ if (kcm) -+ queue_work(kcm_wq, &kcm->tx_work); -+ -+ spin_unlock_bh(&mux->lock); -+out: -+ read_unlock_bh(&sk->sk_callback_lock); -+} -+ -+static void unreserve_psock(struct kcm_sock *kcm); -+ -+/* kcm sock is locked. */ -+static struct kcm_psock *reserve_psock(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ struct kcm_psock *psock; -+ -+ psock = kcm->tx_psock; -+ -+ smp_rmb(); /* Must read tx_psock before tx_wait */ -+ -+ if (psock) { -+ WARN_ON(kcm->tx_wait); -+ if (unlikely(psock->tx_stopped)) -+ unreserve_psock(kcm); -+ else -+ return kcm->tx_psock; -+ } -+ -+ spin_lock_bh(&mux->lock); -+ -+ /* Check again under lock to see if psock was reserved for this -+ * psock via psock_unreserve. -+ */ -+ psock = kcm->tx_psock; -+ if (unlikely(psock)) { -+ WARN_ON(kcm->tx_wait); -+ spin_unlock_bh(&mux->lock); -+ return kcm->tx_psock; -+ } -+ -+ if (!list_empty(&mux->psocks_avail)) { -+ psock = list_first_entry(&mux->psocks_avail, -+ struct kcm_psock, -+ psock_avail_list); -+ list_del(&psock->psock_avail_list); -+ if (kcm->tx_wait) { -+ list_del(&kcm->wait_psock_list); -+ kcm->tx_wait = false; -+ } -+ kcm->tx_psock = psock; -+ psock->tx_kcm = kcm; -+ } else if (!kcm->tx_wait) { -+ list_add_tail(&kcm->wait_psock_list, -+ &mux->kcm_tx_waiters); -+ kcm->tx_wait = true; -+ } -+ -+ spin_unlock_bh(&mux->lock); -+ -+ return psock; -+} -+ -+/* mux lock held */ -+static void psock_now_avail(struct kcm_psock *psock) -+{ -+ struct kcm_mux *mux = psock->mux; -+ struct kcm_sock *kcm; -+ -+ if (list_empty(&mux->kcm_tx_waiters)) { -+ list_add_tail(&psock->psock_avail_list, -+ &mux->psocks_avail); -+ } else { -+ kcm = list_first_entry(&mux->kcm_tx_waiters, -+ struct kcm_sock, -+ wait_psock_list); -+ list_del(&kcm->wait_psock_list); -+ kcm->tx_wait = false; -+ psock->tx_kcm = kcm; -+ -+ /* Commit before changing tx_psock since that is read in -+ * reserve_psock before queuing work. -+ */ -+ smp_mb(); -+ -+ kcm->tx_psock = psock; -+ queue_work(kcm_wq, &kcm->tx_work); -+ } -+} -+ -+/* kcm sock is locked. */ -+static void unreserve_psock(struct kcm_sock *kcm) -+{ -+ struct kcm_psock *psock; -+ struct kcm_mux *mux = kcm->mux; -+ -+ spin_lock_bh(&mux->lock); -+ -+ psock = kcm->tx_psock; -+ -+ if (WARN_ON(!psock)) { -+ spin_unlock_bh(&mux->lock); -+ return; -+ } -+ -+ smp_rmb(); /* Read tx_psock before tx_wait */ -+ -+ WARN_ON(kcm->tx_wait); -+ -+ kcm->tx_psock = NULL; -+ psock->tx_kcm = NULL; -+ -+ if (unlikely(psock->tx_stopped)) { -+ if (psock->done) { -+ /* Deferred free */ -+ list_del(&psock->psock_list); -+ mux->psocks_cnt--; -+ sock_put(psock->sk); -+ fput(psock->sk->sk_socket->file); -+ kmem_cache_free(kcm_psockp, psock); -+ } -+ -+ /* Don't put back on available list */ -+ -+ spin_unlock_bh(&mux->lock); -+ -+ return; -+ } -+ -+ psock_now_avail(psock); -+ -+ spin_unlock_bh(&mux->lock); -+} -+ -+/* Write any messages ready on the kcm socket. Called with kcm sock lock -+ * held. Return bytes actually sent or error. -+ */ -+static int kcm_write_msgs(struct kcm_sock *kcm) -+{ -+ struct sock *sk = &kcm->sk; -+ struct kcm_psock *psock; -+ struct sk_buff *skb, *head; -+ struct kcm_tx_msg *txm; -+ unsigned short fragidx, frag_offset; -+ unsigned int sent, total_sent = 0; -+ int ret = 0; -+ -+ kcm->tx_wait_more = false; -+ psock = kcm->tx_psock; -+ if (unlikely(psock && psock->tx_stopped)) { -+ /* A reserved psock was aborted asynchronously. Unreserve -+ * it and we'll retry the message. -+ */ -+ unreserve_psock(kcm); -+ if (skb_queue_empty(&sk->sk_write_queue)) -+ return 0; -+ -+ kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0; -+ -+ } else if (skb_queue_empty(&sk->sk_write_queue)) { -+ return 0; -+ } -+ -+ head = skb_peek(&sk->sk_write_queue); -+ txm = kcm_tx_msg(head); -+ -+ if (txm->sent) { -+ /* Send of first skbuff in queue already in progress */ -+ if (WARN_ON(!psock)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ sent = txm->sent; -+ frag_offset = txm->frag_offset; -+ fragidx = txm->fragidx; -+ skb = txm->frag_skb; -+ -+ goto do_frag; -+ } -+ -+try_again: -+ psock = reserve_psock(kcm); -+ if (!psock) -+ goto out; -+ -+ do { -+ skb = head; -+ txm = kcm_tx_msg(head); -+ sent = 0; -+ -+do_frag_list: -+ if (WARN_ON(!skb_shinfo(skb)->nr_frags)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; -+ fragidx++) { -+ skb_frag_t *frag; -+ -+ frag_offset = 0; -+do_frag: -+ frag = &skb_shinfo(skb)->frags[fragidx]; -+ if (WARN_ON(!frag->size)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ ret = kernel_sendpage(psock->sk->sk_socket, -+ frag->page.p, -+ frag->page_offset + frag_offset, -+ frag->size - frag_offset, -+ MSG_DONTWAIT); -+ if (ret <= 0) { -+ if (ret == -EAGAIN) { -+ /* Save state to try again when there's -+ * write space on the socket -+ */ -+ txm->sent = sent; -+ txm->frag_offset = frag_offset; -+ txm->fragidx = fragidx; -+ txm->frag_skb = skb; -+ -+ ret = 0; -+ goto out; -+ } -+ -+ /* Hard failure in sending message, abort this -+ * psock since it has lost framing -+ * synchonization and retry sending the -+ * message from the beginning. -+ */ -+ kcm_abort_tx_psock(psock, ret ? -ret : EPIPE, -+ true); -+ unreserve_psock(kcm); -+ -+ txm->sent = 0; -+ ret = 0; -+ -+ goto try_again; -+ } -+ -+ sent += ret; -+ frag_offset += ret; -+ if (frag_offset < frag->size) { -+ /* Not finished with this frag */ -+ goto do_frag; -+ } -+ } -+ -+ if (skb == head) { -+ if (skb_has_frag_list(skb)) { -+ skb = skb_shinfo(skb)->frag_list; -+ goto do_frag_list; -+ } -+ } else if (skb->next) { -+ skb = skb->next; -+ goto do_frag_list; -+ } -+ -+ /* Successfully sent the whole packet, account for it. */ -+ skb_dequeue(&sk->sk_write_queue); -+ kfree_skb(head); -+ sk->sk_wmem_queued -= sent; -+ total_sent += sent; -+ } while ((head = skb_peek(&sk->sk_write_queue))); -+out: -+ if (!head) { -+ /* Done with all queued messages. */ -+ WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); -+ unreserve_psock(kcm); -+ } -+ -+ /* Check if write space is available */ -+ sk->sk_write_space(sk); -+ -+ return total_sent ? : ret; -+} -+ -+static void kcm_tx_work(struct work_struct *w) -+{ -+ struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work); -+ struct sock *sk = &kcm->sk; -+ int err; -+ -+ lock_sock(sk); -+ -+ /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx -+ * aborts -+ */ -+ err = kcm_write_msgs(kcm); -+ if (err < 0) { -+ /* Hard failure in write, report error on KCM socket */ -+ pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err); -+ report_csk_error(&kcm->sk, -err); -+ goto out; -+ } -+ -+ /* Primarily for SOCK_SEQPACKET sockets */ -+ if (likely(sk->sk_socket) && -+ test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { -+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -+ sk->sk_write_space(sk); -+ } -+ -+out: -+ release_sock(sk); -+} -+ -+static void kcm_push(struct kcm_sock *kcm) -+{ -+ if (kcm->tx_wait_more) -+ kcm_write_msgs(kcm); -+} -+ -+static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) -+{ -+ struct sock *sk = sock->sk; -+ struct kcm_sock *kcm = kcm_sk(sk); -+ struct sk_buff *skb = NULL, *head = NULL; -+ size_t copy, copied = 0; -+ long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); -+ int eor = (sock->type == SOCK_DGRAM) ? -+ !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR); -+ int err = -EPIPE; -+ -+ lock_sock(sk); -+ -+ /* Per tcp_sendmsg this should be in poll */ -+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); -+ -+ if (sk->sk_err) -+ goto out_error; -+ -+ if (kcm->seq_skb) { -+ /* Previously opened message */ -+ head = kcm->seq_skb; -+ skb = kcm_tx_msg(head)->last_skb; -+ goto start; -+ } -+ -+ /* Call the sk_stream functions to manage the sndbuf mem. */ -+ if (!sk_stream_memory_free(sk)) { -+ kcm_push(kcm); -+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -+ err = sk_stream_wait_memory(sk, &timeo); -+ if (err) -+ goto out_error; -+ } -+ -+ /* New message, alloc head skb */ -+ head = alloc_skb(0, sk->sk_allocation); -+ while (!head) { -+ kcm_push(kcm); -+ err = sk_stream_wait_memory(sk, &timeo); -+ if (err) -+ goto out_error; -+ -+ head = alloc_skb(0, sk->sk_allocation); -+ } -+ -+ skb = head; -+ -+ /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling -+ * csum_and_copy_from_iter from skb_do_copy_data_nocache. -+ */ -+ skb->ip_summed = CHECKSUM_UNNECESSARY; -+ -+start: -+ while (msg_data_left(msg)) { -+ bool merge = true; -+ int i = skb_shinfo(skb)->nr_frags; -+ struct page_frag *pfrag = sk_page_frag(sk); -+ -+ if (!sk_page_frag_refill(sk, pfrag)) -+ goto wait_for_memory; -+ -+ if (!skb_can_coalesce(skb, i, pfrag->page, -+ pfrag->offset)) { -+ if (i == MAX_SKB_FRAGS) { -+ struct sk_buff *tskb; -+ -+ tskb = alloc_skb(0, sk->sk_allocation); -+ if (!tskb) -+ goto wait_for_memory; -+ -+ if (head == skb) -+ skb_shinfo(head)->frag_list = tskb; -+ else -+ skb->next = tskb; -+ -+ skb = tskb; -+ skb->ip_summed = CHECKSUM_UNNECESSARY; -+ continue; -+ } -+ merge = false; -+ } -+ -+ copy = min_t(int, msg_data_left(msg), -+ pfrag->size - pfrag->offset); -+ -+ if (!sk_wmem_schedule(sk, copy)) -+ goto wait_for_memory; -+ -+ err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, -+ pfrag->page, -+ pfrag->offset, -+ copy); -+ if (err) -+ goto out_error; -+ -+ /* Update the skb. */ -+ if (merge) { -+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); -+ } else { -+ skb_fill_page_desc(skb, i, pfrag->page, -+ pfrag->offset, copy); -+ get_page(pfrag->page); -+ } -+ -+ pfrag->offset += copy; -+ copied += copy; -+ if (head != skb) { -+ head->len += copy; -+ head->data_len += copy; -+ } -+ -+ continue; -+ -+wait_for_memory: -+ kcm_push(kcm); -+ err = sk_stream_wait_memory(sk, &timeo); -+ if (err) -+ goto out_error; -+ } -+ -+ if (eor) { -+ bool not_busy = skb_queue_empty(&sk->sk_write_queue); -+ -+ /* Message complete, queue it on send buffer */ -+ __skb_queue_tail(&sk->sk_write_queue, head); -+ kcm->seq_skb = NULL; -+ -+ if (msg->msg_flags & MSG_BATCH) { -+ kcm->tx_wait_more = true; -+ } else if (kcm->tx_wait_more || not_busy) { -+ err = kcm_write_msgs(kcm); -+ if (err < 0) { -+ /* We got a hard error in write_msgs but have -+ * already queued this message. Report an error -+ * in the socket, but don't affect return value -+ * from sendmsg -+ */ -+ pr_warn("KCM: Hard failure on kcm_write_msgs\n"); -+ report_csk_error(&kcm->sk, -err); -+ } -+ } -+ } else { -+ /* Message not complete, save state */ -+partial_message: -+ kcm->seq_skb = head; -+ kcm_tx_msg(head)->last_skb = skb; -+ } -+ -+ release_sock(sk); -+ return copied; -+ -+out_error: -+ kcm_push(kcm); -+ -+ if (copied && sock->type == SOCK_SEQPACKET) { -+ /* Wrote some bytes before encountering an -+ * error, return partial success. -+ */ -+ goto partial_message; -+ } -+ -+ if (head != kcm->seq_skb) -+ kfree_skb(head); -+ -+ err = sk_stream_error(sk, msg->msg_flags, err); -+ -+ /* make sure we wake any epoll edge trigger waiter */ -+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) -+ sk->sk_write_space(sk); -+ -+ release_sock(sk); -+ return err; -+} -+ -+static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, -+ long timeo, int *err) -+{ -+ struct sk_buff *skb; -+ -+ while (!(skb = skb_peek(&sk->sk_receive_queue))) { -+ if (sk->sk_err) { -+ *err = sock_error(sk); -+ return NULL; -+ } -+ -+ if (sock_flag(sk, SOCK_DONE)) -+ return NULL; -+ -+ if ((flags & MSG_DONTWAIT) || !timeo) { -+ *err = -EAGAIN; -+ return NULL; -+ } -+ -+ sk_wait_data(sk, &timeo, NULL); -+ -+ /* Handle signals */ -+ if (signal_pending(current)) { -+ *err = sock_intr_errno(timeo); -+ return NULL; -+ } -+ } -+ -+ return skb; -+} -+ -+static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, -+ size_t len, int flags) -+{ -+ struct sock *sk = sock->sk; -+ int err = 0; -+ long timeo; -+ struct kcm_rx_msg *rxm; -+ int copied = 0; -+ struct sk_buff *skb; -+ -+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); -+ -+ lock_sock(sk); -+ -+ skb = kcm_wait_data(sk, flags, timeo, &err); -+ if (!skb) -+ goto out; -+ -+ /* Okay, have a message on the receive queue */ -+ -+ rxm = kcm_rx_msg(skb); -+ -+ if (len > rxm->full_len) -+ len = rxm->full_len; -+ -+ err = skb_copy_datagram_msg(skb, rxm->offset, msg, len); -+ if (err < 0) -+ goto out; -+ -+ copied = len; -+ if (likely(!(flags & MSG_PEEK))) { -+ if (copied < rxm->full_len) { -+ if (sock->type == SOCK_DGRAM) { -+ /* Truncated message */ -+ msg->msg_flags |= MSG_TRUNC; -+ goto msg_finished; -+ } -+ rxm->offset += copied; -+ rxm->full_len -= copied; -+ } else { -+msg_finished: -+ /* Finished with message */ -+ msg->msg_flags |= MSG_EOR; -+ skb_unlink(skb, &sk->sk_receive_queue); -+ kfree_skb(skb); -+ } -+ } -+ -+out: -+ release_sock(sk); -+ -+ return copied ? : err; -+} -+ -+/* kcm sock lock held */ -+static void kcm_recv_disable(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ -+ if (kcm->rx_disabled) -+ return; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ kcm->rx_disabled = 1; -+ -+ /* If a psock is reserved we'll do cleanup in unreserve */ -+ if (!kcm->rx_psock) { -+ if (kcm->rx_wait) { -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ } -+ -+ requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); -+ } -+ -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+/* kcm sock lock held */ -+static void kcm_recv_enable(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ -+ if (!kcm->rx_disabled) -+ return; -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ kcm->rx_disabled = 0; -+ kcm_rcv_ready(kcm); -+ -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+static int kcm_setsockopt(struct socket *sock, int level, int optname, -+ char __user *optval, unsigned int optlen) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ int val, valbool; -+ int err = 0; -+ -+ if (level != SOL_KCM) -+ return -ENOPROTOOPT; -+ -+ if (optlen < sizeof(int)) -+ return -EINVAL; -+ -+ if (get_user(val, (int __user *)optval)) -+ return -EINVAL; -+ -+ valbool = val ? 1 : 0; -+ -+ switch (optname) { -+ case KCM_RECV_DISABLE: -+ lock_sock(&kcm->sk); -+ if (valbool) -+ kcm_recv_disable(kcm); -+ else -+ kcm_recv_enable(kcm); -+ release_sock(&kcm->sk); -+ break; -+ default: -+ err = -ENOPROTOOPT; -+ } -+ -+ return err; -+} -+ -+static int kcm_getsockopt(struct socket *sock, int level, int optname, -+ char __user *optval, int __user *optlen) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ int val, len; -+ -+ if (level != SOL_KCM) -+ return -ENOPROTOOPT; -+ -+ if (get_user(len, optlen)) -+ return -EFAULT; -+ -+ len = min_t(unsigned int, len, sizeof(int)); -+ if (len < 0) -+ return -EINVAL; -+ -+ switch (optname) { -+ case KCM_RECV_DISABLE: -+ val = kcm->rx_disabled; -+ break; -+ default: -+ return -ENOPROTOOPT; -+ } -+ -+ if (put_user(len, optlen)) -+ return -EFAULT; -+ if (copy_to_user(optval, &val, len)) -+ return -EFAULT; -+ return 0; -+} -+ -+static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) -+{ -+ struct kcm_sock *tkcm; -+ struct list_head *head; -+ int index = 0; -+ -+ /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so -+ * we set sk_state, otherwise epoll_wait always returns right away with -+ * POLLHUP -+ */ -+ kcm->sk.sk_state = TCP_ESTABLISHED; -+ -+ /* Add to mux's kcm sockets list */ -+ kcm->mux = mux; -+ spin_lock_bh(&mux->lock); -+ -+ head = &mux->kcm_socks; -+ list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) { -+ if (tkcm->index != index) -+ break; -+ head = &tkcm->kcm_sock_list; -+ index++; -+ } -+ -+ list_add(&kcm->kcm_sock_list, head); -+ kcm->index = index; -+ -+ mux->kcm_socks_cnt++; -+ spin_unlock_bh(&mux->lock); -+ -+ INIT_WORK(&kcm->tx_work, kcm_tx_work); -+ -+ spin_lock_bh(&mux->rx_lock); -+ kcm_rcv_ready(kcm); -+ spin_unlock_bh(&mux->rx_lock); -+} -+ -+static int kcm_attach(struct socket *sock, struct socket *csock, -+ struct bpf_prog *prog) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ struct kcm_mux *mux = kcm->mux; -+ struct sock *csk; -+ struct kcm_psock *psock = NULL, *tpsock; -+ struct list_head *head; -+ int index = 0; -+ -+ if (csock->ops->family != PF_INET && -+ csock->ops->family != PF_INET6) -+ return -EINVAL; -+ -+ csk = csock->sk; -+ if (!csk) -+ return -EINVAL; -+ -+ /* Only support TCP for now */ -+ if (csk->sk_protocol != IPPROTO_TCP) -+ return -EINVAL; -+ -+ psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); -+ if (!psock) -+ return -ENOMEM; -+ -+ psock->mux = mux; -+ psock->sk = csk; -+ psock->bpf_prog = prog; -+ INIT_WORK(&psock->rx_work, psock_rx_work); -+ INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work); -+ -+ sock_hold(csk); -+ -+ write_lock_bh(&csk->sk_callback_lock); -+ psock->save_data_ready = csk->sk_data_ready; -+ psock->save_write_space = csk->sk_write_space; -+ psock->save_state_change = csk->sk_state_change; -+ csk->sk_user_data = psock; -+ csk->sk_data_ready = psock_tcp_data_ready; -+ csk->sk_write_space = psock_tcp_write_space; -+ csk->sk_state_change = psock_tcp_state_change; -+ write_unlock_bh(&csk->sk_callback_lock); -+ -+ /* Finished initialization, now add the psock to the MUX. */ -+ spin_lock_bh(&mux->lock); -+ head = &mux->psocks; -+ list_for_each_entry(tpsock, &mux->psocks, psock_list) { -+ if (tpsock->index != index) -+ break; -+ head = &tpsock->psock_list; -+ index++; -+ } -+ -+ list_add(&psock->psock_list, head); -+ psock->index = index; -+ -+ mux->psocks_cnt++; -+ psock_now_avail(psock); -+ spin_unlock_bh(&mux->lock); -+ -+ /* Schedule RX work in case there are already bytes queued */ -+ queue_work(kcm_wq, &psock->rx_work); -+ -+ return 0; -+} -+ -+static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) -+{ -+ struct socket *csock; -+ struct bpf_prog *prog; -+ int err; -+ -+ csock = sockfd_lookup(info->fd, &err); -+ if (!csock) -+ return -ENOENT; -+ -+ prog = bpf_prog_get(info->bpf_fd); -+ if (IS_ERR(prog)) { -+ err = PTR_ERR(prog); -+ goto out; -+ } -+ -+ if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { -+ bpf_prog_put(prog); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ err = kcm_attach(sock, csock, prog); -+ if (err) { -+ bpf_prog_put(prog); -+ goto out; -+ } -+ -+ /* Keep reference on file also */ -+ -+ return 0; -+out: -+ fput(csock->file); -+ return err; -+} -+ -+static void kcm_unattach(struct kcm_psock *psock) -+{ -+ struct sock *csk = psock->sk; -+ struct kcm_mux *mux = psock->mux; -+ -+ /* Stop getting callbacks from TCP socket. After this there should -+ * be no way to reserve a kcm for this psock. -+ */ -+ write_lock_bh(&csk->sk_callback_lock); -+ csk->sk_user_data = NULL; -+ csk->sk_data_ready = psock->save_data_ready; -+ csk->sk_write_space = psock->save_write_space; -+ csk->sk_state_change = psock->save_state_change; -+ psock->rx_stopped = 1; -+ -+ if (WARN_ON(psock->rx_kcm)) { -+ write_unlock_bh(&csk->sk_callback_lock); -+ return; -+ } -+ -+ spin_lock_bh(&mux->rx_lock); -+ -+ /* Stop receiver activities. After this point psock should not be -+ * able to get onto ready list either through callbacks or work. -+ */ -+ if (psock->ready_rx_msg) { -+ list_del(&psock->psock_ready_list); -+ kfree_skb(psock->ready_rx_msg); -+ psock->ready_rx_msg = NULL; -+ } -+ -+ spin_unlock_bh(&mux->rx_lock); -+ -+ write_unlock_bh(&csk->sk_callback_lock); -+ -+ cancel_work_sync(&psock->rx_work); -+ cancel_delayed_work_sync(&psock->rx_delayed_work); -+ -+ bpf_prog_put(psock->bpf_prog); -+ -+ kfree_skb(psock->rx_skb_head); -+ psock->rx_skb_head = NULL; -+ -+ spin_lock_bh(&mux->lock); -+ -+ if (psock->tx_kcm) { -+ /* psock was reserved. Just mark it finished and we will clean -+ * up in the kcm paths, we need kcm lock which can not be -+ * acquired here. -+ */ -+ spin_unlock_bh(&mux->lock); -+ -+ /* We are unattaching a socket that is reserved. Abort the -+ * socket since we may be out of sync in sending on it. We need -+ * to do this without the mux lock. -+ */ -+ kcm_abort_tx_psock(psock, EPIPE, false); -+ -+ spin_lock_bh(&mux->lock); -+ if (!psock->tx_kcm) { -+ /* psock now unreserved in window mux was unlocked */ -+ goto no_reserved; -+ } -+ psock->done = 1; -+ -+ /* Commit done before queuing work to process it */ -+ smp_mb(); -+ -+ /* Queue tx work to make sure psock->done is handled */ -+ queue_work(kcm_wq, &psock->tx_kcm->tx_work); -+ spin_unlock_bh(&mux->lock); -+ } else { -+no_reserved: -+ if (!psock->tx_stopped) -+ list_del(&psock->psock_avail_list); -+ list_del(&psock->psock_list); -+ mux->psocks_cnt--; -+ spin_unlock_bh(&mux->lock); -+ -+ sock_put(csk); -+ fput(csk->sk_socket->file); -+ kmem_cache_free(kcm_psockp, psock); -+ } -+} -+ -+static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) -+{ -+ struct kcm_sock *kcm = kcm_sk(sock->sk); -+ struct kcm_mux *mux = kcm->mux; -+ struct kcm_psock *psock; -+ struct socket *csock; -+ struct sock *csk; -+ int err; -+ -+ csock = sockfd_lookup(info->fd, &err); -+ if (!csock) -+ return -ENOENT; -+ -+ csk = csock->sk; -+ if (!csk) { -+ err = -EINVAL; -+ goto out; -+ } -+ -+ err = -ENOENT; -+ -+ spin_lock_bh(&mux->lock); -+ -+ list_for_each_entry(psock, &mux->psocks, psock_list) { -+ if (psock->sk != csk) -+ continue; -+ -+ /* Found the matching psock */ -+ -+ if (psock->unattaching || WARN_ON(psock->done)) { -+ err = -EALREADY; -+ break; -+ } -+ -+ psock->unattaching = 1; -+ -+ spin_unlock_bh(&mux->lock); -+ -+ kcm_unattach(psock); -+ -+ err = 0; -+ goto out; -+ } -+ -+ spin_unlock_bh(&mux->lock); -+ -+out: -+ fput(csock->file); -+ return err; -+} -+ -+static struct proto kcm_proto = { -+ .name = "KCM", -+ .owner = THIS_MODULE, -+ .obj_size = sizeof(struct kcm_sock), -+}; -+ -+/* Clone a kcm socket. */ -+static int kcm_clone(struct socket *osock, struct kcm_clone *info, -+ struct socket **newsockp) -+{ -+ struct socket *newsock; -+ struct sock *newsk; -+ struct file *newfile; -+ int err, newfd; -+ -+ err = -ENFILE; -+ newsock = sock_alloc(); -+ if (!newsock) -+ goto out; -+ -+ newsock->type = osock->type; -+ newsock->ops = osock->ops; -+ -+ __module_get(newsock->ops->owner); -+ -+ newfd = get_unused_fd_flags(0); -+ if (unlikely(newfd < 0)) { -+ err = newfd; -+ goto out_fd_fail; -+ } -+ -+ newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); -+ if (unlikely(IS_ERR(newfile))) { -+ err = PTR_ERR(newfile); -+ goto out_sock_alloc_fail; -+ } -+ -+ newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, -+ &kcm_proto, true); -+ if (!newsk) { -+ err = -ENOMEM; -+ goto out_sk_alloc_fail; -+ } -+ -+ sock_init_data(newsock, newsk); -+ init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux); -+ -+ fd_install(newfd, newfile); -+ *newsockp = newsock; -+ info->fd = newfd; -+ -+ return 0; -+ -+out_sk_alloc_fail: -+ fput(newfile); -+out_sock_alloc_fail: -+ put_unused_fd(newfd); -+out_fd_fail: -+ sock_release(newsock); -+out: -+ return err; -+} -+ -+static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -+{ -+ int err; -+ -+ switch (cmd) { -+ case SIOCKCMATTACH: { -+ struct kcm_attach info; -+ -+ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) -+ err = -EFAULT; -+ -+ err = kcm_attach_ioctl(sock, &info); -+ -+ break; -+ } -+ case SIOCKCMUNATTACH: { -+ struct kcm_unattach info; -+ -+ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) -+ err = -EFAULT; -+ -+ err = kcm_unattach_ioctl(sock, &info); -+ -+ break; -+ } -+ case SIOCKCMCLONE: { -+ struct kcm_clone info; -+ struct socket *newsock = NULL; -+ -+ if (copy_from_user(&info, (void __user *)arg, sizeof(info))) -+ err = -EFAULT; -+ -+ err = kcm_clone(sock, &info, &newsock); -+ -+ if (!err) { -+ if (copy_to_user((void __user *)arg, &info, -+ sizeof(info))) { -+ err = -EFAULT; -+ sock_release(newsock); -+ } -+ } -+ -+ break; -+ } -+ default: -+ err = -ENOIOCTLCMD; -+ break; -+ } -+ -+ return err; -+} -+ -+static void free_mux(struct rcu_head *rcu) -+{ -+ struct kcm_mux *mux = container_of(rcu, -+ struct kcm_mux, rcu); -+ -+ kmem_cache_free(kcm_muxp, mux); -+} -+ -+static void release_mux(struct kcm_mux *mux) -+{ -+ struct kcm_net *knet = mux->knet; -+ struct kcm_psock *psock, *tmp_psock; -+ -+ /* Release psocks */ -+ list_for_each_entry_safe(psock, tmp_psock, -+ &mux->psocks, psock_list) { -+ if (!WARN_ON(psock->unattaching)) -+ kcm_unattach(psock); -+ } -+ -+ if (WARN_ON(mux->psocks_cnt)) -+ return; -+ -+ __skb_queue_purge(&mux->rx_hold_queue); -+ -+ mutex_lock(&knet->mutex); -+ list_del_rcu(&mux->kcm_mux_list); -+ knet->count--; -+ mutex_unlock(&knet->mutex); -+ -+ call_rcu(&mux->rcu, free_mux); -+} -+ -+static void kcm_done(struct kcm_sock *kcm) -+{ -+ struct kcm_mux *mux = kcm->mux; -+ struct sock *sk = &kcm->sk; -+ int socks_cnt; -+ -+ spin_lock_bh(&mux->rx_lock); -+ if (kcm->rx_psock) { -+ /* Cleanup in unreserve_rx_kcm */ -+ WARN_ON(kcm->done); -+ kcm->rx_disabled = 1; -+ kcm->done = 1; -+ spin_unlock_bh(&mux->rx_lock); -+ return; -+ } -+ -+ if (kcm->rx_wait) { -+ list_del(&kcm->wait_rx_list); -+ kcm->rx_wait = false; -+ } -+ /* Move any pending receive messages to other kcm sockets */ -+ requeue_rx_msgs(mux, &sk->sk_receive_queue); -+ -+ spin_unlock_bh(&mux->rx_lock); -+ -+ if (WARN_ON(sk_rmem_alloc_get(sk))) -+ return; -+ -+ /* Detach from MUX */ -+ spin_lock_bh(&mux->lock); -+ -+ list_del(&kcm->kcm_sock_list); -+ mux->kcm_socks_cnt--; -+ socks_cnt = mux->kcm_socks_cnt; -+ -+ spin_unlock_bh(&mux->lock); -+ -+ if (!socks_cnt) { -+ /* We are done with the mux now. */ -+ release_mux(mux); -+ } -+ -+ WARN_ON(kcm->rx_wait); -+ -+ sock_put(&kcm->sk); -+} -+ -+/* Called by kcm_release to close a KCM socket. -+ * If this is the last KCM socket on the MUX, destroy the MUX. -+ */ -+static int kcm_release(struct socket *sock) -+{ -+ struct sock *sk = sock->sk; -+ struct kcm_sock *kcm; -+ struct kcm_mux *mux; -+ struct kcm_psock *psock; -+ -+ if (!sk) -+ return 0; -+ -+ kcm = kcm_sk(sk); -+ mux = kcm->mux; -+ -+ sock_orphan(sk); -+ kfree_skb(kcm->seq_skb); -+ -+ lock_sock(sk); -+ /* Purge queue under lock to avoid race condition with tx_work trying -+ * to act when queue is nonempty. If tx_work runs after this point -+ * it will just return. -+ */ -+ __skb_queue_purge(&sk->sk_write_queue); -+ release_sock(sk); -+ -+ spin_lock_bh(&mux->lock); -+ if (kcm->tx_wait) { -+ /* Take of tx_wait list, after this point there should be no way -+ * that a psock will be assigned to this kcm. -+ */ -+ list_del(&kcm->wait_psock_list); -+ kcm->tx_wait = false; -+ } -+ spin_unlock_bh(&mux->lock); -+ -+ /* Cancel work. After this point there should be no outside references -+ * to the kcm socket. -+ */ -+ cancel_work_sync(&kcm->tx_work); -+ -+ lock_sock(sk); -+ psock = kcm->tx_psock; -+ if (psock) { -+ /* A psock was reserved, so we need to kill it since it -+ * may already have some bytes queued from a message. We -+ * need to do this after removing kcm from tx_wait list. -+ */ -+ kcm_abort_tx_psock(psock, EPIPE, false); -+ unreserve_psock(kcm); -+ } -+ release_sock(sk); -+ -+ WARN_ON(kcm->tx_wait); -+ WARN_ON(kcm->tx_psock); -+ -+ sock->sk = NULL; -+ -+ kcm_done(kcm); -+ -+ return 0; -+} -+ -+static const struct proto_ops kcm_ops = { -+ .family = PF_KCM, -+ .owner = THIS_MODULE, -+ .release = kcm_release, -+ .bind = sock_no_bind, -+ .connect = sock_no_connect, -+ .socketpair = sock_no_socketpair, -+ .accept = sock_no_accept, -+ .getname = sock_no_getname, -+ .poll = datagram_poll, -+ .ioctl = kcm_ioctl, -+ .listen = sock_no_listen, -+ .shutdown = sock_no_shutdown, -+ .setsockopt = kcm_setsockopt, -+ .getsockopt = kcm_getsockopt, -+ .sendmsg = kcm_sendmsg, -+ .recvmsg = kcm_recvmsg, -+ .mmap = sock_no_mmap, -+ .sendpage = sock_no_sendpage, -+}; -+ -+/* Create proto operation for kcm sockets */ -+static int kcm_create(struct net *net, struct socket *sock, -+ int protocol, int kern) -+{ -+ struct kcm_net *knet = net_generic(net, kcm_net_id); -+ struct sock *sk; -+ struct kcm_mux *mux; -+ -+ switch (sock->type) { -+ case SOCK_DGRAM: -+ case SOCK_SEQPACKET: -+ sock->ops = &kcm_ops; -+ break; -+ default: -+ return -ESOCKTNOSUPPORT; -+ } -+ -+ if (protocol != KCMPROTO_CONNECTED) -+ return -EPROTONOSUPPORT; -+ -+ sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern); -+ if (!sk) -+ return -ENOMEM; -+ -+ /* Allocate a kcm mux, shared between KCM sockets */ -+ mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL); -+ if (!mux) { -+ sk_free(sk); -+ return -ENOMEM; -+ } -+ -+ spin_lock_init(&mux->lock); -+ spin_lock_init(&mux->rx_lock); -+ INIT_LIST_HEAD(&mux->kcm_socks); -+ INIT_LIST_HEAD(&mux->kcm_rx_waiters); -+ INIT_LIST_HEAD(&mux->kcm_tx_waiters); -+ -+ INIT_LIST_HEAD(&mux->psocks); -+ INIT_LIST_HEAD(&mux->psocks_ready); -+ INIT_LIST_HEAD(&mux->psocks_avail); -+ -+ mux->knet = knet; -+ -+ /* Add new MUX to list */ -+ mutex_lock(&knet->mutex); -+ list_add_rcu(&mux->kcm_mux_list, &knet->mux_list); -+ knet->count++; -+ mutex_unlock(&knet->mutex); -+ -+ skb_queue_head_init(&mux->rx_hold_queue); -+ -+ /* Init KCM socket */ -+ sock_init_data(sock, sk); -+ init_kcm_sock(kcm_sk(sk), mux); -+ -+ return 0; -+} -+ -+static struct net_proto_family kcm_family_ops = { -+ .family = PF_KCM, -+ .create = kcm_create, -+ .owner = THIS_MODULE, -+}; -+ -+static __net_init int kcm_init_net(struct net *net) -+{ -+ struct kcm_net *knet = net_generic(net, kcm_net_id); -+ -+ INIT_LIST_HEAD_RCU(&knet->mux_list); -+ mutex_init(&knet->mutex); -+ -+ return 0; -+} -+ -+static __net_exit void kcm_exit_net(struct net *net) -+{ -+ struct kcm_net *knet = net_generic(net, kcm_net_id); -+ -+ /* All KCM sockets should be closed at this point, which should mean -+ * that all multiplexors and psocks have been destroyed. -+ */ -+ WARN_ON(!list_empty(&knet->mux_list)); -+} -+ -+static struct pernet_operations kcm_net_ops = { -+ .init = kcm_init_net, -+ .exit = kcm_exit_net, -+ .id = &kcm_net_id, -+ .size = sizeof(struct kcm_net), -+}; -+ -+static int __init kcm_init(void) -+{ -+ int err = -ENOMEM; -+ -+ kcm_muxp = kmem_cache_create("kcm_mux_cache", -+ sizeof(struct kcm_mux), 0, -+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); -+ if (!kcm_muxp) -+ goto fail; -+ -+ kcm_psockp = kmem_cache_create("kcm_psock_cache", -+ sizeof(struct kcm_psock), 0, -+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); -+ if (!kcm_psockp) -+ goto fail; -+ -+ kcm_wq = create_singlethread_workqueue("kkcmd"); -+ if (!kcm_wq) -+ goto fail; -+ -+ err = proto_register(&kcm_proto, 1); -+ if (err) -+ goto fail; -+ -+ err = sock_register(&kcm_family_ops); -+ if (err) -+ goto sock_register_fail; -+ -+ err = register_pernet_device(&kcm_net_ops); -+ if (err) -+ goto net_ops_fail; -+ -+ return 0; -+ -+net_ops_fail: -+ sock_unregister(PF_KCM); -+ -+sock_register_fail: -+ proto_unregister(&kcm_proto); -+ -+fail: -+ kmem_cache_destroy(kcm_muxp); -+ kmem_cache_destroy(kcm_psockp); -+ -+ if (kcm_wq) -+ destroy_workqueue(kcm_wq); -+ -+ return err; -+} -+ -+static void __exit kcm_exit(void) -+{ -+ unregister_pernet_device(&kcm_net_ops); -+ sock_unregister(PF_KCM); -+ proto_unregister(&kcm_proto); -+ destroy_workqueue(kcm_wq); -+ -+ kmem_cache_destroy(kcm_muxp); -+ kmem_cache_destroy(kcm_psockp); -+} -+ -+module_init(kcm_init); -+module_exit(kcm_exit); -+ -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS_NETPROTO(PF_KCM); --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch b/kernel/patches-4.4.x/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch deleted file mode 100644 index 41100eae6..000000000 --- a/kernel/patches-4.4.x/0037-net-add-the-AF_KCM-entries-to-family-name-tables.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 2852eb182a89c29873cd945e0b8346a92dc075d9 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 21 Mar 2016 02:51:09 -0700 -Subject: [PATCH 37/44] net: add the AF_KCM entries to family name tables - -This is for the recent kcm driver, which introduces AF_KCM(41) in -b7ac4eb(kcm: Kernel Connection Multiplexor module). - -Signed-off-by: Dexuan Cui -Cc: Signed-off-by: Tom Herbert -Origin: https://patchwork.ozlabs.org/patch/600006 ---- - net/core/sock.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/net/core/sock.c b/net/core/sock.c -index bd2fad27891e..ef337bf176f7 100644 ---- a/net/core/sock.c -+++ b/net/core/sock.c -@@ -263,7 +263,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = { - "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , - "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , -- "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" -+ "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , -+ "sk_lock-AF_MAX" - }; - static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , -@@ -279,7 +280,8 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , - "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , -- "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" -+ "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , -+ "slock-AF_MAX" - }; - static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , -@@ -295,7 +297,8 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , - "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , -- "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" -+ "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , -+ "clock-AF_MAX" - }; - - /* --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0038-net-Add-Qualcomm-IPC-router.patch b/kernel/patches-4.4.x/0038-net-Add-Qualcomm-IPC-router.patch deleted file mode 100644 index cbe4501d9..000000000 --- a/kernel/patches-4.4.x/0038-net-Add-Qualcomm-IPC-router.patch +++ /dev/null @@ -1,1307 +0,0 @@ -From aae3913b1b73c83de6fe5113ecc27453bfcb790b Mon Sep 17 00:00:00 2001 -From: Courtney Cavin -Date: Wed, 27 Apr 2016 12:13:03 -0700 -Subject: [PATCH 38/44] net: Add Qualcomm IPC router - -Add an implementation of Qualcomm's IPC router protocol, used to -communicate with service providing remote processors. - -Signed-off-by: Courtney Cavin -Signed-off-by: Bjorn Andersson -[bjorn: Cope with 0 being a valid node id and implement RTM_NEWADDR] -Signed-off-by: Bjorn Andersson -Origin: https://patchwork.ozlabs.org/patch/615774/ ---- - include/linux/socket.h | 4 +- - include/uapi/linux/qrtr.h | 12 + - net/Kconfig | 1 + - net/Makefile | 1 + - net/qrtr/Kconfig | 24 ++ - net/qrtr/Makefile | 2 + - net/qrtr/qrtr.c | 1007 +++++++++++++++++++++++++++++++++++++++++++++ - net/qrtr/qrtr.h | 31 ++ - net/qrtr/smd.c | 117 ++++++ - 9 files changed, 1198 insertions(+), 1 deletion(-) - create mode 100644 include/uapi/linux/qrtr.h - create mode 100644 net/qrtr/Kconfig - create mode 100644 net/qrtr/Makefile - create mode 100644 net/qrtr/qrtr.c - create mode 100644 net/qrtr/qrtr.h - create mode 100644 net/qrtr/smd.c - -diff --git a/include/linux/socket.h b/include/linux/socket.h -index 4e1ea53aa329..dbd81e7f21fd 100644 ---- a/include/linux/socket.h -+++ b/include/linux/socket.h -@@ -201,8 +201,9 @@ struct ucred { - #define AF_NFC 39 /* NFC sockets */ - #define AF_VSOCK 40 /* vSockets */ - #define AF_KCM 41 /* Kernel Connection Multiplexor*/ -+#define AF_QIPCRTR 42 /* Qualcomm IPC Router */ - --#define AF_MAX 42 /* For now.. */ -+#define AF_MAX 43 /* For now.. */ - - /* Protocol families, same as address families. */ - #define PF_UNSPEC AF_UNSPEC -@@ -249,6 +250,7 @@ struct ucred { - #define PF_NFC AF_NFC - #define PF_VSOCK AF_VSOCK - #define PF_KCM AF_KCM -+#define PF_QIPCRTR AF_QIPCRTR - #define PF_MAX AF_MAX - - /* Maximum queue length specifiable by listen. */ -diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h -new file mode 100644 -index 000000000000..66c0748d26e2 ---- /dev/null -+++ b/include/uapi/linux/qrtr.h -@@ -0,0 +1,12 @@ -+#ifndef _LINUX_QRTR_H -+#define _LINUX_QRTR_H -+ -+#include -+ -+struct sockaddr_qrtr { -+ __kernel_sa_family_t sq_family; -+ __u32 sq_node; -+ __u32 sq_port; -+}; -+ -+#endif /* _LINUX_QRTR_H */ -diff --git a/net/Kconfig b/net/Kconfig -index b8439e61f9a0..1c9fda1c602d 100644 ---- a/net/Kconfig -+++ b/net/Kconfig -@@ -233,6 +233,7 @@ source "net/mpls/Kconfig" - source "net/hsr/Kconfig" - source "net/switchdev/Kconfig" - source "net/l3mdev/Kconfig" -+source "net/qrtr/Kconfig" - - config RPS - bool -diff --git a/net/Makefile b/net/Makefile -index 81d14119eab5..bdd14553a774 100644 ---- a/net/Makefile -+++ b/net/Makefile -@@ -78,3 +78,4 @@ endif - ifneq ($(CONFIG_NET_L3_MASTER_DEV),) - obj-y += l3mdev/ - endif -+obj-$(CONFIG_QRTR) += qrtr/ -diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig -new file mode 100644 -index 000000000000..0c2619d068bd ---- /dev/null -+++ b/net/qrtr/Kconfig -@@ -0,0 +1,24 @@ -+# Qualcomm IPC Router configuration -+# -+ -+config QRTR -+ bool "Qualcomm IPC Router support" -+ depends on ARCH_QCOM || COMPILE_TEST -+ ---help--- -+ Say Y if you intend to use Qualcomm IPC router protocol. The -+ protocol is used to communicate with services provided by other -+ hardware blocks in the system. -+ -+ In order to do service lookups, a userspace daemon is required to -+ maintain a service listing. -+ -+if QRTR -+ -+config QRTR_SMD -+ tristate "SMD IPC Router channels" -+ depends on QCOM_SMD || COMPILE_TEST -+ ---help--- -+ Say Y here to support SMD based ipcrouter channels. SMD is the -+ most common transport for IPC Router. -+ -+endif # QRTR -diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile -new file mode 100644 -index 000000000000..e282a84ffc5c ---- /dev/null -+++ b/net/qrtr/Makefile -@@ -0,0 +1,2 @@ -+obj-y := qrtr.o -+obj-$(CONFIG_QRTR_SMD) += smd.o -diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c -new file mode 100644 -index 000000000000..c985ecbe9bd6 ---- /dev/null -+++ b/net/qrtr/qrtr.c -@@ -0,0 +1,1007 @@ -+/* -+ * Copyright (c) 2015, Sony Mobile Communications Inc. -+ * Copyright (c) 2013, The Linux Foundation. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 and -+ * only version 2 as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+#include -+#include -+#include -+#include /* For TIOCINQ/OUTQ */ -+ -+#include -+ -+#include "qrtr.h" -+ -+#define QRTR_PROTO_VER 1 -+ -+/* auto-bind range */ -+#define QRTR_MIN_EPH_SOCKET 0x4000 -+#define QRTR_MAX_EPH_SOCKET 0x7fff -+ -+enum qrtr_pkt_type { -+ QRTR_TYPE_DATA = 1, -+ QRTR_TYPE_HELLO = 2, -+ QRTR_TYPE_BYE = 3, -+ QRTR_TYPE_NEW_SERVER = 4, -+ QRTR_TYPE_DEL_SERVER = 5, -+ QRTR_TYPE_DEL_CLIENT = 6, -+ QRTR_TYPE_RESUME_TX = 7, -+ QRTR_TYPE_EXIT = 8, -+ QRTR_TYPE_PING = 9, -+}; -+ -+/** -+ * struct qrtr_hdr - (I|R)PCrouter packet header -+ * @version: protocol version -+ * @type: packet type; one of QRTR_TYPE_* -+ * @src_node_id: source node -+ * @src_port_id: source port -+ * @confirm_rx: boolean; whether a resume-tx packet should be send in reply -+ * @size: length of packet, excluding this header -+ * @dst_node_id: destination node -+ * @dst_port_id: destination port -+ */ -+struct qrtr_hdr { -+ __le32 version; -+ __le32 type; -+ __le32 src_node_id; -+ __le32 src_port_id; -+ __le32 confirm_rx; -+ __le32 size; -+ __le32 dst_node_id; -+ __le32 dst_port_id; -+} __packed; -+ -+#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr) -+#define QRTR_NODE_BCAST ((unsigned int)-1) -+#define QRTR_PORT_CTRL ((unsigned int)-2) -+ -+struct qrtr_sock { -+ /* WARNING: sk must be the first member */ -+ struct sock sk; -+ struct sockaddr_qrtr us; -+ struct sockaddr_qrtr peer; -+}; -+ -+static inline struct qrtr_sock *qrtr_sk(struct sock *sk) -+{ -+ BUILD_BUG_ON(offsetof(struct qrtr_sock, sk) != 0); -+ return container_of(sk, struct qrtr_sock, sk); -+} -+ -+static unsigned int qrtr_local_nid = -1; -+ -+/* for node ids */ -+static RADIX_TREE(qrtr_nodes, GFP_KERNEL); -+/* broadcast list */ -+static LIST_HEAD(qrtr_all_nodes); -+/* lock for qrtr_nodes, qrtr_all_nodes and node reference */ -+static DEFINE_MUTEX(qrtr_node_lock); -+ -+/* local port allocation management */ -+static DEFINE_IDR(qrtr_ports); -+static DEFINE_MUTEX(qrtr_port_lock); -+ -+/** -+ * struct qrtr_node - endpoint node -+ * @ep_lock: lock for endpoint management and callbacks -+ * @ep: endpoint -+ * @ref: reference count for node -+ * @nid: node id -+ * @rx_queue: receive queue -+ * @work: scheduled work struct for recv work -+ * @item: list item for broadcast list -+ */ -+struct qrtr_node { -+ struct mutex ep_lock; -+ struct qrtr_endpoint *ep; -+ struct kref ref; -+ unsigned int nid; -+ -+ struct sk_buff_head rx_queue; -+ struct work_struct work; -+ struct list_head item; -+}; -+ -+/* Release node resources and free the node. -+ * -+ * Do not call directly, use qrtr_node_release. To be used with -+ * kref_put_mutex. As such, the node mutex is expected to be locked on call. -+ */ -+static void __qrtr_node_release(struct kref *kref) -+{ -+ struct qrtr_node *node = container_of(kref, struct qrtr_node, ref); -+ -+ if (node->nid != QRTR_EP_NID_AUTO) -+ radix_tree_delete(&qrtr_nodes, node->nid); -+ -+ list_del(&node->item); -+ mutex_unlock(&qrtr_node_lock); -+ -+ skb_queue_purge(&node->rx_queue); -+ kfree(node); -+} -+ -+/* Increment reference to node. */ -+static struct qrtr_node *qrtr_node_acquire(struct qrtr_node *node) -+{ -+ if (node) -+ kref_get(&node->ref); -+ return node; -+} -+ -+/* Decrement reference to node and release as necessary. */ -+static void qrtr_node_release(struct qrtr_node *node) -+{ -+ if (!node) -+ return; -+ kref_put_mutex(&node->ref, __qrtr_node_release, &qrtr_node_lock); -+} -+ -+/* Pass an outgoing packet socket buffer to the endpoint driver. */ -+static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb) -+{ -+ int rc = -ENODEV; -+ -+ mutex_lock(&node->ep_lock); -+ if (node->ep) -+ rc = node->ep->xmit(node->ep, skb); -+ else -+ kfree_skb(skb); -+ mutex_unlock(&node->ep_lock); -+ -+ return rc; -+} -+ -+/* Lookup node by id. -+ * -+ * callers must release with qrtr_node_release() -+ */ -+static struct qrtr_node *qrtr_node_lookup(unsigned int nid) -+{ -+ struct qrtr_node *node; -+ -+ mutex_lock(&qrtr_node_lock); -+ node = radix_tree_lookup(&qrtr_nodes, nid); -+ node = qrtr_node_acquire(node); -+ mutex_unlock(&qrtr_node_lock); -+ -+ return node; -+} -+ -+/* Assign node id to node. -+ * -+ * This is mostly useful for automatic node id assignment, based on -+ * the source id in the incoming packet. -+ */ -+static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid) -+{ -+ if (node->nid != QRTR_EP_NID_AUTO || nid == QRTR_EP_NID_AUTO) -+ return; -+ -+ mutex_lock(&qrtr_node_lock); -+ radix_tree_insert(&qrtr_nodes, nid, node); -+ node->nid = nid; -+ mutex_unlock(&qrtr_node_lock); -+} -+ -+/** -+ * qrtr_endpoint_post() - post incoming data -+ * @ep: endpoint handle -+ * @data: data pointer -+ * @len: size of data in bytes -+ * -+ * Return: 0 on success; negative error code on failure -+ */ -+int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) -+{ -+ struct qrtr_node *node = ep->node; -+ const struct qrtr_hdr *phdr = data; -+ struct sk_buff *skb; -+ unsigned int psize; -+ unsigned int size; -+ unsigned int type; -+ unsigned int ver; -+ unsigned int dst; -+ -+ if (len < QRTR_HDR_SIZE || len & 3) -+ return -EINVAL; -+ -+ ver = le32_to_cpu(phdr->version); -+ size = le32_to_cpu(phdr->size); -+ type = le32_to_cpu(phdr->type); -+ dst = le32_to_cpu(phdr->dst_port_id); -+ -+ psize = (size + 3) & ~3; -+ -+ if (ver != QRTR_PROTO_VER) -+ return -EINVAL; -+ -+ if (len != psize + QRTR_HDR_SIZE) -+ return -EINVAL; -+ -+ if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA) -+ return -EINVAL; -+ -+ skb = netdev_alloc_skb(NULL, len); -+ if (!skb) -+ return -ENOMEM; -+ -+ skb_reset_transport_header(skb); -+ memcpy(skb_put(skb, len), data, len); -+ -+ skb_queue_tail(&node->rx_queue, skb); -+ schedule_work(&node->work); -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(qrtr_endpoint_post); -+ -+/* Allocate and construct a resume-tx packet. */ -+static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, -+ u32 dst_node, u32 port) -+{ -+ const int pkt_len = 20; -+ struct qrtr_hdr *hdr; -+ struct sk_buff *skb; -+ u32 *buf; -+ -+ skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); -+ if (!skb) -+ return NULL; -+ skb_reset_transport_header(skb); -+ -+ hdr = (struct qrtr_hdr *)skb_put(skb, QRTR_HDR_SIZE); -+ hdr->version = cpu_to_le32(QRTR_PROTO_VER); -+ hdr->type = cpu_to_le32(QRTR_TYPE_RESUME_TX); -+ hdr->src_node_id = cpu_to_le32(src_node); -+ hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL); -+ hdr->confirm_rx = cpu_to_le32(0); -+ hdr->size = cpu_to_le32(pkt_len); -+ hdr->dst_node_id = cpu_to_le32(dst_node); -+ hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); -+ -+ buf = (u32 *)skb_put(skb, pkt_len); -+ memset(buf, 0, pkt_len); -+ buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); -+ buf[1] = cpu_to_le32(src_node); -+ buf[2] = cpu_to_le32(port); -+ -+ return skb; -+} -+ -+static struct qrtr_sock *qrtr_port_lookup(int port); -+static void qrtr_port_put(struct qrtr_sock *ipc); -+ -+/* Handle and route a received packet. -+ * -+ * This will auto-reply with resume-tx packet as necessary. -+ */ -+static void qrtr_node_rx_work(struct work_struct *work) -+{ -+ struct qrtr_node *node = container_of(work, struct qrtr_node, work); -+ struct sk_buff *skb; -+ -+ while ((skb = skb_dequeue(&node->rx_queue)) != NULL) { -+ const struct qrtr_hdr *phdr; -+ u32 dst_node, dst_port; -+ struct qrtr_sock *ipc; -+ u32 src_node; -+ int confirm; -+ -+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); -+ src_node = le32_to_cpu(phdr->src_node_id); -+ dst_node = le32_to_cpu(phdr->dst_node_id); -+ dst_port = le32_to_cpu(phdr->dst_port_id); -+ confirm = !!phdr->confirm_rx; -+ -+ qrtr_node_assign(node, src_node); -+ -+ ipc = qrtr_port_lookup(dst_port); -+ if (!ipc) { -+ kfree_skb(skb); -+ } else { -+ if (sock_queue_rcv_skb(&ipc->sk, skb)) -+ kfree_skb(skb); -+ -+ qrtr_port_put(ipc); -+ } -+ -+ if (confirm) { -+ skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port); -+ if (!skb) -+ break; -+ if (qrtr_node_enqueue(node, skb)) -+ break; -+ } -+ } -+} -+ -+/** -+ * qrtr_endpoint_register() - register a new endpoint -+ * @ep: endpoint to register -+ * @nid: desired node id; may be QRTR_EP_NID_AUTO for auto-assignment -+ * Return: 0 on success; negative error code on failure -+ * -+ * The specified endpoint must have the xmit function pointer set on call. -+ */ -+int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid) -+{ -+ struct qrtr_node *node; -+ -+ if (!ep || !ep->xmit) -+ return -EINVAL; -+ -+ node = kzalloc(sizeof(*node), GFP_KERNEL); -+ if (!node) -+ return -ENOMEM; -+ -+ INIT_WORK(&node->work, qrtr_node_rx_work); -+ kref_init(&node->ref); -+ mutex_init(&node->ep_lock); -+ skb_queue_head_init(&node->rx_queue); -+ node->nid = QRTR_EP_NID_AUTO; -+ node->ep = ep; -+ -+ qrtr_node_assign(node, nid); -+ -+ mutex_lock(&qrtr_node_lock); -+ list_add(&node->item, &qrtr_all_nodes); -+ mutex_unlock(&qrtr_node_lock); -+ ep->node = node; -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(qrtr_endpoint_register); -+ -+/** -+ * qrtr_endpoint_unregister - unregister endpoint -+ * @ep: endpoint to unregister -+ */ -+void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) -+{ -+ struct qrtr_node *node = ep->node; -+ -+ mutex_lock(&node->ep_lock); -+ node->ep = NULL; -+ mutex_unlock(&node->ep_lock); -+ -+ qrtr_node_release(node); -+ ep->node = NULL; -+} -+EXPORT_SYMBOL_GPL(qrtr_endpoint_unregister); -+ -+/* Lookup socket by port. -+ * -+ * Callers must release with qrtr_port_put() -+ */ -+static struct qrtr_sock *qrtr_port_lookup(int port) -+{ -+ struct qrtr_sock *ipc; -+ -+ if (port == QRTR_PORT_CTRL) -+ port = 0; -+ -+ mutex_lock(&qrtr_port_lock); -+ ipc = idr_find(&qrtr_ports, port); -+ if (ipc) -+ sock_hold(&ipc->sk); -+ mutex_unlock(&qrtr_port_lock); -+ -+ return ipc; -+} -+ -+/* Release acquired socket. */ -+static void qrtr_port_put(struct qrtr_sock *ipc) -+{ -+ sock_put(&ipc->sk); -+} -+ -+/* Remove port assignment. */ -+static void qrtr_port_remove(struct qrtr_sock *ipc) -+{ -+ int port = ipc->us.sq_port; -+ -+ if (port == QRTR_PORT_CTRL) -+ port = 0; -+ -+ __sock_put(&ipc->sk); -+ -+ mutex_lock(&qrtr_port_lock); -+ idr_remove(&qrtr_ports, port); -+ mutex_unlock(&qrtr_port_lock); -+} -+ -+/* Assign port number to socket. -+ * -+ * Specify port in the integer pointed to by port, and it will be adjusted -+ * on return as necesssary. -+ * -+ * Port may be: -+ * 0: Assign ephemeral port in [QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET] -+ * QRTR_MIN_EPH_SOCKET: Specified; available to all -+ */ -+static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) -+{ -+ int rc; -+ -+ mutex_lock(&qrtr_port_lock); -+ if (!*port) { -+ rc = idr_alloc(&qrtr_ports, ipc, -+ QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET + 1, -+ GFP_ATOMIC); -+ if (rc >= 0) -+ *port = rc; -+ } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { -+ rc = -EACCES; -+ } else if (*port == QRTR_PORT_CTRL) { -+ rc = idr_alloc(&qrtr_ports, ipc, 0, 1, GFP_ATOMIC); -+ } else { -+ rc = idr_alloc(&qrtr_ports, ipc, *port, *port + 1, GFP_ATOMIC); -+ if (rc >= 0) -+ *port = rc; -+ } -+ mutex_unlock(&qrtr_port_lock); -+ -+ if (rc == -ENOSPC) -+ return -EADDRINUSE; -+ else if (rc < 0) -+ return rc; -+ -+ sock_hold(&ipc->sk); -+ -+ return 0; -+} -+ -+/* Bind socket to address. -+ * -+ * Socket should be locked upon call. -+ */ -+static int __qrtr_bind(struct socket *sock, -+ const struct sockaddr_qrtr *addr, int zapped) -+{ -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ int port; -+ int rc; -+ -+ /* rebinding ok */ -+ if (!zapped && addr->sq_port == ipc->us.sq_port) -+ return 0; -+ -+ port = addr->sq_port; -+ rc = qrtr_port_assign(ipc, &port); -+ if (rc) -+ return rc; -+ -+ /* unbind previous, if any */ -+ if (!zapped) -+ qrtr_port_remove(ipc); -+ ipc->us.sq_port = port; -+ -+ sock_reset_flag(sk, SOCK_ZAPPED); -+ -+ return 0; -+} -+ -+/* Auto bind to an ephemeral port. */ -+static int qrtr_autobind(struct socket *sock) -+{ -+ struct sock *sk = sock->sk; -+ struct sockaddr_qrtr addr; -+ -+ if (!sock_flag(sk, SOCK_ZAPPED)) -+ return 0; -+ -+ addr.sq_family = AF_QIPCRTR; -+ addr.sq_node = qrtr_local_nid; -+ addr.sq_port = 0; -+ -+ return __qrtr_bind(sock, &addr, 1); -+} -+ -+/* Bind socket to specified sockaddr. */ -+static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ int rc; -+ -+ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) -+ return -EINVAL; -+ -+ if (addr->sq_node != ipc->us.sq_node) -+ return -EINVAL; -+ -+ lock_sock(sk); -+ rc = __qrtr_bind(sock, addr, sock_flag(sk, SOCK_ZAPPED)); -+ release_sock(sk); -+ -+ return rc; -+} -+ -+/* Queue packet to local peer socket. */ -+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb) -+{ -+ const struct qrtr_hdr *phdr; -+ struct qrtr_sock *ipc; -+ -+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); -+ -+ ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id)); -+ if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ -+ kfree_skb(skb); -+ return -ENODEV; -+ } -+ -+ if (sock_queue_rcv_skb(&ipc->sk, skb)) { -+ qrtr_port_put(ipc); -+ kfree_skb(skb); -+ return -ENOSPC; -+ } -+ -+ qrtr_port_put(ipc); -+ -+ return 0; -+} -+ -+/* Queue packet for broadcast. */ -+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) -+{ -+ struct sk_buff *skbn; -+ -+ mutex_lock(&qrtr_node_lock); -+ list_for_each_entry(node, &qrtr_all_nodes, item) { -+ skbn = skb_clone(skb, GFP_KERNEL); -+ if (!skbn) -+ break; -+ skb_set_owner_w(skbn, skb->sk); -+ qrtr_node_enqueue(node, skbn); -+ } -+ mutex_unlock(&qrtr_node_lock); -+ -+ qrtr_local_enqueue(node, skb); -+ -+ return 0; -+} -+ -+static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); -+ int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *); -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ struct qrtr_node *node; -+ struct qrtr_hdr *hdr; -+ struct sk_buff *skb; -+ size_t plen; -+ int rc; -+ -+ if (msg->msg_flags & ~(MSG_DONTWAIT)) -+ return -EINVAL; -+ -+ if (len > 65535) -+ return -EMSGSIZE; -+ -+ lock_sock(sk); -+ -+ if (addr) { -+ if (msg->msg_namelen < sizeof(*addr)) { -+ release_sock(sk); -+ return -EINVAL; -+ } -+ -+ if (addr->sq_family != AF_QIPCRTR) { -+ release_sock(sk); -+ return -EINVAL; -+ } -+ -+ rc = qrtr_autobind(sock); -+ if (rc) { -+ release_sock(sk); -+ return rc; -+ } -+ } else if (sk->sk_state == TCP_ESTABLISHED) { -+ addr = &ipc->peer; -+ } else { -+ release_sock(sk); -+ return -ENOTCONN; -+ } -+ -+ node = NULL; -+ if (addr->sq_node == QRTR_NODE_BCAST) { -+ enqueue_fn = qrtr_bcast_enqueue; -+ } else if (addr->sq_node == ipc->us.sq_node) { -+ enqueue_fn = qrtr_local_enqueue; -+ } else { -+ enqueue_fn = qrtr_node_enqueue; -+ node = qrtr_node_lookup(addr->sq_node); -+ if (!node) { -+ release_sock(sk); -+ return -ECONNRESET; -+ } -+ } -+ -+ plen = (len + 3) & ~3; -+ skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE, -+ msg->msg_flags & MSG_DONTWAIT, &rc); -+ if (!skb) -+ goto out_node; -+ -+ skb_reset_transport_header(skb); -+ skb_put(skb, len + QRTR_HDR_SIZE); -+ -+ hdr = (struct qrtr_hdr *)skb_transport_header(skb); -+ hdr->version = cpu_to_le32(QRTR_PROTO_VER); -+ hdr->src_node_id = cpu_to_le32(ipc->us.sq_node); -+ hdr->src_port_id = cpu_to_le32(ipc->us.sq_port); -+ hdr->confirm_rx = cpu_to_le32(0); -+ hdr->size = cpu_to_le32(len); -+ hdr->dst_node_id = cpu_to_le32(addr->sq_node); -+ hdr->dst_port_id = cpu_to_le32(addr->sq_port); -+ -+ rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE, -+ &msg->msg_iter, len); -+ if (rc) { -+ kfree_skb(skb); -+ goto out_node; -+ } -+ -+ if (plen != len) { -+ skb_pad(skb, plen - len); -+ skb_put(skb, plen - len); -+ } -+ -+ if (ipc->us.sq_port == QRTR_PORT_CTRL) { -+ if (len < 4) { -+ rc = -EINVAL; -+ kfree_skb(skb); -+ goto out_node; -+ } -+ -+ /* control messages already require the type as 'command' */ -+ skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4); -+ } else { -+ hdr->type = cpu_to_le32(QRTR_TYPE_DATA); -+ } -+ -+ rc = enqueue_fn(node, skb); -+ if (rc >= 0) -+ rc = len; -+ -+out_node: -+ qrtr_node_release(node); -+ release_sock(sk); -+ -+ return rc; -+} -+ -+static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, -+ size_t size, int flags) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); -+ const struct qrtr_hdr *phdr; -+ struct sock *sk = sock->sk; -+ struct sk_buff *skb; -+ int copied, rc; -+ -+ lock_sock(sk); -+ -+ if (sock_flag(sk, SOCK_ZAPPED)) { -+ release_sock(sk); -+ return -EADDRNOTAVAIL; -+ } -+ -+ skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, -+ flags & MSG_DONTWAIT, &rc); -+ if (!skb) { -+ release_sock(sk); -+ return rc; -+ } -+ -+ phdr = (const struct qrtr_hdr *)skb_transport_header(skb); -+ copied = le32_to_cpu(phdr->size); -+ if (copied > size) { -+ copied = size; -+ msg->msg_flags |= MSG_TRUNC; -+ } -+ -+ rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied); -+ if (rc < 0) -+ goto out; -+ rc = copied; -+ -+ if (addr) { -+ addr->sq_family = AF_QIPCRTR; -+ addr->sq_node = le32_to_cpu(phdr->src_node_id); -+ addr->sq_port = le32_to_cpu(phdr->src_port_id); -+ msg->msg_namelen = sizeof(*addr); -+ } -+ -+out: -+ skb_free_datagram(sk, skb); -+ release_sock(sk); -+ -+ return rc; -+} -+ -+static int qrtr_connect(struct socket *sock, struct sockaddr *saddr, -+ int len, int flags) -+{ -+ DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, saddr); -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ int rc; -+ -+ if (len < sizeof(*addr) || addr->sq_family != AF_QIPCRTR) -+ return -EINVAL; -+ -+ lock_sock(sk); -+ -+ sk->sk_state = TCP_CLOSE; -+ sock->state = SS_UNCONNECTED; -+ -+ rc = qrtr_autobind(sock); -+ if (rc) { -+ release_sock(sk); -+ return rc; -+ } -+ -+ ipc->peer = *addr; -+ sock->state = SS_CONNECTED; -+ sk->sk_state = TCP_ESTABLISHED; -+ -+ release_sock(sk); -+ -+ return 0; -+} -+ -+static int qrtr_getname(struct socket *sock, struct sockaddr *saddr, -+ int *len, int peer) -+{ -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sockaddr_qrtr qaddr; -+ struct sock *sk = sock->sk; -+ -+ lock_sock(sk); -+ if (peer) { -+ if (sk->sk_state != TCP_ESTABLISHED) { -+ release_sock(sk); -+ return -ENOTCONN; -+ } -+ -+ qaddr = ipc->peer; -+ } else { -+ qaddr = ipc->us; -+ } -+ release_sock(sk); -+ -+ *len = sizeof(qaddr); -+ qaddr.sq_family = AF_QIPCRTR; -+ -+ memcpy(saddr, &qaddr, sizeof(qaddr)); -+ -+ return 0; -+} -+ -+static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -+{ -+ void __user *argp = (void __user *)arg; -+ struct qrtr_sock *ipc = qrtr_sk(sock->sk); -+ struct sock *sk = sock->sk; -+ struct sockaddr_qrtr *sq; -+ struct sk_buff *skb; -+ struct ifreq ifr; -+ long len = 0; -+ int rc = 0; -+ -+ lock_sock(sk); -+ -+ switch (cmd) { -+ case TIOCOUTQ: -+ len = sk->sk_sndbuf - sk_wmem_alloc_get(sk); -+ if (len < 0) -+ len = 0; -+ rc = put_user(len, (int __user *)argp); -+ break; -+ case TIOCINQ: -+ skb = skb_peek(&sk->sk_receive_queue); -+ if (skb) -+ len = skb->len - QRTR_HDR_SIZE; -+ rc = put_user(len, (int __user *)argp); -+ break; -+ case SIOCGIFADDR: -+ if (copy_from_user(&ifr, argp, sizeof(ifr))) { -+ rc = -EFAULT; -+ break; -+ } -+ -+ sq = (struct sockaddr_qrtr *)&ifr.ifr_addr; -+ *sq = ipc->us; -+ if (copy_to_user(argp, &ifr, sizeof(ifr))) { -+ rc = -EFAULT; -+ break; -+ } -+ break; -+ case SIOCGSTAMP: -+ rc = sock_get_timestamp(sk, argp); -+ break; -+ case SIOCADDRT: -+ case SIOCDELRT: -+ case SIOCSIFADDR: -+ case SIOCGIFDSTADDR: -+ case SIOCSIFDSTADDR: -+ case SIOCGIFBRDADDR: -+ case SIOCSIFBRDADDR: -+ case SIOCGIFNETMASK: -+ case SIOCSIFNETMASK: -+ rc = -EINVAL; -+ break; -+ default: -+ rc = -ENOIOCTLCMD; -+ break; -+ } -+ -+ release_sock(sk); -+ -+ return rc; -+} -+ -+static int qrtr_release(struct socket *sock) -+{ -+ struct sock *sk = sock->sk; -+ struct qrtr_sock *ipc; -+ -+ if (!sk) -+ return 0; -+ -+ lock_sock(sk); -+ -+ ipc = qrtr_sk(sk); -+ sk->sk_shutdown = SHUTDOWN_MASK; -+ if (!sock_flag(sk, SOCK_DEAD)) -+ sk->sk_state_change(sk); -+ -+ sock_set_flag(sk, SOCK_DEAD); -+ sock->sk = NULL; -+ -+ if (!sock_flag(sk, SOCK_ZAPPED)) -+ qrtr_port_remove(ipc); -+ -+ skb_queue_purge(&sk->sk_receive_queue); -+ -+ release_sock(sk); -+ sock_put(sk); -+ -+ return 0; -+} -+ -+static const struct proto_ops qrtr_proto_ops = { -+ .owner = THIS_MODULE, -+ .family = AF_QIPCRTR, -+ .bind = qrtr_bind, -+ .connect = qrtr_connect, -+ .socketpair = sock_no_socketpair, -+ .accept = sock_no_accept, -+ .listen = sock_no_listen, -+ .sendmsg = qrtr_sendmsg, -+ .recvmsg = qrtr_recvmsg, -+ .getname = qrtr_getname, -+ .ioctl = qrtr_ioctl, -+ .poll = datagram_poll, -+ .shutdown = sock_no_shutdown, -+ .setsockopt = sock_no_setsockopt, -+ .getsockopt = sock_no_getsockopt, -+ .release = qrtr_release, -+ .mmap = sock_no_mmap, -+ .sendpage = sock_no_sendpage, -+}; -+ -+static struct proto qrtr_proto = { -+ .name = "QIPCRTR", -+ .owner = THIS_MODULE, -+ .obj_size = sizeof(struct qrtr_sock), -+}; -+ -+static int qrtr_create(struct net *net, struct socket *sock, -+ int protocol, int kern) -+{ -+ struct qrtr_sock *ipc; -+ struct sock *sk; -+ -+ if (sock->type != SOCK_DGRAM) -+ return -EPROTOTYPE; -+ -+ sk = sk_alloc(net, AF_QIPCRTR, GFP_KERNEL, &qrtr_proto, kern); -+ if (!sk) -+ return -ENOMEM; -+ -+ sock_set_flag(sk, SOCK_ZAPPED); -+ -+ sock_init_data(sock, sk); -+ sock->ops = &qrtr_proto_ops; -+ -+ ipc = qrtr_sk(sk); -+ ipc->us.sq_family = AF_QIPCRTR; -+ ipc->us.sq_node = qrtr_local_nid; -+ ipc->us.sq_port = 0; -+ -+ return 0; -+} -+ -+static const struct nla_policy qrtr_policy[IFA_MAX + 1] = { -+ [IFA_LOCAL] = { .type = NLA_U32 }, -+}; -+ -+static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) -+{ -+ struct nlattr *tb[IFA_MAX + 1]; -+ struct ifaddrmsg *ifm; -+ int rc; -+ -+ if (!netlink_capable(skb, CAP_NET_ADMIN)) -+ return -EPERM; -+ -+ if (!netlink_capable(skb, CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ ASSERT_RTNL(); -+ -+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy); -+ if (rc < 0) -+ return rc; -+ -+ ifm = nlmsg_data(nlh); -+ if (!tb[IFA_LOCAL]) -+ return -EINVAL; -+ -+ qrtr_local_nid = nla_get_u32(tb[IFA_LOCAL]); -+ return 0; -+} -+ -+static const struct net_proto_family qrtr_family = { -+ .owner = THIS_MODULE, -+ .family = AF_QIPCRTR, -+ .create = qrtr_create, -+}; -+ -+static int __init qrtr_proto_init(void) -+{ -+ int rc; -+ -+ rc = proto_register(&qrtr_proto, 1); -+ if (rc) -+ return rc; -+ -+ rc = sock_register(&qrtr_family); -+ if (rc) { -+ proto_unregister(&qrtr_proto); -+ return rc; -+ } -+ -+ rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL); -+ -+ return 0; -+} -+module_init(qrtr_proto_init); -+ -+static void __exit qrtr_proto_fini(void) -+{ -+ rtnl_unregister(PF_QIPCRTR, RTM_NEWADDR); -+ sock_unregister(qrtr_family.family); -+ proto_unregister(&qrtr_proto); -+} -+module_exit(qrtr_proto_fini); -+ -+MODULE_DESCRIPTION("Qualcomm IPC-router driver"); -+MODULE_LICENSE("GPL v2"); -diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h -new file mode 100644 -index 000000000000..2b848718f8fe ---- /dev/null -+++ b/net/qrtr/qrtr.h -@@ -0,0 +1,31 @@ -+#ifndef __QRTR_H_ -+#define __QRTR_H_ -+ -+#include -+ -+struct sk_buff; -+ -+/* endpoint node id auto assignment */ -+#define QRTR_EP_NID_AUTO (-1) -+ -+/** -+ * struct qrtr_endpoint - endpoint handle -+ * @xmit: Callback for outgoing packets -+ * -+ * The socket buffer passed to the xmit function becomes owned by the endpoint -+ * driver. As such, when the driver is done with the buffer, it should -+ * call kfree_skb() on failure, or consume_skb() on success. -+ */ -+struct qrtr_endpoint { -+ int (*xmit)(struct qrtr_endpoint *ep, struct sk_buff *skb); -+ /* private: not for endpoint use */ -+ struct qrtr_node *node; -+}; -+ -+int qrtr_endpoint_register(struct qrtr_endpoint *ep, unsigned int nid); -+ -+void qrtr_endpoint_unregister(struct qrtr_endpoint *ep); -+ -+int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len); -+ -+#endif -diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c -new file mode 100644 -index 000000000000..84ebce73aa23 ---- /dev/null -+++ b/net/qrtr/smd.c -@@ -0,0 +1,117 @@ -+/* -+ * Copyright (c) 2015, Sony Mobile Communications Inc. -+ * Copyright (c) 2013, The Linux Foundation. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 and -+ * only version 2 as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include -+#include -+#include -+ -+#include "qrtr.h" -+ -+struct qrtr_smd_dev { -+ struct qrtr_endpoint ep; -+ struct qcom_smd_channel *channel; -+}; -+ -+/* from smd to qrtr */ -+static int qcom_smd_qrtr_callback(struct qcom_smd_device *sdev, -+ const void *data, size_t len) -+{ -+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); -+ int rc; -+ -+ if (!qdev) -+ return -EAGAIN; -+ -+ rc = qrtr_endpoint_post(&qdev->ep, data, len); -+ if (rc == -EINVAL) { -+ dev_err(&sdev->dev, "invalid ipcrouter packet\n"); -+ /* return 0 to let smd drop the packet */ -+ rc = 0; -+ } -+ -+ return rc; -+} -+ -+/* from qrtr to smd */ -+static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) -+{ -+ struct qrtr_smd_dev *qdev = container_of(ep, struct qrtr_smd_dev, ep); -+ int rc; -+ -+ rc = skb_linearize(skb); -+ if (rc) -+ goto out; -+ -+ rc = qcom_smd_send(qdev->channel, skb->data, skb->len); -+ -+out: -+ if (rc) -+ kfree_skb(skb); -+ else -+ consume_skb(skb); -+ return rc; -+} -+ -+static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev) -+{ -+ struct qrtr_smd_dev *qdev; -+ int rc; -+ -+ qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL); -+ if (!qdev) -+ return -ENOMEM; -+ -+ qdev->channel = sdev->channel; -+ qdev->ep.xmit = qcom_smd_qrtr_send; -+ -+ rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO); -+ if (rc) -+ return rc; -+ -+ dev_set_drvdata(&sdev->dev, qdev); -+ -+ dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n"); -+ -+ return 0; -+} -+ -+static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev) -+{ -+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); -+ -+ qrtr_endpoint_unregister(&qdev->ep); -+ -+ dev_set_drvdata(&sdev->dev, NULL); -+} -+ -+static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = { -+ { "IPCRTR" }, -+ {} -+}; -+ -+static struct qcom_smd_driver qcom_smd_qrtr_driver = { -+ .probe = qcom_smd_qrtr_probe, -+ .remove = qcom_smd_qrtr_remove, -+ .callback = qcom_smd_qrtr_callback, -+ .smd_match_table = qcom_smd_qrtr_smd_match, -+ .driver = { -+ .name = "qcom_smd_qrtr", -+ .owner = THIS_MODULE, -+ }, -+}; -+ -+module_qcom_smd_driver(qcom_smd_qrtr_driver); -+ -+MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver"); -+MODULE_LICENSE("GPL v2"); --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0039-hv_sock-introduce-Hyper-V-Sockets.patch b/kernel/patches-4.4.x/0039-hv_sock-introduce-Hyper-V-Sockets.patch deleted file mode 100644 index 4a1c9b1ab..000000000 --- a/kernel/patches-4.4.x/0039-hv_sock-introduce-Hyper-V-Sockets.patch +++ /dev/null @@ -1,1805 +0,0 @@ -From 400f7f589396ced3306669e1541fbe41094cc40f Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Sun, 15 May 2016 09:53:11 -0700 -Subject: [PATCH 39/44] hv_sock: introduce Hyper-V Sockets - -Hyper-V Sockets (hv_sock) supplies a byte-stream based communication -mechanism between the host and the guest. It's somewhat like TCP over -VMBus, but the transportation layer (VMBus) is much simpler than IP. - -With Hyper-V Sockets, applications between the host and the guest can talk -to each other directly by the traditional BSD-style socket APIs. - -Hyper-V Sockets is only available on new Windows hosts, like Windows Server -2016. More info is in this article "Make your own integration services": -https://msdn.microsoft.com/en-us/virtualization/hyperv_on_windows/develop/make_mgmt_service - -The patch implements the necessary support in the guest side by introducing -a new socket address family AF_HYPERV. - -Signed-off-by: Dexuan Cui -Cc: "K. Y. Srinivasan" -Cc: Haiyang Zhang -Cc: Vitaly Kuznetsov -Cc: Cathy Avery -Origin: https://patchwork.ozlabs.org/patch/622404/ ---- - MAINTAINERS | 2 + - include/linux/hyperv.h | 14 + - include/linux/socket.h | 4 +- - include/net/af_hvsock.h | 78 +++ - include/uapi/linux/hyperv.h | 25 + - net/Kconfig | 1 + - net/Makefile | 1 + - net/hv_sock/Kconfig | 10 + - net/hv_sock/Makefile | 3 + - net/hv_sock/af_hvsock.c | 1520 +++++++++++++++++++++++++++++++++++++++++++ - 10 files changed, 1657 insertions(+), 1 deletion(-) - create mode 100644 include/net/af_hvsock.h - create mode 100644 net/hv_sock/Kconfig - create mode 100644 net/hv_sock/Makefile - create mode 100644 net/hv_sock/af_hvsock.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index 12d49f58c4e0..fa87bddcf2df 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -5123,7 +5123,9 @@ F: drivers/input/serio/hyperv-keyboard.c - F: drivers/net/hyperv/ - F: drivers/scsi/storvsc_drv.c - F: drivers/video/fbdev/hyperv_fb.c -+F: net/hv_sock/ - F: include/linux/hyperv.h -+F: include/net/af_hvsock.h - F: tools/hv/ - F: Documentation/ABI/stable/sysfs-bus-vmbus - -diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h -index 6c9695ef757e..187d4bda4d8c 100644 ---- a/include/linux/hyperv.h -+++ b/include/linux/hyperv.h -@@ -1349,4 +1349,18 @@ extern __u32 vmbus_proto_version; - - int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, - const uuid_le *shv_host_servie_id); -+struct vmpipe_proto_header { -+ u32 pkt_type; -+ u32 data_size; -+}; -+ -+#define HVSOCK_HEADER_LEN (sizeof(struct vmpacket_descriptor) + \ -+ sizeof(struct vmpipe_proto_header)) -+ -+/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write() */ -+#define PREV_INDICES_LEN (sizeof(u64)) -+ -+#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \ -+ ALIGN((payload_len), 8) + \ -+ PREV_INDICES_LEN) - #endif /* _HYPERV_H */ -diff --git a/include/linux/socket.h b/include/linux/socket.h -index dbd81e7f21fd..6634c47c1825 100644 ---- a/include/linux/socket.h -+++ b/include/linux/socket.h -@@ -202,8 +202,9 @@ struct ucred { - #define AF_VSOCK 40 /* vSockets */ - #define AF_KCM 41 /* Kernel Connection Multiplexor*/ - #define AF_QIPCRTR 42 /* Qualcomm IPC Router */ -+#define AF_HYPERV 43 /* Hyper-V Sockets */ - --#define AF_MAX 43 /* For now.. */ -+#define AF_MAX 44 /* For now.. */ - - /* Protocol families, same as address families. */ - #define PF_UNSPEC AF_UNSPEC -@@ -251,6 +252,7 @@ struct ucred { - #define PF_VSOCK AF_VSOCK - #define PF_KCM AF_KCM - #define PF_QIPCRTR AF_QIPCRTR -+#define PF_HYPERV AF_HYPERV - #define PF_MAX AF_MAX - - /* Maximum queue length specifiable by listen. */ -diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h -new file mode 100644 -index 000000000000..7c8c41e78157 ---- /dev/null -+++ b/include/net/af_hvsock.h -@@ -0,0 +1,78 @@ -+#ifndef __AF_HVSOCK_H__ -+#define __AF_HVSOCK_H__ -+ -+#include -+#include -+#include -+ -+/* Note: 3-page is the minimal recv ringbuffer size by default: -+ * -+ * the 1st page is used as the shared read/write index etc, rather than data: -+ * see hv_ringbuffer_init(); -+ * -+ * the payload length in the vmbus pipe message received from the host can -+ * be 4096 bytes, and considing the header of HVSOCK_HEADER_LEN bytes, we -+ * need at least 2 extra pages for ringbuffer data. -+ */ -+#define HVSOCK_RCV_BUF_SZ PAGE_SIZE -+#define DEF_RINGBUFFER_PAGES_HVSOCK_RCV 3 -+ -+/* As to send, here let's make sure the hvsock_send_buf struct can be held in 1 -+ * page, and since we want to use 2 pages for the send ringbuffer size (this is -+ * the minimal size by default, because the 1st page of the two is used as the -+ * shared read/write index etc, rather than data), we only have 1 page for -+ * ringbuffer data, this means: the max payload length for hvsock data is -+ * PAGE_SIZE - HVSOCK_PKT_LEN(0). And, let's reduce the length by 8-bytes -+ * because the ringbuffer can't be 100% full: see hv_ringbuffer_write(). -+ */ -+#define HVSOCK_SND_BUF_SZ (PAGE_SIZE - HVSOCK_PKT_LEN(0) - 8) -+#define DEF_RINGBUFFER_PAGES_HVSOCK_SND 2 -+ -+/* We only send data when the available space is "big enough". This artificial -+ * value must be less than HVSOCK_SND_BUF_SZ. -+ * -+ */ -+#define HVSOCK_SND_THRESHOLD (PAGE_SIZE / 2) -+ -+#define sk_to_hvsock(__sk) ((struct hvsock_sock *)(__sk)) -+#define hvsock_to_sk(__hvsk) ((struct sock *)(__hvsk)) -+ -+struct hvsock_send_buf { -+ struct vmpipe_proto_header hdr; -+ u8 buf[HVSOCK_SND_BUF_SZ]; -+}; -+ -+struct hvsock_recv_buf { -+ struct vmpipe_proto_header hdr; -+ u8 buf[HVSOCK_RCV_BUF_SZ]; -+ -+ unsigned int data_len; -+ unsigned int data_offset; -+}; -+ -+struct hvsock_sock { -+ /* sk must be the first member. */ -+ struct sock sk; -+ -+ struct sockaddr_hv local_addr; -+ struct sockaddr_hv remote_addr; -+ -+ /* protected by the global hvsock_mutex */ -+ struct list_head bound_list; -+ struct list_head connected_list; -+ -+ struct list_head accept_queue; -+ /* used by enqueue and dequeue */ -+ struct mutex accept_queue_mutex; -+ -+ struct delayed_work dwork; -+ -+ u32 peer_shutdown; -+ -+ struct vmbus_channel *channel; -+ -+ struct hvsock_send_buf *send; -+ struct hvsock_recv_buf *recv; -+}; -+ -+#endif /* __AF_HVSOCK_H__ */ -diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h -index e347b24ef9fb..408b832716b8 100644 ---- a/include/uapi/linux/hyperv.h -+++ b/include/uapi/linux/hyperv.h -@@ -26,6 +26,7 @@ - #define _UAPI_HYPERV_H - - #include -+#include - - /* - * Framework version for util services. -@@ -396,4 +397,28 @@ struct hv_kvp_ip_msg { - struct hv_kvp_ipaddr_value kvp_ip_val; - } __attribute__((packed)); - -+/* -+ * This is the address fromat of Hyper-V Sockets. -+ * Note: here we just borrow the kernel's built-in type uuid_le. When -+ * an application calls bind() or connect(), the 2 members of struct -+ * sockaddr_hv must be of GUID. -+ * The GUID format differs from the UUID format only in the byte order of -+ * the first 3 fields. Refer to: -+ * https://en.wikipedia.org/wiki/Globally_unique_identifier -+ */ -+#define guid_t uuid_le -+struct sockaddr_hv { -+ __kernel_sa_family_t shv_family; /* Address family */ -+ __le16 reserved; /* Must be Zero */ -+ guid_t shv_vm_id; /* VM ID */ -+ guid_t shv_service_id; /* Service ID */ -+}; -+ -+#define SHV_VMID_GUEST NULL_UUID_LE -+#define SHV_VMID_HOST NULL_UUID_LE -+ -+#define SHV_SERVICE_ID_ANY NULL_UUID_LE -+ -+#define SHV_PROTO_RAW 1 -+ - #endif /* _UAPI_HYPERV_H */ -diff --git a/net/Kconfig b/net/Kconfig -index 1c9fda1c602d..9eeccb75ee4e 100644 ---- a/net/Kconfig -+++ b/net/Kconfig -@@ -228,6 +228,7 @@ source "net/dns_resolver/Kconfig" - source "net/batman-adv/Kconfig" - source "net/openvswitch/Kconfig" - source "net/vmw_vsock/Kconfig" -+source "net/hv_sock/Kconfig" - source "net/netlink/Kconfig" - source "net/mpls/Kconfig" - source "net/hsr/Kconfig" -diff --git a/net/Makefile b/net/Makefile -index bdd14553a774..ec175ddfac38 100644 ---- a/net/Makefile -+++ b/net/Makefile -@@ -70,6 +70,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ - obj-$(CONFIG_NFC) += nfc/ - obj-$(CONFIG_OPENVSWITCH) += openvswitch/ - obj-$(CONFIG_VSOCKETS) += vmw_vsock/ -+obj-$(CONFIG_HYPERV_SOCK) += hv_sock/ - obj-$(CONFIG_MPLS) += mpls/ - obj-$(CONFIG_HSR) += hsr/ - ifneq ($(CONFIG_NET_SWITCHDEV),) -diff --git a/net/hv_sock/Kconfig b/net/hv_sock/Kconfig -new file mode 100644 -index 000000000000..1f4184829dde ---- /dev/null -+++ b/net/hv_sock/Kconfig -@@ -0,0 +1,10 @@ -+config HYPERV_SOCK -+ tristate "Hyper-V Sockets" -+ depends on HYPERV -+ default m if HYPERV -+ help -+ Hyper-V Sockets is somewhat like TCP over VMBus, allowing -+ communication between Linux guest and Hyper-V host without TCP/IP. -+ -+ To compile this driver as a module, choose M here: the module -+ will be called hv_sock. -diff --git a/net/hv_sock/Makefile b/net/hv_sock/Makefile -new file mode 100644 -index 000000000000..716c01230129 ---- /dev/null -+++ b/net/hv_sock/Makefile -@@ -0,0 +1,3 @@ -+obj-$(CONFIG_HYPERV_SOCK) += hv_sock.o -+ -+hv_sock-y += af_hvsock.o -diff --git a/net/hv_sock/af_hvsock.c b/net/hv_sock/af_hvsock.c -new file mode 100644 -index 000000000000..b91bd608bf39 ---- /dev/null -+++ b/net/hv_sock/af_hvsock.c -@@ -0,0 +1,1520 @@ -+/* -+ * Hyper-V Sockets -- a socket-based communication channel between the -+ * Hyper-V host and the virtual machines running on it. -+ * -+ * Copyright(c) 2016, Microsoft Corporation. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. The name of the author may not be used to endorse or promote -+ * products derived from this software without specific prior written -+ * permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+ -+static uint send_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_SND; -+static uint recv_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_RCV; -+static uint max_socket_number = 1024; -+ -+static atomic_t total_num_hvsock = ATOMIC_INIT(0); -+ -+module_param(send_ring_page, uint, 0444); -+MODULE_PARM_DESC(send_ring_page, "Send ring buffer size (# of pages)"); -+ -+module_param(recv_ring_page, uint, 0444); -+MODULE_PARM_DESC(recv_ring_page, "Receive ring buffer size (# of pages)"); -+ -+module_param(max_socket_number, uint, 0644); -+MODULE_PARM_DESC(max_socket_number, "The max number of created sockets"); -+ -+static struct proto hvsock_proto = { -+ .name = "HV_SOCK", -+ .owner = THIS_MODULE, -+ .obj_size = sizeof(struct hvsock_sock), -+}; -+ -+#define SS_LISTEN 255 -+ -+static LIST_HEAD(hvsock_bound_list); -+static LIST_HEAD(hvsock_connected_list); -+static DEFINE_MUTEX(hvsock_mutex); -+ -+static bool uuid_equals(uuid_le u1, uuid_le u2) -+{ -+ return !uuid_le_cmp(u1, u2); -+} -+ -+static struct sock *hvsock_find_bound_socket(const struct sockaddr_hv *addr) -+{ -+ struct hvsock_sock *hvsk; -+ -+ list_for_each_entry(hvsk, &hvsock_bound_list, bound_list) { -+ if (uuid_equals(addr->shv_service_id, -+ hvsk->local_addr.shv_service_id)) -+ return hvsock_to_sk(hvsk); -+ } -+ return NULL; -+} -+ -+static struct sock *hvsock_find_connected_socket_by_channel( -+ const struct vmbus_channel *channel) -+{ -+ struct hvsock_sock *hvsk; -+ -+ list_for_each_entry(hvsk, &hvsock_connected_list, connected_list) { -+ if (hvsk->channel == channel) -+ return hvsock_to_sk(hvsk); -+ } -+ return NULL; -+} -+ -+static -+void hvsock_enqueue_accept(struct sock *listener, struct sock *connected) -+{ -+ struct hvsock_sock *hvlistener; -+ struct hvsock_sock *hvconnected; -+ -+ hvlistener = sk_to_hvsock(listener); -+ hvconnected = sk_to_hvsock(connected); -+ -+ sock_hold(connected); -+ sock_hold(listener); -+ -+ mutex_lock(&hvlistener->accept_queue_mutex); -+ list_add_tail(&hvconnected->accept_queue, &hvlistener->accept_queue); -+ listener->sk_ack_backlog++; -+ mutex_unlock(&hvlistener->accept_queue_mutex); -+} -+ -+static struct sock *hvsock_dequeue_accept(struct sock *listener) -+{ -+ struct hvsock_sock *hvlistener; -+ struct hvsock_sock *hvconnected; -+ -+ hvlistener = sk_to_hvsock(listener); -+ -+ mutex_lock(&hvlistener->accept_queue_mutex); -+ -+ if (list_empty(&hvlistener->accept_queue)) { -+ mutex_unlock(&hvlistener->accept_queue_mutex); -+ return NULL; -+ } -+ -+ hvconnected = list_entry(hvlistener->accept_queue.next, -+ struct hvsock_sock, accept_queue); -+ -+ list_del_init(&hvconnected->accept_queue); -+ listener->sk_ack_backlog--; -+ -+ mutex_unlock(&hvlistener->accept_queue_mutex); -+ -+ sock_put(listener); -+ /* The caller will need a reference on the connected socket so we let -+ * it call sock_put(). -+ */ -+ -+ return hvsock_to_sk(hvconnected); -+} -+ -+static bool hvsock_is_accept_queue_empty(struct sock *sk) -+{ -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ int ret; -+ -+ mutex_lock(&hvsk->accept_queue_mutex); -+ ret = list_empty(&hvsk->accept_queue); -+ mutex_unlock(&hvsk->accept_queue_mutex); -+ -+ return ret; -+} -+ -+static void hvsock_addr_init(struct sockaddr_hv *addr, uuid_le service_id) -+{ -+ memset(addr, 0, sizeof(*addr)); -+ addr->shv_family = AF_HYPERV; -+ addr->shv_service_id = service_id; -+} -+ -+static int hvsock_addr_validate(const struct sockaddr_hv *addr) -+{ -+ if (!addr) -+ return -EFAULT; -+ -+ if (addr->shv_family != AF_HYPERV) -+ return -EAFNOSUPPORT; -+ -+ if (addr->reserved != 0) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static bool hvsock_addr_bound(const struct sockaddr_hv *addr) -+{ -+ return !uuid_equals(addr->shv_service_id, SHV_SERVICE_ID_ANY); -+} -+ -+static int hvsock_addr_cast(const struct sockaddr *addr, size_t len, -+ struct sockaddr_hv **out_addr) -+{ -+ if (len < sizeof(**out_addr)) -+ return -EFAULT; -+ -+ *out_addr = (struct sockaddr_hv *)addr; -+ return hvsock_addr_validate(*out_addr); -+} -+ -+static int __hvsock_do_bind(struct hvsock_sock *hvsk, -+ struct sockaddr_hv *addr) -+{ -+ struct sockaddr_hv hv_addr; -+ int ret = 0; -+ -+ hvsock_addr_init(&hv_addr, addr->shv_service_id); -+ -+ mutex_lock(&hvsock_mutex); -+ -+ if (uuid_equals(addr->shv_service_id, SHV_SERVICE_ID_ANY)) { -+ do { -+ uuid_le_gen(&hv_addr.shv_service_id); -+ } while (hvsock_find_bound_socket(&hv_addr)); -+ } else { -+ if (hvsock_find_bound_socket(&hv_addr)) { -+ ret = -EADDRINUSE; -+ goto out; -+ } -+ } -+ -+ hvsock_addr_init(&hvsk->local_addr, hv_addr.shv_service_id); -+ -+ sock_hold(&hvsk->sk); -+ list_add(&hvsk->bound_list, &hvsock_bound_list); -+out: -+ mutex_unlock(&hvsock_mutex); -+ -+ return ret; -+} -+ -+static int __hvsock_bind(struct sock *sk, struct sockaddr_hv *addr) -+{ -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ int ret; -+ -+ if (hvsock_addr_bound(&hvsk->local_addr)) -+ return -EINVAL; -+ -+ switch (sk->sk_socket->type) { -+ case SOCK_STREAM: -+ ret = __hvsock_do_bind(hvsk, addr); -+ break; -+ -+ default: -+ ret = -EINVAL; -+ break; -+ } -+ -+ return ret; -+} -+ -+/* Autobind this socket to the local address if necessary. */ -+static int hvsock_auto_bind(struct hvsock_sock *hvsk) -+{ -+ struct sock *sk = hvsock_to_sk(hvsk); -+ struct sockaddr_hv local_addr; -+ -+ if (hvsock_addr_bound(&hvsk->local_addr)) -+ return 0; -+ hvsock_addr_init(&local_addr, SHV_SERVICE_ID_ANY); -+ return __hvsock_bind(sk, &local_addr); -+} -+ -+static void hvsock_sk_destruct(struct sock *sk) -+{ -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; -+ -+ kfree(hvsk->send); -+ kfree(hvsk->recv); -+ atomic_dec(&total_num_hvsock); -+ -+ if (!channel) -+ return; -+ -+ vmbus_hvsock_device_unregister(channel); -+} -+ -+static void __hvsock_release(struct sock *sk) -+{ -+ struct hvsock_sock *hvsk; -+ struct sock *pending; -+ -+ hvsk = sk_to_hvsock(sk); -+ -+ mutex_lock(&hvsock_mutex); -+ -+ if (!list_empty(&hvsk->bound_list)) { -+ list_del_init(&hvsk->bound_list); -+ sock_put(&hvsk->sk); -+ } -+ -+ if (!list_empty(&hvsk->connected_list)) { -+ list_del_init(&hvsk->connected_list); -+ sock_put(&hvsk->sk); -+ } -+ -+ mutex_unlock(&hvsock_mutex); -+ -+ lock_sock(sk); -+ sock_orphan(sk); -+ sk->sk_shutdown = SHUTDOWN_MASK; -+ -+ /* Clean up any sockets that never were accepted. */ -+ while ((pending = hvsock_dequeue_accept(sk)) != NULL) { -+ __hvsock_release(pending); -+ sock_put(pending); -+ } -+ -+ release_sock(sk); -+ sock_put(sk); -+} -+ -+static int hvsock_release(struct socket *sock) -+{ -+ /* If accept() is interrupted by a signal, the temporary socket -+ * struct's sock->sk is NULL. -+ */ -+ if (sock->sk) { -+ __hvsock_release(sock->sk); -+ sock->sk = NULL; -+ } -+ -+ sock->state = SS_FREE; -+ return 0; -+} -+ -+static int hvsock_create(struct net *net, struct socket *sock, -+ gfp_t priority, unsigned short type, -+ struct sock **sk) -+{ -+ struct hvsock_send_buf *send = NULL; -+ struct hvsock_recv_buf *recv = NULL; -+ struct hvsock_sock *hvsk; -+ int ret = -EMFILE; -+ int num_hvsock; -+ -+ num_hvsock = atomic_inc_return(&total_num_hvsock); -+ if (num_hvsock > max_socket_number) -+ goto err; -+ -+ ret = -ENOMEM; -+ send = kmalloc(sizeof(*send), GFP_KERNEL); -+ recv = kmalloc(sizeof(*recv), GFP_KERNEL); -+ if (!send || !recv) -+ goto err; -+ -+ *sk = sk_alloc(net, AF_HYPERV, priority, &hvsock_proto, 0); -+ if (!*sk) -+ goto err; -+ -+ sock_init_data(sock, *sk); -+ -+ /* (*sk)->sk_type is normally set in sock_init_data, but only if sock -+ * is non-NULL. We make sure that our sockets always have a type by -+ * setting it here if needed. -+ */ -+ if (!sock) -+ (*sk)->sk_type = type; -+ -+ (*sk)->sk_destruct = hvsock_sk_destruct; -+ -+ /* Looks stream-based socket doesn't need this. */ -+ (*sk)->sk_backlog_rcv = NULL; -+ -+ (*sk)->sk_state = 0; -+ sock_reset_flag(*sk, SOCK_DONE); -+ -+ hvsk = sk_to_hvsock(*sk); -+ -+ hvsk->send = send; -+ hvsk->recv = recv; -+ -+ hvsock_addr_init(&hvsk->local_addr, SHV_SERVICE_ID_ANY); -+ hvsock_addr_init(&hvsk->remote_addr, SHV_SERVICE_ID_ANY); -+ -+ INIT_LIST_HEAD(&hvsk->bound_list); -+ INIT_LIST_HEAD(&hvsk->connected_list); -+ -+ INIT_LIST_HEAD(&hvsk->accept_queue); -+ mutex_init(&hvsk->accept_queue_mutex); -+ -+ hvsk->peer_shutdown = 0; -+ -+ hvsk->recv->data_len = 0; -+ hvsk->recv->data_offset = 0; -+ -+ return 0; -+err: -+ atomic_dec(&total_num_hvsock); -+ kfree(send); -+ kfree(recv); -+ *sk = NULL; -+ return ret; -+} -+ -+static int hvsock_bind(struct socket *sock, struct sockaddr *addr, -+ int addr_len) -+{ -+ struct sockaddr_hv *hv_addr; -+ struct sock *sk; -+ int ret; -+ -+ sk = sock->sk; -+ -+ if (hvsock_addr_cast(addr, addr_len, &hv_addr) != 0) -+ return -EINVAL; -+ -+ if (!uuid_equals(hv_addr->shv_vm_id, NULL_UUID_LE)) -+ return -EINVAL; -+ -+ lock_sock(sk); -+ ret = __hvsock_bind(sk, hv_addr); -+ release_sock(sk); -+ -+ return ret; -+} -+ -+static int hvsock_getname(struct socket *sock, -+ struct sockaddr *addr, int *addr_len, int peer) -+{ -+ struct sockaddr_hv *hv_addr; -+ struct hvsock_sock *hvsk; -+ struct sock *sk; -+ int ret; -+ -+ sk = sock->sk; -+ hvsk = sk_to_hvsock(sk); -+ ret = 0; -+ -+ lock_sock(sk); -+ -+ if (peer) { -+ if (sock->state != SS_CONNECTED) { -+ ret = -ENOTCONN; -+ goto out; -+ } -+ hv_addr = &hvsk->remote_addr; -+ } else { -+ hv_addr = &hvsk->local_addr; -+ } -+ -+ __sockaddr_check_size(sizeof(*hv_addr)); -+ -+ memcpy(addr, hv_addr, sizeof(*hv_addr)); -+ *addr_len = sizeof(*hv_addr); -+ -+out: -+ release_sock(sk); -+ return ret; -+} -+ -+static void get_ringbuffer_rw_status(struct vmbus_channel *channel, -+ bool *can_read, bool *can_write) -+{ -+ u32 avl_read_bytes, avl_write_bytes, dummy; -+ -+ if (can_read) { -+ hv_get_ringbuffer_availbytes(&channel->inbound, -+ &avl_read_bytes, -+ &dummy); -+ /* 0-size payload means FIN */ -+ *can_read = avl_read_bytes >= HVSOCK_PKT_LEN(0); -+ } -+ -+ if (can_write) { -+ hv_get_ringbuffer_availbytes(&channel->outbound, -+ &dummy, -+ &avl_write_bytes); -+ -+ *can_write = avl_write_bytes > -+ HVSOCK_PKT_LEN(HVSOCK_SND_THRESHOLD); -+ } -+} -+ -+static size_t get_ringbuffer_writable_bytes(struct vmbus_channel *channel) -+{ -+ u32 avl_write_bytes, dummy; -+ size_t ret; -+ -+ hv_get_ringbuffer_availbytes(&channel->outbound, -+ &dummy, -+ &avl_write_bytes); -+ -+ if (avl_write_bytes < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) -+ return 0; -+ -+ /* The ringbuffer mustn't be 100% full, and we should reserve a -+ * zero-length-payload packet for the FIN: see hv_ringbuffer_write() -+ * and hvsock_shutdown(). -+ */ -+ ret = avl_write_bytes - HVSOCK_PKT_LEN(1) - HVSOCK_PKT_LEN(0); -+ -+ return round_down(ret, 8); -+} -+ -+static int hvsock_send_data(struct vmbus_channel *channel, -+ struct hvsock_sock *hvsk, -+ size_t to_write) -+{ -+ hvsk->send->hdr.pkt_type = 1; -+ hvsk->send->hdr.data_size = to_write; -+ return vmbus_sendpacket(channel, &hvsk->send->hdr, -+ sizeof(hvsk->send->hdr) + to_write, -+ 0, VM_PKT_DATA_INBAND, 0); -+} -+ -+static int hvsock_recv_data(struct vmbus_channel *channel, -+ struct hvsock_sock *hvsk, -+ size_t *payload_len) -+{ -+ u32 buffer_actual_len; -+ u64 dummy_req_id; -+ int ret; -+ -+ ret = vmbus_recvpacket(channel, &hvsk->recv->hdr, -+ sizeof(hvsk->recv->hdr) + -+ sizeof(hvsk->recv->buf), -+ &buffer_actual_len, &dummy_req_id); -+ if (ret != 0 || buffer_actual_len <= sizeof(hvsk->recv->hdr)) -+ *payload_len = 0; -+ else -+ *payload_len = hvsk->recv->hdr.data_size; -+ -+ return ret; -+} -+ -+static int hvsock_shutdown(struct socket *sock, int mode) -+{ -+ struct hvsock_sock *hvsk; -+ struct sock *sk; -+ -+ if (mode < SHUT_RD || mode > SHUT_RDWR) -+ return -EINVAL; -+ /* This maps: -+ * SHUT_RD (0) -> RCV_SHUTDOWN (1) -+ * SHUT_WR (1) -> SEND_SHUTDOWN (2) -+ * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) -+ */ -+ ++mode; -+ -+ if (sock->state != SS_CONNECTED) -+ return -ENOTCONN; -+ -+ sock->state = SS_DISCONNECTING; -+ -+ sk = sock->sk; -+ -+ lock_sock(sk); -+ -+ sk->sk_shutdown |= mode; -+ sk->sk_state_change(sk); -+ -+ if (mode & SEND_SHUTDOWN) { -+ hvsk = sk_to_hvsock(sk); -+ /* It can't fail: see get_ringbuffer_writable_bytes(). */ -+ (void)hvsock_send_data(hvsk->channel, hvsk, 0); -+ } -+ -+ release_sock(sk); -+ -+ return 0; -+} -+ -+static unsigned int hvsock_poll(struct file *file, struct socket *sock, -+ poll_table *wait) -+{ -+ struct vmbus_channel *channel; -+ bool can_read, can_write; -+ struct hvsock_sock *hvsk; -+ struct sock *sk; -+ unsigned int mask; -+ -+ sk = sock->sk; -+ hvsk = sk_to_hvsock(sk); -+ -+ poll_wait(file, sk_sleep(sk), wait); -+ mask = 0; -+ -+ if (sk->sk_err) -+ /* Signify that there has been an error on this socket. */ -+ mask |= POLLERR; -+ -+ /* INET sockets treat local write shutdown and peer write shutdown as a -+ * case of POLLHUP set. -+ */ -+ if ((sk->sk_shutdown == SHUTDOWN_MASK) || -+ ((sk->sk_shutdown & SEND_SHUTDOWN) && -+ (hvsk->peer_shutdown & SEND_SHUTDOWN))) { -+ mask |= POLLHUP; -+ } -+ -+ if (sk->sk_shutdown & RCV_SHUTDOWN || -+ hvsk->peer_shutdown & SEND_SHUTDOWN) { -+ mask |= POLLRDHUP; -+ } -+ -+ lock_sock(sk); -+ -+ /* Listening sockets that have connections in their accept -+ * queue can be read. -+ */ -+ if (sk->sk_state == SS_LISTEN && !hvsock_is_accept_queue_empty(sk)) -+ mask |= POLLIN | POLLRDNORM; -+ -+ /* The mutex is to against hvsock_open_connection() */ -+ mutex_lock(&hvsock_mutex); -+ -+ channel = hvsk->channel; -+ if (channel) { -+ /* If there is something in the queue then we can read */ -+ get_ringbuffer_rw_status(channel, &can_read, &can_write); -+ -+ if (!can_read && hvsk->recv->data_len > 0) -+ can_read = true; -+ -+ if (!(sk->sk_shutdown & RCV_SHUTDOWN) && can_read) -+ mask |= POLLIN | POLLRDNORM; -+ } else { -+ can_read = false; -+ can_write = false; -+ } -+ -+ mutex_unlock(&hvsock_mutex); -+ -+ /* Sockets whose connections have been closed terminated should -+ * also be considered read, and we check the shutdown flag for that. -+ */ -+ if (sk->sk_shutdown & RCV_SHUTDOWN || -+ hvsk->peer_shutdown & SEND_SHUTDOWN) { -+ mask |= POLLIN | POLLRDNORM; -+ } -+ -+ /* Connected sockets that can produce data can be written. */ -+ if (sk->sk_state == SS_CONNECTED && can_write && -+ !(sk->sk_shutdown & SEND_SHUTDOWN)) { -+ /* Remove POLLWRBAND since INET sockets are not setting it. -+ */ -+ mask |= POLLOUT | POLLWRNORM; -+ } -+ -+ /* Simulate INET socket poll behaviors, which sets -+ * POLLOUT|POLLWRNORM when peer is closed and nothing to read, -+ * but local send is not shutdown. -+ */ -+ if (sk->sk_state == SS_UNCONNECTED && -+ !(sk->sk_shutdown & SEND_SHUTDOWN)) -+ mask |= POLLOUT | POLLWRNORM; -+ -+ release_sock(sk); -+ -+ return mask; -+} -+ -+/* This function runs in the tasklet context of process_chn_event() */ -+static void hvsock_on_channel_cb(void *ctx) -+{ -+ struct sock *sk = (struct sock *)ctx; -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; -+ bool can_read, can_write; -+ -+ if (!channel) { -+ WARN_ONCE(1, "NULL channel! There is a programming bug.\n"); -+ return; -+ } -+ -+ get_ringbuffer_rw_status(channel, &can_read, &can_write); -+ -+ if (can_read) -+ sk->sk_data_ready(sk); -+ -+ if (can_write) -+ sk->sk_write_space(sk); -+} -+ -+static void hvsock_close_connection(struct vmbus_channel *channel) -+{ -+ struct hvsock_sock *hvsk; -+ struct sock *sk; -+ -+ mutex_lock(&hvsock_mutex); -+ -+ sk = hvsock_find_connected_socket_by_channel(channel); -+ -+ /* The guest has already closed the connection? */ -+ if (!sk) -+ goto out; -+ -+ sk->sk_state = SS_UNCONNECTED; -+ sock_set_flag(sk, SOCK_DONE); -+ -+ hvsk = sk_to_hvsock(sk); -+ hvsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; -+ -+ sk->sk_state_change(sk); -+out: -+ mutex_unlock(&hvsock_mutex); -+} -+ -+static int hvsock_open_connection(struct vmbus_channel *channel) -+{ -+ struct hvsock_sock *hvsk, *new_hvsk; -+ struct sockaddr_hv hv_addr; -+ struct sock *sk, *new_sk; -+ unsigned char conn_from_host; -+ -+ uuid_le *instance, *service_id; -+ int ret; -+ -+ instance = &channel->offermsg.offer.if_instance; -+ service_id = &channel->offermsg.offer.if_type; -+ -+ /* The first byte != 0 means the host initiated the connection. */ -+ conn_from_host = channel->offermsg.offer.u.pipe.user_def[0]; -+ -+ mutex_lock(&hvsock_mutex); -+ -+ hvsock_addr_init(&hv_addr, conn_from_host ? *service_id : *instance); -+ sk = hvsock_find_bound_socket(&hv_addr); -+ -+ if (!sk || (conn_from_host && sk->sk_state != SS_LISTEN) || -+ (!conn_from_host && sk->sk_state != SS_CONNECTING)) { -+ ret = -ENXIO; -+ goto out; -+ } -+ -+ if (conn_from_host) { -+ if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { -+ ret = -EMFILE; -+ goto out; -+ } -+ -+ ret = hvsock_create(sock_net(sk), NULL, GFP_KERNEL, -+ sk->sk_type, &new_sk); -+ if (ret != 0) -+ goto out; -+ -+ new_sk->sk_state = SS_CONNECTING; -+ new_hvsk = sk_to_hvsock(new_sk); -+ new_hvsk->channel = channel; -+ hvsock_addr_init(&new_hvsk->local_addr, *service_id); -+ hvsock_addr_init(&new_hvsk->remote_addr, *instance); -+ } else { -+ hvsk = sk_to_hvsock(sk); -+ hvsk->channel = channel; -+ } -+ -+ set_channel_read_state(channel, false); -+ ret = vmbus_open(channel, send_ring_page * PAGE_SIZE, -+ recv_ring_page * PAGE_SIZE, NULL, 0, -+ hvsock_on_channel_cb, conn_from_host ? new_sk : sk); -+ if (ret != 0) { -+ if (conn_from_host) { -+ new_hvsk->channel = NULL; -+ sock_put(new_sk); -+ } else { -+ hvsk->channel = NULL; -+ } -+ goto out; -+ } -+ -+ vmbus_set_chn_rescind_callback(channel, hvsock_close_connection); -+ set_channel_pending_send_size(channel, -+ HVSOCK_PKT_LEN(HVSOCK_SND_THRESHOLD)); -+ -+ if (conn_from_host) { -+ new_sk->sk_state = SS_CONNECTED; -+ -+ sock_hold(&new_hvsk->sk); -+ list_add(&new_hvsk->connected_list, &hvsock_connected_list); -+ -+ hvsock_enqueue_accept(sk, new_sk); -+ } else { -+ sk->sk_state = SS_CONNECTED; -+ sk->sk_socket->state = SS_CONNECTED; -+ -+ sock_hold(&hvsk->sk); -+ list_add(&hvsk->connected_list, &hvsock_connected_list); -+ } -+ -+ sk->sk_state_change(sk); -+out: -+ mutex_unlock(&hvsock_mutex); -+ return ret; -+} -+ -+static void hvsock_connect_timeout(struct work_struct *work) -+{ -+ struct hvsock_sock *hvsk; -+ struct sock *sk; -+ -+ hvsk = container_of(work, struct hvsock_sock, dwork.work); -+ sk = hvsock_to_sk(hvsk); -+ -+ lock_sock(sk); -+ if ((sk->sk_state == SS_CONNECTING) && -+ (sk->sk_shutdown != SHUTDOWN_MASK)) { -+ sk->sk_state = SS_UNCONNECTED; -+ sk->sk_err = ETIMEDOUT; -+ sk->sk_error_report(sk); -+ } -+ release_sock(sk); -+ -+ sock_put(sk); -+} -+ -+static int hvsock_connect_wait(struct socket *sock, -+ int flags, int current_ret) -+{ -+ struct sock *sk = sock->sk; -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ -+ int ret = current_ret; -+ -+ long timeout = 30 * HZ; -+ DEFINE_WAIT(wait); -+ -+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -+ -+ while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { -+ if (flags & O_NONBLOCK) { -+ /* If we're not going to block, we schedule a timeout -+ * function to generate a timeout on the connection -+ * attempt, in case the peer doesn't respond in a -+ * timely manner. We hold on to the socket until the -+ * timeout fires. -+ */ -+ sock_hold(sk); -+ INIT_DELAYED_WORK(&hvsk->dwork, -+ hvsock_connect_timeout); -+ schedule_delayed_work(&hvsk->dwork, timeout); -+ -+ /* Skip ahead to preserve error code set above. */ -+ goto out_wait; -+ } -+ -+ release_sock(sk); -+ timeout = schedule_timeout(timeout); -+ lock_sock(sk); -+ -+ if (signal_pending(current)) { -+ ret = sock_intr_errno(timeout); -+ goto out_wait_error; -+ } else if (timeout == 0) { -+ ret = -ETIMEDOUT; -+ goto out_wait_error; -+ } -+ -+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -+ } -+ -+ ret = sk->sk_err ? -sk->sk_err : 0; -+ -+out_wait_error: -+ if (ret < 0) { -+ sk->sk_state = SS_UNCONNECTED; -+ sock->state = SS_UNCONNECTED; -+ } -+out_wait: -+ finish_wait(sk_sleep(sk), &wait); -+ return ret; -+} -+ -+static int hvsock_connect(struct socket *sock, struct sockaddr *addr, -+ int addr_len, int flags) -+{ -+ struct sockaddr_hv *remote_addr; -+ struct hvsock_sock *hvsk; -+ struct sock *sk; -+ int ret = 0; -+ -+ sk = sock->sk; -+ hvsk = sk_to_hvsock(sk); -+ -+ lock_sock(sk); -+ -+ switch (sock->state) { -+ case SS_CONNECTED: -+ ret = -EISCONN; -+ goto out; -+ case SS_DISCONNECTING: -+ ret = -EINVAL; -+ goto out; -+ case SS_CONNECTING: -+ /* This continues on so we can move sock into the SS_CONNECTED -+ * state once the connection has completed (at which point err -+ * will be set to zero also). Otherwise, we will either wait -+ * for the connection or return -EALREADY should this be a -+ * non-blocking call. -+ */ -+ ret = -EALREADY; -+ break; -+ default: -+ if ((sk->sk_state == SS_LISTEN) || -+ hvsock_addr_cast(addr, addr_len, &remote_addr) != 0) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ /* Set the remote address that we are connecting to. */ -+ memcpy(&hvsk->remote_addr, remote_addr, -+ sizeof(hvsk->remote_addr)); -+ -+ ret = hvsock_auto_bind(hvsk); -+ if (ret) -+ goto out; -+ -+ sk->sk_state = SS_CONNECTING; -+ -+ ret = vmbus_send_tl_connect_request( -+ &hvsk->local_addr.shv_service_id, -+ &hvsk->remote_addr.shv_service_id); -+ if (ret < 0) -+ goto out; -+ -+ /* Mark sock as connecting and set the error code to in -+ * progress in case this is a non-blocking connect. -+ */ -+ sock->state = SS_CONNECTING; -+ ret = -EINPROGRESS; -+ } -+ -+ ret = hvsock_connect_wait(sock, flags, ret); -+out: -+ release_sock(sk); -+ return ret; -+} -+ -+static int hvsock_accept_wait(struct sock *listener, -+ struct socket *newsock, int flags) -+{ -+ struct hvsock_sock *hvconnected; -+ struct sock *connected; -+ -+ DEFINE_WAIT(wait); -+ long timeout; -+ -+ int ret = 0; -+ -+ /* Wait for children sockets to appear; these are the new sockets -+ * created upon connection establishment. -+ */ -+ timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); -+ prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); -+ -+ while ((connected = hvsock_dequeue_accept(listener)) == NULL && -+ listener->sk_err == 0) { -+ release_sock(listener); -+ timeout = schedule_timeout(timeout); -+ lock_sock(listener); -+ -+ if (signal_pending(current)) { -+ ret = sock_intr_errno(timeout); -+ goto out_wait; -+ } else if (timeout == 0) { -+ ret = -EAGAIN; -+ goto out_wait; -+ } -+ -+ prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); -+ } -+ -+ if (listener->sk_err) -+ ret = -listener->sk_err; -+ -+ if (connected) { -+ lock_sock(connected); -+ hvconnected = sk_to_hvsock(connected); -+ -+ if (ret) { -+ release_sock(connected); -+ sock_put(connected); -+ } else { -+ newsock->state = SS_CONNECTED; -+ sock_graft(connected, newsock); -+ release_sock(connected); -+ sock_put(connected); -+ } -+ } -+ -+out_wait: -+ finish_wait(sk_sleep(listener), &wait); -+ return ret; -+} -+ -+static -+int hvsock_accept(struct socket *sock, struct socket *newsock, int flags) -+{ -+ struct sock *listener; -+ int ret; -+ -+ listener = sock->sk; -+ -+ lock_sock(listener); -+ -+ if (sock->type != SOCK_STREAM) { -+ ret = -EOPNOTSUPP; -+ goto out; -+ } -+ -+ if (listener->sk_state != SS_LISTEN) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ ret = hvsock_accept_wait(listener, newsock, flags); -+out: -+ release_sock(listener); -+ return ret; -+} -+ -+static int hvsock_listen(struct socket *sock, int backlog) -+{ -+ struct hvsock_sock *hvsk; -+ struct sock *sk; -+ int ret = 0; -+ -+ sk = sock->sk; -+ lock_sock(sk); -+ -+ if (sock->type != SOCK_STREAM) { -+ ret = -EOPNOTSUPP; -+ goto out; -+ } -+ -+ if (sock->state != SS_UNCONNECTED) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ if (backlog <= 0) { -+ ret = -EINVAL; -+ goto out; -+ } -+ /* This is an artificial limit */ -+ if (backlog > 128) -+ backlog = 128; -+ -+ hvsk = sk_to_hvsock(sk); -+ if (!hvsock_addr_bound(&hvsk->local_addr)) { -+ ret = -EINVAL; -+ goto out; -+ } -+ -+ sk->sk_ack_backlog = 0; -+ sk->sk_max_ack_backlog = backlog; -+ sk->sk_state = SS_LISTEN; -+out: -+ release_sock(sk); -+ return ret; -+} -+ -+static -+int hvsock_sendmsg_wait(struct sock *sk, struct msghdr *msg, size_t len) -+{ -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; -+ -+ size_t total_to_write = len; -+ size_t total_written = 0; -+ bool can_write; -+ -+ int ret = 0; -+ -+ DEFINE_WAIT(wait); -+ long timeout; -+ -+ timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); -+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -+ -+ while (total_to_write > 0) { -+ size_t to_write, max_writable; -+ -+ while (1) { -+ get_ringbuffer_rw_status(channel, NULL, &can_write); -+ -+ if (can_write || sk->sk_err != 0 || -+ (sk->sk_shutdown & SEND_SHUTDOWN) || -+ (hvsk->peer_shutdown & RCV_SHUTDOWN)) -+ break; -+ -+ /* Don't wait for non-blocking sockets. */ -+ if (timeout == 0) { -+ ret = -EAGAIN; -+ goto out_wait; -+ } -+ -+ release_sock(sk); -+ -+ timeout = schedule_timeout(timeout); -+ -+ lock_sock(sk); -+ if (signal_pending(current)) { -+ ret = sock_intr_errno(timeout); -+ goto out_wait; -+ } else if (timeout == 0) { -+ ret = -EAGAIN; -+ goto out_wait; -+ } -+ -+ prepare_to_wait(sk_sleep(sk), &wait, -+ TASK_INTERRUPTIBLE); -+ } -+ -+ /* These checks occur both as part of and after the loop -+ * conditional since we need to check before and after -+ * sleeping. -+ */ -+ if (sk->sk_err) { -+ ret = -sk->sk_err; -+ goto out_wait; -+ } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || -+ (hvsk->peer_shutdown & RCV_SHUTDOWN)) { -+ ret = -EPIPE; -+ goto out_wait; -+ } -+ -+ /* Note: that write will only write as many bytes as possible -+ * in the ringbuffer. It is the caller's responsibility to -+ * check how many bytes we actually wrote. -+ */ -+ do { -+ max_writable = get_ringbuffer_writable_bytes(channel); -+ if (max_writable == 0) -+ goto out_wait; -+ -+ to_write = min_t(size_t, HVSOCK_SND_BUF_SZ, -+ total_to_write); -+ if (to_write > max_writable) -+ to_write = max_writable; -+ -+ ret = memcpy_from_msg(hvsk->send->buf, msg, to_write); -+ if (ret != 0) -+ goto out_wait; -+ -+ ret = hvsock_send_data(channel, hvsk, to_write); -+ if (ret != 0) -+ goto out_wait; -+ -+ total_written += to_write; -+ total_to_write -= to_write; -+ } while (total_to_write > 0); -+ } -+ -+out_wait: -+ if (total_written > 0) -+ ret = total_written; -+ -+ finish_wait(sk_sleep(sk), &wait); -+ return ret; -+} -+ -+static int hvsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) -+{ -+ struct hvsock_sock *hvsk; -+ struct sock *sk; -+ int ret; -+ -+ if (len == 0) -+ return -EINVAL; -+ -+ if (msg->msg_flags & ~MSG_DONTWAIT) { -+ pr_err("%s: unsupported flags=0x%x\n", __func__, -+ msg->msg_flags); -+ return -EOPNOTSUPP; -+ } -+ -+ sk = sock->sk; -+ hvsk = sk_to_hvsock(sk); -+ -+ lock_sock(sk); -+ -+ /* Callers should not provide a destination with stream sockets. */ -+ if (msg->msg_namelen) { -+ ret = -EOPNOTSUPP; -+ goto out; -+ } -+ -+ /* Send data only if both sides are not shutdown in the direction. */ -+ if (sk->sk_shutdown & SEND_SHUTDOWN || -+ hvsk->peer_shutdown & RCV_SHUTDOWN) { -+ ret = -EPIPE; -+ goto out; -+ } -+ -+ if (sk->sk_state != SS_CONNECTED || -+ !hvsock_addr_bound(&hvsk->local_addr)) { -+ ret = -ENOTCONN; -+ goto out; -+ } -+ -+ if (!hvsock_addr_bound(&hvsk->remote_addr)) { -+ ret = -EDESTADDRREQ; -+ goto out; -+ } -+ -+ ret = hvsock_sendmsg_wait(sk, msg, len); -+out: -+ release_sock(sk); -+ -+ /* ret is a bigger-than-0 total_written or a negative err code. */ -+ if (ret == 0) { -+ WARN(1, "unexpected return value of 0\n"); -+ ret = -EIO; -+ } -+ -+ return ret; -+} -+ -+static int hvsock_recvmsg_wait(struct sock *sk, struct msghdr *msg, -+ size_t len, int flags) -+{ -+ struct hvsock_sock *hvsk = sk_to_hvsock(sk); -+ struct vmbus_channel *channel = hvsk->channel; -+ -+ size_t to_read, total_to_read = len; -+ size_t copied = 0; -+ bool can_read; -+ -+ int ret = 0; -+ -+ DEFINE_WAIT(wait); -+ long timeout; -+ -+ timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); -+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -+ -+ while (1) { -+ bool need_refill = hvsk->recv->data_len == 0; -+ -+ if (need_refill) -+ get_ringbuffer_rw_status(channel, &can_read, NULL); -+ else -+ can_read = true; -+ -+ if (can_read) { -+ size_t payload_len; -+ -+ if (need_refill) { -+ ret = hvsock_recv_data(channel, hvsk, -+ &payload_len); -+ if (ret != 0 || -+ payload_len > HVSOCK_RCV_BUF_SZ) { -+ ret = -EIO; -+ goto out_wait; -+ } -+ -+ if (payload_len == 0) { -+ ret = copied; -+ goto out_wait; -+ } -+ -+ hvsk->recv->data_len = payload_len; -+ hvsk->recv->data_offset = 0; -+ } -+ -+ to_read = min_t(size_t, total_to_read, -+ hvsk->recv->data_len); -+ -+ ret = memcpy_to_msg(msg, hvsk->recv->buf + -+ hvsk->recv->data_offset, -+ to_read); -+ if (ret != 0) -+ break; -+ -+ copied += to_read; -+ total_to_read -= to_read; -+ -+ hvsk->recv->data_len -= to_read; -+ -+ if (hvsk->recv->data_len == 0) -+ hvsk->recv->data_offset = 0; -+ else -+ hvsk->recv->data_offset += to_read; -+ -+ if (total_to_read == 0) -+ break; -+ } else { -+ if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || -+ (hvsk->peer_shutdown & SEND_SHUTDOWN)) -+ break; -+ -+ /* Don't wait for non-blocking sockets. */ -+ if (timeout == 0) { -+ ret = -EAGAIN; -+ break; -+ } -+ -+ if (copied > 0) -+ break; -+ -+ release_sock(sk); -+ timeout = schedule_timeout(timeout); -+ lock_sock(sk); -+ -+ if (signal_pending(current)) { -+ ret = sock_intr_errno(timeout); -+ break; -+ } else if (timeout == 0) { -+ ret = -EAGAIN; -+ break; -+ } -+ -+ prepare_to_wait(sk_sleep(sk), &wait, -+ TASK_INTERRUPTIBLE); -+ } -+ } -+ -+ if (sk->sk_err) -+ ret = -sk->sk_err; -+ else if (sk->sk_shutdown & RCV_SHUTDOWN) -+ ret = 0; -+ -+ if (copied > 0) { -+ ret = copied; -+ -+ /* If the other side has shutdown for sending and there -+ * is nothing more to read, then we modify the socket -+ * state. -+ */ -+ if ((hvsk->peer_shutdown & SEND_SHUTDOWN) && -+ hvsk->recv->data_len == 0) { -+ get_ringbuffer_rw_status(channel, &can_read, NULL); -+ if (!can_read) { -+ sk->sk_state = SS_UNCONNECTED; -+ sock_set_flag(sk, SOCK_DONE); -+ sk->sk_state_change(sk); -+ } -+ } -+ } -+out_wait: -+ finish_wait(sk_sleep(sk), &wait); -+ return ret; -+} -+ -+static int hvsock_recvmsg(struct socket *sock, struct msghdr *msg, -+ size_t len, int flags) -+{ -+ struct sock *sk = sock->sk; -+ int ret; -+ -+ lock_sock(sk); -+ -+ if (sk->sk_state != SS_CONNECTED) { -+ /* Recvmsg is supposed to return 0 if a peer performs an -+ * orderly shutdown. Differentiate between that case and when a -+ * peer has not connected or a local shutdown occurred with the -+ * SOCK_DONE flag. -+ */ -+ if (sock_flag(sk, SOCK_DONE)) -+ ret = 0; -+ else -+ ret = -ENOTCONN; -+ -+ goto out; -+ } -+ -+ /* We ignore msg->addr_name/len. */ -+ if (flags & ~MSG_DONTWAIT) { -+ pr_err("%s: unsupported flags=0x%x\n", __func__, flags); -+ ret = -EOPNOTSUPP; -+ goto out; -+ } -+ -+ /* We don't check peer_shutdown flag here since peer may actually shut -+ * down, but there can be data in the queue that a local socket can -+ * receive. -+ */ -+ if (sk->sk_shutdown & RCV_SHUTDOWN) { -+ ret = 0; -+ goto out; -+ } -+ -+ /* It is valid on Linux to pass in a zero-length receive buffer. This -+ * is not an error. We may as well bail out now. -+ */ -+ if (!len) { -+ ret = 0; -+ goto out; -+ } -+ -+ ret = hvsock_recvmsg_wait(sk, msg, len, flags); -+out: -+ release_sock(sk); -+ return ret; -+} -+ -+static const struct proto_ops hvsock_ops = { -+ .family = PF_HYPERV, -+ .owner = THIS_MODULE, -+ .release = hvsock_release, -+ .bind = hvsock_bind, -+ .connect = hvsock_connect, -+ .socketpair = sock_no_socketpair, -+ .accept = hvsock_accept, -+ .getname = hvsock_getname, -+ .poll = hvsock_poll, -+ .ioctl = sock_no_ioctl, -+ .listen = hvsock_listen, -+ .shutdown = hvsock_shutdown, -+ .setsockopt = sock_no_setsockopt, -+ .getsockopt = sock_no_getsockopt, -+ .sendmsg = hvsock_sendmsg, -+ .recvmsg = hvsock_recvmsg, -+ .mmap = sock_no_mmap, -+ .sendpage = sock_no_sendpage, -+}; -+ -+static int hvsock_create_sock(struct net *net, struct socket *sock, -+ int protocol, int kern) -+{ -+ struct sock *sk; -+ -+ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ -+ if (protocol != 0 && protocol != SHV_PROTO_RAW) -+ return -EPROTONOSUPPORT; -+ -+ switch (sock->type) { -+ case SOCK_STREAM: -+ sock->ops = &hvsock_ops; -+ break; -+ default: -+ return -ESOCKTNOSUPPORT; -+ } -+ -+ sock->state = SS_UNCONNECTED; -+ -+ return hvsock_create(net, sock, GFP_KERNEL, 0, &sk); -+} -+ -+static const struct net_proto_family hvsock_family_ops = { -+ .family = AF_HYPERV, -+ .create = hvsock_create_sock, -+ .owner = THIS_MODULE, -+}; -+ -+static int hvsock_probe(struct hv_device *hdev, -+ const struct hv_vmbus_device_id *dev_id) -+{ -+ struct vmbus_channel *channel = hdev->channel; -+ -+ /* We ignore the error return code to suppress the unnecessary -+ * error message in vmbus_probe(): on error the host will rescind -+ * the offer in 30 seconds and we can do cleanup at that time. -+ */ -+ (void)hvsock_open_connection(channel); -+ -+ return 0; -+} -+ -+static int hvsock_remove(struct hv_device *hdev) -+{ -+ struct vmbus_channel *channel = hdev->channel; -+ -+ vmbus_close(channel); -+ -+ return 0; -+} -+ -+/* It's not really used. See vmbus_match() and vmbus_probe(). */ -+static const struct hv_vmbus_device_id id_table[] = { -+ {}, -+}; -+ -+static struct hv_driver hvsock_drv = { -+ .name = "hv_sock", -+ .hvsock = true, -+ .id_table = id_table, -+ .probe = hvsock_probe, -+ .remove = hvsock_remove, -+}; -+ -+static int __init hvsock_init(void) -+{ -+ int ret; -+ -+ if (send_ring_page < DEF_RINGBUFFER_PAGES_HVSOCK_SND) -+ send_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_SND; -+ -+ if (recv_ring_page < DEF_RINGBUFFER_PAGES_HVSOCK_RCV) -+ recv_ring_page = DEF_RINGBUFFER_PAGES_HVSOCK_RCV; -+ -+ /* Hyper-V Sockets requires at least VMBus 4.0 */ -+ if ((vmbus_proto_version >> 16) < 4) { -+ pr_err("failed to load: VMBus 4 or later is required\n"); -+ return -ENODEV; -+ } -+ -+ ret = vmbus_driver_register(&hvsock_drv); -+ if (ret) { -+ pr_err("failed to register hv_sock driver\n"); -+ return ret; -+ } -+ -+ ret = proto_register(&hvsock_proto, 0); -+ if (ret) { -+ pr_err("failed to register protocol\n"); -+ goto unreg_hvsock_drv; -+ } -+ -+ ret = sock_register(&hvsock_family_ops); -+ if (ret) { -+ pr_err("failed to register address family\n"); -+ goto unreg_proto; -+ } -+ -+ return 0; -+ -+unreg_proto: -+ proto_unregister(&hvsock_proto); -+unreg_hvsock_drv: -+ vmbus_driver_unregister(&hvsock_drv); -+ return ret; -+} -+ -+static void __exit hvsock_exit(void) -+{ -+ sock_unregister(AF_HYPERV); -+ proto_unregister(&hvsock_proto); -+ vmbus_driver_unregister(&hvsock_drv); -+} -+ -+module_init(hvsock_init); -+module_exit(hvsock_exit); -+ -+MODULE_DESCRIPTION("Hyper-V Sockets"); -+MODULE_LICENSE("Dual BSD/GPL"); --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch b/kernel/patches-4.4.x/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch deleted file mode 100644 index 5d1a6e7d1..000000000 --- a/kernel/patches-4.4.x/0040-net-add-the-AF_HYPERV-entries-to-family-name-tables.patch +++ /dev/null @@ -1,49 +0,0 @@ -From e5c30fb666d82e76150f0900582fba80260b9830 Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Mon, 21 Mar 2016 02:53:08 -0700 -Subject: [PATCH 40/44] net: add the AF_HYPERV entries to family name tables - -This is for the hv_sock driver, which introduces AF_HYPERV(42). - -Signed-off-by: Dexuan Cui -Cc: "K. Y. Srinivasan" -Cc: Haiyang Zhang -Origin: https://patchwork.ozlabs.org/patch/600009 ---- - net/core/sock.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/net/core/sock.c b/net/core/sock.c -index ef337bf176f7..1c5f0a2ef836 100644 ---- a/net/core/sock.c -+++ b/net/core/sock.c -@@ -264,7 +264,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { - "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , - "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , -- "sk_lock-AF_MAX" -+ "sk_lock-AF_HYPERV", "sk_lock-AF_MAX" - }; - static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , -@@ -281,7 +281,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , - "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , -- "slock-AF_MAX" -+ "slock-AF_HYPERV", "slock-AF_MAX" - }; - static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , -@@ -298,7 +298,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , - "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , -- "clock-AF_MAX" -+ "clock-AF_HYPERV", "clock-AF_MAX" - }; - - /* --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch b/kernel/patches-4.4.x/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch deleted file mode 100644 index a7c43c5ea..000000000 --- a/kernel/patches-4.4.x/0041-Drivers-hv-vmbus-fix-the-race-when-querying-updating.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 52ee595fee2bde973189e1425247c60173ae2e3a Mon Sep 17 00:00:00 2001 -From: Dexuan Cui -Date: Sat, 21 May 2016 16:55:50 +0800 -Subject: [PATCH 41/44] Drivers: hv: vmbus: fix the race when querying & - updating the percpu list - -There is a rare race when we remove an entry from the global list -hv_context.percpu_list[cpu] in hv_process_channel_removal() -> -percpu_channel_deq() -> list_del(): at this time, if vmbus_on_event() -> -process_chn_event() -> pcpu_relid2channel() is trying to query the list, -we can get the general protection fault: - -general protection fault: 0000 [#1] SMP -... -RIP: 0010:[] [] vmbus_on_event+0xc4/0x149 - -Similarly, we also have the issue in the code path: vmbus_process_offer() -> -percpu_channel_enq(). - -We can resolve the issue by disabling the tasklet when updating the list. - -Reported-by: Rolf Neugebauer -Signed-off-by: Dexuan Cui -Origin: https://github.com/dcui/linux/commit/fbcca73228b9b90911ab30fdf75f532b2b7c07e5 ---- - drivers/hv/channel.c | 1 + - drivers/hv/channel_mgmt.c | 18 ++++++++++++++++-- - 2 files changed, 17 insertions(+), 2 deletions(-) - -diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c -index 4077e7243151..a19aadc4a297 100644 ---- a/drivers/hv/channel.c -+++ b/drivers/hv/channel.c -@@ -594,6 +594,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel) - - out: - tasklet_enable(tasklet); -+ tasklet_schedule(tasklet); - - return ret; - } -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index c892db5df665..0a543170eba0 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -21,6 +21,7 @@ - #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - - #include -+#include - #include - #include - #include -@@ -307,12 +308,13 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - { - unsigned long flags; - struct vmbus_channel *primary_channel; -- -- vmbus_release_relid(relid); -+ struct tasklet_struct *tasklet; - - BUG_ON(!channel->rescind); - BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - -+ tasklet = hv_context.event_dpc[channel->target_cpu]; -+ tasklet_disable(tasklet); - if (channel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(channel->target_cpu, -@@ -321,6 +323,8 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - percpu_channel_deq(channel); - put_cpu(); - } -+ tasklet_enable(tasklet); -+ tasklet_schedule(tasklet); - - if (channel->primary_channel == NULL) { - list_del(&channel->listentry); -@@ -342,6 +346,8 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) - &primary_channel->alloced_cpus_in_node); - - free_channel(channel); -+ -+ vmbus_release_relid(relid); - } - - void vmbus_free_channels(void) -@@ -363,6 +369,7 @@ void vmbus_free_channels(void) - */ - static void vmbus_process_offer(struct vmbus_channel *newchannel) - { -+ struct tasklet_struct *tasklet; - struct vmbus_channel *channel; - bool fnew = true; - unsigned long flags; -@@ -409,6 +416,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - - init_vp_index(newchannel, dev_type); - -+ tasklet = hv_context.event_dpc[newchannel->target_cpu]; -+ tasklet_disable(tasklet); - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(newchannel->target_cpu, -@@ -418,6 +427,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) - percpu_channel_enq(newchannel); - put_cpu(); - } -+ tasklet_enable(tasklet); -+ tasklet_schedule(tasklet); - - /* - * This state is used to indicate a successful open -@@ -469,6 +480,7 @@ err_deq_chan: - list_del(&newchannel->listentry); - mutex_unlock(&vmbus_connection.channel_mutex); - -+ tasklet_disable(tasklet); - if (newchannel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(newchannel->target_cpu, -@@ -477,6 +489,8 @@ err_deq_chan: - percpu_channel_deq(newchannel); - put_cpu(); - } -+ tasklet_enable(tasklet); -+ tasklet_schedule(tasklet); - - err_free_chan: - free_channel(newchannel); --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch b/kernel/patches-4.4.x/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch deleted file mode 100644 index c2206e1b8..000000000 --- a/kernel/patches-4.4.x/0042-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 0f526c73b5a9e6f2ce3e0a74fe6382942b6d74df Mon Sep 17 00:00:00 2001 -From: Rolf Neugebauer -Date: Mon, 23 May 2016 18:55:45 +0100 -Subject: [PATCH 42/44] vmbus: Don't spam the logs with unknown GUIDs - -With Hyper-V sockets device types are introduced on the fly. The pr_info() -then prints a message on every connection, which is way too verbose. Since -there doesn't seem to be an easy way to check for registered services, -disable the pr_info() completely. - -Signed-off-by: Rolf Neugebauer ---- - drivers/hv/channel_mgmt.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 0a543170eba0..120ee22c945e 100644 ---- a/drivers/hv/channel_mgmt.c -+++ b/drivers/hv/channel_mgmt.c -@@ -147,7 +147,6 @@ static u16 hv_get_dev_type(const uuid_le *guid) - if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) - return i; - } -- pr_info("Unknown GUID: %pUl\n", guid); - return i; - } - --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0043-fs-add-filp_clone_open-API.patch b/kernel/patches-4.4.x/0043-fs-add-filp_clone_open-API.patch deleted file mode 100644 index 5e9933eb5..000000000 --- a/kernel/patches-4.4.x/0043-fs-add-filp_clone_open-API.patch +++ /dev/null @@ -1,64 +0,0 @@ -From eaf798827fd58aa7d7270f9702883207fcbbf4f2 Mon Sep 17 00:00:00 2001 -From: James Bottomley -Date: Wed, 17 Feb 2016 16:49:38 -0800 -Subject: [PATCH 43/44] fs: add filp_clone_open API - -I need an API that allows me to obtain a clone of the current file -pointer to pass in to an exec handler. I've labelled this as an -internal API because I can't see how it would be useful outside of the -fs subsystem. The use case will be a persistent binfmt_misc handler. - -Signed-off-by: James Bottomley -Acked-by: Serge Hallyn -Acked-by: Jan Kara ---- - fs/internal.h | 1 + - fs/open.c | 20 ++++++++++++++++++++ - 2 files changed, 21 insertions(+) - -diff --git a/fs/internal.h b/fs/internal.h -index 71859c4d0b41..c0022708ff3a 100644 ---- a/fs/internal.h -+++ b/fs/internal.h -@@ -108,6 +108,7 @@ extern long do_handle_open(int mountdirfd, - struct file_handle __user *ufh, int open_flag); - extern int open_check_o_direct(struct file *f); - extern int vfs_open(const struct path *, struct file *, const struct cred *); -+extern struct file *filp_clone_open(struct file *); - - /* - * inode.c -diff --git a/fs/open.c b/fs/open.c -index fbc5c7b230b3..94fe386e566d 100644 ---- a/fs/open.c -+++ b/fs/open.c -@@ -1007,6 +1007,26 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, - } - EXPORT_SYMBOL(file_open_root); - -+struct file *filp_clone_open(struct file *oldfile) -+{ -+ struct file *file; -+ int retval; -+ -+ file = get_empty_filp(); -+ if (IS_ERR(file)) -+ return file; -+ -+ file->f_flags = oldfile->f_flags; -+ retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred); -+ if (retval) { -+ put_filp(file); -+ return ERR_PTR(retval); -+ } -+ -+ return file; -+} -+EXPORT_SYMBOL(filp_clone_open); -+ - long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) - { - struct open_flags op; --- -2.14.1 - diff --git a/kernel/patches-4.4.x/0044-binfmt_misc-add-persistent-opened-binary-handler-for.patch b/kernel/patches-4.4.x/0044-binfmt_misc-add-persistent-opened-binary-handler-for.patch deleted file mode 100644 index 9418ccf19..000000000 --- a/kernel/patches-4.4.x/0044-binfmt_misc-add-persistent-opened-binary-handler-for.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 4a5519e5f46c0ae55f8be683a1bdd2363869403b Mon Sep 17 00:00:00 2001 -From: James Bottomley -Date: Wed, 17 Feb 2016 16:51:16 -0800 -Subject: [PATCH 44/44] binfmt_misc: add persistent opened binary handler for - containers - -This patch adds a new flag 'F' to the binfmt handlers. If you pass in -'F' the binary that runs the emulation will be opened immediately and -in future, will be cloned from the open file. - -The net effect is that the handler survives both changeroots and mount -namespace changes, making it easy to work with foreign architecture -containers without contaminating the container image with the -emulator. - -Signed-off-by: James Bottomley -Acked-by: Serge Hallyn ---- - fs/binfmt_misc.c | 41 +++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 39 insertions(+), 2 deletions(-) - -diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c -index 78f005f37847..4beb3d9e0001 100644 ---- a/fs/binfmt_misc.c -+++ b/fs/binfmt_misc.c -@@ -26,6 +26,8 @@ - #include - #include - -+#include "internal.h" -+ - #ifdef DEBUG - # define USE_DEBUG 1 - #else -@@ -43,6 +45,7 @@ enum {Enabled, Magic}; - #define MISC_FMT_PRESERVE_ARGV0 (1 << 31) - #define MISC_FMT_OPEN_BINARY (1 << 30) - #define MISC_FMT_CREDENTIALS (1 << 29) -+#define MISC_FMT_OPEN_FILE (1 << 28) - - typedef struct { - struct list_head list; -@@ -54,6 +57,7 @@ typedef struct { - char *interpreter; /* filename of interpreter */ - char *name; - struct dentry *dentry; -+ struct file *interp_file; - } Node; - - static DEFINE_RWLOCK(entries_lock); -@@ -201,7 +205,13 @@ static int load_misc_binary(struct linux_binprm *bprm) - if (retval < 0) - goto error; - -- interp_file = open_exec(iname); -+ if (fmt->flags & MISC_FMT_OPEN_FILE && fmt->interp_file) { -+ interp_file = filp_clone_open(fmt->interp_file); -+ if (!IS_ERR(interp_file)) -+ deny_write_access(interp_file); -+ } else { -+ interp_file = open_exec(iname); -+ } - retval = PTR_ERR(interp_file); - if (IS_ERR(interp_file)) - goto error; -@@ -285,6 +295,11 @@ static char *check_special_flags(char *sfs, Node *e) - e->flags |= (MISC_FMT_CREDENTIALS | - MISC_FMT_OPEN_BINARY); - break; -+ case 'F': -+ pr_debug("register: flag: F: open interpreter file now\n"); -+ p++; -+ e->flags |= MISC_FMT_OPEN_FILE; -+ break; - default: - cont = 0; - } -@@ -543,6 +558,8 @@ static void entry_status(Node *e, char *page) - *dp++ = 'O'; - if (e->flags & MISC_FMT_CREDENTIALS) - *dp++ = 'C'; -+ if (e->flags & MISC_FMT_OPEN_FILE) -+ *dp++ = 'F'; - *dp++ = '\n'; - - if (!test_bit(Magic, &e->flags)) { -@@ -590,6 +607,11 @@ static void kill_node(Node *e) - } - write_unlock(&entries_lock); - -+ if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) { -+ filp_close(e->interp_file, NULL); -+ e->interp_file = NULL; -+ } -+ - if (dentry) { - drop_nlink(d_inode(dentry)); - d_drop(dentry); -@@ -698,6 +720,21 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, - goto out2; - } - -+ if (e->flags & MISC_FMT_OPEN_FILE) { -+ struct file *f; -+ -+ f = open_exec(e->interpreter); -+ if (IS_ERR(f)) { -+ err = PTR_ERR(f); -+ pr_notice("register: failed to install interpreter file %s\n", e->interpreter); -+ simple_release_fs(&bm_mnt, &entry_count); -+ iput(inode); -+ inode = NULL; -+ goto out2; -+ } -+ e->interp_file = f; -+ } -+ - e->dentry = dget(dentry); - inode->i_private = e; - inode->i_fop = &bm_entry_operations; -@@ -716,7 +753,7 @@ out: - - if (err) { - kfree(e); -- return -EINVAL; -+ return err; - } - return count; - } --- -2.14.1 - diff --git a/kernel/patches-4.9.x/0001-tools-build-Add-test-for-sched_getcpu.patch b/kernel/patches-4.9.x/0001-tools-build-Add-test-for-sched_getcpu.patch index ccccaba7c..861d07d5e 100644 --- a/kernel/patches-4.9.x/0001-tools-build-Add-test-for-sched_getcpu.patch +++ b/kernel/patches-4.9.x/0001-tools-build-Add-test-for-sched_getcpu.patch @@ -1,4 +1,4 @@ -From b29fe48363585d8c4d96a979d43482a67dd1481f Mon Sep 17 00:00:00 2001 +From 07cc90bf689d8d1446f19af31f419e96b6aae8d1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 2 Mar 2017 12:55:49 -0300 Subject: [PATCH 01/13] tools build: Add test for sched_getcpu() @@ -146,5 +146,5 @@ index 43899e0d6fa1..c3b180254f91 100644 int is_printable_array(char *p, unsigned int len); -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0002-perf-jit-Avoid-returning-garbage-for-a-ret-variable.patch b/kernel/patches-4.9.x/0002-perf-jit-Avoid-returning-garbage-for-a-ret-variable.patch index 242b88cb5..e0960445d 100644 --- a/kernel/patches-4.9.x/0002-perf-jit-Avoid-returning-garbage-for-a-ret-variable.patch +++ b/kernel/patches-4.9.x/0002-perf-jit-Avoid-returning-garbage-for-a-ret-variable.patch @@ -1,4 +1,4 @@ -From 9a4dde19bae9797ea632a414b02f43d27a747cc6 Mon Sep 17 00:00:00 2001 +From c1bc306f2a98bc201c155b7e1e93574ab9bb42f2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 13 Oct 2016 17:12:35 -0300 Subject: [PATCH 02/13] perf jit: Avoid returning garbage for a ret variable @@ -66,5 +66,5 @@ index 95f0884aae02..f3ed3c963c71 100644 while ((jr = jit_get_next_entry(jd))) { switch(jr->prefix.id) { -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0003-hv_sock-introduce-Hyper-V-Sockets.patch b/kernel/patches-4.9.x/0003-hv_sock-introduce-Hyper-V-Sockets.patch index db3f53798..9f1455eff 100644 --- a/kernel/patches-4.9.x/0003-hv_sock-introduce-Hyper-V-Sockets.patch +++ b/kernel/patches-4.9.x/0003-hv_sock-introduce-Hyper-V-Sockets.patch @@ -1,4 +1,4 @@ -From 65cc9fceae5c27ea9069c375c5daa7d5e28462ee Mon Sep 17 00:00:00 2001 +From c6eb46c35ff75022c19211cd1236cb0be75456cb Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 23 Jul 2016 01:35:51 +0000 Subject: [PATCH 03/13] hv_sock: introduce Hyper-V Sockets @@ -1787,5 +1787,5 @@ index 000000000000..331d3759f5cb +MODULE_DESCRIPTION("Hyper-V Sockets"); +MODULE_LICENSE("Dual BSD/GPL"); -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch b/kernel/patches-4.9.x/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch index 9ab10a1c4..e4e8cf482 100644 --- a/kernel/patches-4.9.x/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch +++ b/kernel/patches-4.9.x/0004-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch @@ -1,4 +1,4 @@ -From 5b76dda785edf1415657f81fe50962635a54b352 Mon Sep 17 00:00:00 2001 +From 9ba10da496908d0c21d07639e07dba839935522f Mon Sep 17 00:00:00 2001 From: Rolf Neugebauer Date: Mon, 23 May 2016 18:55:45 +0100 Subject: [PATCH 04/13] vmbus: Don't spam the logs with unknown GUIDs @@ -26,5 +26,5 @@ index d8bc4b910192..8df02f3ca0b2 100644 } -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0005-Drivers-hv-utils-Fix-the-mapping-between-host-versio.patch b/kernel/patches-4.9.x/0005-Drivers-hv-utils-Fix-the-mapping-between-host-versio.patch index 3f94c6887..218b35889 100644 --- a/kernel/patches-4.9.x/0005-Drivers-hv-utils-Fix-the-mapping-between-host-versio.patch +++ b/kernel/patches-4.9.x/0005-Drivers-hv-utils-Fix-the-mapping-between-host-versio.patch @@ -1,4 +1,4 @@ -From d1a4c8e21b750ee6c6c9f28b3498015d65f0a453 Mon Sep 17 00:00:00 2001 +From bcbd97d95f015dde8d730206bc6cfcc58d1457c9 Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Sun, 6 Nov 2016 13:14:07 -0800 Subject: [PATCH 05/13] Drivers: hv: utils: Fix the mapping between host @@ -44,5 +44,5 @@ index bcd06306f3e8..e7707747f56d 100644 } -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0006-Drivers-hv-vss-Improve-log-messages.patch b/kernel/patches-4.9.x/0006-Drivers-hv-vss-Improve-log-messages.patch index 158692733..6734b9bb7 100644 --- a/kernel/patches-4.9.x/0006-Drivers-hv-vss-Improve-log-messages.patch +++ b/kernel/patches-4.9.x/0006-Drivers-hv-vss-Improve-log-messages.patch @@ -1,4 +1,4 @@ -From d9646498544d50814410b3e9fdaee0ddcd386c07 Mon Sep 17 00:00:00 2001 +From 513561517efb57638fa9819aa24f715639be6595 Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Sun, 6 Nov 2016 13:14:10 -0800 Subject: [PATCH 06/13] Drivers: hv: vss: Improve log messages. @@ -101,5 +101,5 @@ index a76e3db0d01f..b1446d51ef45 100644 return 0; } -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0007-Drivers-hv-vss-Operation-timeouts-should-match-host-.patch b/kernel/patches-4.9.x/0007-Drivers-hv-vss-Operation-timeouts-should-match-host-.patch index ca5cf3199..051c6fdb2 100644 --- a/kernel/patches-4.9.x/0007-Drivers-hv-vss-Operation-timeouts-should-match-host-.patch +++ b/kernel/patches-4.9.x/0007-Drivers-hv-vss-Operation-timeouts-should-match-host-.patch @@ -1,4 +1,4 @@ -From 6308212197e4682beb468c40036d846c8cf93dad Mon Sep 17 00:00:00 2001 +From 62ab4bab28c89b21dce9bba98cf1f493aedffa5c Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Sun, 6 Nov 2016 13:14:11 -0800 Subject: [PATCH 07/13] Drivers: hv: vss: Operation timeouts should match host @@ -44,5 +44,5 @@ index b1446d51ef45..4e543dbb731a 100644 rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL); if (rc) { -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0008-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch b/kernel/patches-4.9.x/0008-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch index 1d66cc8ff..b1d9ae5a0 100644 --- a/kernel/patches-4.9.x/0008-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch +++ b/kernel/patches-4.9.x/0008-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch @@ -1,4 +1,4 @@ -From 3a796dc6d04e0cfff5e1b553ce14434c8991b73a Mon Sep 17 00:00:00 2001 +From 162dd9275b4ce7ffe7e7e0d3dcdb9662a189309c Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Sat, 28 Jan 2017 12:37:17 -0700 Subject: [PATCH 08/13] Drivers: hv: vmbus: Use all supported IC versions to @@ -488,5 +488,5 @@ index 489ad74c1e6e..956acfc93487 100644 void hv_event_tasklet_disable(struct vmbus_channel *channel); void hv_event_tasklet_enable(struct vmbus_channel *channel); -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0009-Drivers-hv-Log-the-negotiated-IC-versions.patch b/kernel/patches-4.9.x/0009-Drivers-hv-Log-the-negotiated-IC-versions.patch index d118ec006..448cabb49 100644 --- a/kernel/patches-4.9.x/0009-Drivers-hv-Log-the-negotiated-IC-versions.patch +++ b/kernel/patches-4.9.x/0009-Drivers-hv-Log-the-negotiated-IC-versions.patch @@ -1,4 +1,4 @@ -From 34379e682d26011f993a2121ed833699eb413873 Mon Sep 17 00:00:00 2001 +From b7524ea7d09a2b2ff38adc898e1c34a12ca07e6f Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Sat, 28 Jan 2017 12:37:18 -0700 Subject: [PATCH 09/13] Drivers: hv: Log the negotiated IC versions. @@ -114,5 +114,5 @@ index f3797c07be10..89440c2eb346 100644 hb_srv_version & 0xFFFF); } -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0010-vmbus-fix-missed-ring-events-on-boot.patch b/kernel/patches-4.9.x/0010-vmbus-fix-missed-ring-events-on-boot.patch index 9aca30603..0bfc5696f 100644 --- a/kernel/patches-4.9.x/0010-vmbus-fix-missed-ring-events-on-boot.patch +++ b/kernel/patches-4.9.x/0010-vmbus-fix-missed-ring-events-on-boot.patch @@ -1,4 +1,4 @@ -From 4579acf1773e70b59674bc8ce03abf265d5a05ba Mon Sep 17 00:00:00 2001 +From 1e50301f804c4d61db165693a184a919939e6153 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sun, 26 Mar 2017 16:42:20 +0800 Subject: [PATCH 10/13] vmbus: fix missed ring events on boot @@ -52,5 +52,5 @@ index e7949b64bfbc..2fe024e86209 100644 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0011-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch b/kernel/patches-4.9.x/0011-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch index 38aab375d..54c6eb4c4 100644 --- a/kernel/patches-4.9.x/0011-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch +++ b/kernel/patches-4.9.x/0011-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch @@ -1,4 +1,4 @@ -From a487d91e06c48b99d8c92835e13a184ced75fe00 Mon Sep 17 00:00:00 2001 +From 89c7fc409cc898f025963d0056b6ddf9a23a6c68 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 29 Mar 2017 18:37:10 +0800 Subject: [PATCH 11/13] vmbus: remove "goto error_clean_msglist" in @@ -56,5 +56,5 @@ index 1606e7f08f4b..1caed01954f6 100644 vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle); kfree(open_info); -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0012-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch b/kernel/patches-4.9.x/0012-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch index 706b5b261..ea7f5d36f 100644 --- a/kernel/patches-4.9.x/0012-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch +++ b/kernel/patches-4.9.x/0012-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch @@ -1,4 +1,4 @@ -From 05f50ab2f943d79d26b2d5ca0db8021699253a08 Mon Sep 17 00:00:00 2001 +From 2981fbdae254370cd9ccc4a344568300e0c59c7a Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 24 Mar 2017 20:53:18 +0800 Subject: [PATCH 12/13] vmbus: dynamically enqueue/dequeue the channel on @@ -173,5 +173,5 @@ index 956acfc93487..9ee292b28e41 100644 void vmbus_setevent(struct vmbus_channel *channel); -- -2.14.1 +2.11.1 diff --git a/kernel/patches-4.9.x/0013-vmbus-fix-the-missed-signaling-in-hv_signal_on_read.patch b/kernel/patches-4.9.x/0013-vmbus-fix-the-missed-signaling-in-hv_signal_on_read.patch index 2b242ca1f..eb1842ea5 100644 --- a/kernel/patches-4.9.x/0013-vmbus-fix-the-missed-signaling-in-hv_signal_on_read.patch +++ b/kernel/patches-4.9.x/0013-vmbus-fix-the-missed-signaling-in-hv_signal_on_read.patch @@ -1,4 +1,4 @@ -From d1ded41ea339dfec68868f2311780aa46390f069 Mon Sep 17 00:00:00 2001 +From e4956e317bf21890abd2ffe2b9d66b6a177712f0 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 6 Jul 2017 21:37:11 +0000 Subject: [PATCH 13/13] vmbus: fix the missed signaling in hv_signal_on_read() @@ -43,5 +43,5 @@ index 9ee292b28e41..a87757cf277b 100644 return; -- -2.14.1 +2.11.1 diff --git a/linuxkit.yml b/linuxkit.yml index 89312e8bf..369558ba9 100644 --- a/linuxkit.yml +++ b/linuxkit.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0 console=ttyS0 console=ttyAMA0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/projects/compose/compose-dynamic.yml b/projects/compose/compose-dynamic.yml index d374154d0..e566adb21 100644 --- a/projects/compose/compose-dynamic.yml +++ b/projects/compose/compose-dynamic.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/projects/compose/compose-static.yml b/projects/compose/compose-static.yml index 474de2951..cae8d075d 100644 --- a/projects/compose/compose-static.yml +++ b/projects/compose/compose-static.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/projects/etcd/etcd.yml b/projects/etcd/etcd.yml index ceddcf159..b2c8e9254 100644 --- a/projects/etcd/etcd.yml +++ b/projects/etcd/etcd.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 console=tty0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/projects/kubernetes/kube.yml b/projects/kubernetes/kube.yml index 194369def..c9dee7fff 100644 --- a/projects/kubernetes/kube.yml +++ b/projects/kubernetes/kube.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=tty0 console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/projects/logging/examples/logging.yml b/projects/logging/examples/logging.yml index 288f054e6..1adc7bcfd 100644 --- a/projects/logging/examples/logging.yml +++ b/projects/logging/examples/logging.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 console=tty0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 # with runc, logwrite, startmemlogd diff --git a/projects/miragesdk/examples/mirage-dhcp.yml b/projects/miragesdk/examples/mirage-dhcp.yml index be4c79baf..21b8784a9 100644 --- a/projects/miragesdk/examples/mirage-dhcp.yml +++ b/projects/miragesdk/examples/mirage-dhcp.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/projects/swarmd/swarmd.yml b/projects/swarmd/swarmd.yml index 6508689b7..d6944c79e 100644 --- a/projects/swarmd/swarmd.yml +++ b/projects/swarmd/swarmd.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/000_build/000_outputs/test.yml b/test/cases/000_build/000_outputs/test.yml index df71b34d0..50318ed93 100644 --- a/test/cases/000_build/000_outputs/test.yml +++ b/test/cases/000_build/000_outputs/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/010_platforms/000_qemu/000_run_kernel/test.yml b/test/cases/010_platforms/000_qemu/000_run_kernel/test.yml index 864fc8d06..7743c9126 100644 --- a/test/cases/010_platforms/000_qemu/000_run_kernel/test.yml +++ b/test/cases/010_platforms/000_qemu/000_run_kernel/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/010_platforms/000_qemu/010_run_iso/test.yml b/test/cases/010_platforms/000_qemu/010_run_iso/test.yml index b1f36888d..119305359 100644 --- a/test/cases/010_platforms/000_qemu/010_run_iso/test.yml +++ b/test/cases/010_platforms/000_qemu/010_run_iso/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/010_platforms/000_qemu/020_run_efi/test.yml b/test/cases/010_platforms/000_qemu/020_run_efi/test.yml index 864fc8d06..7743c9126 100644 --- a/test/cases/010_platforms/000_qemu/020_run_efi/test.yml +++ b/test/cases/010_platforms/000_qemu/020_run_efi/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/010_platforms/000_qemu/030_run_qcow/test.yml b/test/cases/010_platforms/000_qemu/030_run_qcow/test.yml index 864fc8d06..7743c9126 100644 --- a/test/cases/010_platforms/000_qemu/030_run_qcow/test.yml +++ b/test/cases/010_platforms/000_qemu/030_run_qcow/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/010_platforms/000_qemu/040_run_raw/test.yml b/test/cases/010_platforms/000_qemu/040_run_raw/test.yml index 864fc8d06..7743c9126 100644 --- a/test/cases/010_platforms/000_qemu/040_run_raw/test.yml +++ b/test/cases/010_platforms/000_qemu/040_run_raw/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/010_platforms/000_qemu/100_container/test.yml b/test/cases/010_platforms/000_qemu/100_container/test.yml index f7f33789a..533a0b9de 100644 --- a/test/cases/010_platforms/000_qemu/100_container/test.yml +++ b/test/cases/010_platforms/000_qemu/100_container/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/010_platforms/010_hyperkit/000_run_kernel/test.yml b/test/cases/010_platforms/010_hyperkit/000_run_kernel/test.yml index 864fc8d06..7743c9126 100644 --- a/test/cases/010_platforms/010_hyperkit/000_run_kernel/test.yml +++ b/test/cases/010_platforms/010_hyperkit/000_run_kernel/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/010_platforms/010_hyperkit/010_acpi/test.yml b/test/cases/010_platforms/010_hyperkit/010_acpi/test.yml index 426c827f5..94050ccf8 100644 --- a/test/cases/010_platforms/010_hyperkit/010_acpi/test.yml +++ b/test/cases/010_platforms/010_hyperkit/010_acpi/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/020_kernel/000_config_4.4.x/test.yml b/test/cases/020_kernel/000_config_4.4.x/test.yml index 8c0e6e8ef..cb8387d40 100644 --- a/test/cases/020_kernel/000_config_4.4.x/test.yml +++ b/test/cases/020_kernel/000_config_4.4.x/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.4.92 + image: linuxkit/kernel:4.4.94 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/020_kernel/001_config_4.9.x/test.yml b/test/cases/020_kernel/001_config_4.9.x/test.yml index c21fa5a0d..f11410479 100644 --- a/test/cases/020_kernel/001_config_4.9.x/test.yml +++ b/test/cases/020_kernel/001_config_4.9.x/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/020_kernel/005_config_4.13.x/test.yml b/test/cases/020_kernel/005_config_4.13.x/test.yml index b4a4ce5d4..96350716f 100644 --- a/test/cases/020_kernel/005_config_4.13.x/test.yml +++ b/test/cases/020_kernel/005_config_4.13.x/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.13.6 + image: linuxkit/kernel:4.13.9 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/020_kernel/010_kmod_4.9.x/Dockerfile b/test/cases/020_kernel/010_kmod_4.9.x/Dockerfile index d197571c7..83794714c 100644 --- a/test/cases/020_kernel/010_kmod_4.9.x/Dockerfile +++ b/test/cases/020_kernel/010_kmod_4.9.x/Dockerfile @@ -3,7 +3,7 @@ # In the last stage, it creates a package, which can be used for # testing. -FROM linuxkit/kernel:4.9.56 AS ksrc +FROM linuxkit/kernel:4.9.58 AS ksrc # Extract headers and compile module FROM linuxkit/alpine:ad35b6ddbc70faa07e59a9d7dee7707c08122e8d AS build diff --git a/test/cases/020_kernel/010_kmod_4.9.x/test.sh b/test/cases/020_kernel/010_kmod_4.9.x/test.sh index 1ff4fc3a2..ef6703a3d 100644 --- a/test/cases/020_kernel/010_kmod_4.9.x/test.sh +++ b/test/cases/020_kernel/010_kmod_4.9.x/test.sh @@ -19,7 +19,7 @@ clean_up() { trap clean_up EXIT # Make sure we have the latest kernel image -docker pull linuxkit/kernel:4.9.56 +docker pull linuxkit/kernel:4.9.58 # Build a package docker build -t ${IMAGE_NAME} . diff --git a/test/cases/020_kernel/010_kmod_4.9.x/test.yml b/test/cases/020_kernel/010_kmod_4.9.x/test.yml index 7aeb0504f..6fb16ce38 100644 --- a/test/cases/020_kernel/010_kmod_4.9.x/test.yml +++ b/test/cases/020_kernel/010_kmod_4.9.x/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/020_kernel/110_namespace/000_kernel-4.4.x/common.yml b/test/cases/020_kernel/110_namespace/000_kernel-4.4.x/common.yml index 8b60aa8da..809ac635a 100644 --- a/test/cases/020_kernel/110_namespace/000_kernel-4.4.x/common.yml +++ b/test/cases/020_kernel/110_namespace/000_kernel-4.4.x/common.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.4.92 + image: linuxkit/kernel:4.4.94 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/020_kernel/110_namespace/001_kernel-4.9.x/common.yml b/test/cases/020_kernel/110_namespace/001_kernel-4.9.x/common.yml index 2d4a5a194..13b2b2753 100644 --- a/test/cases/020_kernel/110_namespace/001_kernel-4.9.x/common.yml +++ b/test/cases/020_kernel/110_namespace/001_kernel-4.9.x/common.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/020_kernel/110_namespace/005_kernel-4.13.x/common.yml b/test/cases/020_kernel/110_namespace/005_kernel-4.13.x/common.yml index 85cc6b72c..cc695753c 100644 --- a/test/cases/020_kernel/110_namespace/005_kernel-4.13.x/common.yml +++ b/test/cases/020_kernel/110_namespace/005_kernel-4.13.x/common.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.13.6 + image: linuxkit/kernel:4.13.9 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/030_security/000_docker-bench/test.yml b/test/cases/030_security/000_docker-bench/test.yml index a123e0986..99f9459ba 100644 --- a/test/cases/030_security/000_docker-bench/test.yml +++ b/test/cases/030_security/000_docker-bench/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/030_security/010_ports/test.yml b/test/cases/030_security/010_ports/test.yml index cfb077a5d..11161f6fe 100644 --- a/test/cases/030_security/010_ports/test.yml +++ b/test/cases/030_security/010_ports/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/002_binfmt/test.yml b/test/cases/040_packages/002_binfmt/test.yml index afc2fc5f8..11fca811d 100644 --- a/test/cases/040_packages/002_binfmt/test.yml +++ b/test/cases/040_packages/002_binfmt/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/003_ca-certificates/test.yml b/test/cases/040_packages/003_ca-certificates/test.yml index 61c02cb9b..00877a995 100644 --- a/test/cases/040_packages/003_ca-certificates/test.yml +++ b/test/cases/040_packages/003_ca-certificates/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/003_containerd/test.yml b/test/cases/040_packages/003_containerd/test.yml index 5c07a4870..b9ea20934 100644 --- a/test/cases/040_packages/003_containerd/test.yml +++ b/test/cases/040_packages/003_containerd/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/004_dhcpcd/test.yml b/test/cases/040_packages/004_dhcpcd/test.yml index 66c2dc179..83698d181 100644 --- a/test/cases/040_packages/004_dhcpcd/test.yml +++ b/test/cases/040_packages/004_dhcpcd/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/005_extend/000_ext4/test-create.yml b/test/cases/040_packages/005_extend/000_ext4/test-create.yml index 6b7b8068d..871a9bf8f 100644 --- a/test/cases/040_packages/005_extend/000_ext4/test-create.yml +++ b/test/cases/040_packages/005_extend/000_ext4/test-create.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/005_extend/000_ext4/test.yml b/test/cases/040_packages/005_extend/000_ext4/test.yml index 74cef7525..e307e419b 100644 --- a/test/cases/040_packages/005_extend/000_ext4/test.yml +++ b/test/cases/040_packages/005_extend/000_ext4/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/005_extend/001_btrfs/test-create.yml b/test/cases/040_packages/005_extend/001_btrfs/test-create.yml index de76d8a55..fd38549f7 100644 --- a/test/cases/040_packages/005_extend/001_btrfs/test-create.yml +++ b/test/cases/040_packages/005_extend/001_btrfs/test-create.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/005_extend/001_btrfs/test.yml b/test/cases/040_packages/005_extend/001_btrfs/test.yml index 8e438f9c2..9ecd892d9 100644 --- a/test/cases/040_packages/005_extend/001_btrfs/test.yml +++ b/test/cases/040_packages/005_extend/001_btrfs/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/005_extend/002_xfs/test-create.yml b/test/cases/040_packages/005_extend/002_xfs/test-create.yml index d291bfd40..4ee60b0fa 100644 --- a/test/cases/040_packages/005_extend/002_xfs/test-create.yml +++ b/test/cases/040_packages/005_extend/002_xfs/test-create.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/005_extend/002_xfs/test.yml b/test/cases/040_packages/005_extend/002_xfs/test.yml index 45e384d44..e1a08386e 100644 --- a/test/cases/040_packages/005_extend/002_xfs/test.yml +++ b/test/cases/040_packages/005_extend/002_xfs/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/006_format_mount/000_auto/test.yml b/test/cases/040_packages/006_format_mount/000_auto/test.yml index fc8074420..6bf03c069 100644 --- a/test/cases/040_packages/006_format_mount/000_auto/test.yml +++ b/test/cases/040_packages/006_format_mount/000_auto/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/006_format_mount/001_by_label/test.yml b/test/cases/040_packages/006_format_mount/001_by_label/test.yml index 80f641584..60f3af877 100644 --- a/test/cases/040_packages/006_format_mount/001_by_label/test.yml +++ b/test/cases/040_packages/006_format_mount/001_by_label/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/006_format_mount/003_btrfs/test.yml b/test/cases/040_packages/006_format_mount/003_btrfs/test.yml index 77cfe9b31..fd6fb6e8f 100644 --- a/test/cases/040_packages/006_format_mount/003_btrfs/test.yml +++ b/test/cases/040_packages/006_format_mount/003_btrfs/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/006_format_mount/004_xfs/test.yml b/test/cases/040_packages/006_format_mount/004_xfs/test.yml index 2e195285b..213eff13e 100644 --- a/test/cases/040_packages/006_format_mount/004_xfs/test.yml +++ b/test/cases/040_packages/006_format_mount/004_xfs/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/006_format_mount/010_multiple/test.yml b/test/cases/040_packages/006_format_mount/010_multiple/test.yml index 284f7bec1..502632a57 100644 --- a/test/cases/040_packages/006_format_mount/010_multiple/test.yml +++ b/test/cases/040_packages/006_format_mount/010_multiple/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/013_mkimage/mkimage.yml b/test/cases/040_packages/013_mkimage/mkimage.yml index 2eef73790..783ac5641 100644 --- a/test/cases/040_packages/013_mkimage/mkimage.yml +++ b/test/cases/040_packages/013_mkimage/mkimage.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/013_mkimage/run.yml b/test/cases/040_packages/013_mkimage/run.yml index 3a4bf0eca..610a647bb 100644 --- a/test/cases/040_packages/013_mkimage/run.yml +++ b/test/cases/040_packages/013_mkimage/run.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/019_sysctl/test.yml b/test/cases/040_packages/019_sysctl/test.yml index c3a99605f..c4ec8539b 100644 --- a/test/cases/040_packages/019_sysctl/test.yml +++ b/test/cases/040_packages/019_sysctl/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0 page_poison=1" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/cases/040_packages/023_wireguard/test.yml b/test/cases/040_packages/023_wireguard/test.yml index b1096da5f..cddde6fa5 100644 --- a/test/cases/040_packages/023_wireguard/test.yml +++ b/test/cases/040_packages/023_wireguard/test.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/hack/test-ltp.yml b/test/hack/test-ltp.yml index d2d0e2b84..1bf11c685 100644 --- a/test/hack/test-ltp.yml +++ b/test/hack/test-ltp.yml @@ -1,5 +1,5 @@ kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75 diff --git a/test/hack/test.yml b/test/hack/test.yml index 76204d853..5d2535327 100644 --- a/test/hack/test.yml +++ b/test/hack/test.yml @@ -1,7 +1,7 @@ # FIXME: This should use the minimal example # We continue to use the kernel-config-test as CI is currently expecting to see a success message kernel: - image: linuxkit/kernel:4.9.56 + image: linuxkit/kernel:4.9.58 cmdline: "console=ttyS0" init: - linuxkit/init:6b3755e47f00d6027321d3fca99a19af6504be75