From e671d4664f33b56e35331441c48efb345d6789ba Mon Sep 17 00:00:00 2001 From: Rolf Neugebauer Date: Mon, 20 Mar 2017 13:41:42 +0000 Subject: [PATCH] kernel: Add initial support for 4.10.x kernels Note, this also removes the LTS4.4 build options and replaces it with a KERNEL= build option to select the kernel to build. Signed-off-by: Rolf Neugebauer --- kernel/Dockerfile.4.10 | 54 + kernel/Makefile | 13 +- ...01-hv_sock-introduce-Hyper-V-Sockets.patch | 1790 +++++++++++++++++ ...-Use-all-supported-IC-versions-to-ne.patch | 491 +++++ ...rs-hv-Log-the-negotiated-IC-versions.patch | 117 ++ 5 files changed, 2462 insertions(+), 3 deletions(-) create mode 100644 kernel/Dockerfile.4.10 create mode 100644 kernel/patches-4.10/0001-hv_sock-introduce-Hyper-V-Sockets.patch create mode 100644 kernel/patches-4.10/0002-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch create mode 100644 kernel/patches-4.10/0003-Drivers-hv-Log-the-negotiated-IC-versions.patch diff --git a/kernel/Dockerfile.4.10 b/kernel/Dockerfile.4.10 new file mode 100644 index 000000000..88c4e26fe --- /dev/null +++ b/kernel/Dockerfile.4.10 @@ -0,0 +1,54 @@ +FROM mobylinux/alpine-build-kernel:0e893fbf6fa7638d2f23354de03ea11017bb8065@sha256:3ef3f9d11f0802b759dbd9c43a7706cf0ec37263c99ae90e2b10c29ea85739fa + +ARG KERNEL_VERSION +ARG DEBUG=0 + +ENV KERNEL_SOURCE=https://www.kernel.org/pub/linux/kernel/v4.x/linux-${KERNEL_VERSION}.tar.xz + +RUN curl -fsSL -o linux-${KERNEL_VERSION}.tar.xz ${KERNEL_SOURCE} + +RUN cat linux-${KERNEL_VERSION}.tar.xz | tar --absolute-names -xJ && mv /linux-${KERNEL_VERSION} /linux + +# NOTE: This currently re-uses the 4.9 kernel config +COPY kernel_config /linux/arch/x86/configs/x86_64_defconfig +COPY kernel_config.debug /linux/debug_config + +RUN if [ $DEBUG -ne "0" ]; then \ + sed -i 's/CONFIG_PANIC_ON_OOPS=y/# CONFIG_PANIC_ON_OOPS is not set/' /linux/arch/x86/configs/x86_64_defconfig; \ + cat /linux/debug_config >> /linux/arch/x86/configs/x86_64_defconfig; \ + fi + +# Apply local patches +COPY patches-4.10 /patches +RUN cd /linux && \ + set -e && for patch in /patches/*.patch; do \ + echo "Applying $patch"; \ + patch -p1 < "$patch"; \ + done + +RUN cd /linux && \ + make defconfig && \ + make oldconfig && \ + make -j "$(getconf _NPROCESSORS_ONLN)" KCFLAGS="-fno-pie" +RUN cd /linux && \ + make INSTALL_MOD_PATH=/tmp/kernel-modules modules_install && \ + ( DVER=$(basename $(find /tmp/kernel-modules/lib/modules/ -mindepth 1 -maxdepth 1)) && \ + cd /tmp/kernel-modules/lib/modules/$DVER && \ + rm build source && \ + ln -s /usr/src/linux-headers-$DVER build ) && \ + mkdir -p /tmp/kernel-headers/usr && \ + make INSTALL_HDR_PATH=/tmp/kernel-headers/usr headers_install && \ + ( cd /tmp/kernel-headers && tar cf /kernel-headers.tar usr ) && \ + ( cd /tmp/kernel-modules && tar cf /kernel-modules.tar lib ) && \ + cp vmlinux arch/x86_64/boot/bzImage / + +RUN DVER=$(basename $(find /tmp/kernel-modules/lib/modules/ -mindepth 1 -maxdepth 1)) && \ + dir=/tmp/usr/src/linux-headers-$DVER && \ + mkdir -p $dir && \ + cp /linux/.config $dir && \ + cd /linux && \ + cp -a include "$dir" && \ + mkdir -p "$dir"/arch/x86 && cp -a arch/x86/include "$dir"/arch/x86/ && \ + ( cd /tmp && tar cf /kernel-dev.tar usr/src ) + +RUN printf "KERNEL_SOURCE=${KERNEL_SOURCE}\n" > /kernel-source-info diff --git a/kernel/Makefile b/kernel/Makefile index 145519e33..324f7c2e8 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -18,17 +18,24 @@ all: bzImage tag # frequently. # # IMAGE_VERSION is used to determine if a new image should be pushed to hub. -ifdef LTS4.4 +ifeq ($(KERNEL),v4.4) KERNEL_VERSION=4.4.55 -IMAGE_VERSION=$(KERNEL_VERSION)-0 +IMAGE_VERSION=$(KERNEL_VERSION)-1 IMAGE_MAJOR_VERSION=4.4.x DEPS=Dockerfile.4.4 Makefile kernel_config kernel_config.debug kernel_config.4.4 patches-4.4 else -KERNEL_VERSION=4.9.16 +ifeq ($(KERNEL),v4.10) +KERNEL_VERSION=4.10.4 IMAGE_VERSION=$(KERNEL_VERSION)-0 +IMAGE_MAJOR_VERSION=4.10.x +DEPS=Dockerfile.4.10 Makefile kernel_config kernel_config.debug patches-4.10 +else +KERNEL_VERSION=4.9.16 +IMAGE_VERSION=$(KERNEL_VERSION)-1 IMAGE_MAJOR_VERSION=4.9.x DEPS=Dockerfile Makefile kernel_config kernel_config.debug patches-4.9 endif +endif kernel.tag: $(DEPS) BUILD=$$( tar cf - $^ | docker build -f $< --build-arg DEBUG=$(DEBUG) --build-arg KERNEL_VERSION=$(KERNEL_VERSION) -q - ) && [ -n "$$BUILD" ] && echo "Built $$BUILD" && echo "$$BUILD" > $@ diff --git a/kernel/patches-4.10/0001-hv_sock-introduce-Hyper-V-Sockets.patch b/kernel/patches-4.10/0001-hv_sock-introduce-Hyper-V-Sockets.patch new file mode 100644 index 000000000..c5cbea5cc --- /dev/null +++ b/kernel/patches-4.10/0001-hv_sock-introduce-Hyper-V-Sockets.patch @@ -0,0 +1,1790 @@ +From 539ff37d66b41b195368f4df3d4ce6525e5830af Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Thu, 21 Jul 2016 16:04:38 -0600 +Subject: [PATCH 1/3] hv_sock: introduce Hyper-V Sockets + +Hyper-V Sockets (hv_sock) supplies a byte-stream based communication +mechanism between the host and the guest. It's somewhat like TCP over +VMBus, but the transportation layer (VMBus) is much simpler than IP. + +With Hyper-V Sockets, applications between the host and the guest can talk +to each other directly by the traditional BSD-style socket APIs. + +Hyper-V Sockets is only available on new Windows hosts, like Windows Server +2016. More info is in this article "Make your own integration services": +https://msdn.microsoft.com/en-us/virtualization/hyperv_on_windows/develop/make_mgmt_service + +The patch implements the necessary support in the guest side by introducing +a new socket address family AF_HYPERV. + +Signed-off-by: Dexuan Cui +Cc: "K. Y. Srinivasan" +Cc: Haiyang Zhang +Cc: Vitaly Kuznetsov +Cc: Cathy Avery +(cherry picked from commit 8c902827bfd9c5d47ecbfbe7687e001b74de3930) +--- + MAINTAINERS | 2 + + include/linux/hyperv.h | 13 + + include/linux/socket.h | 4 +- + include/net/af_hvsock.h | 78 +++ + include/uapi/linux/hyperv.h | 23 + + net/Kconfig | 1 + + net/Makefile | 1 + + net/hv_sock/Kconfig | 10 + + net/hv_sock/Makefile | 3 + + net/hv_sock/af_hvsock.c | 1507 +++++++++++++++++++++++++++++++++++++++++++ + 10 files changed, 1641 insertions(+), 1 deletion(-) + create mode 100644 include/net/af_hvsock.h + create mode 100644 net/hv_sock/Kconfig + create mode 100644 net/hv_sock/Makefile + create mode 100644 net/hv_sock/af_hvsock.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index 527d13759ecc..b5cee81a3e82 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -5982,7 +5982,9 @@ F: drivers/net/hyperv/ + F: drivers/scsi/storvsc_drv.c + F: drivers/uio/uio_hv_generic.c + F: drivers/video/fbdev/hyperv_fb.c ++F: net/hv_sock/ + F: include/linux/hyperv.h ++F: include/net/af_hvsock.h + F: tools/hv/ + F: Documentation/ABI/stable/sysfs-bus-vmbus + +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 62679a93e01e..ca26335de49a 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1619,5 +1619,18 @@ static inline void commit_rd_index(struct vmbus_channel *channel) + hv_signal_on_read(channel); + } + ++struct vmpipe_proto_header { ++ u32 pkt_type; ++ u32 data_size; ++}; ++ ++#define HVSOCK_HEADER_LEN (sizeof(struct vmpacket_descriptor) + \ ++ sizeof(struct vmpipe_proto_header)) ++ ++/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write() */ ++#define PREV_INDICES_LEN (sizeof(u64)) + ++#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \ ++ ALIGN((payload_len), 8) + \ ++ PREV_INDICES_LEN) + #endif /* _HYPERV_H */ +diff --git a/include/linux/socket.h b/include/linux/socket.h +index b5cc5a6d7011..0b68b587d6ee 100644 +--- a/include/linux/socket.h ++++ b/include/linux/socket.h +@@ -202,8 +202,9 @@ struct ucred { + #define AF_VSOCK 40 /* vSockets */ + #define AF_KCM 41 /* Kernel Connection Multiplexor*/ + #define AF_QIPCRTR 42 /* Qualcomm IPC Router */ ++#define AF_HYPERV 43 /* Hyper-V Sockets */ + +-#define AF_MAX 43 /* For now.. */ ++#define AF_MAX 44 /* For now.. */ + + /* Protocol families, same as address families. */ + #define PF_UNSPEC AF_UNSPEC +@@ -251,6 +252,7 @@ struct ucred { + #define PF_VSOCK AF_VSOCK + #define PF_KCM AF_KCM + #define PF_QIPCRTR AF_QIPCRTR ++#define PF_HYPERV AF_HYPERV + #define PF_MAX AF_MAX + + /* Maximum queue length specifiable by listen. */ +diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h +new file mode 100644 +index 000000000000..e7a8a3ae08e8 +--- /dev/null ++++ b/include/net/af_hvsock.h +@@ -0,0 +1,78 @@ ++#ifndef __AF_HVSOCK_H__ ++#define __AF_HVSOCK_H__ ++ ++#include ++#include ++#include ++ ++/* The host side's design of the feature requires 5 exact 4KB pages for ++ * recv/send rings respectively -- this is suboptimal considering memory ++ * consumption, however unluckily we have to live with it, before the ++ * host comes up with a better design in the future. ++ */ ++#define PAGE_SIZE_4K 4096 ++#define RINGBUFFER_HVSOCK_RCV_SIZE (PAGE_SIZE_4K * 5) ++#define RINGBUFFER_HVSOCK_SND_SIZE (PAGE_SIZE_4K * 5) ++ ++/* The MTU is 16KB per the host side's design. ++ * In future, the buffer can be elimiated when we switch to use the coming ++ * new VMBus ringbuffer "in-place consumption" APIs, by which we can ++ * directly copy data from VMBus ringbuffer into the userspace buffer. ++ */ ++#define HVSOCK_MTU_SIZE (1024 * 16) ++struct hvsock_recv_buf { ++ unsigned int data_len; ++ unsigned int data_offset; ++ ++ struct vmpipe_proto_header hdr; ++ u8 buf[HVSOCK_MTU_SIZE]; ++}; ++ ++/* In the VM, actually we can send up to HVSOCK_MTU_SIZE bytes of payload, ++ * but for now let's use a smaller size to minimize the dynamically-allocated ++ * buffer. Note: the buffer can be elimiated in future when we add new VMBus ++ * ringbuffer APIs that allow us to directly copy data from userspace buf to ++ * VMBus ringbuffer. ++ */ ++#define HVSOCK_MAX_SND_SIZE_BY_VM (1024 * 4) ++struct hvsock_send_buf { ++ struct vmpipe_proto_header hdr; ++ u8 buf[HVSOCK_MAX_SND_SIZE_BY_VM]; ++}; ++ ++struct hvsock_sock { ++ /* sk must be the first member. */ ++ struct sock sk; ++ ++ struct sockaddr_hv local_addr; ++ struct sockaddr_hv remote_addr; ++ ++ /* protected by the global hvsock_mutex */ ++ struct list_head bound_list; ++ struct list_head connected_list; ++ ++ struct list_head accept_queue; ++ /* used by enqueue and dequeue */ ++ struct mutex accept_queue_mutex; ++ ++ struct delayed_work dwork; ++ ++ u32 peer_shutdown; ++ ++ struct vmbus_channel *channel; ++ ++ struct hvsock_send_buf *send; ++ struct hvsock_recv_buf *recv; ++}; ++ ++static inline struct hvsock_sock *sk_to_hvsock(struct sock *sk) ++{ ++ return (struct hvsock_sock *)sk; ++} ++ ++static inline struct sock *hvsock_to_sk(struct hvsock_sock *hvsk) ++{ ++ return (struct sock *)hvsk; ++} ++ ++#endif /* __AF_HVSOCK_H__ */ +diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h +index e347b24ef9fb..eb3e44b69a5d 100644 +--- a/include/uapi/linux/hyperv.h ++++ b/include/uapi/linux/hyperv.h +@@ -26,6 +26,7 @@ + #define _UAPI_HYPERV_H + + #include ++#include + + /* + * Framework version for util services. +@@ -396,4 +397,26 @@ struct hv_kvp_ip_msg { + struct hv_kvp_ipaddr_value kvp_ip_val; + } __attribute__((packed)); + ++/* This is the address format of Hyper-V Sockets. ++ * Note: here we just borrow the kernel's built-in type uuid_le. When ++ * an application calls bind() or connect(), the 2 members of struct ++ * sockaddr_hv must be of GUID. ++ * The GUID format differs from the UUID format only in the byte order of ++ * the first 3 fields. Refer to: ++ * https://en.wikipedia.org/wiki/Globally_unique_identifier ++ */ ++struct sockaddr_hv { ++ __kernel_sa_family_t shv_family; /* Address family */ ++ u16 reserved; /* Must be Zero */ ++ uuid_le shv_vm_guid; /* VM ID */ ++ uuid_le shv_service_guid; /* Service ID */ ++}; ++ ++#define SHV_VMID_GUEST NULL_UUID_LE ++#define SHV_VMID_HOST NULL_UUID_LE ++ ++#define SHV_SERVICE_ID_ANY NULL_UUID_LE ++ ++#define SHV_PROTO_RAW 1 ++ + #endif /* _UAPI_HYPERV_H */ +diff --git a/net/Kconfig b/net/Kconfig +index a29bb4b41c50..2cdaca32ae5b 100644 +--- a/net/Kconfig ++++ b/net/Kconfig +@@ -231,6 +231,7 @@ source "net/dns_resolver/Kconfig" + source "net/batman-adv/Kconfig" + source "net/openvswitch/Kconfig" + source "net/vmw_vsock/Kconfig" ++source "net/hv_sock/Kconfig" + source "net/netlink/Kconfig" + source "net/mpls/Kconfig" + source "net/hsr/Kconfig" +diff --git a/net/Makefile b/net/Makefile +index 4cafaa2b4667..2b357eb81865 100644 +--- a/net/Makefile ++++ b/net/Makefile +@@ -71,6 +71,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ + obj-$(CONFIG_NFC) += nfc/ + obj-$(CONFIG_OPENVSWITCH) += openvswitch/ + obj-$(CONFIG_VSOCKETS) += vmw_vsock/ ++obj-$(CONFIG_HYPERV_SOCK) += hv_sock/ + obj-$(CONFIG_MPLS) += mpls/ + obj-$(CONFIG_HSR) += hsr/ + ifneq ($(CONFIG_NET_SWITCHDEV),) +diff --git a/net/hv_sock/Kconfig b/net/hv_sock/Kconfig +new file mode 100644 +index 000000000000..ff84875564d1 +--- /dev/null ++++ b/net/hv_sock/Kconfig +@@ -0,0 +1,10 @@ ++config HYPERV_SOCK ++ tristate "Hyper-V Sockets" ++ depends on HYPERV ++ default m if HYPERV ++ help ++ Hyper-V Sockets is a socket interface for high speed ++ communication between Linux guest and Hyper-V host over VMBus. ++ ++ To compile this driver as a module, choose M here: the module ++ will be called hv_sock. +diff --git a/net/hv_sock/Makefile b/net/hv_sock/Makefile +new file mode 100644 +index 000000000000..716c01230129 +--- /dev/null ++++ b/net/hv_sock/Makefile +@@ -0,0 +1,3 @@ ++obj-$(CONFIG_HYPERV_SOCK) += hv_sock.o ++ ++hv_sock-y += af_hvsock.o +diff --git a/net/hv_sock/af_hvsock.c b/net/hv_sock/af_hvsock.c +new file mode 100644 +index 000000000000..331d3759f5cb +--- /dev/null ++++ b/net/hv_sock/af_hvsock.c +@@ -0,0 +1,1507 @@ ++/* ++ * Hyper-V Sockets -- a socket-based communication channel between the ++ * Hyper-V host and the virtual machines running on it. ++ * ++ * Copyright (c) 2016 Microsoft Corporation. ++ * ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote ++ * products derived from this software without specific prior written ++ * permission. ++ * ++ * Alternatively, this software may be distributed under the terms of the ++ * GNU General Public License ("GPL") version 2 as published by the Free ++ * Software Foundation. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ++ * POSSIBILITY OF SUCH DAMAGE. ++ */ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++ ++static struct proto hvsock_proto = { ++ .name = "HV_SOCK", ++ .owner = THIS_MODULE, ++ .obj_size = sizeof(struct hvsock_sock), ++}; ++ ++#define SS_LISTEN 255 ++ ++#define HVSOCK_CONNECT_TIMEOUT (30 * HZ) ++ ++/* This is an artificial limit */ ++#define HVSOCK_MAX_BACKLOG 128 ++ ++static LIST_HEAD(hvsock_bound_list); ++static LIST_HEAD(hvsock_connected_list); ++static DEFINE_MUTEX(hvsock_mutex); ++ ++static struct sock *hvsock_find_bound_socket(const struct sockaddr_hv *addr) ++{ ++ struct hvsock_sock *hvsk; ++ ++ list_for_each_entry(hvsk, &hvsock_bound_list, bound_list) { ++ if (!uuid_le_cmp(addr->shv_service_guid, ++ hvsk->local_addr.shv_service_guid)) ++ return hvsock_to_sk(hvsk); ++ } ++ return NULL; ++} ++ ++static struct sock *hvsock_find_connected_socket_by_channel( ++ const struct vmbus_channel *channel) ++{ ++ struct hvsock_sock *hvsk; ++ ++ list_for_each_entry(hvsk, &hvsock_connected_list, connected_list) { ++ if (hvsk->channel == channel) ++ return hvsock_to_sk(hvsk); ++ } ++ return NULL; ++} ++ ++static void hvsock_enqueue_accept(struct sock *listener, ++ struct sock *connected) ++{ ++ struct hvsock_sock *hvconnected; ++ struct hvsock_sock *hvlistener; ++ ++ hvlistener = sk_to_hvsock(listener); ++ hvconnected = sk_to_hvsock(connected); ++ ++ sock_hold(connected); ++ sock_hold(listener); ++ ++ mutex_lock(&hvlistener->accept_queue_mutex); ++ list_add_tail(&hvconnected->accept_queue, &hvlistener->accept_queue); ++ listener->sk_ack_backlog++; ++ mutex_unlock(&hvlistener->accept_queue_mutex); ++} ++ ++static struct sock *hvsock_dequeue_accept(struct sock *listener) ++{ ++ struct hvsock_sock *hvconnected; ++ struct hvsock_sock *hvlistener; ++ ++ hvlistener = sk_to_hvsock(listener); ++ ++ mutex_lock(&hvlistener->accept_queue_mutex); ++ ++ if (list_empty(&hvlistener->accept_queue)) { ++ mutex_unlock(&hvlistener->accept_queue_mutex); ++ return NULL; ++ } ++ ++ hvconnected = list_entry(hvlistener->accept_queue.next, ++ struct hvsock_sock, accept_queue); ++ ++ list_del_init(&hvconnected->accept_queue); ++ listener->sk_ack_backlog--; ++ ++ mutex_unlock(&hvlistener->accept_queue_mutex); ++ ++ sock_put(listener); ++ /* The caller will need a reference on the connected socket so we let ++ * it call sock_put(). ++ */ ++ ++ return hvsock_to_sk(hvconnected); ++} ++ ++static bool hvsock_is_accept_queue_empty(struct sock *sk) ++{ ++ struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ int ret; ++ ++ mutex_lock(&hvsk->accept_queue_mutex); ++ ret = list_empty(&hvsk->accept_queue); ++ mutex_unlock(&hvsk->accept_queue_mutex); ++ ++ return ret; ++} ++ ++static void hvsock_addr_init(struct sockaddr_hv *addr, uuid_le service_id) ++{ ++ memset(addr, 0, sizeof(*addr)); ++ addr->shv_family = AF_HYPERV; ++ addr->shv_service_guid = service_id; ++} ++ ++static int hvsock_addr_validate(const struct sockaddr_hv *addr) ++{ ++ if (!addr) ++ return -EFAULT; ++ ++ if (addr->shv_family != AF_HYPERV) ++ return -EAFNOSUPPORT; ++ ++ if (addr->reserved != 0) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static bool hvsock_addr_bound(const struct sockaddr_hv *addr) ++{ ++ return !!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY); ++} ++ ++static int hvsock_addr_cast(const struct sockaddr *addr, size_t len, ++ struct sockaddr_hv **out_addr) ++{ ++ if (len < sizeof(**out_addr)) ++ return -EFAULT; ++ ++ *out_addr = (struct sockaddr_hv *)addr; ++ return hvsock_addr_validate(*out_addr); ++} ++ ++static int __hvsock_do_bind(struct hvsock_sock *hvsk, ++ struct sockaddr_hv *addr) ++{ ++ struct sockaddr_hv hv_addr; ++ int ret = 0; ++ ++ hvsock_addr_init(&hv_addr, addr->shv_service_guid); ++ ++ mutex_lock(&hvsock_mutex); ++ ++ if (!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY)) { ++ do { ++ uuid_le_gen(&hv_addr.shv_service_guid); ++ } while (hvsock_find_bound_socket(&hv_addr)); ++ } else { ++ if (hvsock_find_bound_socket(&hv_addr)) { ++ ret = -EADDRINUSE; ++ goto out; ++ } ++ } ++ ++ hvsock_addr_init(&hvsk->local_addr, hv_addr.shv_service_guid); ++ ++ sock_hold(&hvsk->sk); ++ list_add(&hvsk->bound_list, &hvsock_bound_list); ++out: ++ mutex_unlock(&hvsock_mutex); ++ ++ return ret; ++} ++ ++static int __hvsock_bind(struct sock *sk, struct sockaddr_hv *addr) ++{ ++ struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ int ret; ++ ++ if (hvsock_addr_bound(&hvsk->local_addr)) ++ return -EINVAL; ++ ++ switch (sk->sk_socket->type) { ++ case SOCK_STREAM: ++ ret = __hvsock_do_bind(hvsk, addr); ++ break; ++ ++ default: ++ ret = -EINVAL; ++ break; ++ } ++ ++ return ret; ++} ++ ++/* Autobind this socket to the local address if necessary. */ ++static int hvsock_auto_bind(struct hvsock_sock *hvsk) ++{ ++ struct sock *sk = hvsock_to_sk(hvsk); ++ struct sockaddr_hv local_addr; ++ ++ if (hvsock_addr_bound(&hvsk->local_addr)) ++ return 0; ++ hvsock_addr_init(&local_addr, SHV_SERVICE_ID_ANY); ++ return __hvsock_bind(sk, &local_addr); ++} ++ ++static void hvsock_sk_destruct(struct sock *sk) ++{ ++ struct vmbus_channel *channel; ++ struct hvsock_sock *hvsk; ++ ++ hvsk = sk_to_hvsock(sk); ++ vfree(hvsk->send); ++ vfree(hvsk->recv); ++ ++ channel = hvsk->channel; ++ if (!channel) ++ return; ++ ++ vmbus_hvsock_device_unregister(channel); ++} ++ ++static void __hvsock_release(struct sock *sk) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *pending; ++ ++ hvsk = sk_to_hvsock(sk); ++ ++ mutex_lock(&hvsock_mutex); ++ ++ if (!list_empty(&hvsk->bound_list)) { ++ list_del_init(&hvsk->bound_list); ++ sock_put(&hvsk->sk); ++ } ++ ++ if (!list_empty(&hvsk->connected_list)) { ++ list_del_init(&hvsk->connected_list); ++ sock_put(&hvsk->sk); ++ } ++ ++ mutex_unlock(&hvsock_mutex); ++ ++ lock_sock(sk); ++ sock_orphan(sk); ++ sk->sk_shutdown = SHUTDOWN_MASK; ++ ++ /* Clean up any sockets that never were accepted. */ ++ while ((pending = hvsock_dequeue_accept(sk)) != NULL) { ++ __hvsock_release(pending); ++ sock_put(pending); ++ } ++ ++ release_sock(sk); ++ sock_put(sk); ++} ++ ++static int hvsock_release(struct socket *sock) ++{ ++ /* If accept() is interrupted by a signal, the temporary socket ++ * struct's sock->sk is NULL. ++ */ ++ if (sock->sk) { ++ __hvsock_release(sock->sk); ++ sock->sk = NULL; ++ } ++ ++ sock->state = SS_FREE; ++ return 0; ++} ++ ++static struct sock *hvsock_create(struct net *net, struct socket *sock, ++ gfp_t priority, unsigned short type) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ ++ sk = sk_alloc(net, AF_HYPERV, priority, &hvsock_proto, 0); ++ if (!sk) ++ return NULL; ++ ++ sock_init_data(sock, sk); ++ ++ /* sk->sk_type is normally set in sock_init_data, but only if sock ++ * is non-NULL. We make sure that our sockets always have a type by ++ * setting it here if needed. ++ */ ++ if (!sock) ++ sk->sk_type = type; ++ ++ sk->sk_destruct = hvsock_sk_destruct; ++ ++ /* Looks stream-based socket doesn't need this. */ ++ sk->sk_backlog_rcv = NULL; ++ ++ sk->sk_state = 0; ++ sock_reset_flag(sk, SOCK_DONE); ++ ++ hvsk = sk_to_hvsock(sk); ++ ++ hvsk->send = NULL; ++ hvsk->recv = NULL; ++ ++ hvsock_addr_init(&hvsk->local_addr, SHV_SERVICE_ID_ANY); ++ hvsock_addr_init(&hvsk->remote_addr, SHV_SERVICE_ID_ANY); ++ ++ INIT_LIST_HEAD(&hvsk->bound_list); ++ INIT_LIST_HEAD(&hvsk->connected_list); ++ ++ INIT_LIST_HEAD(&hvsk->accept_queue); ++ mutex_init(&hvsk->accept_queue_mutex); ++ ++ hvsk->peer_shutdown = 0; ++ ++ return sk; ++} ++ ++static int hvsock_bind(struct socket *sock, struct sockaddr *addr, ++ int addr_len) ++{ ++ struct sockaddr_hv *hv_addr; ++ struct sock *sk; ++ int ret; ++ ++ sk = sock->sk; ++ ++ if (hvsock_addr_cast(addr, addr_len, &hv_addr) != 0) ++ return -EINVAL; ++ ++ if (uuid_le_cmp(hv_addr->shv_vm_guid, NULL_UUID_LE)) ++ return -EINVAL; ++ ++ lock_sock(sk); ++ ret = __hvsock_bind(sk, hv_addr); ++ release_sock(sk); ++ ++ return ret; ++} ++ ++static int hvsock_getname(struct socket *sock, ++ struct sockaddr *addr, int *addr_len, int peer) ++{ ++ struct sockaddr_hv *hv_addr; ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ int ret; ++ ++ sk = sock->sk; ++ hvsk = sk_to_hvsock(sk); ++ ret = 0; ++ ++ lock_sock(sk); ++ ++ if (peer) { ++ if (sock->state != SS_CONNECTED) { ++ ret = -ENOTCONN; ++ goto out; ++ } ++ hv_addr = &hvsk->remote_addr; ++ } else { ++ hv_addr = &hvsk->local_addr; ++ } ++ ++ __sockaddr_check_size(sizeof(*hv_addr)); ++ ++ memcpy(addr, hv_addr, sizeof(*hv_addr)); ++ *addr_len = sizeof(*hv_addr); ++ ++out: ++ release_sock(sk); ++ return ret; ++} ++ ++static void get_ringbuffer_rw_status(struct vmbus_channel *channel, ++ bool *can_read, bool *can_write) ++{ ++ u32 avl_read_bytes, avl_write_bytes, dummy; ++ ++ if (can_read) { ++ hv_get_ringbuffer_availbytes(&channel->inbound, ++ &avl_read_bytes, ++ &dummy); ++ /* 0-size payload means FIN */ ++ *can_read = avl_read_bytes >= HVSOCK_PKT_LEN(0); ++ } ++ ++ if (can_write) { ++ hv_get_ringbuffer_availbytes(&channel->outbound, ++ &dummy, ++ &avl_write_bytes); ++ ++ /* We only write if there is enough space */ ++ *can_write = avl_write_bytes > HVSOCK_PKT_LEN(PAGE_SIZE_4K); ++ } ++} ++ ++static size_t get_ringbuffer_writable_bytes(struct vmbus_channel *channel) ++{ ++ u32 avl_write_bytes, dummy; ++ size_t ret; ++ ++ hv_get_ringbuffer_availbytes(&channel->outbound, ++ &dummy, ++ &avl_write_bytes); ++ ++ /* The ringbuffer mustn't be 100% full, and we should reserve a ++ * zero-length-payload packet for the FIN: see hv_ringbuffer_write() ++ * and hvsock_shutdown(). ++ */ ++ if (avl_write_bytes < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) ++ return 0; ++ ret = avl_write_bytes - HVSOCK_PKT_LEN(1) - HVSOCK_PKT_LEN(0); ++ ++ return round_down(ret, 8); ++} ++ ++static int hvsock_get_send_buf(struct hvsock_sock *hvsk) ++{ ++ hvsk->send = vmalloc(sizeof(*hvsk->send)); ++ return hvsk->send ? 0 : -ENOMEM; ++} ++ ++static void hvsock_put_send_buf(struct hvsock_sock *hvsk) ++{ ++ vfree(hvsk->send); ++ hvsk->send = NULL; ++} ++ ++static int hvsock_send_data(struct vmbus_channel *channel, ++ struct hvsock_sock *hvsk, ++ size_t to_write) ++{ ++ hvsk->send->hdr.pkt_type = 1; ++ hvsk->send->hdr.data_size = to_write; ++ return vmbus_sendpacket(channel, &hvsk->send->hdr, ++ sizeof(hvsk->send->hdr) + to_write, ++ 0, VM_PKT_DATA_INBAND, 0); ++} ++ ++static int hvsock_get_recv_buf(struct hvsock_sock *hvsk) ++{ ++ hvsk->recv = vmalloc(sizeof(*hvsk->recv)); ++ return hvsk->recv ? 0 : -ENOMEM; ++} ++ ++static void hvsock_put_recv_buf(struct hvsock_sock *hvsk) ++{ ++ vfree(hvsk->recv); ++ hvsk->recv = NULL; ++} ++ ++static int hvsock_recv_data(struct vmbus_channel *channel, ++ struct hvsock_sock *hvsk, ++ size_t *payload_len) ++{ ++ u32 buffer_actual_len; ++ u64 dummy_req_id; ++ int ret; ++ ++ ret = vmbus_recvpacket(channel, &hvsk->recv->hdr, ++ sizeof(hvsk->recv->hdr) + ++ sizeof(hvsk->recv->buf), ++ &buffer_actual_len, &dummy_req_id); ++ if (ret != 0 || buffer_actual_len <= sizeof(hvsk->recv->hdr)) ++ *payload_len = 0; ++ else ++ *payload_len = hvsk->recv->hdr.data_size; ++ ++ return ret; ++} ++ ++static int hvsock_shutdown(struct socket *sock, int mode) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ int ret = 0; ++ ++ if (mode < SHUT_RD || mode > SHUT_RDWR) ++ return -EINVAL; ++ /* This maps: ++ * SHUT_RD (0) -> RCV_SHUTDOWN (1) ++ * SHUT_WR (1) -> SEND_SHUTDOWN (2) ++ * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) ++ */ ++ ++mode; ++ ++ if (sock->state != SS_CONNECTED) ++ return -ENOTCONN; ++ ++ sock->state = SS_DISCONNECTING; ++ ++ sk = sock->sk; ++ ++ lock_sock(sk); ++ ++ sk->sk_shutdown |= mode; ++ sk->sk_state_change(sk); ++ ++ if (mode & SEND_SHUTDOWN) { ++ hvsk = sk_to_hvsock(sk); ++ ++ ret = hvsock_get_send_buf(hvsk); ++ if (ret < 0) ++ goto out; ++ ++ /* It can't fail: see get_ringbuffer_writable_bytes(). */ ++ (void)hvsock_send_data(hvsk->channel, hvsk, 0); ++ ++ hvsock_put_send_buf(hvsk); ++ } ++ ++out: ++ release_sock(sk); ++ ++ return ret; ++} ++ ++static unsigned int hvsock_poll(struct file *file, struct socket *sock, ++ poll_table *wait) ++{ ++ struct vmbus_channel *channel; ++ bool can_read, can_write; ++ struct hvsock_sock *hvsk; ++ unsigned int mask; ++ struct sock *sk; ++ ++ sk = sock->sk; ++ hvsk = sk_to_hvsock(sk); ++ ++ poll_wait(file, sk_sleep(sk), wait); ++ mask = 0; ++ ++ if (sk->sk_err) ++ /* Signify that there has been an error on this socket. */ ++ mask |= POLLERR; ++ ++ /* INET sockets treat local write shutdown and peer write shutdown as a ++ * case of POLLHUP set. ++ */ ++ if ((sk->sk_shutdown == SHUTDOWN_MASK) || ++ ((sk->sk_shutdown & SEND_SHUTDOWN) && ++ (hvsk->peer_shutdown & SEND_SHUTDOWN))) { ++ mask |= POLLHUP; ++ } ++ ++ if (sk->sk_shutdown & RCV_SHUTDOWN || ++ hvsk->peer_shutdown & SEND_SHUTDOWN) { ++ mask |= POLLRDHUP; ++ } ++ ++ lock_sock(sk); ++ ++ /* Listening sockets that have connections in their accept ++ * queue can be read. ++ */ ++ if (sk->sk_state == SS_LISTEN && !hvsock_is_accept_queue_empty(sk)) ++ mask |= POLLIN | POLLRDNORM; ++ ++ /* The mutex is to against hvsock_open_connection() */ ++ mutex_lock(&hvsock_mutex); ++ ++ channel = hvsk->channel; ++ if (channel) { ++ /* If there is something in the queue then we can read */ ++ get_ringbuffer_rw_status(channel, &can_read, &can_write); ++ ++ if (!can_read && hvsk->recv) ++ can_read = true; ++ ++ if (!(sk->sk_shutdown & RCV_SHUTDOWN) && can_read) ++ mask |= POLLIN | POLLRDNORM; ++ } else { ++ can_write = false; ++ } ++ ++ mutex_unlock(&hvsock_mutex); ++ ++ /* Sockets whose connections have been closed terminated should ++ * also be considered read, and we check the shutdown flag for that. ++ */ ++ if (sk->sk_shutdown & RCV_SHUTDOWN || ++ hvsk->peer_shutdown & SEND_SHUTDOWN) { ++ mask |= POLLIN | POLLRDNORM; ++ } ++ ++ /* Connected sockets that can produce data can be written. */ ++ if (sk->sk_state == SS_CONNECTED && can_write && ++ !(sk->sk_shutdown & SEND_SHUTDOWN)) { ++ /* Remove POLLWRBAND since INET sockets are not setting it. ++ */ ++ mask |= POLLOUT | POLLWRNORM; ++ } ++ ++ /* Simulate INET socket poll behaviors, which sets ++ * POLLOUT|POLLWRNORM when peer is closed and nothing to read, ++ * but local send is not shutdown. ++ */ ++ if (sk->sk_state == SS_UNCONNECTED && ++ !(sk->sk_shutdown & SEND_SHUTDOWN)) ++ mask |= POLLOUT | POLLWRNORM; ++ ++ release_sock(sk); ++ ++ return mask; ++} ++ ++/* This function runs in the tasklet context of process_chn_event() */ ++static void hvsock_on_channel_cb(void *ctx) ++{ ++ struct sock *sk = (struct sock *)ctx; ++ struct vmbus_channel *channel; ++ struct hvsock_sock *hvsk; ++ bool can_read, can_write; ++ ++ hvsk = sk_to_hvsock(sk); ++ channel = hvsk->channel; ++ BUG_ON(!channel); ++ ++ get_ringbuffer_rw_status(channel, &can_read, &can_write); ++ ++ if (can_read) ++ sk->sk_data_ready(sk); ++ ++ if (can_write) ++ sk->sk_write_space(sk); ++} ++ ++static void hvsock_close_connection(struct vmbus_channel *channel) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ ++ mutex_lock(&hvsock_mutex); ++ ++ sk = hvsock_find_connected_socket_by_channel(channel); ++ ++ /* The guest has already closed the connection? */ ++ if (!sk) ++ goto out; ++ ++ sk->sk_state = SS_UNCONNECTED; ++ sock_set_flag(sk, SOCK_DONE); ++ ++ hvsk = sk_to_hvsock(sk); ++ hvsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; ++ ++ sk->sk_state_change(sk); ++out: ++ mutex_unlock(&hvsock_mutex); ++} ++ ++static int hvsock_open_connection(struct vmbus_channel *channel) ++{ ++ struct hvsock_sock *hvsk = NULL, *new_hvsk = NULL; ++ uuid_le *instance, *service_id; ++ unsigned char conn_from_host; ++ struct sockaddr_hv hv_addr; ++ struct sock *sk, *new_sk = NULL; ++ int ret; ++ ++ instance = &channel->offermsg.offer.if_instance; ++ service_id = &channel->offermsg.offer.if_type; ++ ++ /* The first byte != 0 means the host initiated the connection. */ ++ conn_from_host = channel->offermsg.offer.u.pipe.user_def[0]; ++ ++ mutex_lock(&hvsock_mutex); ++ ++ hvsock_addr_init(&hv_addr, conn_from_host ? *service_id : *instance); ++ sk = hvsock_find_bound_socket(&hv_addr); ++ ++ if (!sk || (conn_from_host && sk->sk_state != SS_LISTEN) || ++ (!conn_from_host && sk->sk_state != SS_CONNECTING)) { ++ ret = -ENXIO; ++ goto out; ++ } ++ ++ if (conn_from_host) { ++ if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { ++ ret = -ECONNREFUSED; ++ goto out; ++ } ++ ++ new_sk = hvsock_create(sock_net(sk), NULL, GFP_KERNEL, ++ sk->sk_type); ++ if (!new_sk) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ new_sk->sk_state = SS_CONNECTING; ++ new_hvsk = sk_to_hvsock(new_sk); ++ new_hvsk->channel = channel; ++ hvsock_addr_init(&new_hvsk->local_addr, *service_id); ++ hvsock_addr_init(&new_hvsk->remote_addr, *instance); ++ } else { ++ hvsk = sk_to_hvsock(sk); ++ hvsk->channel = channel; ++ } ++ ++ set_channel_read_state(channel, false); ++ ret = vmbus_open(channel, RINGBUFFER_HVSOCK_SND_SIZE, ++ RINGBUFFER_HVSOCK_RCV_SIZE, NULL, 0, ++ hvsock_on_channel_cb, conn_from_host ? new_sk : sk); ++ if (ret != 0) { ++ if (conn_from_host) { ++ new_hvsk->channel = NULL; ++ sock_put(new_sk); ++ } else { ++ hvsk->channel = NULL; ++ } ++ goto out; ++ } ++ ++ vmbus_set_chn_rescind_callback(channel, hvsock_close_connection); ++ ++ /* see get_ringbuffer_rw_status() */ ++ set_channel_pending_send_size(channel, ++ HVSOCK_PKT_LEN(PAGE_SIZE_4K) + 1); ++ ++ if (conn_from_host) { ++ new_sk->sk_state = SS_CONNECTED; ++ ++ sock_hold(&new_hvsk->sk); ++ list_add(&new_hvsk->connected_list, &hvsock_connected_list); ++ ++ hvsock_enqueue_accept(sk, new_sk); ++ } else { ++ sk->sk_state = SS_CONNECTED; ++ sk->sk_socket->state = SS_CONNECTED; ++ ++ sock_hold(&hvsk->sk); ++ list_add(&hvsk->connected_list, &hvsock_connected_list); ++ } ++ ++ sk->sk_state_change(sk); ++out: ++ mutex_unlock(&hvsock_mutex); ++ return ret; ++} ++ ++static void hvsock_connect_timeout(struct work_struct *work) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ ++ hvsk = container_of(work, struct hvsock_sock, dwork.work); ++ sk = hvsock_to_sk(hvsk); ++ ++ lock_sock(sk); ++ if ((sk->sk_state == SS_CONNECTING) && ++ (sk->sk_shutdown != SHUTDOWN_MASK)) { ++ sk->sk_state = SS_UNCONNECTED; ++ sk->sk_err = ETIMEDOUT; ++ sk->sk_error_report(sk); ++ } ++ release_sock(sk); ++ ++ sock_put(sk); ++} ++ ++static int hvsock_connect_wait(struct socket *sock, ++ int flags, int current_ret) ++{ ++ struct sock *sk = sock->sk; ++ struct hvsock_sock *hvsk; ++ int ret = current_ret; ++ DEFINE_WAIT(wait); ++ long timeout; ++ ++ hvsk = sk_to_hvsock(sk); ++ timeout = HVSOCK_CONNECT_TIMEOUT; ++ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ ++ while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { ++ if (flags & O_NONBLOCK) { ++ /* If we're not going to block, we schedule a timeout ++ * function to generate a timeout on the connection ++ * attempt, in case the peer doesn't respond in a ++ * timely manner. We hold on to the socket until the ++ * timeout fires. ++ */ ++ sock_hold(sk); ++ INIT_DELAYED_WORK(&hvsk->dwork, ++ hvsock_connect_timeout); ++ schedule_delayed_work(&hvsk->dwork, timeout); ++ ++ /* Skip ahead to preserve error code set above. */ ++ goto out_wait; ++ } ++ ++ release_sock(sk); ++ timeout = schedule_timeout(timeout); ++ lock_sock(sk); ++ ++ if (signal_pending(current)) { ++ ret = sock_intr_errno(timeout); ++ goto out_wait_error; ++ } else if (timeout == 0) { ++ ret = -ETIMEDOUT; ++ goto out_wait_error; ++ } ++ ++ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ } ++ ++ ret = sk->sk_err ? -sk->sk_err : 0; ++ ++out_wait_error: ++ if (ret < 0) { ++ sk->sk_state = SS_UNCONNECTED; ++ sock->state = SS_UNCONNECTED; ++ } ++out_wait: ++ finish_wait(sk_sleep(sk), &wait); ++ return ret; ++} ++ ++static int hvsock_connect(struct socket *sock, struct sockaddr *addr, ++ int addr_len, int flags) ++{ ++ struct sockaddr_hv *remote_addr; ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ int ret = 0; ++ ++ sk = sock->sk; ++ hvsk = sk_to_hvsock(sk); ++ ++ lock_sock(sk); ++ ++ switch (sock->state) { ++ case SS_CONNECTED: ++ ret = -EISCONN; ++ goto out; ++ case SS_DISCONNECTING: ++ ret = -EINVAL; ++ goto out; ++ case SS_CONNECTING: ++ /* This continues on so we can move sock into the SS_CONNECTED ++ * state once the connection has completed (at which point err ++ * will be set to zero also). Otherwise, we will either wait ++ * for the connection or return -EALREADY should this be a ++ * non-blocking call. ++ */ ++ ret = -EALREADY; ++ break; ++ default: ++ if ((sk->sk_state == SS_LISTEN) || ++ hvsock_addr_cast(addr, addr_len, &remote_addr) != 0) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ /* Set the remote address that we are connecting to. */ ++ memcpy(&hvsk->remote_addr, remote_addr, ++ sizeof(hvsk->remote_addr)); ++ ++ ret = hvsock_auto_bind(hvsk); ++ if (ret) ++ goto out; ++ ++ sk->sk_state = SS_CONNECTING; ++ ++ ret = vmbus_send_tl_connect_request( ++ &hvsk->local_addr.shv_service_guid, ++ &hvsk->remote_addr.shv_service_guid); ++ if (ret < 0) ++ goto out; ++ ++ /* Mark sock as connecting and set the error code to in ++ * progress in case this is a non-blocking connect. ++ */ ++ sock->state = SS_CONNECTING; ++ ret = -EINPROGRESS; ++ } ++ ++ ret = hvsock_connect_wait(sock, flags, ret); ++out: ++ release_sock(sk); ++ return ret; ++} ++ ++static int hvsock_accept_wait(struct sock *listener, ++ struct socket *newsock, int flags) ++{ ++ struct hvsock_sock *hvconnected; ++ struct sock *connected; ++ ++ DEFINE_WAIT(wait); ++ long timeout; ++ ++ int ret = 0; ++ ++ /* Wait for children sockets to appear; these are the new sockets ++ * created upon connection establishment. ++ */ ++ timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); ++ prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); ++ ++ while ((connected = hvsock_dequeue_accept(listener)) == NULL && ++ listener->sk_err == 0) { ++ release_sock(listener); ++ timeout = schedule_timeout(timeout); ++ lock_sock(listener); ++ ++ if (signal_pending(current)) { ++ ret = sock_intr_errno(timeout); ++ goto out_wait; ++ } else if (timeout == 0) { ++ ret = -EAGAIN; ++ goto out_wait; ++ } ++ ++ prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); ++ } ++ ++ if (listener->sk_err) ++ ret = -listener->sk_err; ++ ++ if (connected) { ++ lock_sock(connected); ++ hvconnected = sk_to_hvsock(connected); ++ ++ if (!ret) { ++ newsock->state = SS_CONNECTED; ++ sock_graft(connected, newsock); ++ } ++ release_sock(connected); ++ sock_put(connected); ++ } ++ ++out_wait: ++ finish_wait(sk_sleep(listener), &wait); ++ return ret; ++} ++ ++static int hvsock_accept(struct socket *sock, struct socket *newsock, ++ int flags) ++{ ++ struct sock *listener; ++ int ret; ++ ++ listener = sock->sk; ++ ++ lock_sock(listener); ++ ++ if (sock->type != SOCK_STREAM) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ if (listener->sk_state != SS_LISTEN) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ ret = hvsock_accept_wait(listener, newsock, flags); ++out: ++ release_sock(listener); ++ return ret; ++} ++ ++static int hvsock_listen(struct socket *sock, int backlog) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ int ret = 0; ++ ++ sk = sock->sk; ++ lock_sock(sk); ++ ++ if (sock->type != SOCK_STREAM) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ if (sock->state != SS_UNCONNECTED) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (backlog <= 0) { ++ ret = -EINVAL; ++ goto out; ++ } ++ if (backlog > HVSOCK_MAX_BACKLOG) ++ backlog = HVSOCK_MAX_BACKLOG; ++ ++ hvsk = sk_to_hvsock(sk); ++ if (!hvsock_addr_bound(&hvsk->local_addr)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ sk->sk_ack_backlog = 0; ++ sk->sk_max_ack_backlog = backlog; ++ sk->sk_state = SS_LISTEN; ++out: ++ release_sock(sk); ++ return ret; ++} ++ ++static int hvsock_sendmsg_wait(struct sock *sk, struct msghdr *msg, ++ size_t len) ++{ ++ struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ struct vmbus_channel *channel; ++ size_t total_to_write = len; ++ size_t total_written = 0; ++ DEFINE_WAIT(wait); ++ bool can_write; ++ long timeout; ++ int ret = -EIO; ++ ++ timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); ++ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ channel = hvsk->channel; ++ ++ while (total_to_write > 0) { ++ size_t to_write, max_writable; ++ ++ while (1) { ++ get_ringbuffer_rw_status(channel, NULL, &can_write); ++ ++ if (can_write || sk->sk_err != 0 || ++ (sk->sk_shutdown & SEND_SHUTDOWN) || ++ (hvsk->peer_shutdown & RCV_SHUTDOWN)) ++ break; ++ ++ /* Don't wait for non-blocking sockets. */ ++ if (timeout == 0) { ++ ret = -EAGAIN; ++ goto out_wait; ++ } ++ ++ release_sock(sk); ++ ++ timeout = schedule_timeout(timeout); ++ ++ lock_sock(sk); ++ if (signal_pending(current)) { ++ ret = sock_intr_errno(timeout); ++ goto out_wait; ++ } else if (timeout == 0) { ++ ret = -EAGAIN; ++ goto out_wait; ++ } ++ ++ prepare_to_wait(sk_sleep(sk), &wait, ++ TASK_INTERRUPTIBLE); ++ } ++ ++ /* These checks occur both as part of and after the loop ++ * conditional since we need to check before and after ++ * sleeping. ++ */ ++ if (sk->sk_err) { ++ ret = -sk->sk_err; ++ goto out_wait; ++ } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || ++ (hvsk->peer_shutdown & RCV_SHUTDOWN)) { ++ ret = -EPIPE; ++ goto out_wait; ++ } ++ ++ /* Note: that write will only write as many bytes as possible ++ * in the ringbuffer. It is the caller's responsibility to ++ * check how many bytes we actually wrote. ++ */ ++ do { ++ max_writable = get_ringbuffer_writable_bytes(channel); ++ if (max_writable == 0) ++ goto out_wait; ++ ++ to_write = min_t(size_t, sizeof(hvsk->send->buf), ++ total_to_write); ++ if (to_write > max_writable) ++ to_write = max_writable; ++ ++ ret = hvsock_get_send_buf(hvsk); ++ if (ret < 0) ++ goto out_wait; ++ ++ ret = memcpy_from_msg(hvsk->send->buf, msg, to_write); ++ if (ret != 0) { ++ hvsock_put_send_buf(hvsk); ++ goto out_wait; ++ } ++ ++ ret = hvsock_send_data(channel, hvsk, to_write); ++ hvsock_put_send_buf(hvsk); ++ if (ret != 0) ++ goto out_wait; ++ ++ total_written += to_write; ++ total_to_write -= to_write; ++ } while (total_to_write > 0); ++ } ++ ++out_wait: ++ if (total_written > 0) ++ ret = total_written; ++ ++ finish_wait(sk_sleep(sk), &wait); ++ return ret; ++} ++ ++static int hvsock_sendmsg(struct socket *sock, struct msghdr *msg, ++ size_t len) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ int ret; ++ ++ if (len == 0) ++ return -EINVAL; ++ ++ if (msg->msg_flags & ~MSG_DONTWAIT) ++ return -EOPNOTSUPP; ++ ++ sk = sock->sk; ++ hvsk = sk_to_hvsock(sk); ++ ++ lock_sock(sk); ++ ++ /* Callers should not provide a destination with stream sockets. */ ++ if (msg->msg_namelen) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ /* Send data only if both sides are not shutdown in the direction. */ ++ if (sk->sk_shutdown & SEND_SHUTDOWN || ++ hvsk->peer_shutdown & RCV_SHUTDOWN) { ++ ret = -EPIPE; ++ goto out; ++ } ++ ++ if (sk->sk_state != SS_CONNECTED || ++ !hvsock_addr_bound(&hvsk->local_addr)) { ++ ret = -ENOTCONN; ++ goto out; ++ } ++ ++ if (!hvsock_addr_bound(&hvsk->remote_addr)) { ++ ret = -EDESTADDRREQ; ++ goto out; ++ } ++ ++ ret = hvsock_sendmsg_wait(sk, msg, len); ++out: ++ release_sock(sk); ++ ++ /* ret should be a bigger-than-0 total_written or a negative err ++ * code. ++ */ ++ BUG_ON(ret == 0); ++ ++ return ret; ++} ++ ++static int hvsock_recvmsg_wait(struct sock *sk, struct msghdr *msg, ++ size_t len, int flags) ++{ ++ struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ size_t to_read, total_to_read = len; ++ struct vmbus_channel *channel; ++ DEFINE_WAIT(wait); ++ size_t copied = 0; ++ bool can_read; ++ long timeout; ++ int ret = 0; ++ ++ timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); ++ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ channel = hvsk->channel; ++ ++ while (1) { ++ bool need_refill = !hvsk->recv; ++ ++ if (need_refill) { ++ if (hvsk->peer_shutdown & SEND_SHUTDOWN) ++ can_read = false; ++ else ++ get_ringbuffer_rw_status(channel, &can_read, ++ NULL); ++ } else { ++ can_read = true; ++ } ++ ++ if (can_read) { ++ size_t payload_len; ++ ++ if (need_refill) { ++ ret = hvsock_get_recv_buf(hvsk); ++ if (ret < 0) { ++ if (copied > 0) ++ ret = copied; ++ goto out_wait; ++ } ++ ++ ret = hvsock_recv_data(channel, hvsk, ++ &payload_len); ++ if (ret != 0 || ++ payload_len > sizeof(hvsk->recv->buf)) { ++ ret = -EIO; ++ hvsock_put_recv_buf(hvsk); ++ goto out_wait; ++ } ++ ++ if (payload_len == 0) { ++ ret = copied; ++ hvsock_put_recv_buf(hvsk); ++ hvsk->peer_shutdown |= SEND_SHUTDOWN; ++ break; ++ } ++ ++ hvsk->recv->data_len = payload_len; ++ hvsk->recv->data_offset = 0; ++ } ++ ++ to_read = min_t(size_t, total_to_read, ++ hvsk->recv->data_len); ++ ++ ret = memcpy_to_msg(msg, hvsk->recv->buf + ++ hvsk->recv->data_offset, ++ to_read); ++ if (ret != 0) ++ break; ++ ++ copied += to_read; ++ total_to_read -= to_read; ++ ++ hvsk->recv->data_len -= to_read; ++ ++ if (hvsk->recv->data_len == 0) ++ hvsock_put_recv_buf(hvsk); ++ else ++ hvsk->recv->data_offset += to_read; ++ ++ if (total_to_read == 0) ++ break; ++ } else { ++ if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || ++ (hvsk->peer_shutdown & SEND_SHUTDOWN)) ++ break; ++ ++ /* Don't wait for non-blocking sockets. */ ++ if (timeout == 0) { ++ ret = -EAGAIN; ++ break; ++ } ++ ++ if (copied > 0) ++ break; ++ ++ release_sock(sk); ++ timeout = schedule_timeout(timeout); ++ lock_sock(sk); ++ ++ if (signal_pending(current)) { ++ ret = sock_intr_errno(timeout); ++ break; ++ } else if (timeout == 0) { ++ ret = -EAGAIN; ++ break; ++ } ++ ++ prepare_to_wait(sk_sleep(sk), &wait, ++ TASK_INTERRUPTIBLE); ++ } ++ } ++ ++ if (sk->sk_err) ++ ret = -sk->sk_err; ++ else if (sk->sk_shutdown & RCV_SHUTDOWN) ++ ret = 0; ++ ++ if (copied > 0) ++ ret = copied; ++out_wait: ++ finish_wait(sk_sleep(sk), &wait); ++ return ret; ++} ++ ++static int hvsock_recvmsg(struct socket *sock, struct msghdr *msg, ++ size_t len, int flags) ++{ ++ struct sock *sk = sock->sk; ++ int ret; ++ ++ lock_sock(sk); ++ ++ if (sk->sk_state != SS_CONNECTED) { ++ /* Recvmsg is supposed to return 0 if a peer performs an ++ * orderly shutdown. Differentiate between that case and when a ++ * peer has not connected or a local shutdown occurred with the ++ * SOCK_DONE flag. ++ */ ++ if (sock_flag(sk, SOCK_DONE)) ++ ret = 0; ++ else ++ ret = -ENOTCONN; ++ ++ goto out; ++ } ++ ++ /* We ignore msg->addr_name/len. */ ++ if (flags & ~MSG_DONTWAIT) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ /* We don't check peer_shutdown flag here since peer may actually shut ++ * down, but there can be data in the queue that a local socket can ++ * receive. ++ */ ++ if (sk->sk_shutdown & RCV_SHUTDOWN) { ++ ret = 0; ++ goto out; ++ } ++ ++ /* It is valid on Linux to pass in a zero-length receive buffer. This ++ * is not an error. We may as well bail out now. ++ */ ++ if (!len) { ++ ret = 0; ++ goto out; ++ } ++ ++ ret = hvsock_recvmsg_wait(sk, msg, len, flags); ++out: ++ release_sock(sk); ++ return ret; ++} ++ ++static const struct proto_ops hvsock_ops = { ++ .family = PF_HYPERV, ++ .owner = THIS_MODULE, ++ .release = hvsock_release, ++ .bind = hvsock_bind, ++ .connect = hvsock_connect, ++ .socketpair = sock_no_socketpair, ++ .accept = hvsock_accept, ++ .getname = hvsock_getname, ++ .poll = hvsock_poll, ++ .ioctl = sock_no_ioctl, ++ .listen = hvsock_listen, ++ .shutdown = hvsock_shutdown, ++ .setsockopt = sock_no_setsockopt, ++ .getsockopt = sock_no_getsockopt, ++ .sendmsg = hvsock_sendmsg, ++ .recvmsg = hvsock_recvmsg, ++ .mmap = sock_no_mmap, ++ .sendpage = sock_no_sendpage, ++}; ++ ++static int hvsock_create_sock(struct net *net, struct socket *sock, ++ int protocol, int kern) ++{ ++ struct sock *sk; ++ ++ if (protocol != 0 && protocol != SHV_PROTO_RAW) ++ return -EPROTONOSUPPORT; ++ ++ switch (sock->type) { ++ case SOCK_STREAM: ++ sock->ops = &hvsock_ops; ++ break; ++ default: ++ return -ESOCKTNOSUPPORT; ++ } ++ ++ sock->state = SS_UNCONNECTED; ++ ++ sk = hvsock_create(net, sock, GFP_KERNEL, 0); ++ return sk ? 0 : -ENOMEM; ++} ++ ++static const struct net_proto_family hvsock_family_ops = { ++ .family = AF_HYPERV, ++ .create = hvsock_create_sock, ++ .owner = THIS_MODULE, ++}; ++ ++static int hvsock_probe(struct hv_device *hdev, ++ const struct hv_vmbus_device_id *dev_id) ++{ ++ struct vmbus_channel *channel = hdev->channel; ++ ++ /* We ignore the error return code to suppress the unnecessary ++ * error message in vmbus_probe(): on error the host will rescind ++ * the offer in 30 seconds and we can do cleanup at that time. ++ */ ++ (void)hvsock_open_connection(channel); ++ ++ return 0; ++} ++ ++static int hvsock_remove(struct hv_device *hdev) ++{ ++ struct vmbus_channel *channel = hdev->channel; ++ ++ vmbus_close(channel); ++ ++ return 0; ++} ++ ++/* It's not really used. See vmbus_match() and vmbus_probe(). */ ++static const struct hv_vmbus_device_id id_table[] = { ++ {}, ++}; ++ ++static struct hv_driver hvsock_drv = { ++ .name = "hv_sock", ++ .hvsock = true, ++ .id_table = id_table, ++ .probe = hvsock_probe, ++ .remove = hvsock_remove, ++}; ++ ++static int __init hvsock_init(void) ++{ ++ int ret; ++ ++ if (vmbus_proto_version < VERSION_WIN10) ++ return -ENODEV; ++ ++ ret = vmbus_driver_register(&hvsock_drv); ++ if (ret) { ++ pr_err("failed to register hv_sock driver\n"); ++ return ret; ++ } ++ ++ ret = proto_register(&hvsock_proto, 0); ++ if (ret) { ++ pr_err("failed to register protocol\n"); ++ goto unreg_hvsock_drv; ++ } ++ ++ ret = sock_register(&hvsock_family_ops); ++ if (ret) { ++ pr_err("failed to register address family\n"); ++ goto unreg_proto; ++ } ++ ++ return 0; ++ ++unreg_proto: ++ proto_unregister(&hvsock_proto); ++unreg_hvsock_drv: ++ vmbus_driver_unregister(&hvsock_drv); ++ return ret; ++} ++ ++static void __exit hvsock_exit(void) ++{ ++ sock_unregister(AF_HYPERV); ++ proto_unregister(&hvsock_proto); ++ vmbus_driver_unregister(&hvsock_drv); ++} ++ ++module_init(hvsock_init); ++module_exit(hvsock_exit); ++ ++MODULE_DESCRIPTION("Hyper-V Sockets"); ++MODULE_LICENSE("Dual BSD/GPL"); +-- +2.11.0 + diff --git a/kernel/patches-4.10/0002-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch b/kernel/patches-4.10/0002-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch new file mode 100644 index 000000000..4abdb7199 --- /dev/null +++ b/kernel/patches-4.10/0002-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch @@ -0,0 +1,491 @@ +From d4e5be67dc204637a5fc1cf96e8ab0135253e979 Mon Sep 17 00:00:00 2001 +From: Alex Ng +Date: Sat, 28 Jan 2017 12:37:17 -0700 +Subject: [PATCH 2/3] Drivers: hv: vmbus: Use all supported IC versions to + negotiate + +Previously, we were assuming that each IC protocol version was tied to a +specific host version. For example, some Windows 10 preview hosts only +support v3 TimeSync even though driver assumes v4 is supported by all +Windows 10 hosts. + +The guest will stop trying to negotiate even though older supported +versions may still be offered by the host. + +Make IC version negotiation more robust by going through all versions +that are supported by the guest. + +Fixes: 3da0401b4d0e ("Drivers: hv: utils: Fix the mapping between host +version and protocol to use") + +Reported-by: Rolf Neugebauer +Signed-off-by: Alex Ng +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit a1656454131880980bc3a5313c8bf66ef5990c91) +--- + drivers/hv/channel_mgmt.c | 80 +++++++++++++++++++++++++++------------- + drivers/hv/hv_fcopy.c | 20 +++++++--- + drivers/hv/hv_kvp.c | 41 +++++++++------------ + drivers/hv/hv_snapshot.c | 18 +++++++-- + drivers/hv/hv_util.c | 94 +++++++++++++++++++++++++---------------------- + include/linux/hyperv.h | 7 ++-- + 6 files changed, 154 insertions(+), 106 deletions(-) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index 0af7e39006c8..b42d69c05ebb 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -203,33 +203,34 @@ static u16 hv_get_dev_type(const struct vmbus_channel *channel) + * @buf: Raw buffer channel data + * + * @icmsghdrp is of type &struct icmsg_hdr. +- * @negop is of type &struct icmsg_negotiate. + * Set up and fill in default negotiate response message. + * +- * The fw_version specifies the framework version that +- * we can support and srv_version specifies the service +- * version we can support. ++ * The fw_version and fw_vercnt specifies the framework version that ++ * we can support. ++ * ++ * The srv_version and srv_vercnt specifies the service ++ * versions we can support. ++ * ++ * Versions are given in decreasing order. ++ * ++ * nego_fw_version and nego_srv_version store the selected protocol versions. + * + * Mainly used by Hyper-V drivers. + */ + bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, +- struct icmsg_negotiate *negop, u8 *buf, +- int fw_version, int srv_version) ++ u8 *buf, const int *fw_version, int fw_vercnt, ++ const int *srv_version, int srv_vercnt, ++ int *nego_fw_version, int *nego_srv_version) + { + int icframe_major, icframe_minor; + int icmsg_major, icmsg_minor; + int fw_major, fw_minor; + int srv_major, srv_minor; +- int i; ++ int i, j; + bool found_match = false; ++ struct icmsg_negotiate *negop; + + icmsghdrp->icmsgsize = 0x10; +- fw_major = (fw_version >> 16); +- fw_minor = (fw_version & 0xFFFF); +- +- srv_major = (srv_version >> 16); +- srv_minor = (srv_version & 0xFFFF); +- + negop = (struct icmsg_negotiate *)&buf[ + sizeof(struct vmbuspipe_hdr) + + sizeof(struct icmsg_hdr)]; +@@ -245,13 +246,22 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, + * support. + */ + +- for (i = 0; i < negop->icframe_vercnt; i++) { +- if ((negop->icversion_data[i].major == fw_major) && +- (negop->icversion_data[i].minor == fw_minor)) { +- icframe_major = negop->icversion_data[i].major; +- icframe_minor = negop->icversion_data[i].minor; +- found_match = true; ++ for (i = 0; i < fw_vercnt; i++) { ++ fw_major = (fw_version[i] >> 16); ++ fw_minor = (fw_version[i] & 0xFFFF); ++ ++ for (j = 0; j < negop->icframe_vercnt; j++) { ++ if ((negop->icversion_data[j].major == fw_major) && ++ (negop->icversion_data[j].minor == fw_minor)) { ++ icframe_major = negop->icversion_data[j].major; ++ icframe_minor = negop->icversion_data[j].minor; ++ found_match = true; ++ break; ++ } + } ++ ++ if (found_match) ++ break; + } + + if (!found_match) +@@ -259,14 +269,26 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, + + found_match = false; + +- for (i = negop->icframe_vercnt; +- (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) { +- if ((negop->icversion_data[i].major == srv_major) && +- (negop->icversion_data[i].minor == srv_minor)) { +- icmsg_major = negop->icversion_data[i].major; +- icmsg_minor = negop->icversion_data[i].minor; +- found_match = true; ++ for (i = 0; i < srv_vercnt; i++) { ++ srv_major = (srv_version[i] >> 16); ++ srv_minor = (srv_version[i] & 0xFFFF); ++ ++ for (j = negop->icframe_vercnt; ++ (j < negop->icframe_vercnt + negop->icmsg_vercnt); ++ j++) { ++ ++ if ((negop->icversion_data[j].major == srv_major) && ++ (negop->icversion_data[j].minor == srv_minor)) { ++ ++ icmsg_major = negop->icversion_data[j].major; ++ icmsg_minor = negop->icversion_data[j].minor; ++ found_match = true; ++ break; ++ } + } ++ ++ if (found_match) ++ break; + } + + /* +@@ -283,6 +305,12 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, + negop->icmsg_vercnt = 1; + } + ++ if (nego_fw_version) ++ *nego_fw_version = (icframe_major << 16) | icframe_minor; ++ ++ if (nego_srv_version) ++ *nego_srv_version = (icmsg_major << 16) | icmsg_minor; ++ + negop->icversion_data[0].major = icframe_major; + negop->icversion_data[0].minor = icframe_minor; + negop->icversion_data[1].major = icmsg_major; +diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c +index e47d8c9db03a..0a315e6aa589 100644 +--- a/drivers/hv/hv_fcopy.c ++++ b/drivers/hv/hv_fcopy.c +@@ -31,6 +31,16 @@ + #define WIN8_SRV_MINOR 1 + #define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) + ++#define FCOPY_VER_COUNT 1 ++static const int fcopy_versions[] = { ++ WIN8_SRV_VERSION ++}; ++ ++#define FW_VER_COUNT 1 ++static const int fw_versions[] = { ++ UTIL_FW_VERSION ++}; ++ + /* + * Global state maintained for transaction that is being processed. + * For a class of integration services, including the "file copy service", +@@ -228,8 +238,6 @@ void hv_fcopy_onchannelcallback(void *context) + u64 requestid; + struct hv_fcopy_hdr *fcopy_msg; + struct icmsg_hdr *icmsghdr; +- struct icmsg_negotiate *negop = NULL; +- int util_fw_version; + int fcopy_srv_version; + + if (fcopy_transaction.state > HVUTIL_READY) +@@ -243,10 +251,10 @@ void hv_fcopy_onchannelcallback(void *context) + icmsghdr = (struct icmsg_hdr *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr)]; + if (icmsghdr->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- util_fw_version = UTIL_FW_VERSION; +- fcopy_srv_version = WIN8_SRV_VERSION; +- vmbus_prep_negotiate_resp(icmsghdr, negop, recv_buffer, +- util_fw_version, fcopy_srv_version); ++ vmbus_prep_negotiate_resp(icmsghdr, recv_buffer, ++ fw_versions, FW_VER_COUNT, ++ fcopy_versions, FCOPY_VER_COUNT, ++ NULL, &fcopy_srv_version); + } else { + fcopy_msg = (struct hv_fcopy_hdr *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c +index 3abfc5983c97..2cc670442f6c 100644 +--- a/drivers/hv/hv_kvp.c ++++ b/drivers/hv/hv_kvp.c +@@ -46,6 +46,19 @@ + #define WIN8_SRV_MINOR 0 + #define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) + ++#define KVP_VER_COUNT 3 ++static const int kvp_versions[] = { ++ WIN8_SRV_VERSION, ++ WIN7_SRV_VERSION, ++ WS2008_SRV_VERSION ++}; ++ ++#define FW_VER_COUNT 2 ++static const int fw_versions[] = { ++ UTIL_FW_VERSION, ++ UTIL_WS2K8_FW_VERSION ++}; ++ + /* + * Global state maintained for transaction that is being processed. For a class + * of integration services, including the "KVP service", the specified protocol +@@ -610,8 +623,6 @@ void hv_kvp_onchannelcallback(void *context) + struct hv_kvp_msg *kvp_msg; + + struct icmsg_hdr *icmsghdrp; +- struct icmsg_negotiate *negop = NULL; +- int util_fw_version; + int kvp_srv_version; + static enum {NEGO_NOT_STARTED, + NEGO_IN_PROGRESS, +@@ -640,28 +651,10 @@ void hv_kvp_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- /* +- * Based on the host, select appropriate +- * framework and service versions we will +- * negotiate. +- */ +- switch (vmbus_proto_version) { +- case (VERSION_WS2008): +- util_fw_version = UTIL_WS2K8_FW_VERSION; +- kvp_srv_version = WS2008_SRV_VERSION; +- break; +- case (VERSION_WIN7): +- util_fw_version = UTIL_FW_VERSION; +- kvp_srv_version = WIN7_SRV_VERSION; +- break; +- default: +- util_fw_version = UTIL_FW_VERSION; +- kvp_srv_version = WIN8_SRV_VERSION; +- } +- vmbus_prep_negotiate_resp(icmsghdrp, negop, +- recv_buffer, util_fw_version, +- kvp_srv_version); +- ++ vmbus_prep_negotiate_resp(icmsghdrp, ++ recv_buffer, fw_versions, FW_VER_COUNT, ++ kvp_versions, KVP_VER_COUNT, ++ NULL, &kvp_srv_version); + } else { + kvp_msg = (struct hv_kvp_msg *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c +index 4e543dbb731a..d14f10b924a0 100644 +--- a/drivers/hv/hv_snapshot.c ++++ b/drivers/hv/hv_snapshot.c +@@ -31,6 +31,16 @@ + #define VSS_MINOR 0 + #define VSS_VERSION (VSS_MAJOR << 16 | VSS_MINOR) + ++#define VSS_VER_COUNT 1 ++static const int vss_versions[] = { ++ VSS_VERSION ++}; ++ ++#define FW_VER_COUNT 1 ++static const int fw_versions[] = { ++ UTIL_FW_VERSION ++}; ++ + /* + * Timeout values are based on expecations from host + */ +@@ -297,7 +307,6 @@ void hv_vss_onchannelcallback(void *context) + + + struct icmsg_hdr *icmsghdrp; +- struct icmsg_negotiate *negop = NULL; + + if (vss_transaction.state > HVUTIL_READY) + return; +@@ -310,9 +319,10 @@ void hv_vss_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, negop, +- recv_buffer, UTIL_FW_VERSION, +- VSS_VERSION); ++ vmbus_prep_negotiate_resp(icmsghdrp, ++ recv_buffer, fw_versions, FW_VER_COUNT, ++ vss_versions, VSS_VER_COUNT, ++ NULL, NULL); + } else { + vss_msg = (struct hv_vss_msg *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c +index e7707747f56d..f3797c07be10 100644 +--- a/drivers/hv/hv_util.c ++++ b/drivers/hv/hv_util.c +@@ -57,7 +57,31 @@ + static int sd_srv_version; + static int ts_srv_version; + static int hb_srv_version; +-static int util_fw_version; ++ ++#define SD_VER_COUNT 2 ++static const int sd_versions[] = { ++ SD_VERSION, ++ SD_VERSION_1 ++}; ++ ++#define TS_VER_COUNT 3 ++static const int ts_versions[] = { ++ TS_VERSION, ++ TS_VERSION_3, ++ TS_VERSION_1 ++}; ++ ++#define HB_VER_COUNT 2 ++static const int hb_versions[] = { ++ HB_VERSION, ++ HB_VERSION_1 ++}; ++ ++#define FW_VER_COUNT 2 ++static const int fw_versions[] = { ++ UTIL_FW_VERSION, ++ UTIL_WS2K8_FW_VERSION ++}; + + static void shutdown_onchannelcallback(void *context); + static struct hv_util_service util_shutdown = { +@@ -118,7 +142,6 @@ static void shutdown_onchannelcallback(void *context) + struct shutdown_msg_data *shutdown_msg; + + struct icmsg_hdr *icmsghdrp; +- struct icmsg_negotiate *negop = NULL; + + vmbus_recvpacket(channel, shut_txf_buf, + PAGE_SIZE, &recvlen, &requestid); +@@ -128,9 +151,14 @@ static void shutdown_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, negop, +- shut_txf_buf, util_fw_version, +- sd_srv_version); ++ if (vmbus_prep_negotiate_resp(icmsghdrp, shut_txf_buf, ++ fw_versions, FW_VER_COUNT, ++ sd_versions, SD_VER_COUNT, ++ NULL, &sd_srv_version)) { ++ pr_info("Shutdown IC version %d.%d\n", ++ sd_srv_version >> 16, ++ sd_srv_version & 0xFFFF); ++ } + } else { + shutdown_msg = + (struct shutdown_msg_data *)&shut_txf_buf[ +@@ -253,7 +281,6 @@ static void timesync_onchannelcallback(void *context) + struct ictimesync_data *timedatap; + struct ictimesync_ref_data *refdata; + u8 *time_txf_buf = util_timesynch.recv_buffer; +- struct icmsg_negotiate *negop = NULL; + + vmbus_recvpacket(channel, time_txf_buf, + PAGE_SIZE, &recvlen, &requestid); +@@ -263,12 +290,14 @@ static void timesync_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, negop, +- time_txf_buf, +- util_fw_version, +- ts_srv_version); +- pr_info("Using TimeSync version %d.%d\n", +- ts_srv_version >> 16, ts_srv_version & 0xFFFF); ++ if (vmbus_prep_negotiate_resp(icmsghdrp, time_txf_buf, ++ fw_versions, FW_VER_COUNT, ++ ts_versions, TS_VER_COUNT, ++ NULL, &ts_srv_version)) { ++ pr_info("TimeSync version %d.%d\n", ++ ts_srv_version >> 16, ++ ts_srv_version & 0xFFFF); ++ } + } else { + if (ts_srv_version > TS_VERSION_3) { + refdata = (struct ictimesync_ref_data *) +@@ -312,7 +341,6 @@ static void heartbeat_onchannelcallback(void *context) + struct icmsg_hdr *icmsghdrp; + struct heartbeat_msg_data *heartbeat_msg; + u8 *hbeat_txf_buf = util_heartbeat.recv_buffer; +- struct icmsg_negotiate *negop = NULL; + + while (1) { + +@@ -326,9 +354,16 @@ static void heartbeat_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, negop, +- hbeat_txf_buf, util_fw_version, +- hb_srv_version); ++ if (vmbus_prep_negotiate_resp(icmsghdrp, ++ hbeat_txf_buf, ++ fw_versions, FW_VER_COUNT, ++ hb_versions, HB_VER_COUNT, ++ NULL, &hb_srv_version)) { ++ ++ pr_info("Heartbeat version %d.%d\n", ++ hb_srv_version >> 16, ++ hb_srv_version & 0xFFFF); ++ } + } else { + heartbeat_msg = + (struct heartbeat_msg_data *)&hbeat_txf_buf[ +@@ -378,33 +413,6 @@ static int util_probe(struct hv_device *dev, + + hv_set_drvdata(dev, srv); + +- /* +- * Based on the host; initialize the framework and +- * service version numbers we will negotiate. +- */ +- switch (vmbus_proto_version) { +- case (VERSION_WS2008): +- util_fw_version = UTIL_WS2K8_FW_VERSION; +- sd_srv_version = SD_VERSION_1; +- ts_srv_version = TS_VERSION_1; +- hb_srv_version = HB_VERSION_1; +- break; +- case VERSION_WIN7: +- case VERSION_WIN8: +- case VERSION_WIN8_1: +- util_fw_version = UTIL_FW_VERSION; +- sd_srv_version = SD_VERSION; +- ts_srv_version = TS_VERSION_3; +- hb_srv_version = HB_VERSION; +- break; +- case VERSION_WIN10: +- default: +- util_fw_version = UTIL_FW_VERSION; +- sd_srv_version = SD_VERSION; +- ts_srv_version = TS_VERSION; +- hb_srv_version = HB_VERSION; +- } +- + ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0, + srv->util_cb, dev->channel); + if (ret) +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index ca26335de49a..41e5ed87f833 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1459,9 +1459,10 @@ struct hyperv_service_callback { + }; + + #define MAX_SRV_VER 0x7ffffff +-extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *, +- struct icmsg_negotiate *, u8 *, int, +- int); ++extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, ++ const int *fw_version, int fw_vercnt, ++ const int *srv_version, int srv_vercnt, ++ int *nego_fw_version, int *nego_srv_version); + + void hv_event_tasklet_disable(struct vmbus_channel *channel); + void hv_event_tasklet_enable(struct vmbus_channel *channel); +-- +2.11.0 + diff --git a/kernel/patches-4.10/0003-Drivers-hv-Log-the-negotiated-IC-versions.patch b/kernel/patches-4.10/0003-Drivers-hv-Log-the-negotiated-IC-versions.patch new file mode 100644 index 000000000..827ad0c43 --- /dev/null +++ b/kernel/patches-4.10/0003-Drivers-hv-Log-the-negotiated-IC-versions.patch @@ -0,0 +1,117 @@ +From 632449112ef8796bbc874a19648ca537bfec11e9 Mon Sep 17 00:00:00 2001 +From: Alex Ng +Date: Sat, 28 Jan 2017 12:37:18 -0700 +Subject: [PATCH 3/3] Drivers: hv: Log the negotiated IC versions. + +Log the negotiated IC versions. + +Signed-off-by: Alex Ng +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +(cherry picked from commit 1274a690f6b2bd2b37447c47e3062afa8aa43f93) +--- + drivers/hv/hv_fcopy.c | 9 +++++++-- + drivers/hv/hv_kvp.c | 8 ++++++-- + drivers/hv/hv_snapshot.c | 11 ++++++++--- + drivers/hv/hv_util.c | 4 ++-- + 4 files changed, 23 insertions(+), 9 deletions(-) + +diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c +index 0a315e6aa589..9aee6014339d 100644 +--- a/drivers/hv/hv_fcopy.c ++++ b/drivers/hv/hv_fcopy.c +@@ -251,10 +251,15 @@ void hv_fcopy_onchannelcallback(void *context) + icmsghdr = (struct icmsg_hdr *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr)]; + if (icmsghdr->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdr, recv_buffer, ++ if (vmbus_prep_negotiate_resp(icmsghdr, recv_buffer, + fw_versions, FW_VER_COUNT, + fcopy_versions, FCOPY_VER_COUNT, +- NULL, &fcopy_srv_version); ++ NULL, &fcopy_srv_version)) { ++ ++ pr_info("FCopy IC version %d.%d\n", ++ fcopy_srv_version >> 16, ++ fcopy_srv_version & 0xFFFF); ++ } + } else { + fcopy_msg = (struct hv_fcopy_hdr *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c +index 2cc670442f6c..de263712e247 100644 +--- a/drivers/hv/hv_kvp.c ++++ b/drivers/hv/hv_kvp.c +@@ -651,10 +651,14 @@ void hv_kvp_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, ++ if (vmbus_prep_negotiate_resp(icmsghdrp, + recv_buffer, fw_versions, FW_VER_COUNT, + kvp_versions, KVP_VER_COUNT, +- NULL, &kvp_srv_version); ++ NULL, &kvp_srv_version)) { ++ pr_info("KVP IC version %d.%d\n", ++ kvp_srv_version >> 16, ++ kvp_srv_version & 0xFFFF); ++ } + } else { + kvp_msg = (struct hv_kvp_msg *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c +index d14f10b924a0..bcc03f0748d6 100644 +--- a/drivers/hv/hv_snapshot.c ++++ b/drivers/hv/hv_snapshot.c +@@ -304,7 +304,7 @@ void hv_vss_onchannelcallback(void *context) + u32 recvlen; + u64 requestid; + struct hv_vss_msg *vss_msg; +- ++ int vss_srv_version; + + struct icmsg_hdr *icmsghdrp; + +@@ -319,10 +319,15 @@ void hv_vss_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, ++ if (vmbus_prep_negotiate_resp(icmsghdrp, + recv_buffer, fw_versions, FW_VER_COUNT, + vss_versions, VSS_VER_COUNT, +- NULL, NULL); ++ NULL, &vss_srv_version)) { ++ ++ pr_info("VSS IC version %d.%d\n", ++ vss_srv_version >> 16, ++ vss_srv_version & 0xFFFF); ++ } + } else { + vss_msg = (struct hv_vss_msg *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c +index f3797c07be10..89440c2eb346 100644 +--- a/drivers/hv/hv_util.c ++++ b/drivers/hv/hv_util.c +@@ -294,7 +294,7 @@ static void timesync_onchannelcallback(void *context) + fw_versions, FW_VER_COUNT, + ts_versions, TS_VER_COUNT, + NULL, &ts_srv_version)) { +- pr_info("TimeSync version %d.%d\n", ++ pr_info("TimeSync IC version %d.%d\n", + ts_srv_version >> 16, + ts_srv_version & 0xFFFF); + } +@@ -360,7 +360,7 @@ static void heartbeat_onchannelcallback(void *context) + hb_versions, HB_VER_COUNT, + NULL, &hb_srv_version)) { + +- pr_info("Heartbeat version %d.%d\n", ++ pr_info("Heartbeat IC version %d.%d\n", + hb_srv_version >> 16, + hb_srv_version & 0xFFFF); + } +-- +2.11.0 +