diff --git a/projects/kernel-config/.gitignore b/projects/kernel-config/.gitignore new file mode 100644 index 000000000..1bc4b71ae --- /dev/null +++ b/projects/kernel-config/.gitignore @@ -0,0 +1 @@ +check-kernel-config.sh diff --git a/projects/kernel-config/Dockerfile b/projects/kernel-config/Dockerfile index 067c2c0f5..346a29443 100644 --- a/projects/kernel-config/Dockerfile +++ b/projects/kernel-config/Dockerfile @@ -13,7 +13,7 @@ RUN cat linux-${KERNEL_VERSION}.tar.xz | tar --absolute-names -xJ && mv /linux- RUN mkdir /config COPY kernel_config.* /config/ -COPY makeconfig.sh /config +COPY *.sh /config/ # Apply local patches COPY patches-${KERNEL_SERIES} /patches @@ -24,6 +24,7 @@ RUN set -e && for patch in /patches/*.patch; do \ done RUN /config/makeconfig.sh ${ARCH} ${KERNEL_SERIES} +RUN /config/check-kernel-config.sh /linux/.config RUN mkdir /out RUN printf "KERNEL_SOURCE=${KERNEL_SOURCE}\n" > /out/kernel-source-info diff --git a/projects/kernel-config/Makefile b/projects/kernel-config/Makefile index 5e666adaf..a06c6be66 100644 --- a/projects/kernel-config/Makefile +++ b/projects/kernel-config/Makefile @@ -35,6 +35,7 @@ sign: # build_4.9.x_dbg and adds "_dbg" to the hub image name. define kernel build_$(2)$(3): Dockerfile Makefile $(wildcard patches-$(2)/*) $(wildcard kernel_config.$(2)*) kernel_config.base kernel_config.$(ARCH) + cp ../../test/pkg/kernel-config/check-kernel-config.sh . docker pull linuxkit/$(IMAGE):$(1)$(3)-$(HASH) || \ docker build \ --build-arg KERNEL_VERSION=$(1) \ @@ -42,6 +43,7 @@ build_$(2)$(3): Dockerfile Makefile $(wildcard patches-$(2)/*) $(wildcard kernel --build-arg ARCH=$(ARCH) \ --build-arg DEBUG=$(3) \ --no-cache -t linuxkit/$(IMAGE):$(1)$(3)-$(HASH) . + -rm check-kernel-config.sh push_$(2)$(3): build_$(2)$(3) docker pull linuxkit/$(IMAGE):$(1)$(3)-$(HASH) || \ @@ -64,8 +66,9 @@ endef # Build Targets # Debug targets only for latest stable and LTS stable # -$(eval $(call kernel,4.10.14,4.10.x)) -$(eval $(call kernel,4.10.14,4.10.x,_dbg)) -$(eval $(call kernel,4.9.26,4.9.x)) -$(eval $(call kernel,4.9.26,4.9.x,_dbg)) -$(eval $(call kernel,4.4.66,4.4.x)) +$(eval $(call kernel,4.11.2,4.11.x)) +$(eval $(call kernel,4.11.2,4.11.x,_dbg)) +$(eval $(call kernel,4.10.17,4.10.x)) +$(eval $(call kernel,4.10.17,4.10.x,_dbg)) +$(eval $(call kernel,4.9.29,4.9.x)) +$(eval $(call kernel,4.9.29,4.9.x,_dbg)) diff --git a/projects/kernel-config/kcimport b/projects/kernel-config/kcimport new file mode 100755 index 000000000..8772b5a11 --- /dev/null +++ b/projects/kernel-config/kcimport @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +import sys +import subprocess +import os.path + +import kconfiglib + +KERNEL_VERSION="4.11.x" + +LINUXKIT_KERNEL=os.path.expanduser("~/packages/linuxkit/kernel") + +def collect_config(f): + config = {} + for line in f: + line = line.strip() + if line.startswith("# CONFIG_"): + opt = line[len("# CONFIG_"):-len(" is not set")] + value = "n" + elif line.startswith("CONFIG_"): + [opt, value] = line.strip().split("=") + opt = opt[len("CONFIG_"):] + else: + continue + + config[opt] = value + return config + +with open(os.path.join(LINUXKIT_KERNEL, "kernel_config-%s" % KERNEL_VERSION)) as f: + config = collect_config(f) + +opts = sorted(config.keys()) + +klib = kconfiglib.Config(sys.argv[1]) +# this needs to be `make defconfig` +klib.load_config(".config") + +ours = kconfiglib.Config(sys.argv[1]) +ours.load_config(os.path.join(LINUXKIT_KERNEL, "kernel_config-%s" % KERNEL_VERSION)) + +arch = [] +generic = [] +for o in opts: + # our hyperv patch stuff + if o in ("AF_KCM", "HYPERV_SOCK", "VIRTIO_VSOCKETS", "VIRTIO_VSOCKETS_COMMON", "HYPERV_VSOCKETS"): + generic.append(o) + continue + + sym = klib.get_symbol(o) + if sym is None: + print("symbol %s unknown" % o) + sys.exit(1) + + oursym = ours.get_symbol(o) + if sym is None: + print("symbol %s unknown" % o) + sys.exit(1) + + # don't render invisble symbols + if oursym.get_visibility() == 'n': + continue + + # If defconfig of this symbol matches our value, we don't need to track it: + value = config[o] + if value == sym.get_value(): + continue + + if sym.get_def_locations()[0][0].startswith("arch"): + arch.append(o) + else: + generic.append(o) + +def render_diff(diff): + for o in sorted(diff): + value = config[o] + if value == "n": + print("# CONFIG_%s is not set" % o) + else: + print("CONFIG_%s=%s" % (o, value)) + +print("ARCH\n\n") +render_diff(arch) +print("\n\nGENRIC\n\n") +render_diff(generic) diff --git a/projects/kernel-config/kernel_config.4.10.x b/projects/kernel-config/kernel_config.4.10.x new file mode 100644 index 000000000..5c8abf3d7 --- /dev/null +++ b/projects/kernel-config/kernel_config.4.10.x @@ -0,0 +1,8 @@ +CONFIG_CRYPTO_CMAC=y +CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_HYPERV_SOCK=y +CONFIG_LWTUNNEL_BPF=y +# CONFIG_NET_VENDOR_SYNOPSYS is not set +# CONFIG_SYNC_FILE is not set +# CONFIG_TIMER_STATS is not set +CONFIG_UPROBE_EVENT=y diff --git a/projects/kernel-config/kernel_config.4.11.x b/projects/kernel-config/kernel_config.4.11.x new file mode 100644 index 000000000..589957bbe --- /dev/null +++ b/projects/kernel-config/kernel_config.4.11.x @@ -0,0 +1,7 @@ +CONFIG_HYPERV_VSOCKETS=y +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +CONFIG_PTP_1588_CLOCK_KVM=y +# CONFIG_R8169 is not set +# CONFIG_RCU_TRACE is not set diff --git a/projects/kernel-config/kernel_config.4.9.x b/projects/kernel-config/kernel_config.4.9.x new file mode 100644 index 000000000..cc89671e7 --- /dev/null +++ b/projects/kernel-config/kernel_config.4.9.x @@ -0,0 +1,7 @@ +CONFIG_CRYPTO_CMAC=y +# CONFIG_DEVKMEM is not set +CONFIG_HYPERV_SOCK=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +# CONFIG_NET_VENDOR_SYNOPSYS is not set +# CONFIG_TIMER_STATS is not set +CONFIG_UPROBE_EVENT=y diff --git a/projects/kernel-config/kernel_config.base b/projects/kernel-config/kernel_config.base index a08b2077d..2b420ec29 100644 --- a/projects/kernel-config/kernel_config.base +++ b/projects/kernel-config/kernel_config.base @@ -1,24 +1,834 @@ +CONFIG_8139CP=y +# CONFIG_8139TOO is not set +CONFIG_9P_FS=y +CONFIG_9P_FSCACHE=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_ACPI_APEI=y +CONFIG_ACPI_APEI_GHES=y +CONFIG_ACPI_HED=y +CONFIG_ACPI_PROCESSOR_AGGREGATOR=y +# CONFIG_ACPI_REV_OVERRIDE_POSSIBLE is not set +CONFIG_ACPI_SBS=y +CONFIG_ACPI_WMI=y +# CONFIG_AF_KCM is not set +# CONFIG_AGP is not set +# CONFIG_AMIGA_PARTITION is not set +CONFIG_ASYMMETRIC_KEY_TYPE=y +CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y +CONFIG_ATA_GENERIC=y +CONFIG_ATA_OVER_ETH=y +# CONFIG_ATA_VERBOSE_ERROR is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set +CONFIG_BALLOON_COMPACTION=y +CONFIG_BIG_KEYS=y CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_CRYPTOLOOP=y +CONFIG_BLK_DEV_INTEGRITY=y +# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_NBD=y +CONFIG_BLK_DEV_NVME=y +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BONDING=y +CONFIG_BPF_JIT=y CONFIG_BPF_SYSCALL=y +CONFIG_BRIDGE=y +CONFIG_BRIDGE_EBT_802_3=y +CONFIG_BRIDGE_EBT_AMONG=y +CONFIG_BRIDGE_EBT_ARP=y +CONFIG_BRIDGE_EBT_ARPREPLY=y +CONFIG_BRIDGE_EBT_BROUTE=y +CONFIG_BRIDGE_EBT_DNAT=y +CONFIG_BRIDGE_EBT_IP=y +CONFIG_BRIDGE_EBT_IP6=y +CONFIG_BRIDGE_EBT_LIMIT=y +CONFIG_BRIDGE_EBT_LOG=y +CONFIG_BRIDGE_EBT_MARK=y +CONFIG_BRIDGE_EBT_MARK_T=y +CONFIG_BRIDGE_EBT_NFLOG=y +CONFIG_BRIDGE_EBT_PKTTYPE=y +CONFIG_BRIDGE_EBT_REDIRECT=y +CONFIG_BRIDGE_EBT_SNAT=y +CONFIG_BRIDGE_EBT_STP=y +CONFIG_BRIDGE_EBT_T_FILTER=y +CONFIG_BRIDGE_EBT_T_NAT=y +CONFIG_BRIDGE_EBT_VLAN=y +CONFIG_BRIDGE_IGMP_SNOOPING=y +CONFIG_BRIDGE_NETFILTER=y +CONFIG_BRIDGE_NF_EBTABLES=y +CONFIG_BRIDGE_VLAN_FILTERING=y +# CONFIG_BSD_DISKLABEL is not set +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_CACHEFILES=y +# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_CFQ_GROUP_IOSCHED=y +CONFIG_CFS_BANDWIDTH=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_NET_PRIO=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_PIDS=y -CONFIG_CGROUPS=y -CONFIG_CGROUP_WRITEBACK=y CONFIG_CHECKPOINT_RESTORE=y +CONFIG_CIFS=y +CONFIG_CIFS_DEBUG=y +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_FSCACHE=y +CONFIG_CIFS_POSIX=y +CONFIG_CIFS_SMB2=y +CONFIG_CIFS_XATTR=y +CONFIG_CLS_U32_MARK=y +CONFIG_CLS_U32_PERF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CRC_ITU_T=y +CONFIG_CRC_T10DIF=y +CONFIG_CROSS_COMPILE="" +CONFIG_CRYPTO_842=y +CONFIG_CRYPTO_AES_NI_INTEL=y +CONFIG_CRYPTO_AES_X86_64=y +CONFIG_CRYPTO_ANSI_CPRNG=y +CONFIG_CRYPTO_ANUBIS=y +CONFIG_CRYPTO_BLOWFISH=y +CONFIG_CRYPTO_BLOWFISH_X86_64=y +CONFIG_CRYPTO_CAMELLIA=y +CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=y +CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=y +CONFIG_CRYPTO_CAMELLIA_X86_64=y +CONFIG_CRYPTO_CAST5=y +CONFIG_CRYPTO_CAST5_AVX_X86_64=y +CONFIG_CRYPTO_CAST6=y +CONFIG_CRYPTO_CAST6_AVX_X86_64=y +CONFIG_CRYPTO_CHACHA20=y +CONFIG_CRYPTO_CHACHA20POLY1305=y +CONFIG_CRYPTO_CHACHA20_X86_64=y +CONFIG_CRYPTO_CRC32=y +CONFIG_CRYPTO_CRC32C_INTEL=y +CONFIG_CRYPTO_CRC32_PCLMUL=y +CONFIG_CRYPTO_CRCT10DIF=y +CONFIG_CRYPTO_CRYPTD=y +CONFIG_CRYPTO_CTS=y +CONFIG_CRYPTO_DEFLATE=y +CONFIG_CRYPTO_DES3_EDE_X86_64=y +CONFIG_CRYPTO_DEV_PADLOCK=y +CONFIG_CRYPTO_DEV_PADLOCK_AES=y +CONFIG_CRYPTO_DEV_PADLOCK_SHA=y +CONFIG_CRYPTO_ECB=y +CONFIG_CRYPTO_FCRYPT=y +CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=y +CONFIG_CRYPTO_KEYWRAP=y +CONFIG_CRYPTO_KHAZAD=y +CONFIG_CRYPTO_LRW=y +CONFIG_CRYPTO_LZ4=y +CONFIG_CRYPTO_LZ4HC=y +CONFIG_CRYPTO_LZO=y +CONFIG_CRYPTO_MD4=y +CONFIG_CRYPTO_MICHAEL_MIC=y +CONFIG_CRYPTO_PCBC=y +CONFIG_CRYPTO_POLY1305=y +CONFIG_CRYPTO_POLY1305_X86_64=y +CONFIG_CRYPTO_RMD128=y +CONFIG_CRYPTO_RMD160=y +CONFIG_CRYPTO_RMD256=y +CONFIG_CRYPTO_RMD320=y +CONFIG_CRYPTO_RSA=y +CONFIG_CRYPTO_SALSA20=y +CONFIG_CRYPTO_SALSA20_X86_64=y +CONFIG_CRYPTO_SEED=y +CONFIG_CRYPTO_SERPENT=y +CONFIG_CRYPTO_SERPENT_AVX2_X86_64=y +CONFIG_CRYPTO_SERPENT_AVX_X86_64=y +CONFIG_CRYPTO_SERPENT_SSE2_X86_64=y +CONFIG_CRYPTO_SHA1_SSSE3=y +CONFIG_CRYPTO_SHA256_SSSE3=y +CONFIG_CRYPTO_SHA512=y +CONFIG_CRYPTO_SHA512_SSSE3=y +CONFIG_CRYPTO_TEA=y +CONFIG_CRYPTO_TGR192=y +CONFIG_CRYPTO_TWOFISH=y +CONFIG_CRYPTO_TWOFISH_AVX_X86_64=y +CONFIG_CRYPTO_TWOFISH_X86_64=y +CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=y +CONFIG_CRYPTO_USER=y +CONFIG_CRYPTO_USER_API_AEAD=y +CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_CRYPTO_USER_API_RNG=y +CONFIG_CRYPTO_USER_API_SKCIPHER=y +CONFIG_CRYPTO_VMAC=y +CONFIG_CRYPTO_WP512=y +CONFIG_CRYPTO_XCBC=y +CONFIG_CRYPTO_XTS=y +CONFIG_CUSE=y +CONFIG_DEBUG_CREDENTIALS=y +# CONFIG_DEBUG_DEVRES is not set +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_SPLIT=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_NOTIFIERS=y +# CONFIG_DEBUG_STACKOVERFLOW is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_DEADLINE=y +CONFIG_DEFAULT_HOSTNAME="(none)" +CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y +# CONFIG_DEVTMPFS_MOUNT is not set +# CONFIG_DMADEVICES is not set +CONFIG_DMI_SYSFS=y +CONFIG_DM_CRYPT=y +# CONFIG_DM_MIRROR is not set +CONFIG_DM_SNAPSHOT=y +CONFIG_DM_THIN_PROVISIONING=y +# CONFIG_DM_ZERO is not set +# CONFIG_DRM is not set +CONFIG_DUMMY=y +CONFIG_DYNAMIC_FTRACE=y +# CONFIG_E100 is not set +# CONFIG_EDAC is not set +# CONFIG_EFIVAR_FS is not set +CONFIG_EFI_VARS_PSTORE=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_ENA_ETHERNET=y +CONFIG_ENCRYPTED_KEYS=y CONFIG_EXPERT=y -CONFIG_HUGETLBFS=y -CONFIG_HUGETLB_PAGE=y -CONFIG_IKCONFIG_PROC=y +CONFIG_EXTRA_FIRMWARE="" +CONFIG_FANOTIFY=y +CONFIG_FAT_DEFAULT_IOCHARSET="utf8" +# CONFIG_FB_EFI is not set +CONFIG_FB_HYPERV=y +# CONFIG_FB_MODE_HELPERS is not set +# CONFIG_FB_TILEBLITTING is not set +CONFIG_FB_VESA=y +# CONFIG_FDDI is not set +# CONFIG_FRAME_POINTER is not set +CONFIG_FRAME_WARN=1024 +CONFIG_FSCACHE=y +CONFIG_FSCACHE_STATS=y +CONFIG_FS_ENCRYPTION=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_FUNCTION_GRAPH_TRACER=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_FUNCTION_TRACER=y +CONFIG_FUSE_FS=y +CONFIG_FUSION=y +CONFIG_FUSION_MAX_SGE=128 +CONFIG_FUSION_SPI=y +CONFIG_GACT_PROB=y +CONFIG_GENERIC_PHY=y +CONFIG_GENEVE=y +# CONFIG_HAMRADIO is not set +CONFIG_HANGCHECK_TIMER=y +CONFIG_HARDENED_USERCOPY=y +# CONFIG_HIBERNATION is not set +# CONFIG_HIDRAW is not set +# CONFIG_HID_A4TECH is not set +# CONFIG_HID_APPLE is not set +# CONFIG_HID_BELKIN is not set +# CONFIG_HID_CHERRY is not set +# CONFIG_HID_CHICONY is not set +# CONFIG_HID_CYPRESS is not set +# CONFIG_HID_EZKEY is not set +# CONFIG_HID_GYRATION is not set +# CONFIG_HID_KENSINGTON is not set +# CONFIG_HID_LOGITECH is not set +# CONFIG_HID_MICROSOFT is not set +# CONFIG_HID_MONTEREY is not set +# CONFIG_HID_PANTHERLORD is not set +# CONFIG_HID_PETALYNX is not set +# CONFIG_HID_SAMSUNG is not set +# CONFIG_HID_SUNPLUS is not set +# CONFIG_HID_TOPSEED is not set +CONFIG_HOTPLUG_PCI_PCIE=y +CONFIG_HOTPLUG_PCI_SHPC=y +CONFIG_HPET_MMAP=y +CONFIG_HPET_MMAP_DEFAULT=y +CONFIG_HVC_XEN=y +CONFIG_HVC_XEN_FRONTEND=y +CONFIG_HW_RANDOM_AMD=y +CONFIG_HW_RANDOM_INTEL=y +CONFIG_HW_RANDOM_TIMERIOMEM=y +CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_HYPERV=y +CONFIG_HYPERV_BALLOON=y +CONFIG_HYPERV_KEYBOARD=y +CONFIG_HYPERV_NET=y +CONFIG_HYPERV_STORAGE=y +CONFIG_HYPERV_UTILS=y +CONFIG_HZ_100=y +# CONFIG_HZ_1000 is not set +CONFIG_I2C_CHARDEV=y +# CONFIG_I2C_I801 is not set +CONFIG_I2C_MUX=y +CONFIG_IGB=y +CONFIG_IGBVF=y +CONFIG_IGB_HWMON=y CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_INET6_IPCOMP=y +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=y +CONFIG_INET_AH=y +CONFIG_INET_DIAG=y +CONFIG_INET_ESP=y +CONFIG_INET_IPCOMP=y +CONFIG_INET_UDP_DIAG=y +CONFIG_INET_XFRM_MODE_BEET=y +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_INPUT_ATLAS_BTNS=y +CONFIG_INPUT_JOYDEV=y +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_PCSPKR=y +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_XEN_KBDDEV_FRONTEND=y +CONFIG_INTEL_IDLE=y +CONFIG_INTEL_IPS=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_IO_STRICT_DEVMEM=y +CONFIG_IP6_NF_MATCH_AH=y +CONFIG_IP6_NF_MATCH_EUI64=y +CONFIG_IP6_NF_MATCH_FRAG=y +CONFIG_IP6_NF_MATCH_HL=y +CONFIG_IP6_NF_MATCH_MH=y +CONFIG_IP6_NF_MATCH_OPTS=y +CONFIG_IP6_NF_MATCH_RPFILTER=y +CONFIG_IP6_NF_MATCH_RT=y +CONFIG_IP6_NF_NAT=y +CONFIG_IP6_NF_RAW=y +CONFIG_IP6_NF_SECURITY=y +CONFIG_IP6_NF_TARGET_HL=y +CONFIG_IP6_NF_TARGET_MASQUERADE=y +CONFIG_IP6_NF_TARGET_NPT=y +CONFIG_IP6_NF_TARGET_SYNPROXY=y +CONFIG_IPV6_GRE=y +CONFIG_IPV6_ILA=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_SIT_6RD=y +CONFIG_IPV6_SUBTREES=y +CONFIG_IPV6_TUNNEL=y +CONFIG_IPV6_VTI=y +CONFIG_IPVLAN=y +CONFIG_IP_FIB_TRIE_STATS=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_RPFILTER=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_TARGET_CLUSTERIP=y +CONFIG_IP_NF_TARGET_ECN=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_IP_NF_TARGET_SYNPROXY=y +CONFIG_IP_NF_TARGET_TTL=y +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +CONFIG_IP_SET=y +CONFIG_IP_SET_BITMAP_IP=y +CONFIG_IP_SET_BITMAP_IPMAC=y +CONFIG_IP_SET_BITMAP_PORT=y +CONFIG_IP_SET_HASH_IP=y +CONFIG_IP_SET_HASH_IPPORT=y +CONFIG_IP_SET_HASH_IPPORTIP=y +CONFIG_IP_SET_HASH_IPPORTNET=y +CONFIG_IP_SET_HASH_NET=y +CONFIG_IP_SET_HASH_NETIFACE=y +CONFIG_IP_SET_HASH_NETPORT=y +CONFIG_IP_SET_LIST_SET=y +CONFIG_IP_SET_MAX=256 +CONFIG_IP_VS=y +CONFIG_IP_VS_DEBUG=y +CONFIG_IP_VS_DH=y +CONFIG_IP_VS_FO=y +CONFIG_IP_VS_FTP=y +CONFIG_IP_VS_IPV6=y +CONFIG_IP_VS_LBLC=y +CONFIG_IP_VS_LBLCR=y +CONFIG_IP_VS_LC=y +CONFIG_IP_VS_NFCT=y +CONFIG_IP_VS_NQ=y +CONFIG_IP_VS_OVF=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_SCTP=y +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_RR=y +CONFIG_IP_VS_SED=y +CONFIG_IP_VS_SH=y +CONFIG_IP_VS_SH_TAB_BITS=8 +CONFIG_IP_VS_TAB_BITS=12 +CONFIG_IP_VS_WLC=y +CONFIG_IP_VS_WRR=y +CONFIG_IXGB=y +CONFIG_IXGBE=y +CONFIG_IXGBEVF=y +CONFIG_IXGBE_HWMON=y +# CONFIG_KARMA_PARTITION is not set +CONFIG_KEY_DH_OPERATIONS=y +CONFIG_KSM=y +CONFIG_L2TP=y +CONFIG_LIBCRC32C=y CONFIG_LOCALVERSION="-linuxkit" -CONFIG_MEMCG_SWAP_ENABLED=y -CONFIG_MEMCG_SWAP=y +CONFIG_LOCKUP_DETECTOR=y +# CONFIG_LOGO is not set +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_LPC_ICH=y +CONFIG_LPC_SCH=y +CONFIG_LWTUNNEL=y +# CONFIG_MACINTOSH_DRIVERS is not set +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +# CONFIG_MAC_PARTITION is not set CONFIG_MEMCG=y -CONFIG_MODULES=y -CONFIG_NAMESPACES=y +CONFIG_MEMCG_SWAP=y +CONFIG_MEMCG_SWAP_ENABLED=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_MFD_SM501=y +CONFIG_MFD_VX855=y +CONFIG_MFD_WL1273_CORE=y +# CONFIG_MINIX_SUBPARTITION is not set +CONFIG_MMIOTRACE=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +CONFIG_MPLS=y +CONFIG_MXM_WMI=y +# CONFIG_NETCONSOLE is not set +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_NETLINK_ACCT=y +CONFIG_NETFILTER_NETLINK_GLUE_CT=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_XT_CONNMARK=y +CONFIG_NETFILTER_XT_MARK=y +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_MATCH_CGROUP=y +CONFIG_NETFILTER_XT_MATCH_CLUSTER=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=y +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CPU=y +CONFIG_NETFILTER_XT_MATCH_DCCP=y +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=y +CONFIG_NETFILTER_XT_MATCH_DSCP=y +CONFIG_NETFILTER_XT_MATCH_ECN=y +CONFIG_NETFILTER_XT_MATCH_ESP=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_HL=y +CONFIG_NETFILTER_XT_MATCH_IPCOMP=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_IPVS=y +CONFIG_NETFILTER_XT_MATCH_L2TP=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y +CONFIG_NETFILTER_XT_MATCH_NFACCT=y +CONFIG_NETFILTER_XT_MATCH_OSF=y +CONFIG_NETFILTER_XT_MATCH_OWNER=y +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_RATEEST=y +CONFIG_NETFILTER_XT_MATCH_REALM=y +CONFIG_NETFILTER_XT_MATCH_RECENT=y +CONFIG_NETFILTER_XT_MATCH_SCTP=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NETFILTER_XT_NAT=y +CONFIG_NETFILTER_XT_SET=y +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CT=y +CONFIG_NETFILTER_XT_TARGET_DSCP=y +CONFIG_NETFILTER_XT_TARGET_HL=y +CONFIG_NETFILTER_XT_TARGET_HMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_LOG=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NETMAP=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_NOTRACK=y +CONFIG_NETFILTER_XT_TARGET_RATEEST=y +CONFIG_NETFILTER_XT_TARGET_REDIRECT=y +# CONFIG_NETFILTER_XT_TARGET_SECMARK is not set +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=y +CONFIG_NETFILTER_XT_TARGET_TEE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETLINK_DIAG=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_ACT_CSUM=y +CONFIG_NET_ACT_GACT=y +CONFIG_NET_ACT_IPT=y +CONFIG_NET_ACT_MIRRED=y +CONFIG_NET_ACT_NAT=y +CONFIG_NET_ACT_PEDIT=y +CONFIG_NET_ACT_POLICE=y +CONFIG_NET_ACT_SIMP=y +CONFIG_NET_ACT_SKBEDIT=y +# CONFIG_NET_CADENCE is not set +CONFIG_NET_CLS_BASIC=y +CONFIG_NET_CLS_BPF=y +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_FLOW=y +CONFIG_NET_CLS_FW=y +CONFIG_NET_CLS_IND=y +CONFIG_NET_CLS_MATCHALL=y +CONFIG_NET_CLS_ROUTE4=y +CONFIG_NET_CLS_RSVP=y +CONFIG_NET_CLS_RSVP6=y +CONFIG_NET_CLS_TCINDEX=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH_CMP=y +CONFIG_NET_EMATCH_IPSET=y +CONFIG_NET_EMATCH_META=y +CONFIG_NET_EMATCH_NBYTE=y +CONFIG_NET_EMATCH_TEXT=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_FOU=y +CONFIG_NET_FOU_IP_TUNNELS=y +CONFIG_NET_IPGRE=y +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NET_IPIP=y +CONFIG_NET_IPVTI=y +CONFIG_NET_KEY=y +CONFIG_NET_KEY_MIGRATE=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_MPLS_GSO=y +# CONFIG_NET_PACKET_ENGINE is not set +CONFIG_NET_SCH_CBQ=y +CONFIG_NET_SCH_CHOKE=y +CONFIG_NET_SCH_DRR=y +CONFIG_NET_SCH_DSMARK=y +CONFIG_NET_SCH_GRED=y +CONFIG_NET_SCH_HFSC=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_SCH_INGRESS=y +CONFIG_NET_SCH_MQPRIO=y +CONFIG_NET_SCH_MULTIQ=y +CONFIG_NET_SCH_NETEM=y +CONFIG_NET_SCH_PRIO=y +CONFIG_NET_SCH_QFQ=y +CONFIG_NET_SCH_RED=y +CONFIG_NET_SCH_SFB=y +CONFIG_NET_SCH_SFQ=y +CONFIG_NET_SCH_TBF=y +CONFIG_NET_SCH_TEQL=y +CONFIG_NET_SWITCHDEV=y +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_VENDOR_ADAPTEC is not set +# CONFIG_NET_VENDOR_AGERE is not set +# CONFIG_NET_VENDOR_ALTEON is not set +# CONFIG_NET_VENDOR_AMD is not set +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ATHEROS is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_BROCADE is not set +# CONFIG_NET_VENDOR_CAVIUM is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_CISCO is not set +# CONFIG_NET_VENDOR_DEC is not set +# CONFIG_NET_VENDOR_DLINK is not set +# CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_EXAR is not set +# CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_HP is not set +# CONFIG_NET_VENDOR_I825XX is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_MYRI is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NVIDIA is not set +# CONFIG_NET_VENDOR_OKI is not set +# CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_QUALCOMM is not set +# CONFIG_NET_VENDOR_RDC is not set +# CONFIG_NET_VENDOR_RENESAS is not set +# CONFIG_NET_VENDOR_ROCKER is not set +# CONFIG_NET_VENDOR_SAMSUNG is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_SILAN is not set +# CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SMSC is not set +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SUN is not set +# CONFIG_NET_VENDOR_TEHUTI is not set +# CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NEW_LEDS is not set +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +CONFIG_NFSD_V4=y +CONFIG_NFS_FSCACHE=y +# CONFIG_NFS_V2 is not set +# CONFIG_NFS_V3_ACL is not set +CONFIG_NFS_V4_1=y +CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org" +CONFIG_NFS_V4_2=y +CONFIG_NFT_BRIDGE_META=y +CONFIG_NFT_BRIDGE_REJECT=y +CONFIG_NFT_CHAIN_NAT_IPV4=y +CONFIG_NFT_CHAIN_NAT_IPV6=y +CONFIG_NFT_CHAIN_ROUTE_IPV4=y +CONFIG_NFT_CHAIN_ROUTE_IPV6=y +CONFIG_NFT_COMPAT=y +CONFIG_NFT_COUNTER=y +CONFIG_NFT_CT=y +CONFIG_NFT_DUP_IPV4=y +CONFIG_NFT_DUP_IPV6=y +CONFIG_NFT_DUP_NETDEV=y +CONFIG_NFT_EXTHDR=y +CONFIG_NFT_FWD_NETDEV=y +CONFIG_NFT_HASH=y +CONFIG_NFT_LIMIT=y +CONFIG_NFT_LOG=y +CONFIG_NFT_MASQ=y +CONFIG_NFT_MASQ_IPV4=y +CONFIG_NFT_MASQ_IPV6=y +CONFIG_NFT_META=y +CONFIG_NFT_NAT=y +CONFIG_NFT_QUEUE=y +CONFIG_NFT_REDIR=y +CONFIG_NFT_REDIR_IPV4=y +CONFIG_NFT_REDIR_IPV6=y +CONFIG_NFT_REJECT=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +# CONFIG_NF_CONNTRACK_SECMARK is not set +CONFIG_NF_CONNTRACK_SNMP=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y +CONFIG_NF_CONNTRACK_ZONES=y +CONFIG_NF_CT_NETLINK_HELPER=y +CONFIG_NF_CT_NETLINK_TIMEOUT=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_DUP_IPV4=y +CONFIG_NF_DUP_IPV6=y +CONFIG_NF_DUP_NETDEV=y +CONFIG_NF_LOG_ARP=y +CONFIG_NF_LOG_BRIDGE=y +CONFIG_NF_LOG_IPV4=y +CONFIG_NF_LOG_IPV6=y +CONFIG_NF_NAT_IPV4=y +CONFIG_NF_NAT_IPV6=y +CONFIG_NF_NAT_MASQUERADE_IPV4=y +CONFIG_NF_NAT_MASQUERADE_IPV6=y +CONFIG_NF_NAT_REDIRECT=y +CONFIG_NF_NAT_SNMP_BASIC=y +CONFIG_NF_TABLES=y +CONFIG_NF_TABLES_ARP=y +CONFIG_NF_TABLES_BRIDGE=y +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y +CONFIG_NF_TABLES_NETDEV=y +CONFIG_NLMON=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NOZOMI=y +CONFIG_NTFS_FS=y +CONFIG_OPENVSWITCH=y +CONFIG_OPENVSWITCH_GENEVE=y +CONFIG_OPENVSWITCH_GRE=y +CONFIG_OPENVSWITCH_VXLAN=y +# CONFIG_OSF_PARTITION is not set +CONFIG_OVERLAY_FS=y +CONFIG_PACKET_DIAG=y +CONFIG_PAGE_EXTENSION=y +CONFIG_PAGE_POISONING=y +CONFIG_PAGE_POISONING_NO_SANITY=y +CONFIG_PAGE_POISONING_ZERO=y CONFIG_PANIC_ON_OOPS=y +# CONFIG_PATA_AMD is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_SCH is not set +CONFIG_PATA_SIS=y +# CONFIG_PCCARD is not set +# CONFIG_PCIEAER is not set +CONFIG_PCI_IOV=y +CONFIG_PCI_STUB=y +CONFIG_PERSISTENT_KEYRINGS=y +# CONFIG_PHYLIB is not set +CONFIG_PKCS7_MESSAGE_PARSER=y +# CONFIG_PM_DEBUG is not set +CONFIG_PM_DEVFREQ=y +# CONFIG_PNP_DEBUG_MESSAGES is not set +CONFIG_PPP=y +CONFIG_PPPOE=y +CONFIG_PPPOL2TP=y +CONFIG_PPP_ASYNC=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_MPPE=y +CONFIG_PPP_MULTILINK=y +CONFIG_PPP_SYNC_TTY=y +CONFIG_PPTP=y +CONFIG_PRINT_QUOTA_WARNING=y +CONFIG_PROC_CHILDREN=y +# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set +CONFIG_PSTORE=y +CONFIG_PSTORE_ZLIB_COMPRESS=y +# CONFIG_QFMT_V2 is not set +# CONFIG_RAS is not set +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZ4 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_LZO is not set +# CONFIG_RD_XZ is not set +CONFIG_RESET_CONTROLLER=y +# CONFIG_RFKILL is not set +# CONFIG_ROOT_NFS is not set +CONFIG_RPCSEC_GSS_KRB5=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_HCTOSYS_DEVICE="rtc0" +CONFIG_RTC_SYSTOHC_DEVICE="rtc0" +CONFIG_RT_GROUP_SCHED=y +CONFIG_SATA_MV=y +CONFIG_SATA_NV=y +# CONFIG_SATA_PMP is not set +CONFIG_SATA_PROMISE=y +CONFIG_SATA_SIL=y +CONFIG_SATA_SIS=y +CONFIG_SATA_SVW=y +CONFIG_SATA_ULI=y +CONFIG_SATA_VIA=y +CONFIG_SATA_VITESSE=y +# CONFIG_SCHEDSTATS is not set +CONFIG_SCHED_AUTOGROUP=y +CONFIG_SCHED_DEBUG=y +# CONFIG_SCSI_CONSTANTS is not set +CONFIG_SCSI_LOWLEVEL=y +CONFIG_SCSI_VIRTIO=y +CONFIG_SECURITYFS=y +CONFIG_SECURITY_DMESG_RESTRICT=y +CONFIG_SECURITY_NETWORK_XFRM=y +CONFIG_SECURITY_PATH=y +# CONFIG_SECURITY_SELINUX is not set +CONFIG_SECURITY_YAMA=y +# CONFIG_SERIAL_8250_EXTENDED is not set +# CONFIG_SERIAL_8250_MID is not set +# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_SERIO_PCIPS2=y +CONFIG_SERIO_RAW=y +# CONFIG_SGI_PARTITION is not set +CONFIG_SLAB=y +CONFIG_SLAB_FREELIST_RANDOM=y +# CONFIG_SLUB is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_SOUND is not set +CONFIG_STACK_TRACER=y +CONFIG_STRICT_DEVMEM=y +# CONFIG_SUN_PARTITION is not set +# CONFIG_SUSPEND is not set +# CONFIG_TASK_XACCT is not set +# CONFIG_TCP_CONG_ADVANCED is not set +# CONFIG_THERMAL_GOV_USER_SPACE is not set +# CONFIG_THERMAL_HWMON is not set +# CONFIG_THERMAL_WRITABLE_TRIPS is not set +# CONFIG_TMPFS_POSIX_ACL is not set +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y +CONFIG_TUN=y +CONFIG_UBSAN=y +CONFIG_UDF_FS=y +CONFIG_UEVENT_HELPER_PATH="" +# CONFIG_UNIXWARE_DISKLABEL is not set +CONFIG_UNIX_DIAG=y +# CONFIG_USB_SUPPORT is not set # CONFIG_USELIB is not set CONFIG_USER_NS=y +CONFIG_VETH=y +# CONFIG_VGACON_SOFT_SCROLLBACK is not set +# CONFIG_VGA_ARB is not set +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_INPUT=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_PCI_LEGACY=y +CONFIG_VIRTIO_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y +CONFIG_VLAN_8021Q=y +CONFIG_VMWARE_PVSCSI=y +CONFIG_VMXNET3=y +CONFIG_VSOCKETS=y +CONFIG_VXLAN=y +# CONFIG_WATCHDOG is not set +# CONFIG_WIRELESS is not set +# CONFIG_WLAN is not set +CONFIG_WQ_WATCHDOG=y +CONFIG_X509_CERTIFICATE_PARSER=y +CONFIG_X86_P4_CLOCKMOD=y +CONFIG_X86_PCC_CPUFREQ=y +CONFIG_X86_POWERNOW_K8=y +CONFIG_XENFS=y +CONFIG_XEN_ACPI_PROCESSOR=y +CONFIG_XEN_BALLOON=y +CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y +CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT=512 +CONFIG_XEN_BLKDEV_FRONTEND=y +CONFIG_XEN_COMPAT_XENFS=y +CONFIG_XEN_DEV_EVTCHN=y +CONFIG_XEN_FBDEV_FRONTEND=y +CONFIG_XEN_GNTDEV=y +CONFIG_XEN_GRANT_DEV_ALLOC=y +CONFIG_XEN_NETDEV_FRONTEND=y +CONFIG_XEN_PCIDEV_FRONTEND=y +CONFIG_XEN_SCRUB_PAGES=y +CONFIG_XEN_SCSI_FRONTEND=y +CONFIG_XEN_SYS_HYPERVISOR=y +CONFIG_XFRM_MIGRATE=y +CONFIG_XFRM_STATISTICS=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFS_FS=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_QUOTA=y diff --git a/projects/kernel-config/kernel_config.x86 b/projects/kernel-config/kernel_config.x86 index 06a94e48b..31f19a7a2 100644 --- a/projects/kernel-config/kernel_config.x86 +++ b/projects/kernel-config/kernel_config.x86 @@ -1 +1,33 @@ -CONFIG_64BIT=y +# CONFIG_CALGARY_IOMMU is not set +# CONFIG_CC_STACKPROTECTOR_NONE is not set +CONFIG_CC_STACKPROTECTOR_STRONG=y +# CONFIG_CRASH_DUMP is not set +# CONFIG_DEBUG_BOOT_PARAMS is not set +# CONFIG_EARLY_PRINTK_DBGP is not set +CONFIG_EFI_STUB=y +CONFIG_HYPERVISOR_GUEST=y +# CONFIG_KEXEC is not set +CONFIG_KVM_GUEST=y +# CONFIG_LEGACY_VSYSCALL_EMULATE is not set +CONFIG_LEGACY_VSYSCALL_NONE=y +# CONFIG_MODIFY_LDT_SYSCALL is not set +CONFIG_MTRR_SANITIZER=y +CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0 +CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1 +CONFIG_NR_CPUS=128 +# CONFIG_NUMA is not set +CONFIG_OPROFILE=y +CONFIG_PARAVIRT=y +CONFIG_PARAVIRT_SPINLOCKS=y +CONFIG_PHYSICAL_ALIGN=0x1000000 +CONFIG_RANDOMIZE_BASE=y +CONFIG_RANDOMIZE_MEMORY=y +CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0xa +# CONFIG_SCHED_SMT is not set +# CONFIG_VIRTUALIZATION is not set +# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set +# CONFIG_X86_EXTENDED_PLATFORM is not set +# CONFIG_X86_MCE is not set +CONFIG_XEN=y +CONFIG_XEN_512GB=y +CONFIG_XEN_PVH=y diff --git a/projects/kernel-config/kernel_config.x86.4.10.x b/projects/kernel-config/kernel_config.x86.4.10.x new file mode 100644 index 000000000..18cd0f5c8 --- /dev/null +++ b/projects/kernel-config/kernel_config.x86.4.10.x @@ -0,0 +1 @@ +CONFIG_DEBUG_SET_MODULE_RONX=y diff --git a/projects/kernel-config/kernel_config.x86.4.9.x b/projects/kernel-config/kernel_config.x86.4.9.x new file mode 100644 index 000000000..18cd0f5c8 --- /dev/null +++ b/projects/kernel-config/kernel_config.x86.4.9.x @@ -0,0 +1 @@ +CONFIG_DEBUG_SET_MODULE_RONX=y diff --git a/projects/kernel-config/makeconfig.sh b/projects/kernel-config/makeconfig.sh index c7c92d3d6..98e675133 100755 --- a/projects/kernel-config/makeconfig.sh +++ b/projects/kernel-config/makeconfig.sh @@ -6,33 +6,43 @@ ARCH=$1 KERNEL_SERIES=$2 DEBUG=$3 -defconfig=defconfig -if [ "${ARCH}" == "x86" ]; then - defconfig=x86_64_defconfig -fi -configpath="/linux/arch/${ARCH}/configs/${defconfig}" +cd /linux && make defconfig -cp /config/kernel_config.base "$configpath" - -function append_config() +function merge_config() { config=$1 - - if [ -f "$config" ]; then - cat "$config" >> "$configpath" + if [ ! -f "$config" ]; then + return fi + + # A slightly more intelligent merge algorithm: rather than just catting + # files together (and getting random results), let's explicitly delete the + # old setting, and then insert our new one. + while read line; do + if echo ${line} | grep "is not set" >/dev/null; then + cfg=$(echo ${line/ is not set/} | cut -c3-) + else + cfg=$(echo ${line} | cut -f1 -d=) + fi + + sed -i -e "/${cfg} is not set/d" -e "/${cfg}=/d" /linux/.config + echo ${line} >> /linux/.config + done < "$config" } -append_config "/config/kernel_config.${ARCH}" -append_config "/config/kernel_config.${KERNEL_SERIES}" -append_config "/config/kernel_config.${ARCH}.${KERNEL_SERIES}" +cd /linux && make defconfig && make oldconfig + +merge_config "/config/kernel_config.base" +merge_config "/config/kernel_config.${ARCH}" +merge_config "/config/kernel_config.${KERNEL_SERIES}" +merge_config "/config/kernel_config.${ARCH}.${KERNEL_SERIES}" if [ -n "${DEBUG}" ]; then sed -i sed -i 's/CONFIG_PANIC_ON_OOPS=y/# CONFIG_PANIC_ON_OOPS is not set/' /linux/arch/x86/configs/x86_64_defconfig append_config "/config/kernel_config.debug" fi -cd /linux && make defconfig && make oldconfig +cd /linux && make oldconfig # Let's make sure things are the way we want, i.e. every option we explicitly # set is set the same way in the resulting config. @@ -42,7 +52,18 @@ function check_config() while read line; do if [ -n "${DEBUG}" ] && [ "$line" == "CONFIG_PANIC_ON_OOPS=y" ]; then continue; fi - grep "^${line}$" /linux/.config >/dev/null || (echo "$line set incorrectly" && false) + value="$(grep "^${line}$" /linux/.config || true)" + + # It's okay to for the merging script to have simply not listed values we + # require to be unset. + if echo "${line}" | grep "is not set" >/dev/null && [ "$value" = "" ]; then + continue + fi + if [ "${value}" = "${line}" ]; then + continue + fi + + echo "$line set incorrectly" && false done < $1 } diff --git a/projects/kernel-config/patches-4.10.x/0001-hv_sock-introduce-Hyper-V-Sockets.patch b/projects/kernel-config/patches-4.10.x/0001-hv_sock-introduce-Hyper-V-Sockets.patch index d5116859c..08c29b4f7 100644 --- a/projects/kernel-config/patches-4.10.x/0001-hv_sock-introduce-Hyper-V-Sockets.patch +++ b/projects/kernel-config/patches-4.10.x/0001-hv_sock-introduce-Hyper-V-Sockets.patch @@ -1,7 +1,7 @@ -From dd08e20ca2c64c1d4d6ee282e949eb5e89f77512 Mon Sep 17 00:00:00 2001 +From 63fab9ed419e243582d1f7047c7382565b11cd4e Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 21 Jul 2016 16:04:38 -0600 -Subject: [PATCH 1/7] hv_sock: introduce Hyper-V Sockets +Subject: [PATCH 1/6] hv_sock: introduce Hyper-V Sockets Hyper-V Sockets (hv_sock) supplies a byte-stream based communication mechanism between the host and the guest. It's somewhat like TCP over @@ -1787,5 +1787,5 @@ index 000000000000..331d3759f5cb +MODULE_DESCRIPTION("Hyper-V Sockets"); +MODULE_LICENSE("Dual BSD/GPL"); -- -2.11.0 +2.12.2 diff --git a/projects/kernel-config/patches-4.10.x/0002-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch b/projects/kernel-config/patches-4.10.x/0002-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch index e60299653..b7c06bb3c 100644 --- a/projects/kernel-config/patches-4.10.x/0002-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch +++ b/projects/kernel-config/patches-4.10.x/0002-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch @@ -1,7 +1,7 @@ -From 6d21f35a580779c88a2f251395097b082574f4cc Mon Sep 17 00:00:00 2001 +From d9aa9429204658ccfc3563b99ea555207d3ce455 Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Sat, 28 Jan 2017 12:37:17 -0700 -Subject: [PATCH 2/7] Drivers: hv: vmbus: Use all supported IC versions to +Subject: [PATCH 2/6] Drivers: hv: vmbus: Use all supported IC versions to negotiate Previously, we were assuming that each IC protocol version was tied to a @@ -488,5 +488,5 @@ index ca26335de49a..41e5ed87f833 100644 void hv_event_tasklet_disable(struct vmbus_channel *channel); void hv_event_tasklet_enable(struct vmbus_channel *channel); -- -2.11.0 +2.12.2 diff --git a/projects/kernel-config/patches-4.10.x/0003-Drivers-hv-Log-the-negotiated-IC-versions.patch b/projects/kernel-config/patches-4.10.x/0003-Drivers-hv-Log-the-negotiated-IC-versions.patch index 9d03ee32d..6e4376fa7 100644 --- a/projects/kernel-config/patches-4.10.x/0003-Drivers-hv-Log-the-negotiated-IC-versions.patch +++ b/projects/kernel-config/patches-4.10.x/0003-Drivers-hv-Log-the-negotiated-IC-versions.patch @@ -1,7 +1,7 @@ -From fbdcc24ca6ecc490da8254110a85aa33325137b9 Mon Sep 17 00:00:00 2001 +From 3ed1bbf668019a7fdc4ec8e2f398552db9bd6b7e Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Sat, 28 Jan 2017 12:37:18 -0700 -Subject: [PATCH 3/7] Drivers: hv: Log the negotiated IC versions. +Subject: [PATCH 3/6] Drivers: hv: Log the negotiated IC versions. Log the negotiated IC versions. @@ -114,5 +114,5 @@ index f3797c07be10..89440c2eb346 100644 hb_srv_version & 0xFFFF); } -- -2.11.0 +2.12.2 diff --git a/projects/kernel-config/patches-4.10.x/0004-vmbus-fix-missed-ring-events-on-boot.patch b/projects/kernel-config/patches-4.10.x/0004-vmbus-fix-missed-ring-events-on-boot.patch index 41a8ef495..3e4d2b503 100644 --- a/projects/kernel-config/patches-4.10.x/0004-vmbus-fix-missed-ring-events-on-boot.patch +++ b/projects/kernel-config/patches-4.10.x/0004-vmbus-fix-missed-ring-events-on-boot.patch @@ -1,7 +1,7 @@ -From 64b212adc820690cb54855b79e30e23329aecc5a Mon Sep 17 00:00:00 2001 +From 6a0b3a8e2da9cb321723fbba5bb0c1783f99e985 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sun, 26 Mar 2017 16:42:20 +0800 -Subject: [PATCH 4/7] vmbus: fix missed ring events on boot +Subject: [PATCH 4/6] vmbus: fix missed ring events on boot During initialization, the channel initialization code schedules the tasklet to scan the VMBUS receive event page (i.e. simulates an @@ -52,5 +52,5 @@ index feca5d2e7b25..3fda63bf60ab 100644 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) -- -2.11.0 +2.12.2 diff --git a/projects/kernel-config/patches-4.10.x/0005-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch b/projects/kernel-config/patches-4.10.x/0005-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch index a0db1c0ef..da1bd9096 100644 --- a/projects/kernel-config/patches-4.10.x/0005-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch +++ b/projects/kernel-config/patches-4.10.x/0005-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch @@ -1,7 +1,7 @@ -From 0c089ad42dfc3a845ce6a21a1f78dd1704cbbaed Mon Sep 17 00:00:00 2001 +From ddfa35f8ed42cd04e98144489a08ab9e56106529 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 29 Mar 2017 18:37:10 +0800 -Subject: [PATCH 5/7] vmbus: remove "goto error_clean_msglist" in vmbus_open() +Subject: [PATCH 5/6] vmbus: remove "goto error_clean_msglist" in vmbus_open() This is just a cleanup patch to simplify the code a little. No semantic change. @@ -55,5 +55,5 @@ index 1606e7f08f4b..1caed01954f6 100644 vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle); kfree(open_info); -- -2.11.0 +2.12.2 diff --git a/projects/kernel-config/patches-4.10.x/0006-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch b/projects/kernel-config/patches-4.10.x/0006-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch index 6a8162015..dc6285bb1 100644 --- a/projects/kernel-config/patches-4.10.x/0006-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch +++ b/projects/kernel-config/patches-4.10.x/0006-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch @@ -1,7 +1,7 @@ -From 67f5a959cea3dea2f5773cf7b690337dde421f9b Mon Sep 17 00:00:00 2001 +From 939735a0c5e6a26d202fff4a636dab6c0e4f0b5e Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 24 Mar 2017 20:53:18 +0800 -Subject: [PATCH 6/7] vmbus: dynamically enqueue/dequeue the channel on +Subject: [PATCH 6/6] vmbus: dynamically enqueue/dequeue the channel on vmbus_open/close Signed-off-by: Dexuan Cui @@ -173,5 +173,5 @@ index 41e5ed87f833..0792d16f4b3e 100644 void vmbus_setevent(struct vmbus_channel *channel); -- -2.11.0 +2.12.2 diff --git a/projects/kernel-config/patches-4.10.x/0007-scsi-storvsc-Workaround-for-virtual-DVD-SCSI-version.patch b/projects/kernel-config/patches-4.10.x/0007-scsi-storvsc-Workaround-for-virtual-DVD-SCSI-version.patch deleted file mode 100644 index b1c89be76..000000000 --- a/projects/kernel-config/patches-4.10.x/0007-scsi-storvsc-Workaround-for-virtual-DVD-SCSI-version.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 6064d8207f1a62352f2476d09b8d6c6fba572359 Mon Sep 17 00:00:00 2001 -From: Stephen Hemminger -Date: Tue, 7 Mar 2017 09:15:53 -0800 -Subject: [PATCH 7/7] scsi: storvsc: Workaround for virtual DVD SCSI version - -Hyper-V host emulation of SCSI for virtual DVD device reports SCSI -version 0 (UNKNOWN) but is still capable of supporting REPORTLUN. - -Without this patch, a GEN2 Linux guest on Hyper-V will not boot 4.11 -successfully with virtual DVD ROM device. What happens is that the SCSI -scan process falls back to doing sequential probing by INQUIRY. But the -storvsc driver has a previous workaround that masks/blocks all errors -reports from INQUIRY (or MODE_SENSE) commands. This workaround causes -the scan to then populate a full set of bogus LUN's on the target and -then sends kernel spinning off into a death spiral doing block reads on -the non-existent LUNs. - -By setting the correct blacklist flags, the target with the DVD device -is scanned with REPORTLUN and that works correctly. - -Patch needs to go in current 4.11, it is safe but not necessary in older -kernels. - -Signed-off-by: Stephen Hemminger -Reviewed-by: K. Y. Srinivasan -Reviewed-by: Christoph Hellwig -Signed-off-by: Martin K. Petersen -(cherry picked from commit f1c635b439a5c01776fe3a25b1e2dc546ea82e6f) ---- - drivers/scsi/storvsc_drv.c | 27 +++++++++++++++++---------- - 1 file changed, 17 insertions(+), 10 deletions(-) - -diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c -index 7be04fc0d0e7..6f5d173ea9ff 100644 ---- a/drivers/scsi/storvsc_drv.c -+++ b/drivers/scsi/storvsc_drv.c -@@ -400,8 +400,6 @@ MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels") - */ - static int storvsc_timeout = 180; - --static int msft_blist_flags = BLIST_TRY_VPD_PAGES; -- - #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) - static struct scsi_transport_template *fc_transport_template; - #endif -@@ -1283,6 +1281,22 @@ static int storvsc_do_io(struct hv_device *device, - return ret; - } - -+static int storvsc_device_alloc(struct scsi_device *sdevice) -+{ -+ /* -+ * Set blist flag to permit the reading of the VPD pages even when -+ * the target may claim SPC-2 compliance. MSFT targets currently -+ * claim SPC-2 compliance while they implement post SPC-2 features. -+ * With this flag we can correctly handle WRITE_SAME_16 issues. -+ * -+ * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but -+ * still supports REPORT LUN. -+ */ -+ sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES; -+ -+ return 0; -+} -+ - static int storvsc_device_configure(struct scsi_device *sdevice) - { - -@@ -1298,14 +1312,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice) - sdevice->no_write_same = 1; - - /* -- * Add blist flags to permit the reading of the VPD pages even when -- * the target may claim SPC-2 compliance. MSFT targets currently -- * claim SPC-2 compliance while they implement post SPC-2 features. -- * With this patch we can correctly handle WRITE_SAME_16 issues. -- */ -- sdevice->sdev_bflags |= msft_blist_flags; -- -- /* - * If the host is WIN8 or WIN8 R2, claim conformance to SPC-3 - * if the device is a MSFT virtual device. If the host is - * WIN10 or newer, allow write_same. -@@ -1569,6 +1575,7 @@ static struct scsi_host_template scsi_driver = { - .eh_host_reset_handler = storvsc_host_reset_handler, - .proc_name = "storvsc_host", - .eh_timed_out = storvsc_eh_timed_out, -+ .slave_alloc = storvsc_device_alloc, - .slave_configure = storvsc_device_configure, - .cmd_per_lun = 255, - .this_id = -1, --- -2.11.0 - diff --git a/projects/kernel-config/patches-4.11.x/0001-vmbus-introduce-in-place-packet-iterator.patch b/projects/kernel-config/patches-4.11.x/0001-vmbus-introduce-in-place-packet-iterator.patch new file mode 100644 index 000000000..26e3fc800 --- /dev/null +++ b/projects/kernel-config/patches-4.11.x/0001-vmbus-introduce-in-place-packet-iterator.patch @@ -0,0 +1,369 @@ +From a1b252ef7f38e04a2f5c8218adab6c1689fc3418 Mon Sep 17 00:00:00 2001 +From: stephen hemminger +Date: Mon, 27 Feb 2017 10:26:48 -0800 +Subject: [PATCH 1/9] vmbus: introduce in-place packet iterator + +This is mostly just a refactoring of previous functions +(get_pkt_next_raw, put_pkt_raw and commit_rd_index) to make it easier +to use for other drivers and NAPI. + +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +(cherry picked from commit f3dd3f4797652c311df9c074436d420f1ad3566e) +--- + drivers/hv/ring_buffer.c | 94 +++++++++++++++++++++++++++++++++++++++++++- + drivers/net/hyperv/netvsc.c | 34 +++++----------- + include/linux/hyperv.h | 96 ++++++++++++++------------------------------- + 3 files changed, 133 insertions(+), 91 deletions(-) + +diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c +index 87799e81af97..c3f1a9e33cef 100644 +--- a/drivers/hv/ring_buffer.c ++++ b/drivers/hv/ring_buffer.c +@@ -32,6 +32,8 @@ + + #include "hyperv_vmbus.h" + ++#define VMBUS_PKT_TRAILER 8 ++ + /* + * When we write to the ring buffer, check if the host needs to + * be signaled. Here is the details of this protocol: +@@ -336,6 +338,12 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, + return 0; + } + ++static inline void ++init_cached_read_index(struct hv_ring_buffer_info *rbi) ++{ ++ rbi->cached_read_index = rbi->ring_buffer->read_index; ++} ++ + int hv_ringbuffer_read(struct vmbus_channel *channel, + void *buffer, u32 buflen, u32 *buffer_actual_len, + u64 *requestid, bool raw) +@@ -366,7 +374,8 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, + return ret; + } + +- init_cached_read_index(channel); ++ init_cached_read_index(inring_info); ++ + next_read_location = hv_get_next_read_location(inring_info); + next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc, + sizeof(desc), +@@ -410,3 +419,86 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, + + return ret; + } ++ ++/* ++ * Determine number of bytes available in ring buffer after ++ * the current iterator (priv_read_index) location. ++ * ++ * This is similar to hv_get_bytes_to_read but with private ++ * read index instead. ++ */ ++static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) ++{ ++ u32 priv_read_loc = rbi->priv_read_index; ++ u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index); ++ ++ if (write_loc >= priv_read_loc) ++ return write_loc - priv_read_loc; ++ else ++ return (rbi->ring_datasize - priv_read_loc) + write_loc; ++} ++ ++/* ++ * Get first vmbus packet from ring buffer after read_index ++ * ++ * If ring buffer is empty, returns NULL and no other action needed. ++ */ ++struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel) ++{ ++ struct hv_ring_buffer_info *rbi = &channel->inbound; ++ ++ /* set state for later hv_signal_on_read() */ ++ init_cached_read_index(rbi); ++ ++ if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor)) ++ return NULL; ++ ++ return hv_get_ring_buffer(rbi) + rbi->priv_read_index; ++} ++EXPORT_SYMBOL_GPL(hv_pkt_iter_first); ++ ++/* ++ * Get next vmbus packet from ring buffer. ++ * ++ * Advances the current location (priv_read_index) and checks for more ++ * data. If the end of the ring buffer is reached, then return NULL. ++ */ ++struct vmpacket_descriptor * ++__hv_pkt_iter_next(struct vmbus_channel *channel, ++ const struct vmpacket_descriptor *desc) ++{ ++ struct hv_ring_buffer_info *rbi = &channel->inbound; ++ u32 packetlen = desc->len8 << 3; ++ u32 dsize = rbi->ring_datasize; ++ ++ /* bump offset to next potential packet */ ++ rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER; ++ if (rbi->priv_read_index >= dsize) ++ rbi->priv_read_index -= dsize; ++ ++ /* more data? */ ++ if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor)) ++ return NULL; ++ else ++ return hv_get_ring_buffer(rbi) + rbi->priv_read_index; ++} ++EXPORT_SYMBOL_GPL(__hv_pkt_iter_next); ++ ++/* ++ * Update host ring buffer after iterating over packets. ++ */ ++void hv_pkt_iter_close(struct vmbus_channel *channel) ++{ ++ struct hv_ring_buffer_info *rbi = &channel->inbound; ++ ++ /* ++ * Make sure all reads are done before we update the read index since ++ * the writer may start writing to the read area once the read index ++ * is updated. ++ */ ++ virt_rmb(); ++ rbi->ring_buffer->read_index = rbi->priv_read_index; ++ ++ hv_signal_on_read(channel); ++} ++EXPORT_SYMBOL_GPL(hv_pkt_iter_close); +diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c +index 15ef713d96c0..ab9fe48ec133 100644 +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -646,14 +646,11 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, + static void netvsc_send_completion(struct netvsc_device *net_device, + struct vmbus_channel *incoming_channel, + struct hv_device *device, +- struct vmpacket_descriptor *packet) ++ const struct vmpacket_descriptor *desc) + { +- struct nvsp_message *nvsp_packet; ++ struct nvsp_message *nvsp_packet = hv_pkt_data(desc); + struct net_device *ndev = hv_get_drvdata(device); + +- nvsp_packet = (struct nvsp_message *)((unsigned long)packet + +- (packet->offset8 << 3)); +- + switch (nvsp_packet->hdr.msg_type) { + case NVSP_MSG_TYPE_INIT_COMPLETE: + case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: +@@ -667,7 +664,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device, + + case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: + netvsc_send_tx_complete(net_device, incoming_channel, +- device, packet); ++ device, desc); + break; + + default: +@@ -1070,9 +1067,11 @@ static void netvsc_receive(struct net_device *ndev, + struct net_device_context *net_device_ctx, + struct hv_device *device, + struct vmbus_channel *channel, +- struct vmtransfer_page_packet_header *vmxferpage_packet, ++ const struct vmpacket_descriptor *desc, + struct nvsp_message *nvsp) + { ++ const struct vmtransfer_page_packet_header *vmxferpage_packet ++ = container_of(desc, const struct vmtransfer_page_packet_header, d); + char *recv_buf = net_device->recv_buf; + u32 status = NVSP_STAT_SUCCESS; + int i; +@@ -1180,12 +1179,10 @@ static void netvsc_process_raw_pkt(struct hv_device *device, + struct netvsc_device *net_device, + struct net_device *ndev, + u64 request_id, +- struct vmpacket_descriptor *desc) ++ const struct vmpacket_descriptor *desc) + { + struct net_device_context *net_device_ctx = netdev_priv(ndev); +- struct nvsp_message *nvmsg +- = (struct nvsp_message *)((unsigned long)desc +- + (desc->offset8 << 3)); ++ struct nvsp_message *nvmsg = hv_pkt_data(desc); + + switch (desc->type) { + case VM_PKT_COMP: +@@ -1194,9 +1191,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device, + + case VM_PKT_DATA_USING_XFER_PAGES: + netvsc_receive(ndev, net_device, net_device_ctx, +- device, channel, +- (struct vmtransfer_page_packet_header *)desc, +- nvmsg); ++ device, channel, desc, nvmsg); + break; + + case VM_PKT_DATA_INBAND: +@@ -1218,7 +1213,6 @@ void netvsc_channel_cb(void *context) + struct netvsc_device *net_device; + struct vmpacket_descriptor *desc; + struct net_device *ndev; +- bool need_to_commit = false; + + if (channel->primary_channel != NULL) + device = channel->primary_channel->device_obj; +@@ -1237,20 +1231,12 @@ void netvsc_channel_cb(void *context) + netvsc_channel_idle(net_device, q_idx))) + return; + +- /* commit_rd_index() -> hv_signal_on_read() needs this. */ +- init_cached_read_index(channel); +- +- while ((desc = get_next_pkt_raw(channel)) != NULL) { ++ foreach_vmbus_pkt(desc, channel) { + netvsc_process_raw_pkt(device, channel, net_device, + ndev, desc->trans_id, desc); + +- put_pkt_raw(channel, desc); +- need_to_commit = true; + } + +- if (need_to_commit) +- commit_rd_index(channel); +- + netvsc_chk_recv_comp(net_device, channel, q_idx); + } + +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 970771a5f739..0c170a3f0d8b 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1508,14 +1508,6 @@ static inline void hv_signal_on_read(struct vmbus_channel *channel) + return; + } + +-static inline void +-init_cached_read_index(struct vmbus_channel *channel) +-{ +- struct hv_ring_buffer_info *rbi = &channel->inbound; +- +- rbi->cached_read_index = rbi->ring_buffer->read_index; +-} +- + /* + * Mask off host interrupt callback notifications + */ +@@ -1549,76 +1541,48 @@ static inline u32 hv_end_read(struct hv_ring_buffer_info *rbi) + /* + * An API to support in-place processing of incoming VMBUS packets. + */ +-#define VMBUS_PKT_TRAILER 8 + +-static inline struct vmpacket_descriptor * +-get_next_pkt_raw(struct vmbus_channel *channel) ++/* Get data payload associated with descriptor */ ++static inline void *hv_pkt_data(const struct vmpacket_descriptor *desc) + { +- struct hv_ring_buffer_info *ring_info = &channel->inbound; +- u32 priv_read_loc = ring_info->priv_read_index; +- void *ring_buffer = hv_get_ring_buffer(ring_info); +- u32 dsize = ring_info->ring_datasize; +- /* +- * delta is the difference between what is available to read and +- * what was already consumed in place. We commit read index after +- * the whole batch is processed. +- */ +- u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ? +- priv_read_loc - ring_info->ring_buffer->read_index : +- (dsize - ring_info->ring_buffer->read_index) + priv_read_loc; +- u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); +- +- if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) +- return NULL; +- +- return ring_buffer + priv_read_loc; ++ return (void *)((unsigned long)desc + (desc->offset8 << 3)); + } + +-/* +- * A helper function to step through packets "in-place" +- * This API is to be called after each successful call +- * get_next_pkt_raw(). +- */ +-static inline void put_pkt_raw(struct vmbus_channel *channel, +- struct vmpacket_descriptor *desc) ++/* Get data size associated with descriptor */ ++static inline u32 hv_pkt_datalen(const struct vmpacket_descriptor *desc) + { +- struct hv_ring_buffer_info *ring_info = &channel->inbound; +- u32 packetlen = desc->len8 << 3; +- u32 dsize = ring_info->ring_datasize; +- +- /* +- * Include the packet trailer. +- */ +- ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; +- ring_info->priv_read_index %= dsize; ++ return (desc->len8 << 3) - (desc->offset8 << 3); + } + ++ ++struct vmpacket_descriptor * ++hv_pkt_iter_first(struct vmbus_channel *channel); ++ ++struct vmpacket_descriptor * ++__hv_pkt_iter_next(struct vmbus_channel *channel, ++ const struct vmpacket_descriptor *pkt); ++ ++void hv_pkt_iter_close(struct vmbus_channel *channel); ++ + /* +- * This call commits the read index and potentially signals the host. +- * Here is the pattern for using the "in-place" consumption APIs: +- * +- * init_cached_read_index(); +- * +- * while (get_next_pkt_raw() { +- * process the packet "in-place"; +- * put_pkt_raw(); +- * } +- * if (packets processed in place) +- * commit_rd_index(); ++ * Get next packet descriptor from iterator ++ * If at end of list, return NULL and update host. + */ +-static inline void commit_rd_index(struct vmbus_channel *channel) ++static inline struct vmpacket_descriptor * ++hv_pkt_iter_next(struct vmbus_channel *channel, ++ const struct vmpacket_descriptor *pkt) + { +- struct hv_ring_buffer_info *ring_info = &channel->inbound; +- /* +- * Make sure all reads are done before we update the read index since +- * the writer may start writing to the read area once the read index +- * is updated. +- */ +- virt_rmb(); +- ring_info->ring_buffer->read_index = ring_info->priv_read_index; ++ struct vmpacket_descriptor *nxt; ++ ++ nxt = __hv_pkt_iter_next(channel, pkt); ++ if (!nxt) ++ hv_pkt_iter_close(channel); + +- hv_signal_on_read(channel); ++ return nxt; + } + ++#define foreach_vmbus_pkt(pkt, channel) \ ++ for (pkt = hv_pkt_iter_first(channel); pkt; \ ++ pkt = hv_pkt_iter_next(channel, pkt)) + + #endif /* _HYPERV_H */ +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.11.x/0002-vmbus-vmbus_open-reset-onchannel_callback-on-error.patch b/projects/kernel-config/patches-4.11.x/0002-vmbus-vmbus_open-reset-onchannel_callback-on-error.patch new file mode 100644 index 000000000..edf09fb59 --- /dev/null +++ b/projects/kernel-config/patches-4.11.x/0002-vmbus-vmbus_open-reset-onchannel_callback-on-error.patch @@ -0,0 +1,34 @@ +From 0cf4173a76cf771d4d4406f48c59fcb85b5a5400 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Fri, 5 May 2017 16:57:12 -0600 +Subject: [PATCH 2/9] vmbus: vmbus_open(): reset onchannel_callback on error + +No real issue is observed without the patch, but let's add this +just in case. + +Signed-off-by: Dexuan Cui +Cc: K. Y. Srinivasan +Cc: Haiyang Zhang +Cc: Stephen Hemminger +Origin: https://github.com/dcui/linux/commits/decui/hv_sock/v4.11/20170511 +(cherry picked from commit c248b14174e1337c1461f9b13a573ad90a136e1c) +--- + drivers/hv/channel.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index 321b8833fa6f..628d6fde1887 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -220,6 +220,8 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, + get_order(send_ringbuffer_size + recv_ringbuffer_size)); + error_set_chnstate: + newchannel->state = CHANNEL_OPEN_STATE; ++ newchannel->onchannel_callback = NULL; ++ newchannel->channel_callback_context = NULL; + return err; + } + EXPORT_SYMBOL_GPL(vmbus_open); +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.11.x/0003-vmbus-add-the-matching-tasklet_enable-in-vmbus_close.patch b/projects/kernel-config/patches-4.11.x/0003-vmbus-add-the-matching-tasklet_enable-in-vmbus_close.patch new file mode 100644 index 000000000..36499d3de --- /dev/null +++ b/projects/kernel-config/patches-4.11.x/0003-vmbus-add-the-matching-tasklet_enable-in-vmbus_close.patch @@ -0,0 +1,42 @@ +From d1d1e90f2106c7048feab6a6da955fc2ed521896 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Fri, 5 May 2017 16:57:15 -0600 +Subject: [PATCH 3/9] vmbus: add the matching tasklet_enable() in + vmbus_close_internal() + +If we disable a tasklet that is scheduled but hasn't started to run, +the tasklet has no chance to run any longer, so later we'll hang +in free_channel() -> tasklet_kill(), because the TASKLET_STATE_SCHED +can't be cleared in tasklet_action(). + +With the patch, before free_channel() -> tasklet_kill() returns, the +tasklet still has a chance to run with a NULL channel->onchannel_callback, +which will be ignored safely, e.g. by vmbus_on_event(). + +Fixes: dad72a1d2844 ("vmbus: remove hv_event_tasklet_disable/enable") + +Signed-off-by: Dexuan Cui +Cc: K. Y. Srinivasan +Cc: Haiyang Zhang +Cc: Stephen Hemminger +Origin: https://github.com/dcui/linux/commits/decui/hv_sock/v4.11/20170511 +(cherry picked from commit 008d8d8bc0c86473a8549a365bee9a479243e412) +--- + drivers/hv/channel.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index 628d6fde1887..7cd2bd9fd1f1 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -608,6 +608,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel) + get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); + + out: ++ tasklet_enable(&channel->callback_event); + return ret; + } + +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.11.x/0004-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch b/projects/kernel-config/patches-4.11.x/0004-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch new file mode 100644 index 000000000..6129baae4 --- /dev/null +++ b/projects/kernel-config/patches-4.11.x/0004-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch @@ -0,0 +1,62 @@ +From 3fe617c0af23272071c320b295fb987f69f0ac77 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Fri, 5 May 2017 16:57:20 -0600 +Subject: [PATCH 4/9] vmbus: remove "goto error_clean_msglist" in vmbus_open() + +This is just a cleanup patch to simplify the code a little. +No semantic change. + +Signed-off-by: Dexuan Cui +Cc: K. Y. Srinivasan +Cc: Haiyang Zhang +Cc: Stephen Hemminger +Origin: https://github.com/dcui/linux/commits/decui/hv_sock/v4.11/20170511 +(cherry picked from commit 4713066c11b2396eafd2873cbed7bdd72d1571eb) +--- + drivers/hv/channel.c | 18 +++++++----------- + 1 file changed, 7 insertions(+), 11 deletions(-) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index 7cd2bd9fd1f1..db5e6f8730d2 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -180,17 +180,18 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, + ret = vmbus_post_msg(open_msg, + sizeof(struct vmbus_channel_open_channel), true); + +- if (ret != 0) { +- err = ret; +- goto error_clean_msglist; +- } +- +- wait_for_completion(&open_info->waitevent); ++ if (ret == 0) ++ wait_for_completion(&open_info->waitevent); + + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + list_del(&open_info->msglistentry); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + ++ if (ret != 0) { ++ err = ret; ++ goto error_free_gpadl; ++ } ++ + if (newchannel->rescind) { + err = -ENODEV; + goto error_free_gpadl; +@@ -205,11 +206,6 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, + kfree(open_info); + return 0; + +-error_clean_msglist: +- spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); +- list_del(&open_info->msglistentry); +- spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); +- + error_free_gpadl: + vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle); + kfree(open_info); +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.11.x/0005-vmbus-dynamically-enqueue-dequeue-a-channel-on-vmbus.patch b/projects/kernel-config/patches-4.11.x/0005-vmbus-dynamically-enqueue-dequeue-a-channel-on-vmbus.patch new file mode 100644 index 000000000..0bc1b09c0 --- /dev/null +++ b/projects/kernel-config/patches-4.11.x/0005-vmbus-dynamically-enqueue-dequeue-a-channel-on-vmbus.patch @@ -0,0 +1,189 @@ +From 3809a76e095d648d7d99cb1822ce17844a22ad2a Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Fri, 5 May 2017 16:57:23 -0600 +Subject: [PATCH 5/9] vmbus: dynamically enqueue/dequeue a channel on + vmbus_open/close +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +A just-closed channel may have a pending interrupt, and later when a new +channel with the same channel ID is not being fully initialized, the +pending interrupt of the previous channel with the same channel ID can run +the channel callback on the new channel data structure, causing a crash +of NULL pointer dereferencing. + +Normally it’s pretty hard to reproduce the race condition, but it can +indeed happen with specially-designed hv_sock stress test cases. + +Signed-off-by: Dexuan Cui +Reported-by: Rolf Neugebauer +Tested-by: Rolf Neugebauer +Cc: K. Y. Srinivasan +Cc: Haiyang Zhang +Cc: Stephen Hemminger +Origin: https://github.com/dcui/linux/commits/decui/hv_sock/v4.11/20170511 +(cherry picked from commit 1df677b35ff010d0def33f5420773015815cf843) +--- + drivers/hv/channel.c | 12 +++++++++--- + drivers/hv/channel_mgmt.c | 50 +++++++++++++++++++++-------------------------- + include/linux/hyperv.h | 3 +++ + 3 files changed, 34 insertions(+), 31 deletions(-) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index db5e6f8730d2..f288e506fba0 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -177,6 +177,8 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, + &vmbus_connection.chn_msg_list); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + ++ hv_percpu_channel_enq(newchannel); ++ + ret = vmbus_post_msg(open_msg, + sizeof(struct vmbus_channel_open_channel), true); + +@@ -189,23 +191,25 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, + + if (ret != 0) { + err = ret; +- goto error_free_gpadl; ++ goto error_deq_channel; + } + + if (newchannel->rescind) { + err = -ENODEV; +- goto error_free_gpadl; ++ goto error_deq_channel; + } + + if (open_info->response.open_result.status) { + err = -EAGAIN; +- goto error_free_gpadl; ++ goto error_deq_channel; + } + + newchannel->state = CHANNEL_OPENED_STATE; + kfree(open_info); + return 0; + ++error_deq_channel: ++ hv_percpu_channel_deq(newchannel); + error_free_gpadl: + vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle); + kfree(open_info); +@@ -551,6 +555,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel) + goto out; + } + ++ hv_percpu_channel_deq(channel); ++ + channel->state = CHANNEL_OPEN_STATE; + channel->sc_creation_callback = NULL; + /* Stop callback and cancel the timer asap */ +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index fbcb06352308..c5a01a4d589e 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -363,6 +363,17 @@ static void percpu_channel_enq(void *arg) + list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list); + } + ++void hv_percpu_channel_enq(struct vmbus_channel *channel) ++{ ++ if (channel->target_cpu != get_cpu()) ++ smp_call_function_single(channel->target_cpu, ++ percpu_channel_enq, channel, true); ++ else ++ percpu_channel_enq(channel); ++ ++ put_cpu(); ++} ++ + static void percpu_channel_deq(void *arg) + { + struct vmbus_channel *channel = arg; +@@ -370,6 +381,17 @@ static void percpu_channel_deq(void *arg) + list_del_rcu(&channel->percpu_list); + } + ++void hv_percpu_channel_deq(struct vmbus_channel *channel) ++{ ++ if (channel->target_cpu != get_cpu()) ++ smp_call_function_single(channel->target_cpu, ++ percpu_channel_deq, channel, true); ++ else ++ percpu_channel_deq(channel); ++ ++ put_cpu(); ++} ++ + + static void vmbus_release_relid(u32 relid) + { +@@ -390,15 +412,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) + BUG_ON(!channel->rescind); + BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + +- if (channel->target_cpu != get_cpu()) { +- put_cpu(); +- smp_call_function_single(channel->target_cpu, +- percpu_channel_deq, channel, true); +- } else { +- percpu_channel_deq(channel); +- put_cpu(); +- } +- + if (channel->primary_channel == NULL) { + list_del(&channel->listentry); + +@@ -491,16 +504,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + + init_vp_index(newchannel, dev_type); + +- if (newchannel->target_cpu != get_cpu()) { +- put_cpu(); +- smp_call_function_single(newchannel->target_cpu, +- percpu_channel_enq, +- newchannel, true); +- } else { +- percpu_channel_enq(newchannel); +- put_cpu(); +- } +- + /* + * This state is used to indicate a successful open + * so that when we do close the channel normally, we +@@ -549,15 +552,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + list_del(&newchannel->listentry); + mutex_unlock(&vmbus_connection.channel_mutex); + +- if (newchannel->target_cpu != get_cpu()) { +- put_cpu(); +- smp_call_function_single(newchannel->target_cpu, +- percpu_channel_deq, newchannel, true); +- } else { +- percpu_channel_deq(newchannel); +- put_cpu(); +- } +- + vmbus_release_relid(newchannel->offermsg.child_relid); + + err_free_chan: +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 0c170a3f0d8b..ba93b7e4a972 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1437,6 +1437,9 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, + const int *srv_version, int srv_vercnt, + int *nego_fw_version, int *nego_srv_version); + ++void hv_percpu_channel_enq(struct vmbus_channel *channel); ++void hv_percpu_channel_deq(struct vmbus_channel *channel); ++ + void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); + + void vmbus_setevent(struct vmbus_channel *channel); +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.11.x/0006-hv_sock-implements-Hyper-V-transport-for-Virtual-Soc.patch b/projects/kernel-config/patches-4.11.x/0006-hv_sock-implements-Hyper-V-transport-for-Virtual-Soc.patch new file mode 100644 index 000000000..58f2b1871 --- /dev/null +++ b/projects/kernel-config/patches-4.11.x/0006-hv_sock-implements-Hyper-V-transport-for-Virtual-Soc.patch @@ -0,0 +1,934 @@ +From 189479e4a60fcdb92439aebd7a116f4ba0eede0d Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Fri, 5 May 2017 16:57:26 -0600 +Subject: [PATCH 6/9] hv_sock: implements Hyper-V transport for Virtual Sockets + (AF_VSOCK) + +Hyper-V Sockets (hv_sock) supplies a byte-stream based communication +mechanism between the host and the guest. It uses VMBus ringbuffer as the +transportation layer. + +With hv_sock, applications between the host (Windows 10, Windows Server +2016 or newer) and the guest can talk with each other using the traditional +socket APIs. + +More info about Hyper-V Sockets is available here: + +"Make your own integration services": +https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-guide/make-integration-service + +The patch implements the necessary support in Linux guest by introducing a new +vsock transport for AF_VSOCK. + +Signed-off-by: Dexuan Cui +Cc: K. Y. Srinivasan +Cc: Haiyang Zhang +Cc: Stephen Hemminger +Cc: Andy King +Cc: Dmitry Torokhov +Cc: George Zhang +Cc: Jorgen Hansen +Cc: Reilly Grant +Cc: Asias He +Cc: Stefan Hajnoczi +Cc: Vitaly Kuznetsov +Cc: Cathy Avery +Cc: Rolf Neugebauer +Origin: https://github.com/dcui/linux/commits/decui/hv_sock/v4.11/20170511 +(cherry picked from commit 3476be340d2ff777609fca3e763da0292acbfc45) +--- + MAINTAINERS | 1 + + net/vmw_vsock/Kconfig | 12 + + net/vmw_vsock/Makefile | 3 + + net/vmw_vsock/hyperv_transport.c | 829 +++++++++++++++++++++++++++++++++++++++ + 4 files changed, 845 insertions(+) + create mode 100644 net/vmw_vsock/hyperv_transport.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index 38d3e4ed7208..53bf52ce3173 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -6077,6 +6077,7 @@ F: drivers/net/hyperv/ + F: drivers/scsi/storvsc_drv.c + F: drivers/uio/uio_hv_generic.c + F: drivers/video/fbdev/hyperv_fb.c ++F: net/vmw_vsock/hyperv_transport.c + F: include/linux/hyperv.h + F: tools/hv/ + F: Documentation/ABI/stable/sysfs-bus-vmbus +diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig +index 8831e7c42167..a24369d175fd 100644 +--- a/net/vmw_vsock/Kconfig ++++ b/net/vmw_vsock/Kconfig +@@ -46,3 +46,15 @@ config VIRTIO_VSOCKETS_COMMON + This option is selected by any driver which needs to access + the virtio_vsock. The module will be called + vmw_vsock_virtio_transport_common. ++ ++config HYPERV_VSOCKETS ++ tristate "Hyper-V transport for Virtual Sockets" ++ depends on VSOCKETS && HYPERV ++ help ++ This module implements a Hyper-V transport for Virtual Sockets. ++ ++ Enable this transport if your Virtual Machine host supports Virtual ++ Sockets over Hyper-V VMBus. ++ ++ To compile this driver as a module, choose M here: the module will be ++ called hv_sock. If unsure, say N. +diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile +index bc27c70e0e59..f70f3e70ce9e 100644 +--- a/net/vmw_vsock/Makefile ++++ b/net/vmw_vsock/Makefile +@@ -2,6 +2,7 @@ obj-$(CONFIG_VSOCKETS) += vsock.o + obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o + obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o + obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o ++obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o + + vsock-y += af_vsock.o vsock_addr.o + +@@ -11,3 +12,5 @@ vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ + vmw_vsock_virtio_transport-y += virtio_transport.o + + vmw_vsock_virtio_transport_common-y += virtio_transport_common.o ++ ++hv_sock-y += hyperv_transport.o +diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c +new file mode 100644 +index 000000000000..fd89bf357617 +--- /dev/null ++++ b/net/vmw_vsock/hyperv_transport.c +@@ -0,0 +1,829 @@ ++/* ++ * Hyper-V transport for vsock ++ * ++ * Hyper-V Sockets supplies a byte-stream based communication mechanism ++ * between the host and the VM. This driver implements the necessary ++ * support in the VM by introducing the new vsock transport. ++ * ++ * Copyright (c) 2017, Microsoft Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++/* The host side's design of the feature requires 6 exact 4KB pages for ++ * recv/send rings respectively -- this is suboptimal considering memory ++ * consumption, however unluckily we have to live with it, before the ++ * host comes up with a better design in the future. ++ */ ++#define PAGE_SIZE_4K 4096 ++#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6) ++#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6) ++ ++/* The MTU is 16KB per the host side's design */ ++#define HVS_MTU_SIZE (1024 * 16) ++ ++struct vmpipe_proto_header { ++ u32 pkt_type; ++ u32 data_size; ++}; ++ ++/* For recv, we use the VMBus in-place packet iterator APIs to directly copy ++ * data from the ringbuffer into the userspace buffer. ++ */ ++struct hvs_recv_buf { ++ /* The header before the payload data */ ++ struct vmpipe_proto_header hdr; ++ ++ /* The payload */ ++ u8 data[HVS_MTU_SIZE]; ++}; ++ ++/* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use ++ * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated ++ * buffer, because tests show there is no significant performance difference. ++ * ++ * Note: the buffer can be eliminated in the future when we add new VMBus ++ * ringbuffer APIs that allow us to directly copy data from userspace buffer ++ * to VMBus ringbuffer. ++ */ ++#define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header)) ++ ++struct hvs_send_buf { ++ /* The header before the payload data */ ++ struct vmpipe_proto_header hdr; ++ ++ /* The payload */ ++ u8 data[HVS_SEND_BUF_SIZE]; ++}; ++ ++#define HVS_HEADER_LEN (sizeof(struct vmpacket_descriptor) + \ ++ sizeof(struct vmpipe_proto_header)) ++ ++/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and ++ * __hv_pkt_iter_next(). ++ */ ++#define VMBUS_PKT_TRAILER (sizeof(u64)) ++ ++#define HVS_PKT_LEN(payload_len) (HVS_HEADER_LEN + \ ++ ALIGN((payload_len), 8) + \ ++ VMBUS_PKT_TRAILER) ++ ++/* Per-socket state (accessed via vsk->trans) */ ++struct hvsock { ++ struct vsock_sock *vsk; ++ ++ uuid_le vm_srv_id; ++ uuid_le host_srv_id; ++ ++ struct vmbus_channel *chan; ++ struct vmpacket_descriptor *recv_desc; ++ ++ /* The length of the payload not delivered to userland yet */ ++ u32 recv_data_len; ++ /* The offset of the payload */ ++ u32 recv_data_off; ++}; ++ ++/* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is ++ * (see struct sockaddr_vm). Note: cid is not really used here: ++ * when we write apps to connect to the host, we can only use VMADDR_CID_ANY ++ * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we ++ * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY ++ * as the local cid. ++ * ++ * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV: ++ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user- ++ * guide/make-integration-service, and the endpoint is with ++ * the below sockaddr: ++ * ++ * struct SOCKADDR_HV ++ * { ++ * ADDRESS_FAMILY Family; ++ * USHORT Reserved; ++ * GUID VmId; ++ * GUID ServiceId; ++ * }; ++ * Note: VmID is not used by Linux VM and actually it isn't transmitted via ++ * VMBus, because here it's obvious the host and the VM can easily identify ++ * each other. Though the VmID is useful on the host, especially in the case ++ * of Windows container, Linux VM doesn't need it at all. ++ * ++ * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit ++ * the available GUID space of SOCKADDR_HV so that we can create a mapping ++ * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing ++ * Hyper-V Sockets apps on the host and in Linux VM is: ++ * ++ **************************************************************************** ++ * the only valid Service GUIDs, from the perspectives of both the host and * ++ * Linux VM, that can be connected by the other end, must conform to this * ++ * format: -facb-11e6-bd58-64006a7986d3, and the "port" must be in * ++ * this range [0, 0x7FFFFFFF]. * ++ **************************************************************************** ++ * ++ * When we write apps on the host to connect(), the GUID ServiceID is used. ++ * When we write apps in Linux VM to connect(), we only need to specify the ++ * port and the driver will form the GUID and use that to request the host. ++ * ++ * From the perspective of Linux VM: ++ * 1. the local ephemeral port (i.e. the local auto-bound port when we call ++ * connect() without explicit bind()) is generated by __vsock_bind_stream(), ++ * and the range is [1024, 0xFFFFFFFF). ++ * 2. the remote ephemeral port (i.e. the auto-generated remote port for ++ * a connect request initiated by the host's connect()) is generated by ++ * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF). ++ */ ++ ++#define MAX_LISTEN_PORT ((u32)0x7FFFFFFF) ++#define MAX_VM_LISTEN_PORT MAX_LISTEN_PORT ++#define MAX_HOST_LISTEN_PORT MAX_LISTEN_PORT ++#define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1) ++ ++/* 00000000-facb-11e6-bd58-64006a7986d3 */ ++static const uuid_le srv_id_template = ++ UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58, ++ 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3); ++ ++static inline bool is_valid_srv_id(const uuid_le *id) ++{ ++ return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4); ++} ++ ++static inline unsigned int get_port_by_srv_id(const uuid_le *svr_id) ++{ ++ return *((unsigned int *)svr_id); ++} ++ ++static inline void hvs_addr_init(struct sockaddr_vm *addr, ++ const uuid_le *svr_id) ++{ ++ unsigned int port = get_port_by_srv_id(svr_id); ++ ++ vsock_addr_init(addr, VMADDR_CID_ANY, port); ++} ++ ++static inline void hvs_remote_addr_init(struct sockaddr_vm *remote, ++ struct sockaddr_vm *local) ++{ ++ static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT; ++ struct sock *sk; ++ ++ vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY); ++ ++ while (1) { ++ /* Wrap around ? */ ++ if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT || ++ host_ephemeral_port == VMADDR_PORT_ANY) ++ host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT; ++ ++ remote->svm_port = host_ephemeral_port++; ++ ++ sk = vsock_find_connected_socket(remote, local); ++ if (!sk) { ++ /* Found an available ephemeral port */ ++ return; ++ } ++ ++ /* Release refcnt got in vsock_find_connected_socket */ ++ sock_put(sk); ++ } ++} ++ ++static bool hvs_channel_readable(struct vmbus_channel *chan) ++{ ++ u32 readable = hv_get_bytes_to_read(&chan->inbound); ++ ++ /* 0-size payload means FIN */ ++ return readable >= HVS_PKT_LEN(0); ++} ++ ++static int hvs_channel_readable_payload(struct vmbus_channel *chan) ++{ ++ u32 readable = hv_get_bytes_to_read(&chan->inbound); ++ ++ if (readable > HVS_PKT_LEN(0)) { ++ /* At least we have 1 byte to read. We don't need to return ++ * the exact readable bytes: see vsock_stream_recvmsg() -> ++ * vsock_stream_has_data(). ++ */ ++ return 1; ++ } ++ ++ if (readable == HVS_PKT_LEN(0)) { ++ /* 0-size payload means FIN */ ++ return 0; ++ } ++ ++ /* No payload or FIN */ ++ return -1; ++} ++ ++static inline size_t hvs_channel_writable_bytes(struct vmbus_channel *chan) ++{ ++ u32 writeable = hv_get_bytes_to_write(&chan->outbound); ++ size_t ret; ++ ++ /* The ringbuffer mustn't be 100% full, and we should reserve a ++ * zero-length-payload packet for the FIN: see hv_ringbuffer_write() ++ * and hvs_shutdown(). ++ */ ++ if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0)) ++ return 0; ++ ++ ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0); ++ ++ return round_down(ret, 8); ++} ++ ++static int hvs_send_data(struct vmbus_channel *chan, ++ struct hvs_send_buf *send_buf, size_t to_write) ++{ ++ send_buf->hdr.pkt_type = 1; ++ send_buf->hdr.data_size = to_write; ++ return vmbus_sendpacket(chan, &send_buf->hdr, ++ sizeof(send_buf->hdr) + to_write, ++ 0, VM_PKT_DATA_INBAND, 0); ++} ++ ++static void hvs_channel_cb(void *ctx) ++{ ++ struct sock *sk = (struct sock *)ctx; ++ struct vsock_sock *vsk = vsock_sk(sk); ++ struct hvsock *hvs = vsk->trans; ++ struct vmbus_channel *chan = hvs->chan; ++ ++ if (hvs_channel_readable(chan)) ++ sk->sk_data_ready(sk); ++ ++ /* Mark it writable only if there is enough space */ ++ if (hvs_channel_writable_bytes(chan) >= HVS_SEND_BUF_SIZE) ++ sk->sk_write_space(sk); ++} ++ ++static void hvs_close_connection(struct vmbus_channel *chan) ++{ ++ struct sock *sk = get_per_channel_state(chan); ++ struct vsock_sock *vsk = vsock_sk(sk); ++ ++ sk->sk_state = SS_UNCONNECTED; ++ sock_set_flag(sk, SOCK_DONE); ++ vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; ++ ++ sk->sk_state_change(sk); ++} ++ ++static void hvs_open_connection(struct vmbus_channel *chan) ++{ ++ uuid_le *if_instance, *if_type; ++ unsigned char conn_from_host; ++ ++ struct sockaddr_vm addr; ++ struct sock *sk, *new = NULL; ++ struct vsock_sock *vnew; ++ struct hvsock *hvs, *hvs_new; ++ int ret; ++ ++ if_type = &chan->offermsg.offer.if_type; ++ if_instance = &chan->offermsg.offer.if_instance; ++ conn_from_host = chan->offermsg.offer.u.pipe.user_def[0]; ++ ++ /* The host or the VM should only listen on a port in ++ * [0, MAX_LISTEN_PORT] ++ */ ++ if (!is_valid_srv_id(if_type) || ++ get_port_by_srv_id(if_type) > MAX_LISTEN_PORT) ++ return; ++ ++ hvs_addr_init(&addr, conn_from_host ? if_type : if_instance); ++ sk = vsock_find_bound_socket(&addr); ++ if (!sk) ++ return; ++ ++ if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) || ++ (!conn_from_host && sk->sk_state != SS_CONNECTING)) ++ goto out; ++ ++ if (conn_from_host) { ++ if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) ++ goto out; ++ ++ new = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, ++ sk->sk_type, 0); ++ if (!new) ++ goto out; ++ ++ new->sk_state = SS_CONNECTING; ++ vnew = vsock_sk(new); ++ hvs_new = vnew->trans; ++ hvs_new->chan = chan; ++ } else { ++ hvs = vsock_sk(sk)->trans; ++ hvs->chan = chan; ++ } ++ ++ set_channel_read_mode(chan, HV_CALL_DIRECT); ++ ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE, ++ RINGBUFFER_HVS_RCV_SIZE, NULL, 0, ++ hvs_channel_cb, conn_from_host ? new : sk); ++ if (ret != 0) { ++ if (conn_from_host) { ++ hvs_new->chan = NULL; ++ sock_put(new); ++ } else { ++ hvs->chan = NULL; ++ } ++ goto out; ++ } ++ ++ set_per_channel_state(chan, conn_from_host ? new : sk); ++ vmbus_set_chn_rescind_callback(chan, hvs_close_connection); ++ ++ /* See hvs_channel_cb() and hvs_notify_poll_out() */ ++ set_channel_pending_send_size(chan, ++ HVS_PKT_LEN(HVS_SEND_BUF_SIZE) + 1); ++ ++ if (conn_from_host) { ++ new->sk_state = SS_CONNECTED; ++ sk->sk_ack_backlog++; ++ ++ hvs_addr_init(&vnew->local_addr, if_type); ++ hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr); ++ ++ hvs_new->vm_srv_id = *if_type; ++ hvs_new->host_srv_id = *if_instance; ++ ++ vsock_insert_connected(vnew); ++ vsock_enqueue_accept(sk, new); ++ } else { ++ sk->sk_state = SS_CONNECTED; ++ sk->sk_socket->state = SS_CONNECTED; ++ ++ vsock_insert_connected(vsock_sk(sk)); ++ } ++ ++ sk->sk_state_change(sk); ++ ++out: ++ /* Release refcnt obtained when we called vsock_find_bound_socket() */ ++ sock_put(sk); ++} ++ ++static u32 hvs_get_local_cid(void) ++{ ++ return VMADDR_CID_ANY; ++} ++ ++static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk) ++{ ++ struct hvsock *hvs; ++ ++ hvs = kzalloc(sizeof(*hvs), GFP_KERNEL); ++ if (!hvs) ++ return -ENOMEM; ++ ++ vsk->trans = hvs; ++ hvs->vsk = vsk; ++ ++ return 0; ++} ++ ++static int hvs_connect(struct vsock_sock *vsk) ++{ ++ struct hvsock *h = vsk->trans; ++ ++ h->vm_srv_id = srv_id_template; ++ h->host_srv_id = srv_id_template; ++ ++ *((u32 *)&h->vm_srv_id) = vsk->local_addr.svm_port; ++ *((u32 *)&h->host_srv_id) = vsk->remote_addr.svm_port; ++ ++ return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id); ++} ++ ++static int hvs_shutdown(struct vsock_sock *vsk, int mode) ++{ ++ struct vmpipe_proto_header hdr; ++ struct hvs_send_buf *send_buf; ++ struct hvsock *hvs; ++ ++ if (!(mode & SEND_SHUTDOWN)) ++ return 0; ++ ++ hvs = vsk->trans; ++ ++ send_buf = (struct hvs_send_buf *)&hdr; ++ ++ /* It can't fail: see hvs_channel_writable_bytes(). */ ++ (void)hvs_send_data(hvs->chan, send_buf, 0); ++ ++ return 0; ++} ++ ++static void hvs_release(struct vsock_sock *vsk) ++{ ++ struct hvsock *hvs = vsk->trans; ++ struct vmbus_channel *chan = hvs->chan; ++ ++ if (chan) ++ hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN); ++ ++ vsock_remove_sock(vsk); ++} ++ ++static void hvs_destruct(struct vsock_sock *vsk) ++{ ++ struct hvsock *hvs = vsk->trans; ++ struct vmbus_channel *chan = hvs->chan; ++ ++ if (chan) ++ vmbus_hvsock_device_unregister(chan); ++ ++ kfree(hvs); ++} ++ ++static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg, ++ size_t len, int flags) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static int hvs_dgram_enqueue(struct vsock_sock *vsk, ++ struct sockaddr_vm *remote, struct msghdr *msg, ++ size_t dgram_len) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static bool hvs_dgram_allow(u32 cid, u32 port) ++{ ++ return false; ++} ++ ++static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg, ++ size_t len, int flags) ++{ ++ struct hvsock *hvs = vsk->trans; ++ bool need_refill = !hvs->recv_desc; ++ struct hvs_recv_buf *recv_buf; ++ u32 payload_len, to_read; ++ int ret; ++ ++ if (flags & MSG_PEEK) ++ return -EOPNOTSUPP; ++ ++ if (need_refill) { ++ hvs->recv_desc = hv_pkt_iter_first(hvs->chan); ++ recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1); ++ ++ payload_len = recv_buf->hdr.data_size; ++ if (payload_len == 0 || payload_len > HVS_MTU_SIZE) ++ return -EIO; ++ ++ hvs->recv_data_len = payload_len; ++ hvs->recv_data_off = 0; ++ } else { ++ recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1); ++ } ++ ++ to_read = min_t(u32, len, hvs->recv_data_len); ++ ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read); ++ if (ret != 0) ++ return ret; ++ ++ hvs->recv_data_len -= to_read; ++ ++ if (hvs->recv_data_len == 0) ++ hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc); ++ else ++ hvs->recv_data_off += to_read; ++ ++ return to_read; ++} ++ ++static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg, ++ size_t len) ++{ ++ struct hvsock *hvs = vsk->trans; ++ struct vmbus_channel *chan = hvs->chan; ++ struct hvs_send_buf *send_buf; ++ size_t to_write, max_writable, ret; ++ ++ BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K); ++ ++ send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL); ++ if (!send_buf) ++ return -ENOMEM; ++ ++ max_writable = hvs_channel_writable_bytes(chan); ++ to_write = min_t(size_t, len, max_writable); ++ to_write = min_t(size_t, to_write, HVS_SEND_BUF_SIZE); ++ ++ ret = memcpy_from_msg(send_buf->data, msg, to_write); ++ if (ret < 0) ++ goto out; ++ ++ ret = hvs_send_data(hvs->chan, send_buf, to_write); ++ if (ret < 0) ++ goto out; ++ ++ ret = to_write; ++out: ++ kfree(send_buf); ++ return ret; ++} ++ ++static s64 hvs_stream_has_data(struct vsock_sock *vsk) ++{ ++ struct hvsock *hvs = vsk->trans; ++ s64 ret; ++ ++ switch (hvs_channel_readable_payload(hvs->chan)) { ++ case 1: ++ ret = 1; ++ break; ++ case 0: ++ vsk->peer_shutdown |= SEND_SHUTDOWN; ++ ret = 0; ++ break; ++ default: /* -1 */ ++ ret = 0; ++ break; ++ } ++ ++ return ret; ++} ++ ++static s64 hvs_stream_has_space(struct vsock_sock *vsk) ++{ ++ struct hvsock *hvs = vsk->trans; ++ ++ return hvs_channel_writable_bytes(hvs->chan); ++} ++ ++static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk) ++{ ++ return HVS_MTU_SIZE + 1; ++} ++ ++static bool hvs_stream_is_active(struct vsock_sock *vsk) ++{ ++ struct hvsock *hvs = vsk->trans; ++ ++ return hvs->chan != NULL; ++} ++ ++static bool hvs_stream_allow(u32 cid, u32 port) ++{ ++ static const u32 valid_cids[] = { ++ VMADDR_CID_ANY, ++ VMADDR_CID_HOST, ++ }; ++ int i; ++ ++ /* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is ++ * reserved as ephemeral ports, which are used as the host's ports ++ * when the host initiates connections. ++ */ ++ if (port > MAX_HOST_LISTEN_PORT) ++ return false; ++ ++ for (i = 0; i < ARRAY_SIZE(valid_cids); i++) { ++ if (cid == valid_cids[i]) ++ return true; ++ } ++ ++ return false; ++} ++ ++static ++int hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable) ++{ ++ struct hvsock *hvs = vsk->trans; ++ ++ *readable = hvs_channel_readable(hvs->chan); ++ return 0; ++} ++ ++static ++int hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable) ++{ ++ /* Report writable only if there is enough space */ ++ *writable = hvs_stream_has_space(vsk) >= HVS_SEND_BUF_SIZE; ++ ++ return 0; ++} ++ ++static ++int hvs_notify_recv_init(struct vsock_sock *vsk, size_t target, ++ struct vsock_transport_recv_notify_data *d) ++{ ++ return 0; ++} ++ ++static ++int hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target, ++ struct vsock_transport_recv_notify_data *d) ++{ ++ return 0; ++} ++ ++static ++int hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target, ++ struct vsock_transport_recv_notify_data *d) ++{ ++ return 0; ++} ++ ++static ++int hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target, ++ ssize_t copied, bool data_read, ++ struct vsock_transport_recv_notify_data *d) ++{ ++ return 0; ++} ++ ++static ++int hvs_notify_send_init(struct vsock_sock *vsk, ++ struct vsock_transport_send_notify_data *d) ++{ ++ return 0; ++} ++ ++static ++int hvs_notify_send_pre_block(struct vsock_sock *vsk, ++ struct vsock_transport_send_notify_data *d) ++{ ++ return 0; ++} ++ ++static ++int hvs_notify_send_pre_enqueue(struct vsock_sock *vsk, ++ struct vsock_transport_send_notify_data *d) ++{ ++ return 0; ++} ++ ++static ++int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written, ++ struct vsock_transport_send_notify_data *d) ++{ ++ return 0; ++} ++ ++static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val) ++{ ++ /* Ignored. */ ++} ++ ++static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val) ++{ ++ /* Ignored. */ ++} ++ ++static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val) ++{ ++ /* Ignored. */ ++} ++ ++static u64 hvs_get_buffer_size(struct vsock_sock *vsk) ++{ ++ return -ENOPROTOOPT; ++} ++ ++static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk) ++{ ++ return -ENOPROTOOPT; ++} ++ ++static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk) ++{ ++ return -ENOPROTOOPT; ++} ++ ++static struct vsock_transport hvs_transport = { ++ .get_local_cid = hvs_get_local_cid, ++ ++ .init = hvs_sock_init, ++ .destruct = hvs_destruct, ++ .release = hvs_release, ++ .connect = hvs_connect, ++ .shutdown = hvs_shutdown, ++ ++ .dgram_bind = hvs_dgram_bind, ++ .dgram_dequeue = hvs_dgram_dequeue, ++ .dgram_enqueue = hvs_dgram_enqueue, ++ .dgram_allow = hvs_dgram_allow, ++ ++ .stream_dequeue = hvs_stream_dequeue, ++ .stream_enqueue = hvs_stream_enqueue, ++ .stream_has_data = hvs_stream_has_data, ++ .stream_has_space = hvs_stream_has_space, ++ .stream_rcvhiwat = hvs_stream_rcvhiwat, ++ .stream_is_active = hvs_stream_is_active, ++ .stream_allow = hvs_stream_allow, ++ ++ .notify_poll_in = hvs_notify_poll_in, ++ .notify_poll_out = hvs_notify_poll_out, ++ .notify_recv_init = hvs_notify_recv_init, ++ .notify_recv_pre_block = hvs_notify_recv_pre_block, ++ .notify_recv_pre_dequeue = hvs_notify_recv_pre_dequeue, ++ .notify_recv_post_dequeue = hvs_notify_recv_post_dequeue, ++ .notify_send_init = hvs_notify_send_init, ++ .notify_send_pre_block = hvs_notify_send_pre_block, ++ .notify_send_pre_enqueue = hvs_notify_send_pre_enqueue, ++ .notify_send_post_enqueue = hvs_notify_send_post_enqueue, ++ ++ .set_buffer_size = hvs_set_buffer_size, ++ .set_min_buffer_size = hvs_set_min_buffer_size, ++ .set_max_buffer_size = hvs_set_max_buffer_size, ++ .get_buffer_size = hvs_get_buffer_size, ++ .get_min_buffer_size = hvs_get_min_buffer_size, ++ .get_max_buffer_size = hvs_get_max_buffer_size, ++}; ++ ++static int hvs_probe(struct hv_device *hdev, ++ const struct hv_vmbus_device_id *dev_id) ++{ ++ struct vmbus_channel *chan = hdev->channel; ++ ++ hvs_open_connection(chan); ++ ++ /* Always return success to suppress the unnecessary error message ++ * in vmbus_probe(): on error the host will rescind the device in ++ * 30 seconds and we can do cleanup at that time in ++ * vmbus_onoffer_rescind(). ++ */ ++ return 0; ++} ++ ++static int hvs_remove(struct hv_device *hdev) ++{ ++ struct vmbus_channel *chan = hdev->channel; ++ ++ vmbus_close(chan); ++ ++ return 0; ++} ++ ++/* This isn't really used. See vmbus_match() and vmbus_probe() */ ++static const struct hv_vmbus_device_id id_table[] = { ++ {}, ++}; ++ ++static struct hv_driver hvs_drv = { ++ .name = "hv_sock", ++ .hvsock = true, ++ .id_table = id_table, ++ .probe = hvs_probe, ++ .remove = hvs_remove, ++}; ++ ++static int __init hvs_init(void) ++{ ++ int ret; ++ ++ if (vmbus_proto_version < VERSION_WIN10) ++ return -ENODEV; ++ ++ ret = vmbus_driver_register(&hvs_drv); ++ if (ret != 0) ++ return ret; ++ ++ ret = vsock_core_init(&hvs_transport); ++ if (ret) { ++ vmbus_driver_unregister(&hvs_drv); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static void __exit hvs_exit(void) ++{ ++ vsock_core_exit(); ++ vmbus_driver_unregister(&hvs_drv); ++} ++ ++module_init(hvs_init); ++module_exit(hvs_exit); ++ ++MODULE_DESCRIPTION("Hyper-V sockets"); ++MODULE_VERSION("1.0.0"); ++MODULE_LICENSE("GPL"); +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.11.x/0007-VMCI-only-try-to-load-on-VMware-hypervisor.patch b/projects/kernel-config/patches-4.11.x/0007-VMCI-only-try-to-load-on-VMware-hypervisor.patch new file mode 100644 index 000000000..e836331aa --- /dev/null +++ b/projects/kernel-config/patches-4.11.x/0007-VMCI-only-try-to-load-on-VMware-hypervisor.patch @@ -0,0 +1,64 @@ +From f467ed8a4c51eed215f9f8db32cad8f240a59a2e Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Fri, 5 May 2017 16:57:29 -0600 +Subject: [PATCH 7/9] VMCI: only try to load on VMware hypervisor + +Without the patch, vmw_vsock_vmci_transport.ko and vmw_vmci.ko can +automatically load when an application creates an AF_VSOCK socket. + +This is the expected good behavior on VMware hypervisor, but as we +are adding hv_sock.ko (i.e. Hyper-V transport for AF_VSOCK), we should +make sure vmw_vsock_vmci_transport.ko can't load on Hyper-V, otherwise +there is a -EBUSY conflict when both vmw_vsock_vmci_transport.ko and +hv_sock.ko try to call vsock_core_init(). + +On the other hand, hv_sock.ko can only load on Hyper-V, because it +depends on hv_vmbus.ko, which deteces Hyper-V in hv_acpi_init(). + +KVM's vsock_virtio_transport doesn't have the issue because it doesn't +define MODULE_ALIAS_NETPROTO(PF_VSOCK). + +Signed-off-by: Dexuan Cui +Cc: Alok Kataria +Cc: Andy King +Cc: Adit Ranadive +Cc: George Zhang +Cc: Jorgen Hansen +Cc: K. Y. Srinivasan +Cc: Haiyang Zhang +Cc: Stephen Hemminger +Origin: https://github.com/dcui/linux/commits/decui/hv_sock/v4.11/20170511 +(cherry picked from commit b5566b1b6e5cb19b381590587f841f950caabe4d) +--- + drivers/misc/vmw_vmci/vmci_driver.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c +index d7eaf1eb11e7..1789ea71ff5d 100644 +--- a/drivers/misc/vmw_vmci/vmci_driver.c ++++ b/drivers/misc/vmw_vmci/vmci_driver.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + + #include "vmci_driver.h" + #include "vmci_event.h" +@@ -58,6 +59,13 @@ static int __init vmci_drv_init(void) + int vmci_err; + int error; + ++ /* ++ * Check if we are running on VMware's hypervisor and bail out ++ * if we are not. ++ */ ++ if (x86_hyper != &x86_hyper_vmware) ++ return -ENODEV; ++ + vmci_err = vmci_event_init(); + if (vmci_err < VMCI_SUCCESS) { + pr_err("Failed to initialize VMCIEvent (result=%d)\n", +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.11.x/0008-hv_sock-add-the-support-of-auto-loading.patch b/projects/kernel-config/patches-4.11.x/0008-hv_sock-add-the-support-of-auto-loading.patch new file mode 100644 index 000000000..646f45b13 --- /dev/null +++ b/projects/kernel-config/patches-4.11.x/0008-hv_sock-add-the-support-of-auto-loading.patch @@ -0,0 +1,30 @@ +From 1ee84d4b4cceac4c7f42573476c1253b06e8cdaf Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Fri, 5 May 2017 16:57:35 -0600 +Subject: [PATCH 8/9] hv_sock: add the support of auto-loading + +After we disable VMWare virtual sockets driver's auto-loading on Hyper-V, +we can enable hv_sock's auto-loading now. + +Signed-off-by: Dexuan Cui +Cc: K. Y. Srinivasan +Cc: Haiyang Zhang +Cc: Stephen Hemminger +Origin: https://github.com/dcui/linux/commits/decui/hv_sock/v4.11/20170511 +(cherry picked from commit 6f1aa69011356ff95ed6c57400095e5f2d9eb900) +--- + net/vmw_vsock/hyperv_transport.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c +index fd89bf357617..f465b0b662df 100644 +--- a/net/vmw_vsock/hyperv_transport.c ++++ b/net/vmw_vsock/hyperv_transport.c +@@ -827,3 +827,4 @@ module_exit(hvs_exit); + MODULE_DESCRIPTION("Hyper-V sockets"); + MODULE_VERSION("1.0.0"); + MODULE_LICENSE("GPL"); ++MODULE_ALIAS_NETPROTO(PF_VSOCK); +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.11.x/0009-hvsock-fix-a-race-in-hvs_stream_dequeue.patch b/projects/kernel-config/patches-4.11.x/0009-hvsock-fix-a-race-in-hvs_stream_dequeue.patch new file mode 100644 index 000000000..84b5e836e --- /dev/null +++ b/projects/kernel-config/patches-4.11.x/0009-hvsock-fix-a-race-in-hvs_stream_dequeue.patch @@ -0,0 +1,113 @@ +From d0e6020dd2b25f8880b8d25305c356cf4a22f12d Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Tue, 16 May 2017 22:14:03 +0800 +Subject: [PATCH 9/9] hvsock: fix a race in hvs_stream_dequeue() + +If hv_pkt_iter_next() returns a non-NULL pointer, we must update +the recv_data_len/data_off info, otherwise the received data will +be silently dropped, and let's fix hvs_stream_has_data() accordingly. + +Thank Rolf for finding this! + +Reported-by: Rolf Neugebauer +Signed-off-by: Dexuan Cui +Origin: https://github.com/dcui/linux/commits/decui/hv_sock/v4.11/20170511 +(cherry picked from commit 83c8635b893bbc0b5b329c632cea0382d5479763) +--- + net/vmw_vsock/hyperv_transport.c | 50 +++++++++++++++++++++++++++++----------- + 1 file changed, 36 insertions(+), 14 deletions(-) + +diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c +index f465b0b662df..30154836acd0 100644 +--- a/net/vmw_vsock/hyperv_transport.c ++++ b/net/vmw_vsock/hyperv_transport.c +@@ -476,13 +476,33 @@ static bool hvs_dgram_allow(u32 cid, u32 port) + return false; + } + ++static int hvs_update_recv_data(struct hvsock *hvs) ++{ ++ struct hvs_recv_buf *recv_buf; ++ u32 payload_len; ++ ++ recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1); ++ payload_len = recv_buf->hdr.data_size; ++ ++ if (payload_len > HVS_MTU_SIZE) ++ return -EIO; ++ ++ if (payload_len == 0) ++ hvs->vsk->peer_shutdown |= SEND_SHUTDOWN; ++ ++ hvs->recv_data_len = payload_len; ++ hvs->recv_data_off = 0; ++ ++ return 0; ++} ++ + static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg, + size_t len, int flags) + { + struct hvsock *hvs = vsk->trans; + bool need_refill = !hvs->recv_desc; + struct hvs_recv_buf *recv_buf; +- u32 payload_len, to_read; ++ u32 to_read; + int ret; + + if (flags & MSG_PEEK) +@@ -490,29 +510,28 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg, + + if (need_refill) { + hvs->recv_desc = hv_pkt_iter_first(hvs->chan); +- recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1); +- +- payload_len = recv_buf->hdr.data_size; +- if (payload_len == 0 || payload_len > HVS_MTU_SIZE) +- return -EIO; +- +- hvs->recv_data_len = payload_len; +- hvs->recv_data_off = 0; +- } else { +- recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1); ++ ret = hvs_update_recv_data(hvs); ++ if (ret) ++ return ret; + } + ++ recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1); + to_read = min_t(u32, len, hvs->recv_data_len); + ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read); + if (ret != 0) + return ret; + + hvs->recv_data_len -= to_read; +- +- if (hvs->recv_data_len == 0) ++ if (hvs->recv_data_len == 0) { + hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc); +- else ++ if (hvs->recv_desc) { ++ ret = hvs_update_recv_data(hvs); ++ if (ret) ++ return ret; ++ } ++ } else { + hvs->recv_data_off += to_read; ++ } + + return to_read; + } +@@ -554,6 +573,9 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk) + struct hvsock *hvs = vsk->trans; + s64 ret; + ++ if (hvs->recv_data_len > 0) ++ return 1; ++ + switch (hvs_channel_readable_payload(hvs->chan)) { + case 1: + ret = 1; +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.9.x/0001-hv_sock-introduce-Hyper-V-Sockets.patch b/projects/kernel-config/patches-4.9.x/0001-hv_sock-introduce-Hyper-V-Sockets.patch new file mode 100644 index 000000000..d8965bcfe --- /dev/null +++ b/projects/kernel-config/patches-4.9.x/0001-hv_sock-introduce-Hyper-V-Sockets.patch @@ -0,0 +1,1791 @@ +From 8c4e99c8b5686efa70ca46aea0ec6ba7de489c86 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Sat, 23 Jul 2016 01:35:51 +0000 +Subject: [PATCH 01/10] hv_sock: introduce Hyper-V Sockets + +Hyper-V Sockets (hv_sock) supplies a byte-stream based communication +mechanism between the host and the guest. It's somewhat like TCP over +VMBus, but the transportation layer (VMBus) is much simpler than IP. + +With Hyper-V Sockets, applications between the host and the guest can talk +to each other directly by the traditional BSD-style socket APIs. + +Hyper-V Sockets is only available on new Windows hosts, like Windows Server +2016. More info is in this article "Make your own integration services": +https://msdn.microsoft.com/en-us/virtualization/hyperv_on_windows/develop/make_mgmt_service + +The patch implements the necessary support in the guest side by introducing +a new socket address family AF_HYPERV. + +Signed-off-by: Dexuan Cui +Cc: "K. Y. Srinivasan" +Cc: Haiyang Zhang +Cc: Vitaly Kuznetsov +Cc: Cathy Avery +Cc: Olaf Hering +Origin: https://patchwork.kernel.org/patch/9244467/ +--- + MAINTAINERS | 2 + + include/linux/hyperv.h | 13 + + include/linux/socket.h | 4 +- + include/net/af_hvsock.h | 78 +++ + include/uapi/linux/hyperv.h | 23 + + net/Kconfig | 1 + + net/Makefile | 1 + + net/hv_sock/Kconfig | 10 + + net/hv_sock/Makefile | 3 + + net/hv_sock/af_hvsock.c | 1507 +++++++++++++++++++++++++++++++++++++++++++ + 10 files changed, 1641 insertions(+), 1 deletion(-) + create mode 100644 include/net/af_hvsock.h + create mode 100644 net/hv_sock/Kconfig + create mode 100644 net/hv_sock/Makefile + create mode 100644 net/hv_sock/af_hvsock.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index 63cefa62324c..e64920219d88 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -5853,7 +5853,9 @@ F: drivers/pci/host/pci-hyperv.c + F: drivers/net/hyperv/ + F: drivers/scsi/storvsc_drv.c + F: drivers/video/fbdev/hyperv_fb.c ++F: net/hv_sock/ + F: include/linux/hyperv.h ++F: include/net/af_hvsock.h + F: tools/hv/ + F: Documentation/ABI/stable/sysfs-bus-vmbus + +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index d596a076da11..489ad74c1e6e 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1613,5 +1613,18 @@ static inline void commit_rd_index(struct vmbus_channel *channel) + hv_signal_on_read(channel); + } + ++struct vmpipe_proto_header { ++ u32 pkt_type; ++ u32 data_size; ++}; ++ ++#define HVSOCK_HEADER_LEN (sizeof(struct vmpacket_descriptor) + \ ++ sizeof(struct vmpipe_proto_header)) ++ ++/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write() */ ++#define PREV_INDICES_LEN (sizeof(u64)) + ++#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \ ++ ALIGN((payload_len), 8) + \ ++ PREV_INDICES_LEN) + #endif /* _HYPERV_H */ +diff --git a/include/linux/socket.h b/include/linux/socket.h +index b5cc5a6d7011..0b68b587d6ee 100644 +--- a/include/linux/socket.h ++++ b/include/linux/socket.h +@@ -202,8 +202,9 @@ struct ucred { + #define AF_VSOCK 40 /* vSockets */ + #define AF_KCM 41 /* Kernel Connection Multiplexor*/ + #define AF_QIPCRTR 42 /* Qualcomm IPC Router */ ++#define AF_HYPERV 43 /* Hyper-V Sockets */ + +-#define AF_MAX 43 /* For now.. */ ++#define AF_MAX 44 /* For now.. */ + + /* Protocol families, same as address families. */ + #define PF_UNSPEC AF_UNSPEC +@@ -251,6 +252,7 @@ struct ucred { + #define PF_VSOCK AF_VSOCK + #define PF_KCM AF_KCM + #define PF_QIPCRTR AF_QIPCRTR ++#define PF_HYPERV AF_HYPERV + #define PF_MAX AF_MAX + + /* Maximum queue length specifiable by listen. */ +diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h +new file mode 100644 +index 000000000000..e7a8a3ae08e8 +--- /dev/null ++++ b/include/net/af_hvsock.h +@@ -0,0 +1,78 @@ ++#ifndef __AF_HVSOCK_H__ ++#define __AF_HVSOCK_H__ ++ ++#include ++#include ++#include ++ ++/* The host side's design of the feature requires 5 exact 4KB pages for ++ * recv/send rings respectively -- this is suboptimal considering memory ++ * consumption, however unluckily we have to live with it, before the ++ * host comes up with a better design in the future. ++ */ ++#define PAGE_SIZE_4K 4096 ++#define RINGBUFFER_HVSOCK_RCV_SIZE (PAGE_SIZE_4K * 5) ++#define RINGBUFFER_HVSOCK_SND_SIZE (PAGE_SIZE_4K * 5) ++ ++/* The MTU is 16KB per the host side's design. ++ * In future, the buffer can be elimiated when we switch to use the coming ++ * new VMBus ringbuffer "in-place consumption" APIs, by which we can ++ * directly copy data from VMBus ringbuffer into the userspace buffer. ++ */ ++#define HVSOCK_MTU_SIZE (1024 * 16) ++struct hvsock_recv_buf { ++ unsigned int data_len; ++ unsigned int data_offset; ++ ++ struct vmpipe_proto_header hdr; ++ u8 buf[HVSOCK_MTU_SIZE]; ++}; ++ ++/* In the VM, actually we can send up to HVSOCK_MTU_SIZE bytes of payload, ++ * but for now let's use a smaller size to minimize the dynamically-allocated ++ * buffer. Note: the buffer can be elimiated in future when we add new VMBus ++ * ringbuffer APIs that allow us to directly copy data from userspace buf to ++ * VMBus ringbuffer. ++ */ ++#define HVSOCK_MAX_SND_SIZE_BY_VM (1024 * 4) ++struct hvsock_send_buf { ++ struct vmpipe_proto_header hdr; ++ u8 buf[HVSOCK_MAX_SND_SIZE_BY_VM]; ++}; ++ ++struct hvsock_sock { ++ /* sk must be the first member. */ ++ struct sock sk; ++ ++ struct sockaddr_hv local_addr; ++ struct sockaddr_hv remote_addr; ++ ++ /* protected by the global hvsock_mutex */ ++ struct list_head bound_list; ++ struct list_head connected_list; ++ ++ struct list_head accept_queue; ++ /* used by enqueue and dequeue */ ++ struct mutex accept_queue_mutex; ++ ++ struct delayed_work dwork; ++ ++ u32 peer_shutdown; ++ ++ struct vmbus_channel *channel; ++ ++ struct hvsock_send_buf *send; ++ struct hvsock_recv_buf *recv; ++}; ++ ++static inline struct hvsock_sock *sk_to_hvsock(struct sock *sk) ++{ ++ return (struct hvsock_sock *)sk; ++} ++ ++static inline struct sock *hvsock_to_sk(struct hvsock_sock *hvsk) ++{ ++ return (struct sock *)hvsk; ++} ++ ++#endif /* __AF_HVSOCK_H__ */ +diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h +index e347b24ef9fb..eb3e44b69a5d 100644 +--- a/include/uapi/linux/hyperv.h ++++ b/include/uapi/linux/hyperv.h +@@ -26,6 +26,7 @@ + #define _UAPI_HYPERV_H + + #include ++#include + + /* + * Framework version for util services. +@@ -396,4 +397,26 @@ struct hv_kvp_ip_msg { + struct hv_kvp_ipaddr_value kvp_ip_val; + } __attribute__((packed)); + ++/* This is the address format of Hyper-V Sockets. ++ * Note: here we just borrow the kernel's built-in type uuid_le. When ++ * an application calls bind() or connect(), the 2 members of struct ++ * sockaddr_hv must be of GUID. ++ * The GUID format differs from the UUID format only in the byte order of ++ * the first 3 fields. Refer to: ++ * https://en.wikipedia.org/wiki/Globally_unique_identifier ++ */ ++struct sockaddr_hv { ++ __kernel_sa_family_t shv_family; /* Address family */ ++ u16 reserved; /* Must be Zero */ ++ uuid_le shv_vm_guid; /* VM ID */ ++ uuid_le shv_service_guid; /* Service ID */ ++}; ++ ++#define SHV_VMID_GUEST NULL_UUID_LE ++#define SHV_VMID_HOST NULL_UUID_LE ++ ++#define SHV_SERVICE_ID_ANY NULL_UUID_LE ++ ++#define SHV_PROTO_RAW 1 ++ + #endif /* _UAPI_HYPERV_H */ +diff --git a/net/Kconfig b/net/Kconfig +index 7b6cd340b72b..a9be6907a620 100644 +--- a/net/Kconfig ++++ b/net/Kconfig +@@ -231,6 +231,7 @@ source "net/dns_resolver/Kconfig" + source "net/batman-adv/Kconfig" + source "net/openvswitch/Kconfig" + source "net/vmw_vsock/Kconfig" ++source "net/hv_sock/Kconfig" + source "net/netlink/Kconfig" + source "net/mpls/Kconfig" + source "net/hsr/Kconfig" +diff --git a/net/Makefile b/net/Makefile +index 4cafaa2b4667..2b357eb81865 100644 +--- a/net/Makefile ++++ b/net/Makefile +@@ -71,6 +71,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ + obj-$(CONFIG_NFC) += nfc/ + obj-$(CONFIG_OPENVSWITCH) += openvswitch/ + obj-$(CONFIG_VSOCKETS) += vmw_vsock/ ++obj-$(CONFIG_HYPERV_SOCK) += hv_sock/ + obj-$(CONFIG_MPLS) += mpls/ + obj-$(CONFIG_HSR) += hsr/ + ifneq ($(CONFIG_NET_SWITCHDEV),) +diff --git a/net/hv_sock/Kconfig b/net/hv_sock/Kconfig +new file mode 100644 +index 000000000000..ff84875564d1 +--- /dev/null ++++ b/net/hv_sock/Kconfig +@@ -0,0 +1,10 @@ ++config HYPERV_SOCK ++ tristate "Hyper-V Sockets" ++ depends on HYPERV ++ default m if HYPERV ++ help ++ Hyper-V Sockets is a socket interface for high speed ++ communication between Linux guest and Hyper-V host over VMBus. ++ ++ To compile this driver as a module, choose M here: the module ++ will be called hv_sock. +diff --git a/net/hv_sock/Makefile b/net/hv_sock/Makefile +new file mode 100644 +index 000000000000..716c01230129 +--- /dev/null ++++ b/net/hv_sock/Makefile +@@ -0,0 +1,3 @@ ++obj-$(CONFIG_HYPERV_SOCK) += hv_sock.o ++ ++hv_sock-y += af_hvsock.o +diff --git a/net/hv_sock/af_hvsock.c b/net/hv_sock/af_hvsock.c +new file mode 100644 +index 000000000000..331d3759f5cb +--- /dev/null ++++ b/net/hv_sock/af_hvsock.c +@@ -0,0 +1,1507 @@ ++/* ++ * Hyper-V Sockets -- a socket-based communication channel between the ++ * Hyper-V host and the virtual machines running on it. ++ * ++ * Copyright (c) 2016 Microsoft Corporation. ++ * ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote ++ * products derived from this software without specific prior written ++ * permission. ++ * ++ * Alternatively, this software may be distributed under the terms of the ++ * GNU General Public License ("GPL") version 2 as published by the Free ++ * Software Foundation. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR ++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ++ * POSSIBILITY OF SUCH DAMAGE. ++ */ ++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt ++ ++#include ++#include ++#include ++#include ++ ++static struct proto hvsock_proto = { ++ .name = "HV_SOCK", ++ .owner = THIS_MODULE, ++ .obj_size = sizeof(struct hvsock_sock), ++}; ++ ++#define SS_LISTEN 255 ++ ++#define HVSOCK_CONNECT_TIMEOUT (30 * HZ) ++ ++/* This is an artificial limit */ ++#define HVSOCK_MAX_BACKLOG 128 ++ ++static LIST_HEAD(hvsock_bound_list); ++static LIST_HEAD(hvsock_connected_list); ++static DEFINE_MUTEX(hvsock_mutex); ++ ++static struct sock *hvsock_find_bound_socket(const struct sockaddr_hv *addr) ++{ ++ struct hvsock_sock *hvsk; ++ ++ list_for_each_entry(hvsk, &hvsock_bound_list, bound_list) { ++ if (!uuid_le_cmp(addr->shv_service_guid, ++ hvsk->local_addr.shv_service_guid)) ++ return hvsock_to_sk(hvsk); ++ } ++ return NULL; ++} ++ ++static struct sock *hvsock_find_connected_socket_by_channel( ++ const struct vmbus_channel *channel) ++{ ++ struct hvsock_sock *hvsk; ++ ++ list_for_each_entry(hvsk, &hvsock_connected_list, connected_list) { ++ if (hvsk->channel == channel) ++ return hvsock_to_sk(hvsk); ++ } ++ return NULL; ++} ++ ++static void hvsock_enqueue_accept(struct sock *listener, ++ struct sock *connected) ++{ ++ struct hvsock_sock *hvconnected; ++ struct hvsock_sock *hvlistener; ++ ++ hvlistener = sk_to_hvsock(listener); ++ hvconnected = sk_to_hvsock(connected); ++ ++ sock_hold(connected); ++ sock_hold(listener); ++ ++ mutex_lock(&hvlistener->accept_queue_mutex); ++ list_add_tail(&hvconnected->accept_queue, &hvlistener->accept_queue); ++ listener->sk_ack_backlog++; ++ mutex_unlock(&hvlistener->accept_queue_mutex); ++} ++ ++static struct sock *hvsock_dequeue_accept(struct sock *listener) ++{ ++ struct hvsock_sock *hvconnected; ++ struct hvsock_sock *hvlistener; ++ ++ hvlistener = sk_to_hvsock(listener); ++ ++ mutex_lock(&hvlistener->accept_queue_mutex); ++ ++ if (list_empty(&hvlistener->accept_queue)) { ++ mutex_unlock(&hvlistener->accept_queue_mutex); ++ return NULL; ++ } ++ ++ hvconnected = list_entry(hvlistener->accept_queue.next, ++ struct hvsock_sock, accept_queue); ++ ++ list_del_init(&hvconnected->accept_queue); ++ listener->sk_ack_backlog--; ++ ++ mutex_unlock(&hvlistener->accept_queue_mutex); ++ ++ sock_put(listener); ++ /* The caller will need a reference on the connected socket so we let ++ * it call sock_put(). ++ */ ++ ++ return hvsock_to_sk(hvconnected); ++} ++ ++static bool hvsock_is_accept_queue_empty(struct sock *sk) ++{ ++ struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ int ret; ++ ++ mutex_lock(&hvsk->accept_queue_mutex); ++ ret = list_empty(&hvsk->accept_queue); ++ mutex_unlock(&hvsk->accept_queue_mutex); ++ ++ return ret; ++} ++ ++static void hvsock_addr_init(struct sockaddr_hv *addr, uuid_le service_id) ++{ ++ memset(addr, 0, sizeof(*addr)); ++ addr->shv_family = AF_HYPERV; ++ addr->shv_service_guid = service_id; ++} ++ ++static int hvsock_addr_validate(const struct sockaddr_hv *addr) ++{ ++ if (!addr) ++ return -EFAULT; ++ ++ if (addr->shv_family != AF_HYPERV) ++ return -EAFNOSUPPORT; ++ ++ if (addr->reserved != 0) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static bool hvsock_addr_bound(const struct sockaddr_hv *addr) ++{ ++ return !!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY); ++} ++ ++static int hvsock_addr_cast(const struct sockaddr *addr, size_t len, ++ struct sockaddr_hv **out_addr) ++{ ++ if (len < sizeof(**out_addr)) ++ return -EFAULT; ++ ++ *out_addr = (struct sockaddr_hv *)addr; ++ return hvsock_addr_validate(*out_addr); ++} ++ ++static int __hvsock_do_bind(struct hvsock_sock *hvsk, ++ struct sockaddr_hv *addr) ++{ ++ struct sockaddr_hv hv_addr; ++ int ret = 0; ++ ++ hvsock_addr_init(&hv_addr, addr->shv_service_guid); ++ ++ mutex_lock(&hvsock_mutex); ++ ++ if (!uuid_le_cmp(addr->shv_service_guid, SHV_SERVICE_ID_ANY)) { ++ do { ++ uuid_le_gen(&hv_addr.shv_service_guid); ++ } while (hvsock_find_bound_socket(&hv_addr)); ++ } else { ++ if (hvsock_find_bound_socket(&hv_addr)) { ++ ret = -EADDRINUSE; ++ goto out; ++ } ++ } ++ ++ hvsock_addr_init(&hvsk->local_addr, hv_addr.shv_service_guid); ++ ++ sock_hold(&hvsk->sk); ++ list_add(&hvsk->bound_list, &hvsock_bound_list); ++out: ++ mutex_unlock(&hvsock_mutex); ++ ++ return ret; ++} ++ ++static int __hvsock_bind(struct sock *sk, struct sockaddr_hv *addr) ++{ ++ struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ int ret; ++ ++ if (hvsock_addr_bound(&hvsk->local_addr)) ++ return -EINVAL; ++ ++ switch (sk->sk_socket->type) { ++ case SOCK_STREAM: ++ ret = __hvsock_do_bind(hvsk, addr); ++ break; ++ ++ default: ++ ret = -EINVAL; ++ break; ++ } ++ ++ return ret; ++} ++ ++/* Autobind this socket to the local address if necessary. */ ++static int hvsock_auto_bind(struct hvsock_sock *hvsk) ++{ ++ struct sock *sk = hvsock_to_sk(hvsk); ++ struct sockaddr_hv local_addr; ++ ++ if (hvsock_addr_bound(&hvsk->local_addr)) ++ return 0; ++ hvsock_addr_init(&local_addr, SHV_SERVICE_ID_ANY); ++ return __hvsock_bind(sk, &local_addr); ++} ++ ++static void hvsock_sk_destruct(struct sock *sk) ++{ ++ struct vmbus_channel *channel; ++ struct hvsock_sock *hvsk; ++ ++ hvsk = sk_to_hvsock(sk); ++ vfree(hvsk->send); ++ vfree(hvsk->recv); ++ ++ channel = hvsk->channel; ++ if (!channel) ++ return; ++ ++ vmbus_hvsock_device_unregister(channel); ++} ++ ++static void __hvsock_release(struct sock *sk) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *pending; ++ ++ hvsk = sk_to_hvsock(sk); ++ ++ mutex_lock(&hvsock_mutex); ++ ++ if (!list_empty(&hvsk->bound_list)) { ++ list_del_init(&hvsk->bound_list); ++ sock_put(&hvsk->sk); ++ } ++ ++ if (!list_empty(&hvsk->connected_list)) { ++ list_del_init(&hvsk->connected_list); ++ sock_put(&hvsk->sk); ++ } ++ ++ mutex_unlock(&hvsock_mutex); ++ ++ lock_sock(sk); ++ sock_orphan(sk); ++ sk->sk_shutdown = SHUTDOWN_MASK; ++ ++ /* Clean up any sockets that never were accepted. */ ++ while ((pending = hvsock_dequeue_accept(sk)) != NULL) { ++ __hvsock_release(pending); ++ sock_put(pending); ++ } ++ ++ release_sock(sk); ++ sock_put(sk); ++} ++ ++static int hvsock_release(struct socket *sock) ++{ ++ /* If accept() is interrupted by a signal, the temporary socket ++ * struct's sock->sk is NULL. ++ */ ++ if (sock->sk) { ++ __hvsock_release(sock->sk); ++ sock->sk = NULL; ++ } ++ ++ sock->state = SS_FREE; ++ return 0; ++} ++ ++static struct sock *hvsock_create(struct net *net, struct socket *sock, ++ gfp_t priority, unsigned short type) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ ++ sk = sk_alloc(net, AF_HYPERV, priority, &hvsock_proto, 0); ++ if (!sk) ++ return NULL; ++ ++ sock_init_data(sock, sk); ++ ++ /* sk->sk_type is normally set in sock_init_data, but only if sock ++ * is non-NULL. We make sure that our sockets always have a type by ++ * setting it here if needed. ++ */ ++ if (!sock) ++ sk->sk_type = type; ++ ++ sk->sk_destruct = hvsock_sk_destruct; ++ ++ /* Looks stream-based socket doesn't need this. */ ++ sk->sk_backlog_rcv = NULL; ++ ++ sk->sk_state = 0; ++ sock_reset_flag(sk, SOCK_DONE); ++ ++ hvsk = sk_to_hvsock(sk); ++ ++ hvsk->send = NULL; ++ hvsk->recv = NULL; ++ ++ hvsock_addr_init(&hvsk->local_addr, SHV_SERVICE_ID_ANY); ++ hvsock_addr_init(&hvsk->remote_addr, SHV_SERVICE_ID_ANY); ++ ++ INIT_LIST_HEAD(&hvsk->bound_list); ++ INIT_LIST_HEAD(&hvsk->connected_list); ++ ++ INIT_LIST_HEAD(&hvsk->accept_queue); ++ mutex_init(&hvsk->accept_queue_mutex); ++ ++ hvsk->peer_shutdown = 0; ++ ++ return sk; ++} ++ ++static int hvsock_bind(struct socket *sock, struct sockaddr *addr, ++ int addr_len) ++{ ++ struct sockaddr_hv *hv_addr; ++ struct sock *sk; ++ int ret; ++ ++ sk = sock->sk; ++ ++ if (hvsock_addr_cast(addr, addr_len, &hv_addr) != 0) ++ return -EINVAL; ++ ++ if (uuid_le_cmp(hv_addr->shv_vm_guid, NULL_UUID_LE)) ++ return -EINVAL; ++ ++ lock_sock(sk); ++ ret = __hvsock_bind(sk, hv_addr); ++ release_sock(sk); ++ ++ return ret; ++} ++ ++static int hvsock_getname(struct socket *sock, ++ struct sockaddr *addr, int *addr_len, int peer) ++{ ++ struct sockaddr_hv *hv_addr; ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ int ret; ++ ++ sk = sock->sk; ++ hvsk = sk_to_hvsock(sk); ++ ret = 0; ++ ++ lock_sock(sk); ++ ++ if (peer) { ++ if (sock->state != SS_CONNECTED) { ++ ret = -ENOTCONN; ++ goto out; ++ } ++ hv_addr = &hvsk->remote_addr; ++ } else { ++ hv_addr = &hvsk->local_addr; ++ } ++ ++ __sockaddr_check_size(sizeof(*hv_addr)); ++ ++ memcpy(addr, hv_addr, sizeof(*hv_addr)); ++ *addr_len = sizeof(*hv_addr); ++ ++out: ++ release_sock(sk); ++ return ret; ++} ++ ++static void get_ringbuffer_rw_status(struct vmbus_channel *channel, ++ bool *can_read, bool *can_write) ++{ ++ u32 avl_read_bytes, avl_write_bytes, dummy; ++ ++ if (can_read) { ++ hv_get_ringbuffer_availbytes(&channel->inbound, ++ &avl_read_bytes, ++ &dummy); ++ /* 0-size payload means FIN */ ++ *can_read = avl_read_bytes >= HVSOCK_PKT_LEN(0); ++ } ++ ++ if (can_write) { ++ hv_get_ringbuffer_availbytes(&channel->outbound, ++ &dummy, ++ &avl_write_bytes); ++ ++ /* We only write if there is enough space */ ++ *can_write = avl_write_bytes > HVSOCK_PKT_LEN(PAGE_SIZE_4K); ++ } ++} ++ ++static size_t get_ringbuffer_writable_bytes(struct vmbus_channel *channel) ++{ ++ u32 avl_write_bytes, dummy; ++ size_t ret; ++ ++ hv_get_ringbuffer_availbytes(&channel->outbound, ++ &dummy, ++ &avl_write_bytes); ++ ++ /* The ringbuffer mustn't be 100% full, and we should reserve a ++ * zero-length-payload packet for the FIN: see hv_ringbuffer_write() ++ * and hvsock_shutdown(). ++ */ ++ if (avl_write_bytes < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) ++ return 0; ++ ret = avl_write_bytes - HVSOCK_PKT_LEN(1) - HVSOCK_PKT_LEN(0); ++ ++ return round_down(ret, 8); ++} ++ ++static int hvsock_get_send_buf(struct hvsock_sock *hvsk) ++{ ++ hvsk->send = vmalloc(sizeof(*hvsk->send)); ++ return hvsk->send ? 0 : -ENOMEM; ++} ++ ++static void hvsock_put_send_buf(struct hvsock_sock *hvsk) ++{ ++ vfree(hvsk->send); ++ hvsk->send = NULL; ++} ++ ++static int hvsock_send_data(struct vmbus_channel *channel, ++ struct hvsock_sock *hvsk, ++ size_t to_write) ++{ ++ hvsk->send->hdr.pkt_type = 1; ++ hvsk->send->hdr.data_size = to_write; ++ return vmbus_sendpacket(channel, &hvsk->send->hdr, ++ sizeof(hvsk->send->hdr) + to_write, ++ 0, VM_PKT_DATA_INBAND, 0); ++} ++ ++static int hvsock_get_recv_buf(struct hvsock_sock *hvsk) ++{ ++ hvsk->recv = vmalloc(sizeof(*hvsk->recv)); ++ return hvsk->recv ? 0 : -ENOMEM; ++} ++ ++static void hvsock_put_recv_buf(struct hvsock_sock *hvsk) ++{ ++ vfree(hvsk->recv); ++ hvsk->recv = NULL; ++} ++ ++static int hvsock_recv_data(struct vmbus_channel *channel, ++ struct hvsock_sock *hvsk, ++ size_t *payload_len) ++{ ++ u32 buffer_actual_len; ++ u64 dummy_req_id; ++ int ret; ++ ++ ret = vmbus_recvpacket(channel, &hvsk->recv->hdr, ++ sizeof(hvsk->recv->hdr) + ++ sizeof(hvsk->recv->buf), ++ &buffer_actual_len, &dummy_req_id); ++ if (ret != 0 || buffer_actual_len <= sizeof(hvsk->recv->hdr)) ++ *payload_len = 0; ++ else ++ *payload_len = hvsk->recv->hdr.data_size; ++ ++ return ret; ++} ++ ++static int hvsock_shutdown(struct socket *sock, int mode) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ int ret = 0; ++ ++ if (mode < SHUT_RD || mode > SHUT_RDWR) ++ return -EINVAL; ++ /* This maps: ++ * SHUT_RD (0) -> RCV_SHUTDOWN (1) ++ * SHUT_WR (1) -> SEND_SHUTDOWN (2) ++ * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) ++ */ ++ ++mode; ++ ++ if (sock->state != SS_CONNECTED) ++ return -ENOTCONN; ++ ++ sock->state = SS_DISCONNECTING; ++ ++ sk = sock->sk; ++ ++ lock_sock(sk); ++ ++ sk->sk_shutdown |= mode; ++ sk->sk_state_change(sk); ++ ++ if (mode & SEND_SHUTDOWN) { ++ hvsk = sk_to_hvsock(sk); ++ ++ ret = hvsock_get_send_buf(hvsk); ++ if (ret < 0) ++ goto out; ++ ++ /* It can't fail: see get_ringbuffer_writable_bytes(). */ ++ (void)hvsock_send_data(hvsk->channel, hvsk, 0); ++ ++ hvsock_put_send_buf(hvsk); ++ } ++ ++out: ++ release_sock(sk); ++ ++ return ret; ++} ++ ++static unsigned int hvsock_poll(struct file *file, struct socket *sock, ++ poll_table *wait) ++{ ++ struct vmbus_channel *channel; ++ bool can_read, can_write; ++ struct hvsock_sock *hvsk; ++ unsigned int mask; ++ struct sock *sk; ++ ++ sk = sock->sk; ++ hvsk = sk_to_hvsock(sk); ++ ++ poll_wait(file, sk_sleep(sk), wait); ++ mask = 0; ++ ++ if (sk->sk_err) ++ /* Signify that there has been an error on this socket. */ ++ mask |= POLLERR; ++ ++ /* INET sockets treat local write shutdown and peer write shutdown as a ++ * case of POLLHUP set. ++ */ ++ if ((sk->sk_shutdown == SHUTDOWN_MASK) || ++ ((sk->sk_shutdown & SEND_SHUTDOWN) && ++ (hvsk->peer_shutdown & SEND_SHUTDOWN))) { ++ mask |= POLLHUP; ++ } ++ ++ if (sk->sk_shutdown & RCV_SHUTDOWN || ++ hvsk->peer_shutdown & SEND_SHUTDOWN) { ++ mask |= POLLRDHUP; ++ } ++ ++ lock_sock(sk); ++ ++ /* Listening sockets that have connections in their accept ++ * queue can be read. ++ */ ++ if (sk->sk_state == SS_LISTEN && !hvsock_is_accept_queue_empty(sk)) ++ mask |= POLLIN | POLLRDNORM; ++ ++ /* The mutex is to against hvsock_open_connection() */ ++ mutex_lock(&hvsock_mutex); ++ ++ channel = hvsk->channel; ++ if (channel) { ++ /* If there is something in the queue then we can read */ ++ get_ringbuffer_rw_status(channel, &can_read, &can_write); ++ ++ if (!can_read && hvsk->recv) ++ can_read = true; ++ ++ if (!(sk->sk_shutdown & RCV_SHUTDOWN) && can_read) ++ mask |= POLLIN | POLLRDNORM; ++ } else { ++ can_write = false; ++ } ++ ++ mutex_unlock(&hvsock_mutex); ++ ++ /* Sockets whose connections have been closed terminated should ++ * also be considered read, and we check the shutdown flag for that. ++ */ ++ if (sk->sk_shutdown & RCV_SHUTDOWN || ++ hvsk->peer_shutdown & SEND_SHUTDOWN) { ++ mask |= POLLIN | POLLRDNORM; ++ } ++ ++ /* Connected sockets that can produce data can be written. */ ++ if (sk->sk_state == SS_CONNECTED && can_write && ++ !(sk->sk_shutdown & SEND_SHUTDOWN)) { ++ /* Remove POLLWRBAND since INET sockets are not setting it. ++ */ ++ mask |= POLLOUT | POLLWRNORM; ++ } ++ ++ /* Simulate INET socket poll behaviors, which sets ++ * POLLOUT|POLLWRNORM when peer is closed and nothing to read, ++ * but local send is not shutdown. ++ */ ++ if (sk->sk_state == SS_UNCONNECTED && ++ !(sk->sk_shutdown & SEND_SHUTDOWN)) ++ mask |= POLLOUT | POLLWRNORM; ++ ++ release_sock(sk); ++ ++ return mask; ++} ++ ++/* This function runs in the tasklet context of process_chn_event() */ ++static void hvsock_on_channel_cb(void *ctx) ++{ ++ struct sock *sk = (struct sock *)ctx; ++ struct vmbus_channel *channel; ++ struct hvsock_sock *hvsk; ++ bool can_read, can_write; ++ ++ hvsk = sk_to_hvsock(sk); ++ channel = hvsk->channel; ++ BUG_ON(!channel); ++ ++ get_ringbuffer_rw_status(channel, &can_read, &can_write); ++ ++ if (can_read) ++ sk->sk_data_ready(sk); ++ ++ if (can_write) ++ sk->sk_write_space(sk); ++} ++ ++static void hvsock_close_connection(struct vmbus_channel *channel) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ ++ mutex_lock(&hvsock_mutex); ++ ++ sk = hvsock_find_connected_socket_by_channel(channel); ++ ++ /* The guest has already closed the connection? */ ++ if (!sk) ++ goto out; ++ ++ sk->sk_state = SS_UNCONNECTED; ++ sock_set_flag(sk, SOCK_DONE); ++ ++ hvsk = sk_to_hvsock(sk); ++ hvsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; ++ ++ sk->sk_state_change(sk); ++out: ++ mutex_unlock(&hvsock_mutex); ++} ++ ++static int hvsock_open_connection(struct vmbus_channel *channel) ++{ ++ struct hvsock_sock *hvsk = NULL, *new_hvsk = NULL; ++ uuid_le *instance, *service_id; ++ unsigned char conn_from_host; ++ struct sockaddr_hv hv_addr; ++ struct sock *sk, *new_sk = NULL; ++ int ret; ++ ++ instance = &channel->offermsg.offer.if_instance; ++ service_id = &channel->offermsg.offer.if_type; ++ ++ /* The first byte != 0 means the host initiated the connection. */ ++ conn_from_host = channel->offermsg.offer.u.pipe.user_def[0]; ++ ++ mutex_lock(&hvsock_mutex); ++ ++ hvsock_addr_init(&hv_addr, conn_from_host ? *service_id : *instance); ++ sk = hvsock_find_bound_socket(&hv_addr); ++ ++ if (!sk || (conn_from_host && sk->sk_state != SS_LISTEN) || ++ (!conn_from_host && sk->sk_state != SS_CONNECTING)) { ++ ret = -ENXIO; ++ goto out; ++ } ++ ++ if (conn_from_host) { ++ if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { ++ ret = -ECONNREFUSED; ++ goto out; ++ } ++ ++ new_sk = hvsock_create(sock_net(sk), NULL, GFP_KERNEL, ++ sk->sk_type); ++ if (!new_sk) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ new_sk->sk_state = SS_CONNECTING; ++ new_hvsk = sk_to_hvsock(new_sk); ++ new_hvsk->channel = channel; ++ hvsock_addr_init(&new_hvsk->local_addr, *service_id); ++ hvsock_addr_init(&new_hvsk->remote_addr, *instance); ++ } else { ++ hvsk = sk_to_hvsock(sk); ++ hvsk->channel = channel; ++ } ++ ++ set_channel_read_state(channel, false); ++ ret = vmbus_open(channel, RINGBUFFER_HVSOCK_SND_SIZE, ++ RINGBUFFER_HVSOCK_RCV_SIZE, NULL, 0, ++ hvsock_on_channel_cb, conn_from_host ? new_sk : sk); ++ if (ret != 0) { ++ if (conn_from_host) { ++ new_hvsk->channel = NULL; ++ sock_put(new_sk); ++ } else { ++ hvsk->channel = NULL; ++ } ++ goto out; ++ } ++ ++ vmbus_set_chn_rescind_callback(channel, hvsock_close_connection); ++ ++ /* see get_ringbuffer_rw_status() */ ++ set_channel_pending_send_size(channel, ++ HVSOCK_PKT_LEN(PAGE_SIZE_4K) + 1); ++ ++ if (conn_from_host) { ++ new_sk->sk_state = SS_CONNECTED; ++ ++ sock_hold(&new_hvsk->sk); ++ list_add(&new_hvsk->connected_list, &hvsock_connected_list); ++ ++ hvsock_enqueue_accept(sk, new_sk); ++ } else { ++ sk->sk_state = SS_CONNECTED; ++ sk->sk_socket->state = SS_CONNECTED; ++ ++ sock_hold(&hvsk->sk); ++ list_add(&hvsk->connected_list, &hvsock_connected_list); ++ } ++ ++ sk->sk_state_change(sk); ++out: ++ mutex_unlock(&hvsock_mutex); ++ return ret; ++} ++ ++static void hvsock_connect_timeout(struct work_struct *work) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ ++ hvsk = container_of(work, struct hvsock_sock, dwork.work); ++ sk = hvsock_to_sk(hvsk); ++ ++ lock_sock(sk); ++ if ((sk->sk_state == SS_CONNECTING) && ++ (sk->sk_shutdown != SHUTDOWN_MASK)) { ++ sk->sk_state = SS_UNCONNECTED; ++ sk->sk_err = ETIMEDOUT; ++ sk->sk_error_report(sk); ++ } ++ release_sock(sk); ++ ++ sock_put(sk); ++} ++ ++static int hvsock_connect_wait(struct socket *sock, ++ int flags, int current_ret) ++{ ++ struct sock *sk = sock->sk; ++ struct hvsock_sock *hvsk; ++ int ret = current_ret; ++ DEFINE_WAIT(wait); ++ long timeout; ++ ++ hvsk = sk_to_hvsock(sk); ++ timeout = HVSOCK_CONNECT_TIMEOUT; ++ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ ++ while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { ++ if (flags & O_NONBLOCK) { ++ /* If we're not going to block, we schedule a timeout ++ * function to generate a timeout on the connection ++ * attempt, in case the peer doesn't respond in a ++ * timely manner. We hold on to the socket until the ++ * timeout fires. ++ */ ++ sock_hold(sk); ++ INIT_DELAYED_WORK(&hvsk->dwork, ++ hvsock_connect_timeout); ++ schedule_delayed_work(&hvsk->dwork, timeout); ++ ++ /* Skip ahead to preserve error code set above. */ ++ goto out_wait; ++ } ++ ++ release_sock(sk); ++ timeout = schedule_timeout(timeout); ++ lock_sock(sk); ++ ++ if (signal_pending(current)) { ++ ret = sock_intr_errno(timeout); ++ goto out_wait_error; ++ } else if (timeout == 0) { ++ ret = -ETIMEDOUT; ++ goto out_wait_error; ++ } ++ ++ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ } ++ ++ ret = sk->sk_err ? -sk->sk_err : 0; ++ ++out_wait_error: ++ if (ret < 0) { ++ sk->sk_state = SS_UNCONNECTED; ++ sock->state = SS_UNCONNECTED; ++ } ++out_wait: ++ finish_wait(sk_sleep(sk), &wait); ++ return ret; ++} ++ ++static int hvsock_connect(struct socket *sock, struct sockaddr *addr, ++ int addr_len, int flags) ++{ ++ struct sockaddr_hv *remote_addr; ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ int ret = 0; ++ ++ sk = sock->sk; ++ hvsk = sk_to_hvsock(sk); ++ ++ lock_sock(sk); ++ ++ switch (sock->state) { ++ case SS_CONNECTED: ++ ret = -EISCONN; ++ goto out; ++ case SS_DISCONNECTING: ++ ret = -EINVAL; ++ goto out; ++ case SS_CONNECTING: ++ /* This continues on so we can move sock into the SS_CONNECTED ++ * state once the connection has completed (at which point err ++ * will be set to zero also). Otherwise, we will either wait ++ * for the connection or return -EALREADY should this be a ++ * non-blocking call. ++ */ ++ ret = -EALREADY; ++ break; ++ default: ++ if ((sk->sk_state == SS_LISTEN) || ++ hvsock_addr_cast(addr, addr_len, &remote_addr) != 0) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ /* Set the remote address that we are connecting to. */ ++ memcpy(&hvsk->remote_addr, remote_addr, ++ sizeof(hvsk->remote_addr)); ++ ++ ret = hvsock_auto_bind(hvsk); ++ if (ret) ++ goto out; ++ ++ sk->sk_state = SS_CONNECTING; ++ ++ ret = vmbus_send_tl_connect_request( ++ &hvsk->local_addr.shv_service_guid, ++ &hvsk->remote_addr.shv_service_guid); ++ if (ret < 0) ++ goto out; ++ ++ /* Mark sock as connecting and set the error code to in ++ * progress in case this is a non-blocking connect. ++ */ ++ sock->state = SS_CONNECTING; ++ ret = -EINPROGRESS; ++ } ++ ++ ret = hvsock_connect_wait(sock, flags, ret); ++out: ++ release_sock(sk); ++ return ret; ++} ++ ++static int hvsock_accept_wait(struct sock *listener, ++ struct socket *newsock, int flags) ++{ ++ struct hvsock_sock *hvconnected; ++ struct sock *connected; ++ ++ DEFINE_WAIT(wait); ++ long timeout; ++ ++ int ret = 0; ++ ++ /* Wait for children sockets to appear; these are the new sockets ++ * created upon connection establishment. ++ */ ++ timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); ++ prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); ++ ++ while ((connected = hvsock_dequeue_accept(listener)) == NULL && ++ listener->sk_err == 0) { ++ release_sock(listener); ++ timeout = schedule_timeout(timeout); ++ lock_sock(listener); ++ ++ if (signal_pending(current)) { ++ ret = sock_intr_errno(timeout); ++ goto out_wait; ++ } else if (timeout == 0) { ++ ret = -EAGAIN; ++ goto out_wait; ++ } ++ ++ prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); ++ } ++ ++ if (listener->sk_err) ++ ret = -listener->sk_err; ++ ++ if (connected) { ++ lock_sock(connected); ++ hvconnected = sk_to_hvsock(connected); ++ ++ if (!ret) { ++ newsock->state = SS_CONNECTED; ++ sock_graft(connected, newsock); ++ } ++ release_sock(connected); ++ sock_put(connected); ++ } ++ ++out_wait: ++ finish_wait(sk_sleep(listener), &wait); ++ return ret; ++} ++ ++static int hvsock_accept(struct socket *sock, struct socket *newsock, ++ int flags) ++{ ++ struct sock *listener; ++ int ret; ++ ++ listener = sock->sk; ++ ++ lock_sock(listener); ++ ++ if (sock->type != SOCK_STREAM) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ if (listener->sk_state != SS_LISTEN) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ ret = hvsock_accept_wait(listener, newsock, flags); ++out: ++ release_sock(listener); ++ return ret; ++} ++ ++static int hvsock_listen(struct socket *sock, int backlog) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ int ret = 0; ++ ++ sk = sock->sk; ++ lock_sock(sk); ++ ++ if (sock->type != SOCK_STREAM) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ if (sock->state != SS_UNCONNECTED) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (backlog <= 0) { ++ ret = -EINVAL; ++ goto out; ++ } ++ if (backlog > HVSOCK_MAX_BACKLOG) ++ backlog = HVSOCK_MAX_BACKLOG; ++ ++ hvsk = sk_to_hvsock(sk); ++ if (!hvsock_addr_bound(&hvsk->local_addr)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ sk->sk_ack_backlog = 0; ++ sk->sk_max_ack_backlog = backlog; ++ sk->sk_state = SS_LISTEN; ++out: ++ release_sock(sk); ++ return ret; ++} ++ ++static int hvsock_sendmsg_wait(struct sock *sk, struct msghdr *msg, ++ size_t len) ++{ ++ struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ struct vmbus_channel *channel; ++ size_t total_to_write = len; ++ size_t total_written = 0; ++ DEFINE_WAIT(wait); ++ bool can_write; ++ long timeout; ++ int ret = -EIO; ++ ++ timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); ++ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ channel = hvsk->channel; ++ ++ while (total_to_write > 0) { ++ size_t to_write, max_writable; ++ ++ while (1) { ++ get_ringbuffer_rw_status(channel, NULL, &can_write); ++ ++ if (can_write || sk->sk_err != 0 || ++ (sk->sk_shutdown & SEND_SHUTDOWN) || ++ (hvsk->peer_shutdown & RCV_SHUTDOWN)) ++ break; ++ ++ /* Don't wait for non-blocking sockets. */ ++ if (timeout == 0) { ++ ret = -EAGAIN; ++ goto out_wait; ++ } ++ ++ release_sock(sk); ++ ++ timeout = schedule_timeout(timeout); ++ ++ lock_sock(sk); ++ if (signal_pending(current)) { ++ ret = sock_intr_errno(timeout); ++ goto out_wait; ++ } else if (timeout == 0) { ++ ret = -EAGAIN; ++ goto out_wait; ++ } ++ ++ prepare_to_wait(sk_sleep(sk), &wait, ++ TASK_INTERRUPTIBLE); ++ } ++ ++ /* These checks occur both as part of and after the loop ++ * conditional since we need to check before and after ++ * sleeping. ++ */ ++ if (sk->sk_err) { ++ ret = -sk->sk_err; ++ goto out_wait; ++ } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || ++ (hvsk->peer_shutdown & RCV_SHUTDOWN)) { ++ ret = -EPIPE; ++ goto out_wait; ++ } ++ ++ /* Note: that write will only write as many bytes as possible ++ * in the ringbuffer. It is the caller's responsibility to ++ * check how many bytes we actually wrote. ++ */ ++ do { ++ max_writable = get_ringbuffer_writable_bytes(channel); ++ if (max_writable == 0) ++ goto out_wait; ++ ++ to_write = min_t(size_t, sizeof(hvsk->send->buf), ++ total_to_write); ++ if (to_write > max_writable) ++ to_write = max_writable; ++ ++ ret = hvsock_get_send_buf(hvsk); ++ if (ret < 0) ++ goto out_wait; ++ ++ ret = memcpy_from_msg(hvsk->send->buf, msg, to_write); ++ if (ret != 0) { ++ hvsock_put_send_buf(hvsk); ++ goto out_wait; ++ } ++ ++ ret = hvsock_send_data(channel, hvsk, to_write); ++ hvsock_put_send_buf(hvsk); ++ if (ret != 0) ++ goto out_wait; ++ ++ total_written += to_write; ++ total_to_write -= to_write; ++ } while (total_to_write > 0); ++ } ++ ++out_wait: ++ if (total_written > 0) ++ ret = total_written; ++ ++ finish_wait(sk_sleep(sk), &wait); ++ return ret; ++} ++ ++static int hvsock_sendmsg(struct socket *sock, struct msghdr *msg, ++ size_t len) ++{ ++ struct hvsock_sock *hvsk; ++ struct sock *sk; ++ int ret; ++ ++ if (len == 0) ++ return -EINVAL; ++ ++ if (msg->msg_flags & ~MSG_DONTWAIT) ++ return -EOPNOTSUPP; ++ ++ sk = sock->sk; ++ hvsk = sk_to_hvsock(sk); ++ ++ lock_sock(sk); ++ ++ /* Callers should not provide a destination with stream sockets. */ ++ if (msg->msg_namelen) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ /* Send data only if both sides are not shutdown in the direction. */ ++ if (sk->sk_shutdown & SEND_SHUTDOWN || ++ hvsk->peer_shutdown & RCV_SHUTDOWN) { ++ ret = -EPIPE; ++ goto out; ++ } ++ ++ if (sk->sk_state != SS_CONNECTED || ++ !hvsock_addr_bound(&hvsk->local_addr)) { ++ ret = -ENOTCONN; ++ goto out; ++ } ++ ++ if (!hvsock_addr_bound(&hvsk->remote_addr)) { ++ ret = -EDESTADDRREQ; ++ goto out; ++ } ++ ++ ret = hvsock_sendmsg_wait(sk, msg, len); ++out: ++ release_sock(sk); ++ ++ /* ret should be a bigger-than-0 total_written or a negative err ++ * code. ++ */ ++ BUG_ON(ret == 0); ++ ++ return ret; ++} ++ ++static int hvsock_recvmsg_wait(struct sock *sk, struct msghdr *msg, ++ size_t len, int flags) ++{ ++ struct hvsock_sock *hvsk = sk_to_hvsock(sk); ++ size_t to_read, total_to_read = len; ++ struct vmbus_channel *channel; ++ DEFINE_WAIT(wait); ++ size_t copied = 0; ++ bool can_read; ++ long timeout; ++ int ret = 0; ++ ++ timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); ++ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ++ channel = hvsk->channel; ++ ++ while (1) { ++ bool need_refill = !hvsk->recv; ++ ++ if (need_refill) { ++ if (hvsk->peer_shutdown & SEND_SHUTDOWN) ++ can_read = false; ++ else ++ get_ringbuffer_rw_status(channel, &can_read, ++ NULL); ++ } else { ++ can_read = true; ++ } ++ ++ if (can_read) { ++ size_t payload_len; ++ ++ if (need_refill) { ++ ret = hvsock_get_recv_buf(hvsk); ++ if (ret < 0) { ++ if (copied > 0) ++ ret = copied; ++ goto out_wait; ++ } ++ ++ ret = hvsock_recv_data(channel, hvsk, ++ &payload_len); ++ if (ret != 0 || ++ payload_len > sizeof(hvsk->recv->buf)) { ++ ret = -EIO; ++ hvsock_put_recv_buf(hvsk); ++ goto out_wait; ++ } ++ ++ if (payload_len == 0) { ++ ret = copied; ++ hvsock_put_recv_buf(hvsk); ++ hvsk->peer_shutdown |= SEND_SHUTDOWN; ++ break; ++ } ++ ++ hvsk->recv->data_len = payload_len; ++ hvsk->recv->data_offset = 0; ++ } ++ ++ to_read = min_t(size_t, total_to_read, ++ hvsk->recv->data_len); ++ ++ ret = memcpy_to_msg(msg, hvsk->recv->buf + ++ hvsk->recv->data_offset, ++ to_read); ++ if (ret != 0) ++ break; ++ ++ copied += to_read; ++ total_to_read -= to_read; ++ ++ hvsk->recv->data_len -= to_read; ++ ++ if (hvsk->recv->data_len == 0) ++ hvsock_put_recv_buf(hvsk); ++ else ++ hvsk->recv->data_offset += to_read; ++ ++ if (total_to_read == 0) ++ break; ++ } else { ++ if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || ++ (hvsk->peer_shutdown & SEND_SHUTDOWN)) ++ break; ++ ++ /* Don't wait for non-blocking sockets. */ ++ if (timeout == 0) { ++ ret = -EAGAIN; ++ break; ++ } ++ ++ if (copied > 0) ++ break; ++ ++ release_sock(sk); ++ timeout = schedule_timeout(timeout); ++ lock_sock(sk); ++ ++ if (signal_pending(current)) { ++ ret = sock_intr_errno(timeout); ++ break; ++ } else if (timeout == 0) { ++ ret = -EAGAIN; ++ break; ++ } ++ ++ prepare_to_wait(sk_sleep(sk), &wait, ++ TASK_INTERRUPTIBLE); ++ } ++ } ++ ++ if (sk->sk_err) ++ ret = -sk->sk_err; ++ else if (sk->sk_shutdown & RCV_SHUTDOWN) ++ ret = 0; ++ ++ if (copied > 0) ++ ret = copied; ++out_wait: ++ finish_wait(sk_sleep(sk), &wait); ++ return ret; ++} ++ ++static int hvsock_recvmsg(struct socket *sock, struct msghdr *msg, ++ size_t len, int flags) ++{ ++ struct sock *sk = sock->sk; ++ int ret; ++ ++ lock_sock(sk); ++ ++ if (sk->sk_state != SS_CONNECTED) { ++ /* Recvmsg is supposed to return 0 if a peer performs an ++ * orderly shutdown. Differentiate between that case and when a ++ * peer has not connected or a local shutdown occurred with the ++ * SOCK_DONE flag. ++ */ ++ if (sock_flag(sk, SOCK_DONE)) ++ ret = 0; ++ else ++ ret = -ENOTCONN; ++ ++ goto out; ++ } ++ ++ /* We ignore msg->addr_name/len. */ ++ if (flags & ~MSG_DONTWAIT) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ /* We don't check peer_shutdown flag here since peer may actually shut ++ * down, but there can be data in the queue that a local socket can ++ * receive. ++ */ ++ if (sk->sk_shutdown & RCV_SHUTDOWN) { ++ ret = 0; ++ goto out; ++ } ++ ++ /* It is valid on Linux to pass in a zero-length receive buffer. This ++ * is not an error. We may as well bail out now. ++ */ ++ if (!len) { ++ ret = 0; ++ goto out; ++ } ++ ++ ret = hvsock_recvmsg_wait(sk, msg, len, flags); ++out: ++ release_sock(sk); ++ return ret; ++} ++ ++static const struct proto_ops hvsock_ops = { ++ .family = PF_HYPERV, ++ .owner = THIS_MODULE, ++ .release = hvsock_release, ++ .bind = hvsock_bind, ++ .connect = hvsock_connect, ++ .socketpair = sock_no_socketpair, ++ .accept = hvsock_accept, ++ .getname = hvsock_getname, ++ .poll = hvsock_poll, ++ .ioctl = sock_no_ioctl, ++ .listen = hvsock_listen, ++ .shutdown = hvsock_shutdown, ++ .setsockopt = sock_no_setsockopt, ++ .getsockopt = sock_no_getsockopt, ++ .sendmsg = hvsock_sendmsg, ++ .recvmsg = hvsock_recvmsg, ++ .mmap = sock_no_mmap, ++ .sendpage = sock_no_sendpage, ++}; ++ ++static int hvsock_create_sock(struct net *net, struct socket *sock, ++ int protocol, int kern) ++{ ++ struct sock *sk; ++ ++ if (protocol != 0 && protocol != SHV_PROTO_RAW) ++ return -EPROTONOSUPPORT; ++ ++ switch (sock->type) { ++ case SOCK_STREAM: ++ sock->ops = &hvsock_ops; ++ break; ++ default: ++ return -ESOCKTNOSUPPORT; ++ } ++ ++ sock->state = SS_UNCONNECTED; ++ ++ sk = hvsock_create(net, sock, GFP_KERNEL, 0); ++ return sk ? 0 : -ENOMEM; ++} ++ ++static const struct net_proto_family hvsock_family_ops = { ++ .family = AF_HYPERV, ++ .create = hvsock_create_sock, ++ .owner = THIS_MODULE, ++}; ++ ++static int hvsock_probe(struct hv_device *hdev, ++ const struct hv_vmbus_device_id *dev_id) ++{ ++ struct vmbus_channel *channel = hdev->channel; ++ ++ /* We ignore the error return code to suppress the unnecessary ++ * error message in vmbus_probe(): on error the host will rescind ++ * the offer in 30 seconds and we can do cleanup at that time. ++ */ ++ (void)hvsock_open_connection(channel); ++ ++ return 0; ++} ++ ++static int hvsock_remove(struct hv_device *hdev) ++{ ++ struct vmbus_channel *channel = hdev->channel; ++ ++ vmbus_close(channel); ++ ++ return 0; ++} ++ ++/* It's not really used. See vmbus_match() and vmbus_probe(). */ ++static const struct hv_vmbus_device_id id_table[] = { ++ {}, ++}; ++ ++static struct hv_driver hvsock_drv = { ++ .name = "hv_sock", ++ .hvsock = true, ++ .id_table = id_table, ++ .probe = hvsock_probe, ++ .remove = hvsock_remove, ++}; ++ ++static int __init hvsock_init(void) ++{ ++ int ret; ++ ++ if (vmbus_proto_version < VERSION_WIN10) ++ return -ENODEV; ++ ++ ret = vmbus_driver_register(&hvsock_drv); ++ if (ret) { ++ pr_err("failed to register hv_sock driver\n"); ++ return ret; ++ } ++ ++ ret = proto_register(&hvsock_proto, 0); ++ if (ret) { ++ pr_err("failed to register protocol\n"); ++ goto unreg_hvsock_drv; ++ } ++ ++ ret = sock_register(&hvsock_family_ops); ++ if (ret) { ++ pr_err("failed to register address family\n"); ++ goto unreg_proto; ++ } ++ ++ return 0; ++ ++unreg_proto: ++ proto_unregister(&hvsock_proto); ++unreg_hvsock_drv: ++ vmbus_driver_unregister(&hvsock_drv); ++ return ret; ++} ++ ++static void __exit hvsock_exit(void) ++{ ++ sock_unregister(AF_HYPERV); ++ proto_unregister(&hvsock_proto); ++ vmbus_driver_unregister(&hvsock_drv); ++} ++ ++module_init(hvsock_init); ++module_exit(hvsock_exit); ++ ++MODULE_DESCRIPTION("Hyper-V Sockets"); ++MODULE_LICENSE("Dual BSD/GPL"); +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.9.x/0002-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch b/projects/kernel-config/patches-4.9.x/0002-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch new file mode 100644 index 000000000..6e5f202bf --- /dev/null +++ b/projects/kernel-config/patches-4.9.x/0002-vmbus-Don-t-spam-the-logs-with-unknown-GUIDs.patch @@ -0,0 +1,30 @@ +From 43b90bff8334b8d4df02f1832df2510ede671bd1 Mon Sep 17 00:00:00 2001 +From: Rolf Neugebauer +Date: Mon, 23 May 2016 18:55:45 +0100 +Subject: [PATCH 02/10] vmbus: Don't spam the logs with unknown GUIDs + +With Hyper-V sockets device types are introduced on the fly. The pr_info() +then prints a message on every connection, which is way too verbose. Since +there doesn't seem to be an easy way to check for registered services, +disable the pr_info() completely. + +Signed-off-by: Rolf Neugebauer +--- + drivers/hv/channel_mgmt.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index d8bc4b910192..8df02f3ca0b2 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -192,7 +192,6 @@ static u16 hv_get_dev_type(const struct vmbus_channel *channel) + if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) + return i; + } +- pr_info("Unknown GUID: %pUl\n", guid); + return i; + } + +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.9.x/0003-Drivers-hv-utils-Fix-the-mapping-between-host-versio.patch b/projects/kernel-config/patches-4.9.x/0003-Drivers-hv-utils-Fix-the-mapping-between-host-versio.patch new file mode 100644 index 000000000..daac42739 --- /dev/null +++ b/projects/kernel-config/patches-4.9.x/0003-Drivers-hv-utils-Fix-the-mapping-between-host-versio.patch @@ -0,0 +1,48 @@ +From 83b5940282a6afe82fa26fd4cab93e88d705ec86 Mon Sep 17 00:00:00 2001 +From: Alex Ng +Date: Sun, 6 Nov 2016 13:14:07 -0800 +Subject: [PATCH 03/10] Drivers: hv: utils: Fix the mapping between host + version and protocol to use + +We should intentionally declare the protocols to use for every known host +and default to using the latest protocol if the host is unknown or new. + +Signed-off-by: Alex Ng +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git +(cherry picked from commit 3da0401b4d0e17aea7526db0235d98fa535d903e) +--- + drivers/hv/hv_util.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c +index bcd06306f3e8..e7707747f56d 100644 +--- a/drivers/hv/hv_util.c ++++ b/drivers/hv/hv_util.c +@@ -389,16 +389,19 @@ static int util_probe(struct hv_device *dev, + ts_srv_version = TS_VERSION_1; + hb_srv_version = HB_VERSION_1; + break; +- case(VERSION_WIN10): ++ case VERSION_WIN7: ++ case VERSION_WIN8: ++ case VERSION_WIN8_1: + util_fw_version = UTIL_FW_VERSION; + sd_srv_version = SD_VERSION; +- ts_srv_version = TS_VERSION; ++ ts_srv_version = TS_VERSION_3; + hb_srv_version = HB_VERSION; + break; ++ case VERSION_WIN10: + default: + util_fw_version = UTIL_FW_VERSION; + sd_srv_version = SD_VERSION; +- ts_srv_version = TS_VERSION_3; ++ ts_srv_version = TS_VERSION; + hb_srv_version = HB_VERSION; + } + +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.9.x/0004-Drivers-hv-vss-Improve-log-messages.patch b/projects/kernel-config/patches-4.9.x/0004-Drivers-hv-vss-Improve-log-messages.patch new file mode 100644 index 000000000..28006f276 --- /dev/null +++ b/projects/kernel-config/patches-4.9.x/0004-Drivers-hv-vss-Improve-log-messages.patch @@ -0,0 +1,105 @@ +From b370e69e9fce43cfeec7704adb70adbbc9f81806 Mon Sep 17 00:00:00 2001 +From: Alex Ng +Date: Sun, 6 Nov 2016 13:14:10 -0800 +Subject: [PATCH 04/10] Drivers: hv: vss: Improve log messages. + +Adding log messages to help troubleshoot error cases and transaction +handling. + +Signed-off-by: Alex Ng +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git +(cherry picked from commit 23d2cc0c29eb0e7c6fe4cac88098306c31c40208) +--- + drivers/hv/hv_snapshot.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) + +diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c +index a76e3db0d01f..b1446d51ef45 100644 +--- a/drivers/hv/hv_snapshot.c ++++ b/drivers/hv/hv_snapshot.c +@@ -121,7 +121,7 @@ static int vss_handle_handshake(struct hv_vss_msg *vss_msg) + default: + return -EINVAL; + } +- pr_debug("VSS: userspace daemon ver. %d connected\n", dm_reg_value); ++ pr_info("VSS: userspace daemon ver. %d connected\n", dm_reg_value); + return 0; + } + +@@ -129,8 +129,10 @@ static int vss_on_msg(void *msg, int len) + { + struct hv_vss_msg *vss_msg = (struct hv_vss_msg *)msg; + +- if (len != sizeof(*vss_msg)) ++ if (len != sizeof(*vss_msg)) { ++ pr_debug("VSS: Message size does not match length\n"); + return -EINVAL; ++ } + + if (vss_msg->vss_hdr.operation == VSS_OP_REGISTER || + vss_msg->vss_hdr.operation == VSS_OP_REGISTER1) { +@@ -138,8 +140,11 @@ static int vss_on_msg(void *msg, int len) + * Don't process registration messages if we're in the middle + * of a transaction processing. + */ +- if (vss_transaction.state > HVUTIL_READY) ++ if (vss_transaction.state > HVUTIL_READY) { ++ pr_debug("VSS: Got unexpected registration request\n"); + return -EINVAL; ++ } ++ + return vss_handle_handshake(vss_msg); + } else if (vss_transaction.state == HVUTIL_USERSPACE_REQ) { + vss_transaction.state = HVUTIL_USERSPACE_RECV; +@@ -156,7 +161,7 @@ static int vss_on_msg(void *msg, int len) + } + } else { + /* This is a spurious call! */ +- pr_warn("VSS: Transaction not active\n"); ++ pr_debug("VSS: Transaction not active\n"); + return -EINVAL; + } + return 0; +@@ -169,8 +174,10 @@ static void vss_send_op(void) + struct hv_vss_msg *vss_msg; + + /* The transaction state is wrong. */ +- if (vss_transaction.state != HVUTIL_HOSTMSG_RECEIVED) ++ if (vss_transaction.state != HVUTIL_HOSTMSG_RECEIVED) { ++ pr_debug("VSS: Unexpected attempt to send to daemon\n"); + return; ++ } + + vss_msg = kzalloc(sizeof(*vss_msg), GFP_KERNEL); + if (!vss_msg) +@@ -211,9 +218,13 @@ static void vss_handle_request(struct work_struct *dummy) + case VSS_OP_HOT_BACKUP: + if (vss_transaction.state < HVUTIL_READY) { + /* Userspace is not registered yet */ ++ pr_debug("VSS: Not ready for request.\n"); + vss_respond_to_host(HV_E_FAIL); + return; + } ++ ++ pr_debug("VSS: Received request for op code: %d\n", ++ vss_transaction.msg->vss_hdr.operation); + vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED; + vss_send_op(); + return; +@@ -356,8 +367,10 @@ hv_vss_init(struct hv_util_service *srv) + + hvt = hvutil_transport_init(vss_devname, CN_VSS_IDX, CN_VSS_VAL, + vss_on_msg, vss_on_reset); +- if (!hvt) ++ if (!hvt) { ++ pr_warn("VSS: Failed to initialize transport\n"); + return -EFAULT; ++ } + + return 0; + } +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.9.x/0005-Drivers-hv-vss-Operation-timeouts-should-match-host-.patch b/projects/kernel-config/patches-4.9.x/0005-Drivers-hv-vss-Operation-timeouts-should-match-host-.patch new file mode 100644 index 000000000..bdb640ee6 --- /dev/null +++ b/projects/kernel-config/patches-4.9.x/0005-Drivers-hv-vss-Operation-timeouts-should-match-host-.patch @@ -0,0 +1,48 @@ +From a33a84158f9f16747d3f6608b7c4e9f1769802e0 Mon Sep 17 00:00:00 2001 +From: Alex Ng +Date: Sun, 6 Nov 2016 13:14:11 -0800 +Subject: [PATCH 05/10] Drivers: hv: vss: Operation timeouts should match host + expectation + +Increase the timeout of backup operations. When system is under I/O load, +it needs more time to freeze. These timeout values should also match the +host timeout values more closely. + +Signed-off-by: Alex Ng +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git +(cherry picked from commit b357fd3908c1191f2f56e38aa77f2aecdae18bc8) +--- + drivers/hv/hv_snapshot.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c +index b1446d51ef45..4e543dbb731a 100644 +--- a/drivers/hv/hv_snapshot.c ++++ b/drivers/hv/hv_snapshot.c +@@ -31,7 +31,10 @@ + #define VSS_MINOR 0 + #define VSS_VERSION (VSS_MAJOR << 16 | VSS_MINOR) + +-#define VSS_USERSPACE_TIMEOUT (msecs_to_jiffies(10 * 1000)) ++/* ++ * Timeout values are based on expecations from host ++ */ ++#define VSS_FREEZE_TIMEOUT (15 * 60) + + /* + * Global state maintained for transaction that is being processed. For a class +@@ -187,7 +190,8 @@ static void vss_send_op(void) + + vss_transaction.state = HVUTIL_USERSPACE_REQ; + +- schedule_delayed_work(&vss_timeout_work, VSS_USERSPACE_TIMEOUT); ++ schedule_delayed_work(&vss_timeout_work, op == VSS_OP_FREEZE ? ++ VSS_FREEZE_TIMEOUT * HZ : HV_UTIL_TIMEOUT * HZ); + + rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL); + if (rc) { +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.9.x/0006-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch b/projects/kernel-config/patches-4.9.x/0006-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch new file mode 100644 index 000000000..eab9773df --- /dev/null +++ b/projects/kernel-config/patches-4.9.x/0006-Drivers-hv-vmbus-Use-all-supported-IC-versions-to-ne.patch @@ -0,0 +1,492 @@ +From 18bbea8bb178648186dfcf70f57f6ddfd7ca1151 Mon Sep 17 00:00:00 2001 +From: Alex Ng +Date: Sat, 28 Jan 2017 12:37:17 -0700 +Subject: [PATCH 06/10] Drivers: hv: vmbus: Use all supported IC versions to + negotiate + +Previously, we were assuming that each IC protocol version was tied to a +specific host version. For example, some Windows 10 preview hosts only +support v3 TimeSync even though driver assumes v4 is supported by all +Windows 10 hosts. + +The guest will stop trying to negotiate even though older supported +versions may still be offered by the host. + +Make IC version negotiation more robust by going through all versions +that are supported by the guest. + +Fixes: 3da0401b4d0e ("Drivers: hv: utils: Fix the mapping between host +version and protocol to use") + +Reported-by: Rolf Neugebauer +Signed-off-by: Alex Ng +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git +(cherry picked from commit a1656454131880980bc3a5313c8bf66ef5990c91) +--- + drivers/hv/channel_mgmt.c | 80 +++++++++++++++++++++++++++------------- + drivers/hv/hv_fcopy.c | 20 +++++++--- + drivers/hv/hv_kvp.c | 41 +++++++++------------ + drivers/hv/hv_snapshot.c | 18 +++++++-- + drivers/hv/hv_util.c | 94 +++++++++++++++++++++++++---------------------- + include/linux/hyperv.h | 7 ++-- + 6 files changed, 154 insertions(+), 106 deletions(-) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index 8df02f3ca0b2..e7949b64bfbc 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -202,33 +202,34 @@ static u16 hv_get_dev_type(const struct vmbus_channel *channel) + * @buf: Raw buffer channel data + * + * @icmsghdrp is of type &struct icmsg_hdr. +- * @negop is of type &struct icmsg_negotiate. + * Set up and fill in default negotiate response message. + * +- * The fw_version specifies the framework version that +- * we can support and srv_version specifies the service +- * version we can support. ++ * The fw_version and fw_vercnt specifies the framework version that ++ * we can support. ++ * ++ * The srv_version and srv_vercnt specifies the service ++ * versions we can support. ++ * ++ * Versions are given in decreasing order. ++ * ++ * nego_fw_version and nego_srv_version store the selected protocol versions. + * + * Mainly used by Hyper-V drivers. + */ + bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, +- struct icmsg_negotiate *negop, u8 *buf, +- int fw_version, int srv_version) ++ u8 *buf, const int *fw_version, int fw_vercnt, ++ const int *srv_version, int srv_vercnt, ++ int *nego_fw_version, int *nego_srv_version) + { + int icframe_major, icframe_minor; + int icmsg_major, icmsg_minor; + int fw_major, fw_minor; + int srv_major, srv_minor; +- int i; ++ int i, j; + bool found_match = false; ++ struct icmsg_negotiate *negop; + + icmsghdrp->icmsgsize = 0x10; +- fw_major = (fw_version >> 16); +- fw_minor = (fw_version & 0xFFFF); +- +- srv_major = (srv_version >> 16); +- srv_minor = (srv_version & 0xFFFF); +- + negop = (struct icmsg_negotiate *)&buf[ + sizeof(struct vmbuspipe_hdr) + + sizeof(struct icmsg_hdr)]; +@@ -244,13 +245,22 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, + * support. + */ + +- for (i = 0; i < negop->icframe_vercnt; i++) { +- if ((negop->icversion_data[i].major == fw_major) && +- (negop->icversion_data[i].minor == fw_minor)) { +- icframe_major = negop->icversion_data[i].major; +- icframe_minor = negop->icversion_data[i].minor; +- found_match = true; ++ for (i = 0; i < fw_vercnt; i++) { ++ fw_major = (fw_version[i] >> 16); ++ fw_minor = (fw_version[i] & 0xFFFF); ++ ++ for (j = 0; j < negop->icframe_vercnt; j++) { ++ if ((negop->icversion_data[j].major == fw_major) && ++ (negop->icversion_data[j].minor == fw_minor)) { ++ icframe_major = negop->icversion_data[j].major; ++ icframe_minor = negop->icversion_data[j].minor; ++ found_match = true; ++ break; ++ } + } ++ ++ if (found_match) ++ break; + } + + if (!found_match) +@@ -258,14 +268,26 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, + + found_match = false; + +- for (i = negop->icframe_vercnt; +- (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) { +- if ((negop->icversion_data[i].major == srv_major) && +- (negop->icversion_data[i].minor == srv_minor)) { +- icmsg_major = negop->icversion_data[i].major; +- icmsg_minor = negop->icversion_data[i].minor; +- found_match = true; ++ for (i = 0; i < srv_vercnt; i++) { ++ srv_major = (srv_version[i] >> 16); ++ srv_minor = (srv_version[i] & 0xFFFF); ++ ++ for (j = negop->icframe_vercnt; ++ (j < negop->icframe_vercnt + negop->icmsg_vercnt); ++ j++) { ++ ++ if ((negop->icversion_data[j].major == srv_major) && ++ (negop->icversion_data[j].minor == srv_minor)) { ++ ++ icmsg_major = negop->icversion_data[j].major; ++ icmsg_minor = negop->icversion_data[j].minor; ++ found_match = true; ++ break; ++ } + } ++ ++ if (found_match) ++ break; + } + + /* +@@ -282,6 +304,12 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, + negop->icmsg_vercnt = 1; + } + ++ if (nego_fw_version) ++ *nego_fw_version = (icframe_major << 16) | icframe_minor; ++ ++ if (nego_srv_version) ++ *nego_srv_version = (icmsg_major << 16) | icmsg_minor; ++ + negop->icversion_data[0].major = icframe_major; + negop->icversion_data[0].minor = icframe_minor; + negop->icversion_data[1].major = icmsg_major; +diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c +index e47d8c9db03a..0a315e6aa589 100644 +--- a/drivers/hv/hv_fcopy.c ++++ b/drivers/hv/hv_fcopy.c +@@ -31,6 +31,16 @@ + #define WIN8_SRV_MINOR 1 + #define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) + ++#define FCOPY_VER_COUNT 1 ++static const int fcopy_versions[] = { ++ WIN8_SRV_VERSION ++}; ++ ++#define FW_VER_COUNT 1 ++static const int fw_versions[] = { ++ UTIL_FW_VERSION ++}; ++ + /* + * Global state maintained for transaction that is being processed. + * For a class of integration services, including the "file copy service", +@@ -228,8 +238,6 @@ void hv_fcopy_onchannelcallback(void *context) + u64 requestid; + struct hv_fcopy_hdr *fcopy_msg; + struct icmsg_hdr *icmsghdr; +- struct icmsg_negotiate *negop = NULL; +- int util_fw_version; + int fcopy_srv_version; + + if (fcopy_transaction.state > HVUTIL_READY) +@@ -243,10 +251,10 @@ void hv_fcopy_onchannelcallback(void *context) + icmsghdr = (struct icmsg_hdr *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr)]; + if (icmsghdr->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- util_fw_version = UTIL_FW_VERSION; +- fcopy_srv_version = WIN8_SRV_VERSION; +- vmbus_prep_negotiate_resp(icmsghdr, negop, recv_buffer, +- util_fw_version, fcopy_srv_version); ++ vmbus_prep_negotiate_resp(icmsghdr, recv_buffer, ++ fw_versions, FW_VER_COUNT, ++ fcopy_versions, FCOPY_VER_COUNT, ++ NULL, &fcopy_srv_version); + } else { + fcopy_msg = (struct hv_fcopy_hdr *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c +index 3abfc5983c97..2cc670442f6c 100644 +--- a/drivers/hv/hv_kvp.c ++++ b/drivers/hv/hv_kvp.c +@@ -46,6 +46,19 @@ + #define WIN8_SRV_MINOR 0 + #define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) + ++#define KVP_VER_COUNT 3 ++static const int kvp_versions[] = { ++ WIN8_SRV_VERSION, ++ WIN7_SRV_VERSION, ++ WS2008_SRV_VERSION ++}; ++ ++#define FW_VER_COUNT 2 ++static const int fw_versions[] = { ++ UTIL_FW_VERSION, ++ UTIL_WS2K8_FW_VERSION ++}; ++ + /* + * Global state maintained for transaction that is being processed. For a class + * of integration services, including the "KVP service", the specified protocol +@@ -610,8 +623,6 @@ void hv_kvp_onchannelcallback(void *context) + struct hv_kvp_msg *kvp_msg; + + struct icmsg_hdr *icmsghdrp; +- struct icmsg_negotiate *negop = NULL; +- int util_fw_version; + int kvp_srv_version; + static enum {NEGO_NOT_STARTED, + NEGO_IN_PROGRESS, +@@ -640,28 +651,10 @@ void hv_kvp_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- /* +- * Based on the host, select appropriate +- * framework and service versions we will +- * negotiate. +- */ +- switch (vmbus_proto_version) { +- case (VERSION_WS2008): +- util_fw_version = UTIL_WS2K8_FW_VERSION; +- kvp_srv_version = WS2008_SRV_VERSION; +- break; +- case (VERSION_WIN7): +- util_fw_version = UTIL_FW_VERSION; +- kvp_srv_version = WIN7_SRV_VERSION; +- break; +- default: +- util_fw_version = UTIL_FW_VERSION; +- kvp_srv_version = WIN8_SRV_VERSION; +- } +- vmbus_prep_negotiate_resp(icmsghdrp, negop, +- recv_buffer, util_fw_version, +- kvp_srv_version); +- ++ vmbus_prep_negotiate_resp(icmsghdrp, ++ recv_buffer, fw_versions, FW_VER_COUNT, ++ kvp_versions, KVP_VER_COUNT, ++ NULL, &kvp_srv_version); + } else { + kvp_msg = (struct hv_kvp_msg *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c +index 4e543dbb731a..d14f10b924a0 100644 +--- a/drivers/hv/hv_snapshot.c ++++ b/drivers/hv/hv_snapshot.c +@@ -31,6 +31,16 @@ + #define VSS_MINOR 0 + #define VSS_VERSION (VSS_MAJOR << 16 | VSS_MINOR) + ++#define VSS_VER_COUNT 1 ++static const int vss_versions[] = { ++ VSS_VERSION ++}; ++ ++#define FW_VER_COUNT 1 ++static const int fw_versions[] = { ++ UTIL_FW_VERSION ++}; ++ + /* + * Timeout values are based on expecations from host + */ +@@ -297,7 +307,6 @@ void hv_vss_onchannelcallback(void *context) + + + struct icmsg_hdr *icmsghdrp; +- struct icmsg_negotiate *negop = NULL; + + if (vss_transaction.state > HVUTIL_READY) + return; +@@ -310,9 +319,10 @@ void hv_vss_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, negop, +- recv_buffer, UTIL_FW_VERSION, +- VSS_VERSION); ++ vmbus_prep_negotiate_resp(icmsghdrp, ++ recv_buffer, fw_versions, FW_VER_COUNT, ++ vss_versions, VSS_VER_COUNT, ++ NULL, NULL); + } else { + vss_msg = (struct hv_vss_msg *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c +index e7707747f56d..f3797c07be10 100644 +--- a/drivers/hv/hv_util.c ++++ b/drivers/hv/hv_util.c +@@ -57,7 +57,31 @@ + static int sd_srv_version; + static int ts_srv_version; + static int hb_srv_version; +-static int util_fw_version; ++ ++#define SD_VER_COUNT 2 ++static const int sd_versions[] = { ++ SD_VERSION, ++ SD_VERSION_1 ++}; ++ ++#define TS_VER_COUNT 3 ++static const int ts_versions[] = { ++ TS_VERSION, ++ TS_VERSION_3, ++ TS_VERSION_1 ++}; ++ ++#define HB_VER_COUNT 2 ++static const int hb_versions[] = { ++ HB_VERSION, ++ HB_VERSION_1 ++}; ++ ++#define FW_VER_COUNT 2 ++static const int fw_versions[] = { ++ UTIL_FW_VERSION, ++ UTIL_WS2K8_FW_VERSION ++}; + + static void shutdown_onchannelcallback(void *context); + static struct hv_util_service util_shutdown = { +@@ -118,7 +142,6 @@ static void shutdown_onchannelcallback(void *context) + struct shutdown_msg_data *shutdown_msg; + + struct icmsg_hdr *icmsghdrp; +- struct icmsg_negotiate *negop = NULL; + + vmbus_recvpacket(channel, shut_txf_buf, + PAGE_SIZE, &recvlen, &requestid); +@@ -128,9 +151,14 @@ static void shutdown_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, negop, +- shut_txf_buf, util_fw_version, +- sd_srv_version); ++ if (vmbus_prep_negotiate_resp(icmsghdrp, shut_txf_buf, ++ fw_versions, FW_VER_COUNT, ++ sd_versions, SD_VER_COUNT, ++ NULL, &sd_srv_version)) { ++ pr_info("Shutdown IC version %d.%d\n", ++ sd_srv_version >> 16, ++ sd_srv_version & 0xFFFF); ++ } + } else { + shutdown_msg = + (struct shutdown_msg_data *)&shut_txf_buf[ +@@ -253,7 +281,6 @@ static void timesync_onchannelcallback(void *context) + struct ictimesync_data *timedatap; + struct ictimesync_ref_data *refdata; + u8 *time_txf_buf = util_timesynch.recv_buffer; +- struct icmsg_negotiate *negop = NULL; + + vmbus_recvpacket(channel, time_txf_buf, + PAGE_SIZE, &recvlen, &requestid); +@@ -263,12 +290,14 @@ static void timesync_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, negop, +- time_txf_buf, +- util_fw_version, +- ts_srv_version); +- pr_info("Using TimeSync version %d.%d\n", +- ts_srv_version >> 16, ts_srv_version & 0xFFFF); ++ if (vmbus_prep_negotiate_resp(icmsghdrp, time_txf_buf, ++ fw_versions, FW_VER_COUNT, ++ ts_versions, TS_VER_COUNT, ++ NULL, &ts_srv_version)) { ++ pr_info("TimeSync version %d.%d\n", ++ ts_srv_version >> 16, ++ ts_srv_version & 0xFFFF); ++ } + } else { + if (ts_srv_version > TS_VERSION_3) { + refdata = (struct ictimesync_ref_data *) +@@ -312,7 +341,6 @@ static void heartbeat_onchannelcallback(void *context) + struct icmsg_hdr *icmsghdrp; + struct heartbeat_msg_data *heartbeat_msg; + u8 *hbeat_txf_buf = util_heartbeat.recv_buffer; +- struct icmsg_negotiate *negop = NULL; + + while (1) { + +@@ -326,9 +354,16 @@ static void heartbeat_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, negop, +- hbeat_txf_buf, util_fw_version, +- hb_srv_version); ++ if (vmbus_prep_negotiate_resp(icmsghdrp, ++ hbeat_txf_buf, ++ fw_versions, FW_VER_COUNT, ++ hb_versions, HB_VER_COUNT, ++ NULL, &hb_srv_version)) { ++ ++ pr_info("Heartbeat version %d.%d\n", ++ hb_srv_version >> 16, ++ hb_srv_version & 0xFFFF); ++ } + } else { + heartbeat_msg = + (struct heartbeat_msg_data *)&hbeat_txf_buf[ +@@ -378,33 +413,6 @@ static int util_probe(struct hv_device *dev, + + hv_set_drvdata(dev, srv); + +- /* +- * Based on the host; initialize the framework and +- * service version numbers we will negotiate. +- */ +- switch (vmbus_proto_version) { +- case (VERSION_WS2008): +- util_fw_version = UTIL_WS2K8_FW_VERSION; +- sd_srv_version = SD_VERSION_1; +- ts_srv_version = TS_VERSION_1; +- hb_srv_version = HB_VERSION_1; +- break; +- case VERSION_WIN7: +- case VERSION_WIN8: +- case VERSION_WIN8_1: +- util_fw_version = UTIL_FW_VERSION; +- sd_srv_version = SD_VERSION; +- ts_srv_version = TS_VERSION_3; +- hb_srv_version = HB_VERSION; +- break; +- case VERSION_WIN10: +- default: +- util_fw_version = UTIL_FW_VERSION; +- sd_srv_version = SD_VERSION; +- ts_srv_version = TS_VERSION; +- hb_srv_version = HB_VERSION; +- } +- + ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0, + srv->util_cb, dev->channel); + if (ret) +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 489ad74c1e6e..956acfc93487 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1453,9 +1453,10 @@ struct hyperv_service_callback { + }; + + #define MAX_SRV_VER 0x7ffffff +-extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *, +- struct icmsg_negotiate *, u8 *, int, +- int); ++extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, ++ const int *fw_version, int fw_vercnt, ++ const int *srv_version, int srv_vercnt, ++ int *nego_fw_version, int *nego_srv_version); + + void hv_event_tasklet_disable(struct vmbus_channel *channel); + void hv_event_tasklet_enable(struct vmbus_channel *channel); +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.9.x/0007-Drivers-hv-Log-the-negotiated-IC-versions.patch b/projects/kernel-config/patches-4.9.x/0007-Drivers-hv-Log-the-negotiated-IC-versions.patch new file mode 100644 index 000000000..121604178 --- /dev/null +++ b/projects/kernel-config/patches-4.9.x/0007-Drivers-hv-Log-the-negotiated-IC-versions.patch @@ -0,0 +1,118 @@ +From 32a6b98612216b754fb89fc3a69b13acdd566eb8 Mon Sep 17 00:00:00 2001 +From: Alex Ng +Date: Sat, 28 Jan 2017 12:37:18 -0700 +Subject: [PATCH 07/10] Drivers: hv: Log the negotiated IC versions. + +Log the negotiated IC versions. + +Signed-off-by: Alex Ng +Signed-off-by: K. Y. Srinivasan +Signed-off-by: Greg Kroah-Hartman +Origin: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git +(cherry picked from commit 1274a690f6b2bd2b37447c47e3062afa8aa43f93) +--- + drivers/hv/hv_fcopy.c | 9 +++++++-- + drivers/hv/hv_kvp.c | 8 ++++++-- + drivers/hv/hv_snapshot.c | 11 ++++++++--- + drivers/hv/hv_util.c | 4 ++-- + 4 files changed, 23 insertions(+), 9 deletions(-) + +diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c +index 0a315e6aa589..9aee6014339d 100644 +--- a/drivers/hv/hv_fcopy.c ++++ b/drivers/hv/hv_fcopy.c +@@ -251,10 +251,15 @@ void hv_fcopy_onchannelcallback(void *context) + icmsghdr = (struct icmsg_hdr *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr)]; + if (icmsghdr->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdr, recv_buffer, ++ if (vmbus_prep_negotiate_resp(icmsghdr, recv_buffer, + fw_versions, FW_VER_COUNT, + fcopy_versions, FCOPY_VER_COUNT, +- NULL, &fcopy_srv_version); ++ NULL, &fcopy_srv_version)) { ++ ++ pr_info("FCopy IC version %d.%d\n", ++ fcopy_srv_version >> 16, ++ fcopy_srv_version & 0xFFFF); ++ } + } else { + fcopy_msg = (struct hv_fcopy_hdr *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c +index 2cc670442f6c..de263712e247 100644 +--- a/drivers/hv/hv_kvp.c ++++ b/drivers/hv/hv_kvp.c +@@ -651,10 +651,14 @@ void hv_kvp_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, ++ if (vmbus_prep_negotiate_resp(icmsghdrp, + recv_buffer, fw_versions, FW_VER_COUNT, + kvp_versions, KVP_VER_COUNT, +- NULL, &kvp_srv_version); ++ NULL, &kvp_srv_version)) { ++ pr_info("KVP IC version %d.%d\n", ++ kvp_srv_version >> 16, ++ kvp_srv_version & 0xFFFF); ++ } + } else { + kvp_msg = (struct hv_kvp_msg *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c +index d14f10b924a0..bcc03f0748d6 100644 +--- a/drivers/hv/hv_snapshot.c ++++ b/drivers/hv/hv_snapshot.c +@@ -304,7 +304,7 @@ void hv_vss_onchannelcallback(void *context) + u32 recvlen; + u64 requestid; + struct hv_vss_msg *vss_msg; +- ++ int vss_srv_version; + + struct icmsg_hdr *icmsghdrp; + +@@ -319,10 +319,15 @@ void hv_vss_onchannelcallback(void *context) + sizeof(struct vmbuspipe_hdr)]; + + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { +- vmbus_prep_negotiate_resp(icmsghdrp, ++ if (vmbus_prep_negotiate_resp(icmsghdrp, + recv_buffer, fw_versions, FW_VER_COUNT, + vss_versions, VSS_VER_COUNT, +- NULL, NULL); ++ NULL, &vss_srv_version)) { ++ ++ pr_info("VSS IC version %d.%d\n", ++ vss_srv_version >> 16, ++ vss_srv_version & 0xFFFF); ++ } + } else { + vss_msg = (struct hv_vss_msg *)&recv_buffer[ + sizeof(struct vmbuspipe_hdr) + +diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c +index f3797c07be10..89440c2eb346 100644 +--- a/drivers/hv/hv_util.c ++++ b/drivers/hv/hv_util.c +@@ -294,7 +294,7 @@ static void timesync_onchannelcallback(void *context) + fw_versions, FW_VER_COUNT, + ts_versions, TS_VER_COUNT, + NULL, &ts_srv_version)) { +- pr_info("TimeSync version %d.%d\n", ++ pr_info("TimeSync IC version %d.%d\n", + ts_srv_version >> 16, + ts_srv_version & 0xFFFF); + } +@@ -360,7 +360,7 @@ static void heartbeat_onchannelcallback(void *context) + hb_versions, HB_VER_COUNT, + NULL, &hb_srv_version)) { + +- pr_info("Heartbeat version %d.%d\n", ++ pr_info("Heartbeat IC version %d.%d\n", + hb_srv_version >> 16, + hb_srv_version & 0xFFFF); + } +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.9.x/0008-vmbus-fix-missed-ring-events-on-boot.patch b/projects/kernel-config/patches-4.9.x/0008-vmbus-fix-missed-ring-events-on-boot.patch new file mode 100644 index 000000000..1d78d5dfa --- /dev/null +++ b/projects/kernel-config/patches-4.9.x/0008-vmbus-fix-missed-ring-events-on-boot.patch @@ -0,0 +1,56 @@ +From d7c92ad3b3d30f4d790a651070d71fa8a0e5923f Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Sun, 26 Mar 2017 16:42:20 +0800 +Subject: [PATCH 08/10] vmbus: fix missed ring events on boot + +During initialization, the channel initialization code schedules the +tasklet to scan the VMBUS receive event page (i.e. simulates an +interrupt). The problem was that it invokes the tasklet on a different +CPU from where it normally runs and therefore if an event is present, +it will clear the bit but not find the associated channel. + +This can lead to missed events, typically stuck tasks, during bootup +when sub channels are being initialized. Typically seen as stuck +boot with 8 or more CPU's. + +This patch is not necessary for upstream (4.11 and later) since +commit 631e63a9f346 ("vmbus: change to per channel tasklet"). +This changed vmbus code to get rid of common tasklet which +caused the problem. + +Cc: stable@vger.kernel.org +Fixes: 638fea33aee8 ("Drivers: hv: vmbus: fix the race when querying & updating the percpu list") +Signed-off-by: Stephen Hemminger +Origin: git@github.com:dcui/linux.git +(cherry picked from commit 5cf3a72a111cecc7da759542c56560ce509159d7) +--- + drivers/hv/channel_mgmt.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index e7949b64bfbc..2fe024e86209 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -388,8 +388,17 @@ void hv_event_tasklet_enable(struct vmbus_channel *channel) + tasklet = hv_context.event_dpc[channel->target_cpu]; + tasklet_enable(tasklet); + +- /* In case there is any pending event */ +- tasklet_schedule(tasklet); ++ /* ++ * In case there is any pending event schedule a rescan ++ * but must be on the correct CPU for the channel. ++ */ ++ if (channel->target_cpu == get_cpu()) ++ tasklet_schedule(tasklet); ++ else ++ smp_call_function_single(channel->target_cpu, ++ (smp_call_func_t)tasklet_schedule, ++ tasklet, false); ++ put_cpu(); + } + + void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.9.x/0009-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch b/projects/kernel-config/patches-4.9.x/0009-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch new file mode 100644 index 000000000..2b68cbe15 --- /dev/null +++ b/projects/kernel-config/patches-4.9.x/0009-vmbus-remove-goto-error_clean_msglist-in-vmbus_open.patch @@ -0,0 +1,60 @@ +From 2e1b50493f08cc2b0c40b02caff57a9b33c25687 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Wed, 29 Mar 2017 18:37:10 +0800 +Subject: [PATCH 09/10] vmbus: remove "goto error_clean_msglist" in + vmbus_open() + +This is just a cleanup patch to simplify the code a little. +No semantic change. + +Signed-off-by: Dexuan Cui +Origin: git@github.com:dcui/linux.git +(cherry picked from commit 2c89f21cbdfd39299482cd6068094097a45f13b3) +--- + drivers/hv/channel.c | 18 +++++++----------- + 1 file changed, 7 insertions(+), 11 deletions(-) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index 1606e7f08f4b..1caed01954f6 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -184,17 +184,18 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, + ret = vmbus_post_msg(open_msg, + sizeof(struct vmbus_channel_open_channel), true); + +- if (ret != 0) { +- err = ret; +- goto error_clean_msglist; +- } +- +- wait_for_completion(&open_info->waitevent); ++ if (ret == 0) ++ wait_for_completion(&open_info->waitevent); + + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); + list_del(&open_info->msglistentry); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + ++ if (ret != 0) { ++ err = ret; ++ goto error_free_gpadl; ++ } ++ + if (newchannel->rescind) { + err = -ENODEV; + goto error_free_gpadl; +@@ -209,11 +210,6 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, + kfree(open_info); + return 0; + +-error_clean_msglist: +- spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); +- list_del(&open_info->msglistentry); +- spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); +- + error_free_gpadl: + vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle); + kfree(open_info); +-- +2.12.2 + diff --git a/projects/kernel-config/patches-4.9.x/0010-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch b/projects/kernel-config/patches-4.9.x/0010-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch new file mode 100644 index 000000000..4bc62046b --- /dev/null +++ b/projects/kernel-config/patches-4.9.x/0010-vmbus-dynamically-enqueue-dequeue-the-channel-on-vmb.patch @@ -0,0 +1,177 @@ +From 93a84f69a13df0a695747daccd5c8d95b4880736 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui +Date: Fri, 24 Mar 2017 20:53:18 +0800 +Subject: [PATCH 10/10] vmbus: dynamically enqueue/dequeue the channel on + vmbus_open/close + +Signed-off-by: Dexuan Cui +Origin: git@github.com:dcui/linux.git +(cherry picked from commit bee4910daa4aed57ce60d2e2350e3cc120c383ca) +--- + drivers/hv/channel.c | 16 ++++++++++--- + drivers/hv/channel_mgmt.c | 58 ++++++++++++++++++++--------------------------- + include/linux/hyperv.h | 3 +++ + 3 files changed, 40 insertions(+), 37 deletions(-) + +diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c +index 1caed01954f6..5bbcc964dbf7 100644 +--- a/drivers/hv/channel.c ++++ b/drivers/hv/channel.c +@@ -181,6 +181,10 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, + &vmbus_connection.chn_msg_list); + spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); + ++ hv_event_tasklet_disable(newchannel); ++ hv_percpu_channel_enq(newchannel); ++ hv_event_tasklet_enable(newchannel); ++ + ret = vmbus_post_msg(open_msg, + sizeof(struct vmbus_channel_open_channel), true); + +@@ -193,23 +197,27 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, + + if (ret != 0) { + err = ret; +- goto error_free_gpadl; ++ goto error_deq_channel; + } + + if (newchannel->rescind) { + err = -ENODEV; +- goto error_free_gpadl; ++ goto error_deq_channel; + } + + if (open_info->response.open_result.status) { + err = -EAGAIN; +- goto error_free_gpadl; ++ goto error_deq_channel; + } + + newchannel->state = CHANNEL_OPENED_STATE; + kfree(open_info); + return 0; + ++error_deq_channel: ++ hv_event_tasklet_disable(newchannel); ++ hv_percpu_channel_deq(newchannel); ++ hv_event_tasklet_enable(newchannel); + error_free_gpadl: + vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle); + kfree(open_info); +@@ -555,6 +563,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel) + goto out; + } + ++ hv_percpu_channel_deq(channel); ++ + channel->state = CHANNEL_OPEN_STATE; + channel->sc_creation_callback = NULL; + /* Stop callback and cancel the timer asap */ +diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c +index 2fe024e86209..b2bdcfb49144 100644 +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -375,6 +375,30 @@ static void vmbus_release_relid(u32 relid) + true); + } + ++void hv_percpu_channel_enq(struct vmbus_channel *channel) ++{ ++ if (channel->target_cpu != get_cpu()) ++ smp_call_function_single(channel->target_cpu, ++ percpu_channel_enq, ++ channel, true); ++ else ++ percpu_channel_enq(channel); ++ put_cpu(); ++ ++} ++ ++void hv_percpu_channel_deq(struct vmbus_channel *channel) ++{ ++ if (channel->target_cpu != get_cpu()) ++ smp_call_function_single(channel->target_cpu, ++ percpu_channel_deq, ++ channel, true); ++ else ++ percpu_channel_deq(channel); ++ put_cpu(); ++ ++} ++ + void hv_event_tasklet_disable(struct vmbus_channel *channel) + { + struct tasklet_struct *tasklet; +@@ -409,17 +433,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) + BUG_ON(!channel->rescind); + BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + +- hv_event_tasklet_disable(channel); +- if (channel->target_cpu != get_cpu()) { +- put_cpu(); +- smp_call_function_single(channel->target_cpu, +- percpu_channel_deq, channel, true); +- } else { +- percpu_channel_deq(channel); +- put_cpu(); +- } +- hv_event_tasklet_enable(channel); +- + if (channel->primary_channel == NULL) { + list_del(&channel->listentry); + +@@ -512,18 +525,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + + init_vp_index(newchannel, dev_type); + +- hv_event_tasklet_disable(newchannel); +- if (newchannel->target_cpu != get_cpu()) { +- put_cpu(); +- smp_call_function_single(newchannel->target_cpu, +- percpu_channel_enq, +- newchannel, true); +- } else { +- percpu_channel_enq(newchannel); +- put_cpu(); +- } +- hv_event_tasklet_enable(newchannel); +- + /* + * This state is used to indicate a successful open + * so that when we do close the channel normally, we +@@ -572,17 +573,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) + list_del(&newchannel->listentry); + mutex_unlock(&vmbus_connection.channel_mutex); + +- hv_event_tasklet_disable(newchannel); +- if (newchannel->target_cpu != get_cpu()) { +- put_cpu(); +- smp_call_function_single(newchannel->target_cpu, +- percpu_channel_deq, newchannel, true); +- } else { +- percpu_channel_deq(newchannel); +- put_cpu(); +- } +- hv_event_tasklet_enable(newchannel); +- + vmbus_release_relid(newchannel->offermsg.child_relid); + + err_free_chan: +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index 956acfc93487..9ee292b28e41 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1461,6 +1461,9 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, + void hv_event_tasklet_disable(struct vmbus_channel *channel); + void hv_event_tasklet_enable(struct vmbus_channel *channel); + ++void hv_percpu_channel_enq(struct vmbus_channel *channel); ++void hv_percpu_channel_deq(struct vmbus_channel *channel); ++ + void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); + + void vmbus_setevent(struct vmbus_channel *channel); +-- +2.12.2 + diff --git a/test/pkg/kernel-config/check-kernel-config.sh b/test/pkg/kernel-config/check-kernel-config.sh index 28dec1563..a05026769 100755 --- a/test/pkg/kernel-config/check-kernel-config.sh +++ b/test/pkg/kernel-config/check-kernel-config.sh @@ -2,10 +2,14 @@ set -e -echo "starting kernel config sanity test with /proc/config.gz" +echo "starting kernel config sanity test with ${1:-/proc/config.gz}" -# decompress /proc/config.gz from the host -UNZIPPED_CONFIG=$(zcat /proc/config.gz) +if [ -n "$1" ]; then + UNZIPPED_CONFIG=$(cat "$1") +else + # decompress /proc/config.gz from the host + UNZIPPED_CONFIG=$(zcat /proc/config.gz) +fi kernelVersion="$(uname -r)" kernelMajor="${kernelVersion%%.*}"