kernel: Fix Hyper-V socket support in 4.11 kernels

In order to enable Hyper-V sockets, the CONFIG_HYPERV_VSOCKETS
option must be set. This is different to the older kernel patches.

In order for the Hyper-V socket code to compile, f3dd3f4797652c311df
("vmbus: introduce in-place packet iterator") needed cherry-picking.

Signed-off-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
This commit is contained in:
Rolf Neugebauer 2017-05-09 19:05:49 +01:00
parent b39e477c10
commit 5d88623faf
9 changed files with 392 additions and 23 deletions

View File

@ -1331,7 +1331,7 @@ CONFIG_OPENVSWITCH_GENEVE=y
CONFIG_VSOCKETS=y
CONFIG_VIRTIO_VSOCKETS=y
CONFIG_VIRTIO_VSOCKETS_COMMON=y
# CONFIG_HYPERV_VSOCKETS is not set
CONFIG_HYPERV_VSOCKETS=y
CONFIG_NETLINK_DIAG=y
CONFIG_MPLS=y
CONFIG_NET_MPLS_GSO=y

View File

@ -0,0 +1,369 @@
From 008801f1023737f7e6a8c3c2cb1feb2f2d63982f Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Mon, 27 Feb 2017 10:26:48 -0800
Subject: [PATCH 1/8] vmbus: introduce in-place packet iterator
This is mostly just a refactoring of previous functions
(get_pkt_next_raw, put_pkt_raw and commit_rd_index) to make it easier
to use for other drivers and NAPI.
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit f3dd3f4797652c311df9c074436d420f1ad3566e)
---
drivers/hv/ring_buffer.c | 94 +++++++++++++++++++++++++++++++++++++++++++-
drivers/net/hyperv/netvsc.c | 34 +++++-----------
include/linux/hyperv.h | 96 ++++++++++++++-------------------------------
3 files changed, 133 insertions(+), 91 deletions(-)
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 87799e81af97..c3f1a9e33cef 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -32,6 +32,8 @@
#include "hyperv_vmbus.h"
+#define VMBUS_PKT_TRAILER 8
+
/*
* When we write to the ring buffer, check if the host needs to
* be signaled. Here is the details of this protocol:
@@ -336,6 +338,12 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
return 0;
}
+static inline void
+init_cached_read_index(struct hv_ring_buffer_info *rbi)
+{
+ rbi->cached_read_index = rbi->ring_buffer->read_index;
+}
+
int hv_ringbuffer_read(struct vmbus_channel *channel,
void *buffer, u32 buflen, u32 *buffer_actual_len,
u64 *requestid, bool raw)
@@ -366,7 +374,8 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
return ret;
}
- init_cached_read_index(channel);
+ init_cached_read_index(inring_info);
+
next_read_location = hv_get_next_read_location(inring_info);
next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc,
sizeof(desc),
@@ -410,3 +419,86 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
return ret;
}
+
+/*
+ * Determine number of bytes available in ring buffer after
+ * the current iterator (priv_read_index) location.
+ *
+ * This is similar to hv_get_bytes_to_read but with private
+ * read index instead.
+ */
+static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
+{
+ u32 priv_read_loc = rbi->priv_read_index;
+ u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index);
+
+ if (write_loc >= priv_read_loc)
+ return write_loc - priv_read_loc;
+ else
+ return (rbi->ring_datasize - priv_read_loc) + write_loc;
+}
+
+/*
+ * Get first vmbus packet from ring buffer after read_index
+ *
+ * If ring buffer is empty, returns NULL and no other action needed.
+ */
+struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
+{
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+ /* set state for later hv_signal_on_read() */
+ init_cached_read_index(rbi);
+
+ if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
+ return NULL;
+
+ return hv_get_ring_buffer(rbi) + rbi->priv_read_index;
+}
+EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
+
+/*
+ * Get next vmbus packet from ring buffer.
+ *
+ * Advances the current location (priv_read_index) and checks for more
+ * data. If the end of the ring buffer is reached, then return NULL.
+ */
+struct vmpacket_descriptor *
+__hv_pkt_iter_next(struct vmbus_channel *channel,
+ const struct vmpacket_descriptor *desc)
+{
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
+ u32 packetlen = desc->len8 << 3;
+ u32 dsize = rbi->ring_datasize;
+
+ /* bump offset to next potential packet */
+ rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
+ if (rbi->priv_read_index >= dsize)
+ rbi->priv_read_index -= dsize;
+
+ /* more data? */
+ if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
+ return NULL;
+ else
+ return hv_get_ring_buffer(rbi) + rbi->priv_read_index;
+}
+EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
+
+/*
+ * Update host ring buffer after iterating over packets.
+ */
+void hv_pkt_iter_close(struct vmbus_channel *channel)
+{
+ struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+ /*
+ * Make sure all reads are done before we update the read index since
+ * the writer may start writing to the read area once the read index
+ * is updated.
+ */
+ virt_rmb();
+ rbi->ring_buffer->read_index = rbi->priv_read_index;
+
+ hv_signal_on_read(channel);
+}
+EXPORT_SYMBOL_GPL(hv_pkt_iter_close);
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 15ef713d96c0..ab9fe48ec133 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -646,14 +646,11 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
static void netvsc_send_completion(struct netvsc_device *net_device,
struct vmbus_channel *incoming_channel,
struct hv_device *device,
- struct vmpacket_descriptor *packet)
+ const struct vmpacket_descriptor *desc)
{
- struct nvsp_message *nvsp_packet;
+ struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
struct net_device *ndev = hv_get_drvdata(device);
- nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
- (packet->offset8 << 3));
-
switch (nvsp_packet->hdr.msg_type) {
case NVSP_MSG_TYPE_INIT_COMPLETE:
case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
@@ -667,7 +664,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
netvsc_send_tx_complete(net_device, incoming_channel,
- device, packet);
+ device, desc);
break;
default:
@@ -1070,9 +1067,11 @@ static void netvsc_receive(struct net_device *ndev,
struct net_device_context *net_device_ctx,
struct hv_device *device,
struct vmbus_channel *channel,
- struct vmtransfer_page_packet_header *vmxferpage_packet,
+ const struct vmpacket_descriptor *desc,
struct nvsp_message *nvsp)
{
+ const struct vmtransfer_page_packet_header *vmxferpage_packet
+ = container_of(desc, const struct vmtransfer_page_packet_header, d);
char *recv_buf = net_device->recv_buf;
u32 status = NVSP_STAT_SUCCESS;
int i;
@@ -1180,12 +1179,10 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
struct netvsc_device *net_device,
struct net_device *ndev,
u64 request_id,
- struct vmpacket_descriptor *desc)
+ const struct vmpacket_descriptor *desc)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct nvsp_message *nvmsg
- = (struct nvsp_message *)((unsigned long)desc
- + (desc->offset8 << 3));
+ struct nvsp_message *nvmsg = hv_pkt_data(desc);
switch (desc->type) {
case VM_PKT_COMP:
@@ -1194,9 +1191,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
case VM_PKT_DATA_USING_XFER_PAGES:
netvsc_receive(ndev, net_device, net_device_ctx,
- device, channel,
- (struct vmtransfer_page_packet_header *)desc,
- nvmsg);
+ device, channel, desc, nvmsg);
break;
case VM_PKT_DATA_INBAND:
@@ -1218,7 +1213,6 @@ void netvsc_channel_cb(void *context)
struct netvsc_device *net_device;
struct vmpacket_descriptor *desc;
struct net_device *ndev;
- bool need_to_commit = false;
if (channel->primary_channel != NULL)
device = channel->primary_channel->device_obj;
@@ -1237,20 +1231,12 @@ void netvsc_channel_cb(void *context)
netvsc_channel_idle(net_device, q_idx)))
return;
- /* commit_rd_index() -> hv_signal_on_read() needs this. */
- init_cached_read_index(channel);
-
- while ((desc = get_next_pkt_raw(channel)) != NULL) {
+ foreach_vmbus_pkt(desc, channel) {
netvsc_process_raw_pkt(device, channel, net_device,
ndev, desc->trans_id, desc);
- put_pkt_raw(channel, desc);
- need_to_commit = true;
}
- if (need_to_commit)
- commit_rd_index(channel);
-
netvsc_chk_recv_comp(net_device, channel, q_idx);
}
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 970771a5f739..0c170a3f0d8b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1508,14 +1508,6 @@ static inline void hv_signal_on_read(struct vmbus_channel *channel)
return;
}
-static inline void
-init_cached_read_index(struct vmbus_channel *channel)
-{
- struct hv_ring_buffer_info *rbi = &channel->inbound;
-
- rbi->cached_read_index = rbi->ring_buffer->read_index;
-}
-
/*
* Mask off host interrupt callback notifications
*/
@@ -1549,76 +1541,48 @@ static inline u32 hv_end_read(struct hv_ring_buffer_info *rbi)
/*
* An API to support in-place processing of incoming VMBUS packets.
*/
-#define VMBUS_PKT_TRAILER 8
-static inline struct vmpacket_descriptor *
-get_next_pkt_raw(struct vmbus_channel *channel)
+/* Get data payload associated with descriptor */
+static inline void *hv_pkt_data(const struct vmpacket_descriptor *desc)
{
- struct hv_ring_buffer_info *ring_info = &channel->inbound;
- u32 priv_read_loc = ring_info->priv_read_index;
- void *ring_buffer = hv_get_ring_buffer(ring_info);
- u32 dsize = ring_info->ring_datasize;
- /*
- * delta is the difference between what is available to read and
- * what was already consumed in place. We commit read index after
- * the whole batch is processed.
- */
- u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ?
- priv_read_loc - ring_info->ring_buffer->read_index :
- (dsize - ring_info->ring_buffer->read_index) + priv_read_loc;
- u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta);
-
- if (bytes_avail_toread < sizeof(struct vmpacket_descriptor))
- return NULL;
-
- return ring_buffer + priv_read_loc;
+ return (void *)((unsigned long)desc + (desc->offset8 << 3));
}
-/*
- * A helper function to step through packets "in-place"
- * This API is to be called after each successful call
- * get_next_pkt_raw().
- */
-static inline void put_pkt_raw(struct vmbus_channel *channel,
- struct vmpacket_descriptor *desc)
+/* Get data size associated with descriptor */
+static inline u32 hv_pkt_datalen(const struct vmpacket_descriptor *desc)
{
- struct hv_ring_buffer_info *ring_info = &channel->inbound;
- u32 packetlen = desc->len8 << 3;
- u32 dsize = ring_info->ring_datasize;
-
- /*
- * Include the packet trailer.
- */
- ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
- ring_info->priv_read_index %= dsize;
+ return (desc->len8 << 3) - (desc->offset8 << 3);
}
+
+struct vmpacket_descriptor *
+hv_pkt_iter_first(struct vmbus_channel *channel);
+
+struct vmpacket_descriptor *
+__hv_pkt_iter_next(struct vmbus_channel *channel,
+ const struct vmpacket_descriptor *pkt);
+
+void hv_pkt_iter_close(struct vmbus_channel *channel);
+
/*
- * This call commits the read index and potentially signals the host.
- * Here is the pattern for using the "in-place" consumption APIs:
- *
- * init_cached_read_index();
- *
- * while (get_next_pkt_raw() {
- * process the packet "in-place";
- * put_pkt_raw();
- * }
- * if (packets processed in place)
- * commit_rd_index();
+ * Get next packet descriptor from iterator
+ * If at end of list, return NULL and update host.
*/
-static inline void commit_rd_index(struct vmbus_channel *channel)
+static inline struct vmpacket_descriptor *
+hv_pkt_iter_next(struct vmbus_channel *channel,
+ const struct vmpacket_descriptor *pkt)
{
- struct hv_ring_buffer_info *ring_info = &channel->inbound;
- /*
- * Make sure all reads are done before we update the read index since
- * the writer may start writing to the read area once the read index
- * is updated.
- */
- virt_rmb();
- ring_info->ring_buffer->read_index = ring_info->priv_read_index;
+ struct vmpacket_descriptor *nxt;
+
+ nxt = __hv_pkt_iter_next(channel, pkt);
+ if (!nxt)
+ hv_pkt_iter_close(channel);
- hv_signal_on_read(channel);
+ return nxt;
}
+#define foreach_vmbus_pkt(pkt, channel) \
+ for (pkt = hv_pkt_iter_first(channel); pkt; \
+ pkt = hv_pkt_iter_next(channel, pkt))
#endif /* _HYPERV_H */
--
2.12.2

View File

@ -1,7 +1,7 @@
From b7dea38997e358b0f6505432a07647f250e36f0c Mon Sep 17 00:00:00 2001
From 103dab94d97a63a9b7c2be36b668134903ad64f0 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Fri, 5 May 2017 16:57:12 -0600
Subject: [PATCH 1/7] vmbus: vmbus_open(): reset onchannel_callback on error
Subject: [PATCH 2/8] vmbus: vmbus_open(): reset onchannel_callback on error
No real issue is observed without the patch, but let's add this
just in case.
@ -30,5 +30,5 @@ index 321b8833fa6f..628d6fde1887 100644
}
EXPORT_SYMBOL_GPL(vmbus_open);
--
2.11.1
2.12.2

View File

@ -1,7 +1,7 @@
From ab40355a804892b8977854220a07f055a14c8f5c Mon Sep 17 00:00:00 2001
From ef0add3b77376a938f5de3f46337e5d23a1582f8 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Fri, 5 May 2017 16:57:15 -0600
Subject: [PATCH 2/7] vmbus: add the matching tasklet_enable() in
Subject: [PATCH 3/8] vmbus: add the matching tasklet_enable() in
vmbus_close_internal()
If we disable a tasklet that is scheduled but hasn't started to run,
@ -38,5 +38,5 @@ index 628d6fde1887..7cd2bd9fd1f1 100644
}
--
2.11.1
2.12.2

View File

@ -1,7 +1,7 @@
From dade92a41fb20bc4224f412f4fb6b9de34f48c56 Mon Sep 17 00:00:00 2001
From dc0bd14f20b08b4bec097949ad0d4ec421d75ebf Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Fri, 5 May 2017 16:57:20 -0600
Subject: [PATCH 3/7] vmbus: remove "goto error_clean_msglist" in vmbus_open()
Subject: [PATCH 4/8] vmbus: remove "goto error_clean_msglist" in vmbus_open()
This is just a cleanup patch to simplify the code a little.
No semantic change.
@ -58,5 +58,5 @@ index 7cd2bd9fd1f1..db5e6f8730d2 100644
vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
kfree(open_info);
--
2.11.1
2.12.2

View File

@ -1,7 +1,7 @@
From cc8ae5575e4e77b88c134f82161341a4d4f9b094 Mon Sep 17 00:00:00 2001
From a0345dea9dae2e1b1762350b257348a8928302b7 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Fri, 5 May 2017 16:57:23 -0600
Subject: [PATCH 4/7] vmbus: dynamically enqueue/dequeue a channel on
Subject: [PATCH 5/8] vmbus: dynamically enqueue/dequeue a channel on
vmbus_open/close
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
@ -171,7 +171,7 @@ index fbcb06352308..c5a01a4d589e 100644
err_free_chan:
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 970771a5f739..a8bae2caa69f 100644
index 0c170a3f0d8b..ba93b7e4a972 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1437,6 +1437,9 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
@ -185,5 +185,5 @@ index 970771a5f739..a8bae2caa69f 100644
void vmbus_setevent(struct vmbus_channel *channel);
--
2.11.1
2.12.2

View File

@ -1,7 +1,7 @@
From aa172b579ab1228c9d4364d2d21ede58927648f4 Mon Sep 17 00:00:00 2001
From 1698f465f360c5c659075e25bc337710e7633260 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Fri, 5 May 2017 16:57:26 -0600
Subject: [PATCH 5/7] hv_sock: implements Hyper-V transport for Virtual Sockets
Subject: [PATCH 6/8] hv_sock: implements Hyper-V transport for Virtual Sockets
(AF_VSOCK)
Hyper-V Sockets (hv_sock) supplies a byte-stream based communication
@ -930,5 +930,5 @@ index 000000000000..fd89bf357617
+MODULE_VERSION("1.0.0");
+MODULE_LICENSE("GPL");
--
2.11.1
2.12.2

View File

@ -1,7 +1,7 @@
From 56e37d5939cd7da4e7b9dd8a699e11f0b004c999 Mon Sep 17 00:00:00 2001
From 95b72c1f5f513ff818bb9c483671000154bf8526 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Fri, 5 May 2017 16:57:29 -0600
Subject: [PATCH 6/7] VMCI: only try to load on VMware hypervisor
Subject: [PATCH 7/8] VMCI: only try to load on VMware hypervisor
Without the patch, vmw_vsock_vmci_transport.ko and vmw_vmci.ko can
automatically load when an application creates an AF_VSOCK socket.
@ -60,5 +60,5 @@ index d7eaf1eb11e7..1789ea71ff5d 100644
if (vmci_err < VMCI_SUCCESS) {
pr_err("Failed to initialize VMCIEvent (result=%d)\n",
--
2.11.1
2.12.2

View File

@ -1,7 +1,7 @@
From 6b2c2ccb78ab847ac1db6009abc9cc4954a562ef Mon Sep 17 00:00:00 2001
From 92fd02ce7581b945c3e44258ca745b32bdb177fa Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Fri, 5 May 2017 16:57:35 -0600
Subject: [PATCH 7/7] hv_sock: add the support of auto-loading
Subject: [PATCH 8/8] hv_sock: add the support of auto-loading
After we disable VMWare virtual sockets driver's auto-loading on Hyper-V,
we can enable hv_sock's auto-loading now.
@ -26,5 +26,5 @@ index fd89bf357617..f465b0b662df 100644
MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_VSOCK);
--
2.11.1
2.12.2