mirror of
				https://github.com/linuxkit/linuxkit.git
				synced 2025-11-04 08:55:13 +00:00 
			
		
		
		
	Merge pull request #1785 from rneugeba/kern-v4.11
kernel: Add initial support for the 4.11 kernel
This commit is contained in:
		@@ -62,6 +62,8 @@ endef
 | 
			
		||||
# Build Targets
 | 
			
		||||
# Debug targets only for latest stable and LTS stable
 | 
			
		||||
#
 | 
			
		||||
$(eval $(call kernel,4.11,4.11.x))
 | 
			
		||||
$(eval $(call kernel,4.11,4.11.x,_dbg))
 | 
			
		||||
$(eval $(call kernel,4.10.15,4.10.x))
 | 
			
		||||
$(eval $(call kernel,4.10.15,4.10.x,_dbg))
 | 
			
		||||
$(eval $(call kernel,4.9.27,4.9.x))
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										3810
									
								
								kernel/kernel_config-4.11.x
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3810
									
								
								kernel/kernel_config-4.11.x
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -0,0 +1,34 @@
 | 
			
		||||
From b7dea38997e358b0f6505432a07647f250e36f0c Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Date: Fri, 5 May 2017 16:57:12 -0600
 | 
			
		||||
Subject: [PATCH 1/7] vmbus: vmbus_open(): reset onchannel_callback on error
 | 
			
		||||
 | 
			
		||||
No real issue is observed without the patch, but let's add this
 | 
			
		||||
just in case.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Cc: K. Y. Srinivasan <kys@microsoft.com>
 | 
			
		||||
Cc: Haiyang Zhang <haiyangz@microsoft.com>
 | 
			
		||||
Cc: Stephen Hemminger <sthemmin@microsoft.com>
 | 
			
		||||
Origin: git@github.com:dcui/linux.git
 | 
			
		||||
(cherry picked from commit d66cd1ea748e2e3250aeb58b35f9ad665e310db9)
 | 
			
		||||
---
 | 
			
		||||
 drivers/hv/channel.c | 2 ++
 | 
			
		||||
 1 file changed, 2 insertions(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
 | 
			
		||||
index 321b8833fa6f..628d6fde1887 100644
 | 
			
		||||
--- a/drivers/hv/channel.c
 | 
			
		||||
+++ b/drivers/hv/channel.c
 | 
			
		||||
@@ -220,6 +220,8 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 | 
			
		||||
 		     get_order(send_ringbuffer_size + recv_ringbuffer_size));
 | 
			
		||||
 error_set_chnstate:
 | 
			
		||||
 	newchannel->state = CHANNEL_OPEN_STATE;
 | 
			
		||||
+	newchannel->onchannel_callback = NULL;
 | 
			
		||||
+	newchannel->channel_callback_context = NULL;
 | 
			
		||||
 	return err;
 | 
			
		||||
 }
 | 
			
		||||
 EXPORT_SYMBOL_GPL(vmbus_open);
 | 
			
		||||
-- 
 | 
			
		||||
2.11.1
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,42 @@
 | 
			
		||||
From ab40355a804892b8977854220a07f055a14c8f5c Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Date: Fri, 5 May 2017 16:57:15 -0600
 | 
			
		||||
Subject: [PATCH 2/7] vmbus: add the matching tasklet_enable() in
 | 
			
		||||
 vmbus_close_internal()
 | 
			
		||||
 | 
			
		||||
If we disable a tasklet that is scheduled but hasn't started to run,
 | 
			
		||||
the tasklet has no chance to run any longer, so later we'll hang
 | 
			
		||||
in free_channel() -> tasklet_kill(), because the TASKLET_STATE_SCHED
 | 
			
		||||
can't be cleared in tasklet_action().
 | 
			
		||||
 | 
			
		||||
With the patch, before free_channel() -> tasklet_kill() returns, the
 | 
			
		||||
tasklet still has a chance to run with a NULL channel->onchannel_callback,
 | 
			
		||||
which will be ignored safely, e.g. by vmbus_on_event().
 | 
			
		||||
 | 
			
		||||
Fixes: dad72a1d2844 ("vmbus: remove hv_event_tasklet_disable/enable")
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Cc: K. Y. Srinivasan <kys@microsoft.com>
 | 
			
		||||
Cc: Haiyang Zhang <haiyangz@microsoft.com>
 | 
			
		||||
Cc: Stephen Hemminger <sthemmin@microsoft.com>
 | 
			
		||||
Origin: git@github.com:dcui/linux.git
 | 
			
		||||
(cherry picked from commit 2e653533181f0c70db04d2ca13a4ae60251d1a93)
 | 
			
		||||
---
 | 
			
		||||
 drivers/hv/channel.c | 1 +
 | 
			
		||||
 1 file changed, 1 insertion(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
 | 
			
		||||
index 628d6fde1887..7cd2bd9fd1f1 100644
 | 
			
		||||
--- a/drivers/hv/channel.c
 | 
			
		||||
+++ b/drivers/hv/channel.c
 | 
			
		||||
@@ -608,6 +608,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 | 
			
		||||
 		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
 | 
			
		||||
 
 | 
			
		||||
 out:
 | 
			
		||||
+	tasklet_enable(&channel->callback_event);
 | 
			
		||||
 	return ret;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
-- 
 | 
			
		||||
2.11.1
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,62 @@
 | 
			
		||||
From dade92a41fb20bc4224f412f4fb6b9de34f48c56 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Date: Fri, 5 May 2017 16:57:20 -0600
 | 
			
		||||
Subject: [PATCH 3/7] vmbus: remove "goto error_clean_msglist" in vmbus_open()
 | 
			
		||||
 | 
			
		||||
This is just a cleanup patch to simplify the code a little.
 | 
			
		||||
No semantic change.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Cc: K. Y. Srinivasan <kys@microsoft.com>
 | 
			
		||||
Cc: Haiyang Zhang <haiyangz@microsoft.com>
 | 
			
		||||
Cc: Stephen Hemminger <sthemmin@microsoft.com>
 | 
			
		||||
Origin: git@github.com:dcui/linux.git
 | 
			
		||||
(cherry picked from commit 2deba76a449075c5a05bd572bfdee660f710b207)
 | 
			
		||||
---
 | 
			
		||||
 drivers/hv/channel.c | 18 +++++++-----------
 | 
			
		||||
 1 file changed, 7 insertions(+), 11 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
 | 
			
		||||
index 7cd2bd9fd1f1..db5e6f8730d2 100644
 | 
			
		||||
--- a/drivers/hv/channel.c
 | 
			
		||||
+++ b/drivers/hv/channel.c
 | 
			
		||||
@@ -180,17 +180,18 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 | 
			
		||||
 	ret = vmbus_post_msg(open_msg,
 | 
			
		||||
 			     sizeof(struct vmbus_channel_open_channel), true);
 | 
			
		||||
 
 | 
			
		||||
-	if (ret != 0) {
 | 
			
		||||
-		err = ret;
 | 
			
		||||
-		goto error_clean_msglist;
 | 
			
		||||
-	}
 | 
			
		||||
-
 | 
			
		||||
-	wait_for_completion(&open_info->waitevent);
 | 
			
		||||
+	if (ret == 0)
 | 
			
		||||
+		wait_for_completion(&open_info->waitevent);
 | 
			
		||||
 
 | 
			
		||||
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 | 
			
		||||
 	list_del(&open_info->msglistentry);
 | 
			
		||||
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 | 
			
		||||
 
 | 
			
		||||
+	if (ret != 0) {
 | 
			
		||||
+		err = ret;
 | 
			
		||||
+		goto error_free_gpadl;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
 	if (newchannel->rescind) {
 | 
			
		||||
 		err = -ENODEV;
 | 
			
		||||
 		goto error_free_gpadl;
 | 
			
		||||
@@ -205,11 +206,6 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 | 
			
		||||
 	kfree(open_info);
 | 
			
		||||
 	return 0;
 | 
			
		||||
 
 | 
			
		||||
-error_clean_msglist:
 | 
			
		||||
-	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 | 
			
		||||
-	list_del(&open_info->msglistentry);
 | 
			
		||||
-	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 | 
			
		||||
-
 | 
			
		||||
 error_free_gpadl:
 | 
			
		||||
 	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
 | 
			
		||||
 	kfree(open_info);
 | 
			
		||||
-- 
 | 
			
		||||
2.11.1
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,189 @@
 | 
			
		||||
From cc8ae5575e4e77b88c134f82161341a4d4f9b094 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Date: Fri, 5 May 2017 16:57:23 -0600
 | 
			
		||||
Subject: [PATCH 4/7] vmbus: dynamically enqueue/dequeue a channel on
 | 
			
		||||
 vmbus_open/close
 | 
			
		||||
MIME-Version: 1.0
 | 
			
		||||
Content-Type: text/plain; charset=UTF-8
 | 
			
		||||
Content-Transfer-Encoding: 8bit
 | 
			
		||||
 | 
			
		||||
A just-closed channel may have a pending interrupt, and later when a new
 | 
			
		||||
channel with the same channel ID is not being fully initialized, the
 | 
			
		||||
pending interrupt of the previous channel with the same channel ID can run
 | 
			
		||||
the channel callback on the new channel data structure, causing a crash
 | 
			
		||||
of NULL pointer dereferencing.
 | 
			
		||||
 | 
			
		||||
Normally it’s pretty hard to reproduce the race condition, but it can
 | 
			
		||||
indeed happen with specially-designed hv_sock stress test cases.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Reported-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
 | 
			
		||||
Tested-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
 | 
			
		||||
Cc: K. Y. Srinivasan <kys@microsoft.com>
 | 
			
		||||
Cc: Haiyang Zhang <haiyangz@microsoft.com>
 | 
			
		||||
Cc: Stephen Hemminger <sthemmin@microsoft.com>
 | 
			
		||||
Origin: git@github.com:dcui/linux.git
 | 
			
		||||
(cherry picked from commit fed51f8f6068cb953be480c6b6322a3a6e97745d)
 | 
			
		||||
---
 | 
			
		||||
 drivers/hv/channel.c      | 12 +++++++++---
 | 
			
		||||
 drivers/hv/channel_mgmt.c | 50 +++++++++++++++++++++--------------------------
 | 
			
		||||
 include/linux/hyperv.h    |  3 +++
 | 
			
		||||
 3 files changed, 34 insertions(+), 31 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
 | 
			
		||||
index db5e6f8730d2..f288e506fba0 100644
 | 
			
		||||
--- a/drivers/hv/channel.c
 | 
			
		||||
+++ b/drivers/hv/channel.c
 | 
			
		||||
@@ -177,6 +177,8 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 | 
			
		||||
 		      &vmbus_connection.chn_msg_list);
 | 
			
		||||
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 | 
			
		||||
 
 | 
			
		||||
+	hv_percpu_channel_enq(newchannel);
 | 
			
		||||
+
 | 
			
		||||
 	ret = vmbus_post_msg(open_msg,
 | 
			
		||||
 			     sizeof(struct vmbus_channel_open_channel), true);
 | 
			
		||||
 
 | 
			
		||||
@@ -189,23 +191,25 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 | 
			
		||||
 
 | 
			
		||||
 	if (ret != 0) {
 | 
			
		||||
 		err = ret;
 | 
			
		||||
-		goto error_free_gpadl;
 | 
			
		||||
+		goto error_deq_channel;
 | 
			
		||||
 	}
 | 
			
		||||
 
 | 
			
		||||
 	if (newchannel->rescind) {
 | 
			
		||||
 		err = -ENODEV;
 | 
			
		||||
-		goto error_free_gpadl;
 | 
			
		||||
+		goto error_deq_channel;
 | 
			
		||||
 	}
 | 
			
		||||
 
 | 
			
		||||
 	if (open_info->response.open_result.status) {
 | 
			
		||||
 		err = -EAGAIN;
 | 
			
		||||
-		goto error_free_gpadl;
 | 
			
		||||
+		goto error_deq_channel;
 | 
			
		||||
 	}
 | 
			
		||||
 
 | 
			
		||||
 	newchannel->state = CHANNEL_OPENED_STATE;
 | 
			
		||||
 	kfree(open_info);
 | 
			
		||||
 	return 0;
 | 
			
		||||
 
 | 
			
		||||
+error_deq_channel:
 | 
			
		||||
+	hv_percpu_channel_deq(newchannel);
 | 
			
		||||
 error_free_gpadl:
 | 
			
		||||
 	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
 | 
			
		||||
 	kfree(open_info);
 | 
			
		||||
@@ -551,6 +555,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 | 
			
		||||
 		goto out;
 | 
			
		||||
 	}
 | 
			
		||||
 
 | 
			
		||||
+	hv_percpu_channel_deq(channel);
 | 
			
		||||
+
 | 
			
		||||
 	channel->state = CHANNEL_OPEN_STATE;
 | 
			
		||||
 	channel->sc_creation_callback = NULL;
 | 
			
		||||
 	/* Stop callback and cancel the timer asap */
 | 
			
		||||
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
 | 
			
		||||
index fbcb06352308..c5a01a4d589e 100644
 | 
			
		||||
--- a/drivers/hv/channel_mgmt.c
 | 
			
		||||
+++ b/drivers/hv/channel_mgmt.c
 | 
			
		||||
@@ -363,6 +363,17 @@ static void percpu_channel_enq(void *arg)
 | 
			
		||||
 	list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+void hv_percpu_channel_enq(struct vmbus_channel *channel)
 | 
			
		||||
+{
 | 
			
		||||
+	if (channel->target_cpu != get_cpu())
 | 
			
		||||
+		smp_call_function_single(channel->target_cpu,
 | 
			
		||||
+					 percpu_channel_enq, channel, true);
 | 
			
		||||
+	else
 | 
			
		||||
+		percpu_channel_enq(channel);
 | 
			
		||||
+
 | 
			
		||||
+	put_cpu();
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 static void percpu_channel_deq(void *arg)
 | 
			
		||||
 {
 | 
			
		||||
 	struct vmbus_channel *channel = arg;
 | 
			
		||||
@@ -370,6 +381,17 @@ static void percpu_channel_deq(void *arg)
 | 
			
		||||
 	list_del_rcu(&channel->percpu_list);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+void hv_percpu_channel_deq(struct vmbus_channel *channel)
 | 
			
		||||
+{
 | 
			
		||||
+	if (channel->target_cpu != get_cpu())
 | 
			
		||||
+		smp_call_function_single(channel->target_cpu,
 | 
			
		||||
+					 percpu_channel_deq, channel, true);
 | 
			
		||||
+	else
 | 
			
		||||
+		percpu_channel_deq(channel);
 | 
			
		||||
+
 | 
			
		||||
+	put_cpu();
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 
 | 
			
		||||
 static void vmbus_release_relid(u32 relid)
 | 
			
		||||
 {
 | 
			
		||||
@@ -390,15 +412,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 | 
			
		||||
 	BUG_ON(!channel->rescind);
 | 
			
		||||
 	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
 | 
			
		||||
 
 | 
			
		||||
-	if (channel->target_cpu != get_cpu()) {
 | 
			
		||||
-		put_cpu();
 | 
			
		||||
-		smp_call_function_single(channel->target_cpu,
 | 
			
		||||
-					 percpu_channel_deq, channel, true);
 | 
			
		||||
-	} else {
 | 
			
		||||
-		percpu_channel_deq(channel);
 | 
			
		||||
-		put_cpu();
 | 
			
		||||
-	}
 | 
			
		||||
-
 | 
			
		||||
 	if (channel->primary_channel == NULL) {
 | 
			
		||||
 		list_del(&channel->listentry);
 | 
			
		||||
 
 | 
			
		||||
@@ -491,16 +504,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 | 
			
		||||
 
 | 
			
		||||
 	init_vp_index(newchannel, dev_type);
 | 
			
		||||
 
 | 
			
		||||
-	if (newchannel->target_cpu != get_cpu()) {
 | 
			
		||||
-		put_cpu();
 | 
			
		||||
-		smp_call_function_single(newchannel->target_cpu,
 | 
			
		||||
-					 percpu_channel_enq,
 | 
			
		||||
-					 newchannel, true);
 | 
			
		||||
-	} else {
 | 
			
		||||
-		percpu_channel_enq(newchannel);
 | 
			
		||||
-		put_cpu();
 | 
			
		||||
-	}
 | 
			
		||||
-
 | 
			
		||||
 	/*
 | 
			
		||||
 	 * This state is used to indicate a successful open
 | 
			
		||||
 	 * so that when we do close the channel normally, we
 | 
			
		||||
@@ -549,15 +552,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 | 
			
		||||
 	list_del(&newchannel->listentry);
 | 
			
		||||
 	mutex_unlock(&vmbus_connection.channel_mutex);
 | 
			
		||||
 
 | 
			
		||||
-	if (newchannel->target_cpu != get_cpu()) {
 | 
			
		||||
-		put_cpu();
 | 
			
		||||
-		smp_call_function_single(newchannel->target_cpu,
 | 
			
		||||
-					 percpu_channel_deq, newchannel, true);
 | 
			
		||||
-	} else {
 | 
			
		||||
-		percpu_channel_deq(newchannel);
 | 
			
		||||
-		put_cpu();
 | 
			
		||||
-	}
 | 
			
		||||
-
 | 
			
		||||
 	vmbus_release_relid(newchannel->offermsg.child_relid);
 | 
			
		||||
 
 | 
			
		||||
 err_free_chan:
 | 
			
		||||
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
 | 
			
		||||
index 970771a5f739..a8bae2caa69f 100644
 | 
			
		||||
--- a/include/linux/hyperv.h
 | 
			
		||||
+++ b/include/linux/hyperv.h
 | 
			
		||||
@@ -1437,6 +1437,9 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
 | 
			
		||||
 				const int *srv_version, int srv_vercnt,
 | 
			
		||||
 				int *nego_fw_version, int *nego_srv_version);
 | 
			
		||||
 
 | 
			
		||||
+void hv_percpu_channel_enq(struct vmbus_channel *channel);
 | 
			
		||||
+void hv_percpu_channel_deq(struct vmbus_channel *channel);
 | 
			
		||||
+
 | 
			
		||||
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
 | 
			
		||||
 
 | 
			
		||||
 void vmbus_setevent(struct vmbus_channel *channel);
 | 
			
		||||
-- 
 | 
			
		||||
2.11.1
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,934 @@
 | 
			
		||||
From aa172b579ab1228c9d4364d2d21ede58927648f4 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Date: Fri, 5 May 2017 16:57:26 -0600
 | 
			
		||||
Subject: [PATCH 5/7] hv_sock: implements Hyper-V transport for Virtual Sockets
 | 
			
		||||
 (AF_VSOCK)
 | 
			
		||||
 | 
			
		||||
Hyper-V Sockets (hv_sock) supplies a byte-stream based communication
 | 
			
		||||
mechanism between the host and the guest. It uses VMBus ringbuffer as the
 | 
			
		||||
transportation layer.
 | 
			
		||||
 | 
			
		||||
With hv_sock, applications between the host (Windows 10, Windows Server
 | 
			
		||||
2016 or newer) and the guest can talk with each other using the traditional
 | 
			
		||||
socket APIs.
 | 
			
		||||
 | 
			
		||||
More info about Hyper-V Sockets is available here:
 | 
			
		||||
 | 
			
		||||
"Make your own integration services":
 | 
			
		||||
https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-guide/make-integration-service
 | 
			
		||||
 | 
			
		||||
The patch implements the necessary support in Linux guest by introducing a new
 | 
			
		||||
vsock transport for AF_VSOCK.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Cc: K. Y. Srinivasan <kys@microsoft.com>
 | 
			
		||||
Cc: Haiyang Zhang <haiyangz@microsoft.com>
 | 
			
		||||
Cc: Stephen Hemminger <sthemmin@microsoft.com>
 | 
			
		||||
Cc: Andy King <acking@vmware.com>
 | 
			
		||||
Cc: Dmitry Torokhov <dtor@vmware.com>
 | 
			
		||||
Cc: George Zhang <georgezhang@vmware.com>
 | 
			
		||||
Cc: Jorgen Hansen <jhansen@vmware.com>
 | 
			
		||||
Cc: Reilly Grant <grantr@vmware.com>
 | 
			
		||||
Cc: Asias He <asias@redhat.com>
 | 
			
		||||
Cc: Stefan Hajnoczi <stefanha@redhat.com>
 | 
			
		||||
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
 | 
			
		||||
Cc: Cathy Avery <cavery@redhat.com>
 | 
			
		||||
Cc: Rolf Neugebauer <rolf.neugebauer@docker.com>
 | 
			
		||||
Origin: git@github.com:dcui/linux.git
 | 
			
		||||
(cherry picked from commit 691aff936c0fa6fc24e2662c0e8cc6ab65509600)
 | 
			
		||||
---
 | 
			
		||||
 MAINTAINERS                      |   1 +
 | 
			
		||||
 net/vmw_vsock/Kconfig            |  12 +
 | 
			
		||||
 net/vmw_vsock/Makefile           |   3 +
 | 
			
		||||
 net/vmw_vsock/hyperv_transport.c | 829 +++++++++++++++++++++++++++++++++++++++
 | 
			
		||||
 4 files changed, 845 insertions(+)
 | 
			
		||||
 create mode 100644 net/vmw_vsock/hyperv_transport.c
 | 
			
		||||
 | 
			
		||||
diff --git a/MAINTAINERS b/MAINTAINERS
 | 
			
		||||
index 38d3e4ed7208..53bf52ce3173 100644
 | 
			
		||||
--- a/MAINTAINERS
 | 
			
		||||
+++ b/MAINTAINERS
 | 
			
		||||
@@ -6077,6 +6077,7 @@ F:	drivers/net/hyperv/
 | 
			
		||||
 F:	drivers/scsi/storvsc_drv.c
 | 
			
		||||
 F:	drivers/uio/uio_hv_generic.c
 | 
			
		||||
 F:	drivers/video/fbdev/hyperv_fb.c
 | 
			
		||||
+F:	net/vmw_vsock/hyperv_transport.c
 | 
			
		||||
 F:	include/linux/hyperv.h
 | 
			
		||||
 F:	tools/hv/
 | 
			
		||||
 F:	Documentation/ABI/stable/sysfs-bus-vmbus
 | 
			
		||||
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
 | 
			
		||||
index 8831e7c42167..a24369d175fd 100644
 | 
			
		||||
--- a/net/vmw_vsock/Kconfig
 | 
			
		||||
+++ b/net/vmw_vsock/Kconfig
 | 
			
		||||
@@ -46,3 +46,15 @@ config VIRTIO_VSOCKETS_COMMON
 | 
			
		||||
 	  This option is selected by any driver which needs to access
 | 
			
		||||
 	  the virtio_vsock.  The module will be called
 | 
			
		||||
 	  vmw_vsock_virtio_transport_common.
 | 
			
		||||
+
 | 
			
		||||
+config HYPERV_VSOCKETS
 | 
			
		||||
+	tristate "Hyper-V transport for Virtual Sockets"
 | 
			
		||||
+	depends on VSOCKETS && HYPERV
 | 
			
		||||
+	help
 | 
			
		||||
+	  This module implements a Hyper-V transport for Virtual Sockets.
 | 
			
		||||
+
 | 
			
		||||
+	  Enable this transport if your Virtual Machine host supports Virtual
 | 
			
		||||
+	  Sockets over Hyper-V VMBus.
 | 
			
		||||
+
 | 
			
		||||
+	  To compile this driver as a module, choose M here: the module will be
 | 
			
		||||
+	  called hv_sock. If unsure, say N.
 | 
			
		||||
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
 | 
			
		||||
index bc27c70e0e59..f70f3e70ce9e 100644
 | 
			
		||||
--- a/net/vmw_vsock/Makefile
 | 
			
		||||
+++ b/net/vmw_vsock/Makefile
 | 
			
		||||
@@ -2,6 +2,7 @@ obj-$(CONFIG_VSOCKETS) += vsock.o
 | 
			
		||||
 obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
 | 
			
		||||
 obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
 | 
			
		||||
 obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
 | 
			
		||||
+obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o
 | 
			
		||||
 
 | 
			
		||||
 vsock-y += af_vsock.o vsock_addr.o
 | 
			
		||||
 
 | 
			
		||||
@@ -11,3 +12,5 @@ vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
 | 
			
		||||
 vmw_vsock_virtio_transport-y += virtio_transport.o
 | 
			
		||||
 
 | 
			
		||||
 vmw_vsock_virtio_transport_common-y += virtio_transport_common.o
 | 
			
		||||
+
 | 
			
		||||
+hv_sock-y += hyperv_transport.o
 | 
			
		||||
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
 | 
			
		||||
new file mode 100644
 | 
			
		||||
index 000000000000..fd89bf357617
 | 
			
		||||
--- /dev/null
 | 
			
		||||
+++ b/net/vmw_vsock/hyperv_transport.c
 | 
			
		||||
@@ -0,0 +1,829 @@
 | 
			
		||||
+/*
 | 
			
		||||
+ * Hyper-V transport for vsock
 | 
			
		||||
+ *
 | 
			
		||||
+ * Hyper-V Sockets supplies a byte-stream based communication mechanism
 | 
			
		||||
+ * between the host and the VM. This driver implements the necessary
 | 
			
		||||
+ * support in the VM by introducing the new vsock transport.
 | 
			
		||||
+ *
 | 
			
		||||
+ * Copyright (c) 2017, Microsoft Corporation.
 | 
			
		||||
+ *
 | 
			
		||||
+ * This program is free software; you can redistribute it and/or modify it
 | 
			
		||||
+ * under the terms and conditions of the GNU General Public License,
 | 
			
		||||
+ * version 2, as published by the Free Software Foundation.
 | 
			
		||||
+ *
 | 
			
		||||
+ * This program is distributed in the hope it will be useful, but WITHOUT
 | 
			
		||||
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 | 
			
		||||
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 | 
			
		||||
+ * more details.
 | 
			
		||||
+ *
 | 
			
		||||
+ */
 | 
			
		||||
+#include <linux/module.h>
 | 
			
		||||
+#include <linux/vmalloc.h>
 | 
			
		||||
+#include <linux/hyperv.h>
 | 
			
		||||
+#include <net/sock.h>
 | 
			
		||||
+#include <net/af_vsock.h>
 | 
			
		||||
+
 | 
			
		||||
+/* The host side's design of the feature requires 6 exact 4KB pages for
 | 
			
		||||
+ * recv/send rings respectively -- this is suboptimal considering memory
 | 
			
		||||
+ * consumption, however unluckily we have to live with it, before the
 | 
			
		||||
+ * host comes up with a better design in the future.
 | 
			
		||||
+ */
 | 
			
		||||
+#define PAGE_SIZE_4K		4096
 | 
			
		||||
+#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
 | 
			
		||||
+#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
 | 
			
		||||
+
 | 
			
		||||
+/* The MTU is 16KB per the host side's design */
 | 
			
		||||
+#define HVS_MTU_SIZE		(1024 * 16)
 | 
			
		||||
+
 | 
			
		||||
+struct vmpipe_proto_header {
 | 
			
		||||
+	u32 pkt_type;
 | 
			
		||||
+	u32 data_size;
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+/* For recv, we use the VMBus in-place packet iterator APIs to directly copy
 | 
			
		||||
+ * data from the ringbuffer into the userspace buffer.
 | 
			
		||||
+ */
 | 
			
		||||
+struct hvs_recv_buf {
 | 
			
		||||
+	/* The header before the payload data */
 | 
			
		||||
+	struct vmpipe_proto_header hdr;
 | 
			
		||||
+
 | 
			
		||||
+	/* The payload */
 | 
			
		||||
+	u8 data[HVS_MTU_SIZE];
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+/* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
 | 
			
		||||
+ * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
 | 
			
		||||
+ * buffer, because tests show there is no significant performance difference.
 | 
			
		||||
+ *
 | 
			
		||||
+ * Note: the buffer can be eliminated in the future when we add new VMBus
 | 
			
		||||
+ * ringbuffer APIs that allow us to directly copy data from userspace buffer
 | 
			
		||||
+ * to VMBus ringbuffer.
 | 
			
		||||
+ */
 | 
			
		||||
+#define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
 | 
			
		||||
+
 | 
			
		||||
+struct hvs_send_buf {
 | 
			
		||||
+	/* The header before the payload data */
 | 
			
		||||
+	struct vmpipe_proto_header hdr;
 | 
			
		||||
+
 | 
			
		||||
+	/* The payload */
 | 
			
		||||
+	u8 data[HVS_SEND_BUF_SIZE];
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+#define HVS_HEADER_LEN	(sizeof(struct vmpacket_descriptor) + \
 | 
			
		||||
+			 sizeof(struct vmpipe_proto_header))
 | 
			
		||||
+
 | 
			
		||||
+/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and
 | 
			
		||||
+ * __hv_pkt_iter_next().
 | 
			
		||||
+ */
 | 
			
		||||
+#define VMBUS_PKT_TRAILER	(sizeof(u64))
 | 
			
		||||
+
 | 
			
		||||
+#define HVS_PKT_LEN(payload_len)	(HVS_HEADER_LEN + \
 | 
			
		||||
+					 ALIGN((payload_len), 8) + \
 | 
			
		||||
+					 VMBUS_PKT_TRAILER)
 | 
			
		||||
+
 | 
			
		||||
+/* Per-socket state (accessed via vsk->trans) */
 | 
			
		||||
+struct hvsock {
 | 
			
		||||
+	struct vsock_sock *vsk;
 | 
			
		||||
+
 | 
			
		||||
+	uuid_le	vm_srv_id;
 | 
			
		||||
+	uuid_le	host_srv_id;
 | 
			
		||||
+
 | 
			
		||||
+	struct vmbus_channel *chan;
 | 
			
		||||
+	struct vmpacket_descriptor *recv_desc;
 | 
			
		||||
+
 | 
			
		||||
+	/* The length of the payload not delivered to userland yet */
 | 
			
		||||
+	u32 recv_data_len;
 | 
			
		||||
+	/* The offset of the payload */
 | 
			
		||||
+	u32 recv_data_off;
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+/* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is
 | 
			
		||||
+ * <cid, port> (see struct sockaddr_vm). Note: cid is not really used here:
 | 
			
		||||
+ * when we write apps to connect to the host, we can only use VMADDR_CID_ANY
 | 
			
		||||
+ * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we
 | 
			
		||||
+ * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY
 | 
			
		||||
+ * as the local cid.
 | 
			
		||||
+ *
 | 
			
		||||
+ * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
 | 
			
		||||
+ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
 | 
			
		||||
+ * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
 | 
			
		||||
+ * the below sockaddr:
 | 
			
		||||
+ *
 | 
			
		||||
+ * struct SOCKADDR_HV
 | 
			
		||||
+ * {
 | 
			
		||||
+ *    ADDRESS_FAMILY Family;
 | 
			
		||||
+ *    USHORT Reserved;
 | 
			
		||||
+ *    GUID VmId;
 | 
			
		||||
+ *    GUID ServiceId;
 | 
			
		||||
+ * };
 | 
			
		||||
+ * Note: VmID is not used by Linux VM and actually it isn't transmitted via
 | 
			
		||||
+ * VMBus, because here it's obvious the host and the VM can easily identify
 | 
			
		||||
+ * each other. Though the VmID is useful on the host, especially in the case
 | 
			
		||||
+ * of Windows container, Linux VM doesn't need it at all.
 | 
			
		||||
+ *
 | 
			
		||||
+ * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit
 | 
			
		||||
+ * the available GUID space of SOCKADDR_HV so that we can create a mapping
 | 
			
		||||
+ * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing
 | 
			
		||||
+ * Hyper-V Sockets apps on the host and in Linux VM is:
 | 
			
		||||
+ *
 | 
			
		||||
+ ****************************************************************************
 | 
			
		||||
+ * the only valid Service GUIDs, from the perspectives of both the host and *
 | 
			
		||||
+ * Linux VM, that can be connected by the other end, must conform to this   *
 | 
			
		||||
+ * format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in    *
 | 
			
		||||
+ * this range [0, 0x7FFFFFFF].                                              *
 | 
			
		||||
+ ****************************************************************************
 | 
			
		||||
+ *
 | 
			
		||||
+ * When we write apps on the host to connect(), the GUID ServiceID is used.
 | 
			
		||||
+ * When we write apps in Linux VM to connect(), we only need to specify the
 | 
			
		||||
+ * port and the driver will form the GUID and use that to request the host.
 | 
			
		||||
+ *
 | 
			
		||||
+ * From the perspective of Linux VM:
 | 
			
		||||
+ * 1. the local ephemeral port (i.e. the local auto-bound port when we call
 | 
			
		||||
+ * connect() without explicit bind()) is generated by __vsock_bind_stream(),
 | 
			
		||||
+ * and the range is [1024, 0xFFFFFFFF).
 | 
			
		||||
+ * 2. the remote ephemeral port (i.e. the auto-generated remote port for
 | 
			
		||||
+ * a connect request initiated by the host's connect()) is generated by
 | 
			
		||||
+ * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF).
 | 
			
		||||
+ */
 | 
			
		||||
+
 | 
			
		||||
+#define MAX_LISTEN_PORT			((u32)0x7FFFFFFF)
 | 
			
		||||
+#define MAX_VM_LISTEN_PORT		MAX_LISTEN_PORT
 | 
			
		||||
+#define MAX_HOST_LISTEN_PORT		MAX_LISTEN_PORT
 | 
			
		||||
+#define MIN_HOST_EPHEMERAL_PORT		(MAX_HOST_LISTEN_PORT + 1)
 | 
			
		||||
+
 | 
			
		||||
+/* 00000000-facb-11e6-bd58-64006a7986d3 */
 | 
			
		||||
+static const uuid_le srv_id_template =
 | 
			
		||||
+	UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
 | 
			
		||||
+		0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
 | 
			
		||||
+
 | 
			
		||||
+static inline bool is_valid_srv_id(const uuid_le *id)
 | 
			
		||||
+{
 | 
			
		||||
+	return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static inline unsigned int get_port_by_srv_id(const uuid_le *svr_id)
 | 
			
		||||
+{
 | 
			
		||||
+	return *((unsigned int *)svr_id);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static inline void hvs_addr_init(struct sockaddr_vm *addr,
 | 
			
		||||
+				 const uuid_le *svr_id)
 | 
			
		||||
+{
 | 
			
		||||
+	unsigned int port = get_port_by_srv_id(svr_id);
 | 
			
		||||
+
 | 
			
		||||
+	vsock_addr_init(addr, VMADDR_CID_ANY, port);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static inline void hvs_remote_addr_init(struct sockaddr_vm *remote,
 | 
			
		||||
+					struct sockaddr_vm *local)
 | 
			
		||||
+{
 | 
			
		||||
+	static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
 | 
			
		||||
+	struct sock *sk;
 | 
			
		||||
+
 | 
			
		||||
+	vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY);
 | 
			
		||||
+
 | 
			
		||||
+	while (1) {
 | 
			
		||||
+		/* Wrap around ? */
 | 
			
		||||
+		if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT ||
 | 
			
		||||
+		    host_ephemeral_port == VMADDR_PORT_ANY)
 | 
			
		||||
+			host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
 | 
			
		||||
+
 | 
			
		||||
+		remote->svm_port = host_ephemeral_port++;
 | 
			
		||||
+
 | 
			
		||||
+		sk = vsock_find_connected_socket(remote, local);
 | 
			
		||||
+		if (!sk) {
 | 
			
		||||
+			/* Found an available ephemeral port */
 | 
			
		||||
+			return;
 | 
			
		||||
+		}
 | 
			
		||||
+
 | 
			
		||||
+		/* Release refcnt got in vsock_find_connected_socket */
 | 
			
		||||
+		sock_put(sk);
 | 
			
		||||
+	}
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static bool hvs_channel_readable(struct vmbus_channel *chan)
 | 
			
		||||
+{
 | 
			
		||||
+	u32 readable = hv_get_bytes_to_read(&chan->inbound);
 | 
			
		||||
+
 | 
			
		||||
+	/* 0-size payload means FIN */
 | 
			
		||||
+	return readable >= HVS_PKT_LEN(0);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int hvs_channel_readable_payload(struct vmbus_channel *chan)
 | 
			
		||||
+{
 | 
			
		||||
+	u32 readable = hv_get_bytes_to_read(&chan->inbound);
 | 
			
		||||
+
 | 
			
		||||
+	if (readable > HVS_PKT_LEN(0)) {
 | 
			
		||||
+		/* At least we have 1 byte to read. We don't need to return
 | 
			
		||||
+		 * the exact readable bytes: see vsock_stream_recvmsg() ->
 | 
			
		||||
+		 * vsock_stream_has_data().
 | 
			
		||||
+		 */
 | 
			
		||||
+		return 1;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	if (readable == HVS_PKT_LEN(0)) {
 | 
			
		||||
+		/* 0-size payload means FIN */
 | 
			
		||||
+		return 0;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	/* No payload or FIN */
 | 
			
		||||
+	return -1;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static inline size_t hvs_channel_writable_bytes(struct vmbus_channel *chan)
 | 
			
		||||
+{
 | 
			
		||||
+	u32 writeable = hv_get_bytes_to_write(&chan->outbound);
 | 
			
		||||
+	size_t ret;
 | 
			
		||||
+
 | 
			
		||||
+	/* The ringbuffer mustn't be 100% full, and we should reserve a
 | 
			
		||||
+	 * zero-length-payload packet for the FIN: see hv_ringbuffer_write()
 | 
			
		||||
+	 * and hvs_shutdown().
 | 
			
		||||
+	 */
 | 
			
		||||
+	if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0))
 | 
			
		||||
+		return 0;
 | 
			
		||||
+
 | 
			
		||||
+	ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0);
 | 
			
		||||
+
 | 
			
		||||
+	return round_down(ret, 8);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int hvs_send_data(struct vmbus_channel *chan,
 | 
			
		||||
+			 struct hvs_send_buf *send_buf, size_t to_write)
 | 
			
		||||
+{
 | 
			
		||||
+	send_buf->hdr.pkt_type = 1;
 | 
			
		||||
+	send_buf->hdr.data_size = to_write;
 | 
			
		||||
+	return vmbus_sendpacket(chan, &send_buf->hdr,
 | 
			
		||||
+				sizeof(send_buf->hdr) + to_write,
 | 
			
		||||
+				0, VM_PKT_DATA_INBAND, 0);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void hvs_channel_cb(void *ctx)
 | 
			
		||||
+{
 | 
			
		||||
+	struct sock *sk = (struct sock *)ctx;
 | 
			
		||||
+	struct vsock_sock *vsk = vsock_sk(sk);
 | 
			
		||||
+	struct hvsock *hvs = vsk->trans;
 | 
			
		||||
+	struct vmbus_channel *chan = hvs->chan;
 | 
			
		||||
+
 | 
			
		||||
+	if (hvs_channel_readable(chan))
 | 
			
		||||
+		sk->sk_data_ready(sk);
 | 
			
		||||
+
 | 
			
		||||
+	/* Mark it writable only if there is enough space */
 | 
			
		||||
+	if (hvs_channel_writable_bytes(chan) >= HVS_SEND_BUF_SIZE)
 | 
			
		||||
+		sk->sk_write_space(sk);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void hvs_close_connection(struct vmbus_channel *chan)
 | 
			
		||||
+{
 | 
			
		||||
+	struct sock *sk = get_per_channel_state(chan);
 | 
			
		||||
+	struct vsock_sock *vsk = vsock_sk(sk);
 | 
			
		||||
+
 | 
			
		||||
+	sk->sk_state = SS_UNCONNECTED;
 | 
			
		||||
+	sock_set_flag(sk, SOCK_DONE);
 | 
			
		||||
+	vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
 | 
			
		||||
+
 | 
			
		||||
+	sk->sk_state_change(sk);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void hvs_open_connection(struct vmbus_channel *chan)
 | 
			
		||||
+{
 | 
			
		||||
+	uuid_le *if_instance, *if_type;
 | 
			
		||||
+	unsigned char conn_from_host;
 | 
			
		||||
+
 | 
			
		||||
+	struct sockaddr_vm addr;
 | 
			
		||||
+	struct sock *sk, *new = NULL;
 | 
			
		||||
+	struct vsock_sock *vnew;
 | 
			
		||||
+	struct hvsock *hvs, *hvs_new;
 | 
			
		||||
+	int ret;
 | 
			
		||||
+
 | 
			
		||||
+	if_type = &chan->offermsg.offer.if_type;
 | 
			
		||||
+	if_instance = &chan->offermsg.offer.if_instance;
 | 
			
		||||
+	conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
 | 
			
		||||
+
 | 
			
		||||
+	/* The host or the VM should only listen on a port in
 | 
			
		||||
+	 * [0, MAX_LISTEN_PORT]
 | 
			
		||||
+	 */
 | 
			
		||||
+	if (!is_valid_srv_id(if_type) ||
 | 
			
		||||
+	    get_port_by_srv_id(if_type) > MAX_LISTEN_PORT)
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
 | 
			
		||||
+	sk = vsock_find_bound_socket(&addr);
 | 
			
		||||
+	if (!sk)
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) ||
 | 
			
		||||
+	    (!conn_from_host && sk->sk_state != SS_CONNECTING))
 | 
			
		||||
+		goto out;
 | 
			
		||||
+
 | 
			
		||||
+	if (conn_from_host) {
 | 
			
		||||
+		if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog)
 | 
			
		||||
+			goto out;
 | 
			
		||||
+
 | 
			
		||||
+		new = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
 | 
			
		||||
+				     sk->sk_type, 0);
 | 
			
		||||
+		if (!new)
 | 
			
		||||
+			goto out;
 | 
			
		||||
+
 | 
			
		||||
+		new->sk_state = SS_CONNECTING;
 | 
			
		||||
+		vnew = vsock_sk(new);
 | 
			
		||||
+		hvs_new = vnew->trans;
 | 
			
		||||
+		hvs_new->chan = chan;
 | 
			
		||||
+	} else {
 | 
			
		||||
+		hvs = vsock_sk(sk)->trans;
 | 
			
		||||
+		hvs->chan = chan;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	set_channel_read_mode(chan, HV_CALL_DIRECT);
 | 
			
		||||
+	ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
 | 
			
		||||
+			 RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
 | 
			
		||||
+			 hvs_channel_cb, conn_from_host ? new : sk);
 | 
			
		||||
+	if (ret != 0) {
 | 
			
		||||
+		if (conn_from_host) {
 | 
			
		||||
+			hvs_new->chan = NULL;
 | 
			
		||||
+			sock_put(new);
 | 
			
		||||
+		} else {
 | 
			
		||||
+			hvs->chan = NULL;
 | 
			
		||||
+		}
 | 
			
		||||
+		goto out;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	set_per_channel_state(chan, conn_from_host ? new : sk);
 | 
			
		||||
+	vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
 | 
			
		||||
+
 | 
			
		||||
+	/* See hvs_channel_cb() and hvs_notify_poll_out()  */
 | 
			
		||||
+	set_channel_pending_send_size(chan,
 | 
			
		||||
+				      HVS_PKT_LEN(HVS_SEND_BUF_SIZE) + 1);
 | 
			
		||||
+
 | 
			
		||||
+	if (conn_from_host) {
 | 
			
		||||
+		new->sk_state = SS_CONNECTED;
 | 
			
		||||
+		sk->sk_ack_backlog++;
 | 
			
		||||
+
 | 
			
		||||
+		hvs_addr_init(&vnew->local_addr, if_type);
 | 
			
		||||
+		hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
 | 
			
		||||
+
 | 
			
		||||
+		hvs_new->vm_srv_id = *if_type;
 | 
			
		||||
+		hvs_new->host_srv_id = *if_instance;
 | 
			
		||||
+
 | 
			
		||||
+		vsock_insert_connected(vnew);
 | 
			
		||||
+		vsock_enqueue_accept(sk, new);
 | 
			
		||||
+	} else {
 | 
			
		||||
+		sk->sk_state = SS_CONNECTED;
 | 
			
		||||
+		sk->sk_socket->state = SS_CONNECTED;
 | 
			
		||||
+
 | 
			
		||||
+		vsock_insert_connected(vsock_sk(sk));
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	sk->sk_state_change(sk);
 | 
			
		||||
+
 | 
			
		||||
+out:
 | 
			
		||||
+	/* Release refcnt obtained when we called vsock_find_bound_socket() */
 | 
			
		||||
+	sock_put(sk);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static u32 hvs_get_local_cid(void)
 | 
			
		||||
+{
 | 
			
		||||
+	return VMADDR_CID_ANY;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
 | 
			
		||||
+{
 | 
			
		||||
+	struct hvsock *hvs;
 | 
			
		||||
+
 | 
			
		||||
+	hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
 | 
			
		||||
+	if (!hvs)
 | 
			
		||||
+		return -ENOMEM;
 | 
			
		||||
+
 | 
			
		||||
+	vsk->trans = hvs;
 | 
			
		||||
+	hvs->vsk = vsk;
 | 
			
		||||
+
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int hvs_connect(struct vsock_sock *vsk)
 | 
			
		||||
+{
 | 
			
		||||
+	struct hvsock *h = vsk->trans;
 | 
			
		||||
+
 | 
			
		||||
+	h->vm_srv_id = srv_id_template;
 | 
			
		||||
+	h->host_srv_id = srv_id_template;
 | 
			
		||||
+
 | 
			
		||||
+	*((u32 *)&h->vm_srv_id) = vsk->local_addr.svm_port;
 | 
			
		||||
+	*((u32 *)&h->host_srv_id) = vsk->remote_addr.svm_port;
 | 
			
		||||
+
 | 
			
		||||
+	return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int hvs_shutdown(struct vsock_sock *vsk, int mode)
 | 
			
		||||
+{
 | 
			
		||||
+	struct vmpipe_proto_header hdr;
 | 
			
		||||
+	struct hvs_send_buf *send_buf;
 | 
			
		||||
+	struct hvsock *hvs;
 | 
			
		||||
+
 | 
			
		||||
+	if (!(mode & SEND_SHUTDOWN))
 | 
			
		||||
+		return 0;
 | 
			
		||||
+
 | 
			
		||||
+	hvs = vsk->trans;
 | 
			
		||||
+
 | 
			
		||||
+	send_buf = (struct hvs_send_buf *)&hdr;
 | 
			
		||||
+
 | 
			
		||||
+	/* It can't fail: see hvs_channel_writable_bytes(). */
 | 
			
		||||
+	(void)hvs_send_data(hvs->chan, send_buf, 0);
 | 
			
		||||
+
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void hvs_release(struct vsock_sock *vsk)
 | 
			
		||||
+{
 | 
			
		||||
+	struct hvsock *hvs = vsk->trans;
 | 
			
		||||
+	struct vmbus_channel *chan = hvs->chan;
 | 
			
		||||
+
 | 
			
		||||
+	if (chan)
 | 
			
		||||
+		hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
 | 
			
		||||
+
 | 
			
		||||
+	vsock_remove_sock(vsk);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void hvs_destruct(struct vsock_sock *vsk)
 | 
			
		||||
+{
 | 
			
		||||
+	struct hvsock *hvs = vsk->trans;
 | 
			
		||||
+	struct vmbus_channel *chan = hvs->chan;
 | 
			
		||||
+
 | 
			
		||||
+	if (chan)
 | 
			
		||||
+		vmbus_hvsock_device_unregister(chan);
 | 
			
		||||
+
 | 
			
		||||
+	kfree(hvs);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
 | 
			
		||||
+{
 | 
			
		||||
+	return -EOPNOTSUPP;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
 | 
			
		||||
+			     size_t len, int flags)
 | 
			
		||||
+{
 | 
			
		||||
+	return -EOPNOTSUPP;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int hvs_dgram_enqueue(struct vsock_sock *vsk,
 | 
			
		||||
+			     struct sockaddr_vm *remote, struct msghdr *msg,
 | 
			
		||||
+			     size_t dgram_len)
 | 
			
		||||
+{
 | 
			
		||||
+	return -EOPNOTSUPP;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static bool hvs_dgram_allow(u32 cid, u32 port)
 | 
			
		||||
+{
 | 
			
		||||
+	return false;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
 | 
			
		||||
+				  size_t len, int flags)
 | 
			
		||||
+{
 | 
			
		||||
+	struct hvsock *hvs = vsk->trans;
 | 
			
		||||
+	bool need_refill = !hvs->recv_desc;
 | 
			
		||||
+	struct hvs_recv_buf *recv_buf;
 | 
			
		||||
+	u32 payload_len, to_read;
 | 
			
		||||
+	int ret;
 | 
			
		||||
+
 | 
			
		||||
+	if (flags & MSG_PEEK)
 | 
			
		||||
+		return -EOPNOTSUPP;
 | 
			
		||||
+
 | 
			
		||||
+	if (need_refill) {
 | 
			
		||||
+		hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
 | 
			
		||||
+		recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
 | 
			
		||||
+
 | 
			
		||||
+		payload_len = recv_buf->hdr.data_size;
 | 
			
		||||
+		if (payload_len == 0 || payload_len > HVS_MTU_SIZE)
 | 
			
		||||
+			return -EIO;
 | 
			
		||||
+
 | 
			
		||||
+		hvs->recv_data_len = payload_len;
 | 
			
		||||
+		hvs->recv_data_off = 0;
 | 
			
		||||
+	} else {
 | 
			
		||||
+		recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	to_read = min_t(u32, len, hvs->recv_data_len);
 | 
			
		||||
+	ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read);
 | 
			
		||||
+	if (ret != 0)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+
 | 
			
		||||
+	hvs->recv_data_len -= to_read;
 | 
			
		||||
+
 | 
			
		||||
+	if (hvs->recv_data_len == 0)
 | 
			
		||||
+		hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
 | 
			
		||||
+	else
 | 
			
		||||
+		hvs->recv_data_off += to_read;
 | 
			
		||||
+
 | 
			
		||||
+	return to_read;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
 | 
			
		||||
+				  size_t len)
 | 
			
		||||
+{
 | 
			
		||||
+	struct hvsock *hvs = vsk->trans;
 | 
			
		||||
+	struct vmbus_channel *chan = hvs->chan;
 | 
			
		||||
+	struct hvs_send_buf *send_buf;
 | 
			
		||||
+	size_t to_write, max_writable, ret;
 | 
			
		||||
+
 | 
			
		||||
+	BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
 | 
			
		||||
+
 | 
			
		||||
+	send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
 | 
			
		||||
+	if (!send_buf)
 | 
			
		||||
+		return -ENOMEM;
 | 
			
		||||
+
 | 
			
		||||
+	max_writable = hvs_channel_writable_bytes(chan);
 | 
			
		||||
+	to_write = min_t(size_t, len, max_writable);
 | 
			
		||||
+	to_write = min_t(size_t, to_write, HVS_SEND_BUF_SIZE);
 | 
			
		||||
+
 | 
			
		||||
+	ret = memcpy_from_msg(send_buf->data, msg, to_write);
 | 
			
		||||
+	if (ret < 0)
 | 
			
		||||
+		goto out;
 | 
			
		||||
+
 | 
			
		||||
+	ret = hvs_send_data(hvs->chan, send_buf, to_write);
 | 
			
		||||
+	if (ret < 0)
 | 
			
		||||
+		goto out;
 | 
			
		||||
+
 | 
			
		||||
+	ret = to_write;
 | 
			
		||||
+out:
 | 
			
		||||
+	kfree(send_buf);
 | 
			
		||||
+	return ret;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static s64 hvs_stream_has_data(struct vsock_sock *vsk)
 | 
			
		||||
+{
 | 
			
		||||
+	struct hvsock *hvs = vsk->trans;
 | 
			
		||||
+	s64 ret;
 | 
			
		||||
+
 | 
			
		||||
+	switch (hvs_channel_readable_payload(hvs->chan)) {
 | 
			
		||||
+	case 1:
 | 
			
		||||
+		ret = 1;
 | 
			
		||||
+		break;
 | 
			
		||||
+	case 0:
 | 
			
		||||
+		vsk->peer_shutdown |= SEND_SHUTDOWN;
 | 
			
		||||
+		ret = 0;
 | 
			
		||||
+		break;
 | 
			
		||||
+	default: /* -1 */
 | 
			
		||||
+		ret = 0;
 | 
			
		||||
+		break;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	return ret;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static s64 hvs_stream_has_space(struct vsock_sock *vsk)
 | 
			
		||||
+{
 | 
			
		||||
+	struct hvsock *hvs = vsk->trans;
 | 
			
		||||
+
 | 
			
		||||
+	return hvs_channel_writable_bytes(hvs->chan);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
 | 
			
		||||
+{
 | 
			
		||||
+	return HVS_MTU_SIZE + 1;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static bool hvs_stream_is_active(struct vsock_sock *vsk)
 | 
			
		||||
+{
 | 
			
		||||
+	struct hvsock *hvs = vsk->trans;
 | 
			
		||||
+
 | 
			
		||||
+	return hvs->chan != NULL;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static bool hvs_stream_allow(u32 cid, u32 port)
 | 
			
		||||
+{
 | 
			
		||||
+	static const u32 valid_cids[] = {
 | 
			
		||||
+		VMADDR_CID_ANY,
 | 
			
		||||
+		VMADDR_CID_HOST,
 | 
			
		||||
+	};
 | 
			
		||||
+	int i;
 | 
			
		||||
+
 | 
			
		||||
+	/* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is
 | 
			
		||||
+	 * reserved as ephemeral ports, which are used as the host's ports
 | 
			
		||||
+	 * when the host initiates connections.
 | 
			
		||||
+	 */
 | 
			
		||||
+	if (port > MAX_HOST_LISTEN_PORT)
 | 
			
		||||
+		return false;
 | 
			
		||||
+
 | 
			
		||||
+	for (i = 0; i < ARRAY_SIZE(valid_cids); i++) {
 | 
			
		||||
+		if (cid == valid_cids[i])
 | 
			
		||||
+			return true;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	return false;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static
 | 
			
		||||
+int hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable)
 | 
			
		||||
+{
 | 
			
		||||
+	struct hvsock *hvs = vsk->trans;
 | 
			
		||||
+
 | 
			
		||||
+	*readable = hvs_channel_readable(hvs->chan);
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static
 | 
			
		||||
+int hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable)
 | 
			
		||||
+{
 | 
			
		||||
+	/* Report writable only if there is enough space */
 | 
			
		||||
+	*writable = hvs_stream_has_space(vsk) >= HVS_SEND_BUF_SIZE;
 | 
			
		||||
+
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static
 | 
			
		||||
+int hvs_notify_recv_init(struct vsock_sock *vsk, size_t target,
 | 
			
		||||
+			 struct vsock_transport_recv_notify_data *d)
 | 
			
		||||
+{
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static
 | 
			
		||||
+int hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target,
 | 
			
		||||
+			      struct vsock_transport_recv_notify_data *d)
 | 
			
		||||
+{
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static
 | 
			
		||||
+int hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target,
 | 
			
		||||
+				struct vsock_transport_recv_notify_data *d)
 | 
			
		||||
+{
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static
 | 
			
		||||
+int hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target,
 | 
			
		||||
+				 ssize_t copied, bool data_read,
 | 
			
		||||
+				 struct vsock_transport_recv_notify_data *d)
 | 
			
		||||
+{
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static
 | 
			
		||||
+int hvs_notify_send_init(struct vsock_sock *vsk,
 | 
			
		||||
+			 struct vsock_transport_send_notify_data *d)
 | 
			
		||||
+{
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static
 | 
			
		||||
+int hvs_notify_send_pre_block(struct vsock_sock *vsk,
 | 
			
		||||
+			      struct vsock_transport_send_notify_data *d)
 | 
			
		||||
+{
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static
 | 
			
		||||
+int hvs_notify_send_pre_enqueue(struct vsock_sock *vsk,
 | 
			
		||||
+				struct vsock_transport_send_notify_data *d)
 | 
			
		||||
+{
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static
 | 
			
		||||
+int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
 | 
			
		||||
+				 struct vsock_transport_send_notify_data *d)
 | 
			
		||||
+{
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val)
 | 
			
		||||
+{
 | 
			
		||||
+	/* Ignored. */
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
 | 
			
		||||
+{
 | 
			
		||||
+	/* Ignored. */
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
 | 
			
		||||
+{
 | 
			
		||||
+	/* Ignored. */
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static u64 hvs_get_buffer_size(struct vsock_sock *vsk)
 | 
			
		||||
+{
 | 
			
		||||
+	return -ENOPROTOOPT;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk)
 | 
			
		||||
+{
 | 
			
		||||
+	return -ENOPROTOOPT;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk)
 | 
			
		||||
+{
 | 
			
		||||
+	return -ENOPROTOOPT;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static struct vsock_transport hvs_transport = {
 | 
			
		||||
+	.get_local_cid            = hvs_get_local_cid,
 | 
			
		||||
+
 | 
			
		||||
+	.init                     = hvs_sock_init,
 | 
			
		||||
+	.destruct                 = hvs_destruct,
 | 
			
		||||
+	.release                  = hvs_release,
 | 
			
		||||
+	.connect                  = hvs_connect,
 | 
			
		||||
+	.shutdown                 = hvs_shutdown,
 | 
			
		||||
+
 | 
			
		||||
+	.dgram_bind               = hvs_dgram_bind,
 | 
			
		||||
+	.dgram_dequeue            = hvs_dgram_dequeue,
 | 
			
		||||
+	.dgram_enqueue            = hvs_dgram_enqueue,
 | 
			
		||||
+	.dgram_allow              = hvs_dgram_allow,
 | 
			
		||||
+
 | 
			
		||||
+	.stream_dequeue           = hvs_stream_dequeue,
 | 
			
		||||
+	.stream_enqueue           = hvs_stream_enqueue,
 | 
			
		||||
+	.stream_has_data          = hvs_stream_has_data,
 | 
			
		||||
+	.stream_has_space         = hvs_stream_has_space,
 | 
			
		||||
+	.stream_rcvhiwat          = hvs_stream_rcvhiwat,
 | 
			
		||||
+	.stream_is_active         = hvs_stream_is_active,
 | 
			
		||||
+	.stream_allow             = hvs_stream_allow,
 | 
			
		||||
+
 | 
			
		||||
+	.notify_poll_in           = hvs_notify_poll_in,
 | 
			
		||||
+	.notify_poll_out          = hvs_notify_poll_out,
 | 
			
		||||
+	.notify_recv_init         = hvs_notify_recv_init,
 | 
			
		||||
+	.notify_recv_pre_block    = hvs_notify_recv_pre_block,
 | 
			
		||||
+	.notify_recv_pre_dequeue  = hvs_notify_recv_pre_dequeue,
 | 
			
		||||
+	.notify_recv_post_dequeue = hvs_notify_recv_post_dequeue,
 | 
			
		||||
+	.notify_send_init         = hvs_notify_send_init,
 | 
			
		||||
+	.notify_send_pre_block    = hvs_notify_send_pre_block,
 | 
			
		||||
+	.notify_send_pre_enqueue  = hvs_notify_send_pre_enqueue,
 | 
			
		||||
+	.notify_send_post_enqueue = hvs_notify_send_post_enqueue,
 | 
			
		||||
+
 | 
			
		||||
+	.set_buffer_size          = hvs_set_buffer_size,
 | 
			
		||||
+	.set_min_buffer_size      = hvs_set_min_buffer_size,
 | 
			
		||||
+	.set_max_buffer_size      = hvs_set_max_buffer_size,
 | 
			
		||||
+	.get_buffer_size          = hvs_get_buffer_size,
 | 
			
		||||
+	.get_min_buffer_size      = hvs_get_min_buffer_size,
 | 
			
		||||
+	.get_max_buffer_size      = hvs_get_max_buffer_size,
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+static int hvs_probe(struct hv_device *hdev,
 | 
			
		||||
+		     const struct hv_vmbus_device_id *dev_id)
 | 
			
		||||
+{
 | 
			
		||||
+	struct vmbus_channel *chan = hdev->channel;
 | 
			
		||||
+
 | 
			
		||||
+	hvs_open_connection(chan);
 | 
			
		||||
+
 | 
			
		||||
+	/* Always return success to suppress the unnecessary error message
 | 
			
		||||
+	 * in vmbus_probe(): on error the host will rescind the device in
 | 
			
		||||
+	 * 30 seconds and we can do cleanup at that time in
 | 
			
		||||
+	 * vmbus_onoffer_rescind().
 | 
			
		||||
+	 */
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int hvs_remove(struct hv_device *hdev)
 | 
			
		||||
+{
 | 
			
		||||
+	struct vmbus_channel *chan = hdev->channel;
 | 
			
		||||
+
 | 
			
		||||
+	vmbus_close(chan);
 | 
			
		||||
+
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+/* This isn't really used. See vmbus_match() and vmbus_probe() */
 | 
			
		||||
+static const struct hv_vmbus_device_id id_table[] = {
 | 
			
		||||
+	{},
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+static struct hv_driver hvs_drv = {
 | 
			
		||||
+	.name		= "hv_sock",
 | 
			
		||||
+	.hvsock		= true,
 | 
			
		||||
+	.id_table	= id_table,
 | 
			
		||||
+	.probe		= hvs_probe,
 | 
			
		||||
+	.remove		= hvs_remove,
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+static int __init hvs_init(void)
 | 
			
		||||
+{
 | 
			
		||||
+	int ret;
 | 
			
		||||
+
 | 
			
		||||
+	if (vmbus_proto_version < VERSION_WIN10)
 | 
			
		||||
+		return -ENODEV;
 | 
			
		||||
+
 | 
			
		||||
+	ret = vmbus_driver_register(&hvs_drv);
 | 
			
		||||
+	if (ret != 0)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+
 | 
			
		||||
+	ret = vsock_core_init(&hvs_transport);
 | 
			
		||||
+	if (ret) {
 | 
			
		||||
+		vmbus_driver_unregister(&hvs_drv);
 | 
			
		||||
+		return ret;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void __exit hvs_exit(void)
 | 
			
		||||
+{
 | 
			
		||||
+	vsock_core_exit();
 | 
			
		||||
+	vmbus_driver_unregister(&hvs_drv);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+module_init(hvs_init);
 | 
			
		||||
+module_exit(hvs_exit);
 | 
			
		||||
+
 | 
			
		||||
+MODULE_DESCRIPTION("Hyper-V sockets");
 | 
			
		||||
+MODULE_VERSION("1.0.0");
 | 
			
		||||
+MODULE_LICENSE("GPL");
 | 
			
		||||
-- 
 | 
			
		||||
2.11.1
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,64 @@
 | 
			
		||||
From 56e37d5939cd7da4e7b9dd8a699e11f0b004c999 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Date: Fri, 5 May 2017 16:57:29 -0600
 | 
			
		||||
Subject: [PATCH 6/7] VMCI: only try to load on VMware hypervisor
 | 
			
		||||
 | 
			
		||||
Without the patch, vmw_vsock_vmci_transport.ko and vmw_vmci.ko can
 | 
			
		||||
automatically load when an application creates an AF_VSOCK socket.
 | 
			
		||||
 | 
			
		||||
This is the expected good behavior on VMware hypervisor, but as we
 | 
			
		||||
are adding hv_sock.ko (i.e. Hyper-V transport for AF_VSOCK), we should
 | 
			
		||||
make sure vmw_vsock_vmci_transport.ko can't load on Hyper-V, otherwise
 | 
			
		||||
there is a -EBUSY conflict when both vmw_vsock_vmci_transport.ko and
 | 
			
		||||
hv_sock.ko try to call vsock_core_init().
 | 
			
		||||
 | 
			
		||||
On the other hand, hv_sock.ko can only load on Hyper-V, because it
 | 
			
		||||
depends on hv_vmbus.ko, which deteces Hyper-V in hv_acpi_init().
 | 
			
		||||
 | 
			
		||||
KVM's vsock_virtio_transport doesn't have the issue because it doesn't
 | 
			
		||||
define MODULE_ALIAS_NETPROTO(PF_VSOCK).
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Cc: Alok Kataria <akataria@vmware.com>
 | 
			
		||||
Cc: Andy King <acking@vmware.com>
 | 
			
		||||
Cc: Adit Ranadive <aditr@vmware.com>
 | 
			
		||||
Cc: George Zhang <georgezhang@vmware.com>
 | 
			
		||||
Cc: Jorgen Hansen <jhansen@vmware.com>
 | 
			
		||||
Cc: K. Y. Srinivasan <kys@microsoft.com>
 | 
			
		||||
Cc: Haiyang Zhang <haiyangz@microsoft.com>
 | 
			
		||||
Cc: Stephen Hemminger <sthemmin@microsoft.com>
 | 
			
		||||
Origin: git@github.com:dcui/linux.git
 | 
			
		||||
(cherry picked from commit d643b934352ffea88cd9ce50a709667574523a8d)
 | 
			
		||||
---
 | 
			
		||||
 drivers/misc/vmw_vmci/vmci_driver.c | 8 ++++++++
 | 
			
		||||
 1 file changed, 8 insertions(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c
 | 
			
		||||
index d7eaf1eb11e7..1789ea71ff5d 100644
 | 
			
		||||
--- a/drivers/misc/vmw_vmci/vmci_driver.c
 | 
			
		||||
+++ b/drivers/misc/vmw_vmci/vmci_driver.c
 | 
			
		||||
@@ -19,6 +19,7 @@
 | 
			
		||||
 #include <linux/kernel.h>
 | 
			
		||||
 #include <linux/module.h>
 | 
			
		||||
 #include <linux/init.h>
 | 
			
		||||
+#include <linux/hypervisor.h>
 | 
			
		||||
 
 | 
			
		||||
 #include "vmci_driver.h"
 | 
			
		||||
 #include "vmci_event.h"
 | 
			
		||||
@@ -58,6 +59,13 @@ static int __init vmci_drv_init(void)
 | 
			
		||||
 	int vmci_err;
 | 
			
		||||
 	int error;
 | 
			
		||||
 
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * Check if we are running on VMware's hypervisor and bail out
 | 
			
		||||
+	 * if we are not.
 | 
			
		||||
+	 */
 | 
			
		||||
+	if (x86_hyper != &x86_hyper_vmware)
 | 
			
		||||
+		return -ENODEV;
 | 
			
		||||
+
 | 
			
		||||
 	vmci_err = vmci_event_init();
 | 
			
		||||
 	if (vmci_err < VMCI_SUCCESS) {
 | 
			
		||||
 		pr_err("Failed to initialize VMCIEvent (result=%d)\n",
 | 
			
		||||
-- 
 | 
			
		||||
2.11.1
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,30 @@
 | 
			
		||||
From 6b2c2ccb78ab847ac1db6009abc9cc4954a562ef Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Date: Fri, 5 May 2017 16:57:35 -0600
 | 
			
		||||
Subject: [PATCH 7/7] hv_sock: add the support of auto-loading
 | 
			
		||||
 | 
			
		||||
After we disable VMWare virtual sockets driver's auto-loading on Hyper-V,
 | 
			
		||||
we can enable hv_sock's auto-loading now.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Dexuan Cui <decui@microsoft.com>
 | 
			
		||||
Cc: K. Y. Srinivasan <kys@microsoft.com>
 | 
			
		||||
Cc: Haiyang Zhang <haiyangz@microsoft.com>
 | 
			
		||||
Cc: Stephen Hemminger <sthemmin@microsoft.com>
 | 
			
		||||
Origin: git@github.com:dcui/linux.git
 | 
			
		||||
(cherry picked from commit d02cd7376ce780f3d46b72a2f553f266ad558cd4)
 | 
			
		||||
---
 | 
			
		||||
 net/vmw_vsock/hyperv_transport.c | 1 +
 | 
			
		||||
 1 file changed, 1 insertion(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
 | 
			
		||||
index fd89bf357617..f465b0b662df 100644
 | 
			
		||||
--- a/net/vmw_vsock/hyperv_transport.c
 | 
			
		||||
+++ b/net/vmw_vsock/hyperv_transport.c
 | 
			
		||||
@@ -827,3 +827,4 @@ module_exit(hvs_exit);
 | 
			
		||||
 MODULE_DESCRIPTION("Hyper-V sockets");
 | 
			
		||||
 MODULE_VERSION("1.0.0");
 | 
			
		||||
 MODULE_LICENSE("GPL");
 | 
			
		||||
+MODULE_ALIAS_NETPROTO(PF_VSOCK);
 | 
			
		||||
-- 
 | 
			
		||||
2.11.1
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user